import streamlit as st import pinecone import os # To access environment variables from sentence_transformers import SentenceTransformer import numpy as np from datasets import load_dataset # Step 1: Get the Pinecone API key from the environment variable (Hugging Face secret) pinecone_api_key = os.getenv('PINECONE_API_KEY') # Fetch Pinecone API key from Hugging Face secrets if not pinecone_api_key: st.error("Pinecone API key not found! Make sure to set the secret in Hugging Face settings.") st.stop() # Initialize Pinecone client using the API key pinecone.init(api_key=pinecone_api_key, environment="us-west1-gcp") # Change the environment if needed # Connect to your Pinecone index index_name = "legal-docs-index-dji2ip8" # Your Pinecone index name index = pinecone.Index(index_name) # Step 2: Load the sentence-transformers model for embeddings model = SentenceTransformer("all-MiniLM-L6-v2") # Step 3: Load dataset (for reference in your app) dataset = load_dataset("macadeliccc/US-LegalKit", split="train") law_texts = [item['text'] for item in dataset if 'text' in item] # Step 4: Function to search Pinecone index def search_pinecone(query, top_k=5): # Create an embedding for the user's query query_embedding = model.encode([query]) # Query the Pinecone index for similar documents results = index.query(query_embedding, top_k=top_k, include_metadata=True) # Extract the text of the top-k results return [match['metadata']['text'] for match in results['matches']] # Step 5: Streamlit UI st.title("🔍 Legal AI Assistant (US-LegalKit)") query = st.text_input("📌 Enter your legal query:") if query: # Get the top results from Pinecone results = search_pinecone(query) st.write("### 📄 Relevant Legal Documents:") for i, doc in enumerate(results, 1): st.write(f"**{i}.** {doc[:500]}...") # Show preview of the document