Spaces:
Sleeping
Sleeping
import streamlit as st | |
import os | |
from pinecone import Pinecone, ServerlessSpec | |
from sentence_transformers import SentenceTransformer | |
import numpy as np | |
from datasets import load_dataset | |
# β Step 1: Fetch Pinecone API key from Hugging Face secrets | |
if "PINECONE_API_KEY" not in st.secrets: | |
st.error("π¨ Pinecone API key not found! Please set it in Hugging Face secrets.") | |
st.stop() | |
pinecone_api_key = st.secrets["PINECONE_API_KEY"] # β Now it's properly defined | |
# β Step 2: Initialize Pinecone client | |
pc = Pinecone(api_key=pinecone_api_key) | |
# β Step 3: Connect to your existing Pinecone index | |
index_name = "legal-docs-index" | |
index = pc.Index(index_name) | |
# β Step 4: Load embedding model | |
model = SentenceTransformer("text-embedding-ada-002") | |
# β Step 5: Load dataset (for reference) | |
dataset = load_dataset("macadeliccc/US-LegalKit", split="train") | |
law_texts = [item['text'] for item in dataset if 'text' in item] | |
# β Step 6: Function to search Pinecone index | |
def search_pinecone(query, top_k=5): | |
query_embedding = model.encode([query]).tolist() | |
results = index.query(query_embedding, top_k=top_k, include_metadata=True) | |
return [match['metadata']['text'] for match in results['matches']] | |
# β Step 7: Streamlit UI | |
st.title("π Legal AI Assistant (US-LegalKit)") | |
query = st.text_input("π Enter your legal query:") | |
if query: | |
results = search_pinecone(query) | |
st.write("### π Relevant Legal Documents:") | |
for i, doc in enumerate(results, 1): | |
st.write(f"**{i}.** {doc[:500]}...") | |