Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,51 +1,8 @@
|
|
1 |
-
import
|
2 |
-
import pinecone
|
3 |
-
import os # To access environment variables
|
4 |
-
from sentence_transformers import SentenceTransformer
|
5 |
-
import numpy as np
|
6 |
-
from datasets import load_dataset
|
7 |
|
8 |
-
#
|
9 |
-
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
# Initialize Pinecone client using the API key
|
16 |
-
pinecone.init(api_key=pinecone_api_key, environment="us-esst-1") # Change the environment if needed
|
17 |
-
|
18 |
-
# Connect to your Pinecone index
|
19 |
-
index_name = "legal-docs-index-dji2ip8" # Your Pinecone index name
|
20 |
-
index = pinecone.Index(index_name)
|
21 |
-
|
22 |
-
# Step 2: Load the sentence-transformers model for embeddings
|
23 |
-
model = SentenceTransformer("all-MiniLM-L6-v2")
|
24 |
-
|
25 |
-
# Step 3: Load dataset (for reference in your app)
|
26 |
-
dataset = load_dataset("macadeliccc/US-LegalKit", split="train")
|
27 |
-
law_texts = [item['text'] for item in dataset if 'text' in item]
|
28 |
-
|
29 |
-
# Step 4: Function to search Pinecone index
|
30 |
-
def search_pinecone(query, top_k=5):
|
31 |
-
# Create an embedding for the user's query
|
32 |
-
query_embedding = model.encode([query])
|
33 |
-
|
34 |
-
# Query the Pinecone index for similar documents
|
35 |
-
results = index.query(query_embedding, top_k=top_k, include_metadata=True)
|
36 |
-
|
37 |
-
# Extract the text of the top-k results
|
38 |
-
return [match['metadata']['text'] for match in results['matches']]
|
39 |
-
|
40 |
-
# Step 5: Streamlit UI
|
41 |
-
st.title("π Legal AI Assistant (US-LegalKit)")
|
42 |
-
|
43 |
-
query = st.text_input("π Enter your legal query:")
|
44 |
-
|
45 |
-
if query:
|
46 |
-
# Get the top results from Pinecone
|
47 |
-
results = search_pinecone(query)
|
48 |
-
|
49 |
-
st.write("### π Relevant Legal Documents:")
|
50 |
-
for i, doc in enumerate(results, 1):
|
51 |
-
st.write(f"**{i}.** {doc[:500]}...") # Show preview of the document
|
|
|
1 |
+
from pinecone import Pinecone, ServerlessSpec
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
+
# Initialize Pinecone
|
4 |
+
pc = Pinecone(api_key=pinecone_api_key)
|
5 |
|
6 |
+
# Connect to your existing index
|
7 |
+
index_name = "legal-docs-index-dji2ip8"
|
8 |
+
index = pc.Index(index_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|