Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,45 +1,51 @@
|
|
1 |
import streamlit as st
|
2 |
-
import
|
3 |
-
import
|
4 |
from sentence_transformers import SentenceTransformer
|
5 |
-
import
|
6 |
from datasets import load_dataset
|
7 |
|
8 |
-
#
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
-
#
|
|
|
|
|
|
|
|
|
12 |
model = SentenceTransformer("all-MiniLM-L6-v2")
|
13 |
|
14 |
-
# Load dataset (
|
15 |
dataset = load_dataset("macadeliccc/US-LegalKit", split="train")
|
16 |
law_texts = [item['text'] for item in dataset if 'text' in item]
|
17 |
|
18 |
-
#
|
19 |
-
|
20 |
-
|
21 |
-
# Function to search relevant legal documents
|
22 |
-
def search_legal_docs(query, top_k=5):
|
23 |
query_embedding = model.encode([query])
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
|
|
28 |
st.title("π Legal AI Assistant (US-LegalKit)")
|
29 |
|
30 |
query = st.text_input("π Enter your legal query:")
|
31 |
|
32 |
if query:
|
33 |
-
results
|
|
|
|
|
34 |
st.write("### π Relevant Legal Documents:")
|
35 |
for i, doc in enumerate(results, 1):
|
36 |
st.write(f"**{i}.** {doc[:500]}...") # Show preview of the document
|
37 |
-
|
38 |
-
# Generate AI-based legal response
|
39 |
-
response = openai.ChatCompletion.create(
|
40 |
-
model="gpt-4",
|
41 |
-
messages=[{"role": "system", "content": "You are a legal assistant."},
|
42 |
-
{"role": "user", "content": query}]
|
43 |
-
)
|
44 |
-
st.write("### π§ββοΈ AI Response:")
|
45 |
-
st.write(response['choices'][0]['message']['content'])
|
|
|
1 |
import streamlit as st
|
2 |
+
import pinecone
|
3 |
+
import os # To access environment variables
|
4 |
from sentence_transformers import SentenceTransformer
|
5 |
+
import numpy as np
|
6 |
from datasets import load_dataset
|
7 |
|
8 |
+
# Step 1: Get the Pinecone API key from the environment variable (Hugging Face secret)
|
9 |
+
pinecone_api_key = os.getenv('PINECONE_API_KEY') # Fetch Pinecone API key from Hugging Face secrets
|
10 |
+
|
11 |
+
if not pinecone_api_key:
|
12 |
+
st.error("Pinecone API key not found! Make sure to set the secret in Hugging Face settings.")
|
13 |
+
st.stop()
|
14 |
+
|
15 |
+
# Initialize Pinecone client using the API key
|
16 |
+
pinecone.init(api_key=pinecone_api_key, environment="us-west1-gcp") # Change the environment if needed
|
17 |
|
18 |
+
# Connect to your Pinecone index
|
19 |
+
index_name = "legal-docs-index-dji2ip8" # Your Pinecone index name
|
20 |
+
index = pinecone.Index(index_name)
|
21 |
+
|
22 |
+
# Step 2: Load the sentence-transformers model for embeddings
|
23 |
model = SentenceTransformer("all-MiniLM-L6-v2")
|
24 |
|
25 |
+
# Step 3: Load dataset (for reference in your app)
|
26 |
dataset = load_dataset("macadeliccc/US-LegalKit", split="train")
|
27 |
law_texts = [item['text'] for item in dataset if 'text' in item]
|
28 |
|
29 |
+
# Step 4: Function to search Pinecone index
|
30 |
+
def search_pinecone(query, top_k=5):
|
31 |
+
# Create an embedding for the user's query
|
|
|
|
|
32 |
query_embedding = model.encode([query])
|
33 |
+
|
34 |
+
# Query the Pinecone index for similar documents
|
35 |
+
results = index.query(query_embedding, top_k=top_k, include_metadata=True)
|
36 |
+
|
37 |
+
# Extract the text of the top-k results
|
38 |
+
return [match['metadata']['text'] for match in results['matches']]
|
39 |
+
|
40 |
+
# Step 5: Streamlit UI
|
41 |
st.title("π Legal AI Assistant (US-LegalKit)")
|
42 |
|
43 |
query = st.text_input("π Enter your legal query:")
|
44 |
|
45 |
if query:
|
46 |
+
# Get the top results from Pinecone
|
47 |
+
results = search_pinecone(query)
|
48 |
+
|
49 |
st.write("### π Relevant Legal Documents:")
|
50 |
for i, doc in enumerate(results, 1):
|
51 |
st.write(f"**{i}.** {doc[:500]}...") # Show preview of the document
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|