Hidayatmahar commited on
Commit
5a16bc2
Β·
verified Β·
1 Parent(s): 4e8cbc3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -26
app.py CHANGED
@@ -1,45 +1,51 @@
1
  import streamlit as st
2
- import faiss
3
- import numpy as np
4
  from sentence_transformers import SentenceTransformer
5
- import openai
6
  from datasets import load_dataset
7
 
8
- # Load FAISS index
9
- index = faiss.read_index("faiss_index.bin")
 
 
 
 
 
 
 
10
 
11
- # Load embedding model
 
 
 
 
12
  model = SentenceTransformer("all-MiniLM-L6-v2")
13
 
14
- # Load dataset (only titles for reference)
15
  dataset = load_dataset("macadeliccc/US-LegalKit", split="train")
16
  law_texts = [item['text'] for item in dataset if 'text' in item]
17
 
18
- # OpenAI API Key (store it as a secret in Hugging Face)
19
- openai.api_key = st.secrets["GROQ_API_KEY"]
20
-
21
- # Function to search relevant legal documents
22
- def search_legal_docs(query, top_k=5):
23
  query_embedding = model.encode([query])
24
- _, idxs = index.search(query_embedding, top_k)
25
- return [law_texts[i] for i in idxs[0]] # Return matching legal documents
26
-
27
- # Streamlit UI
 
 
 
 
28
  st.title("πŸ” Legal AI Assistant (US-LegalKit)")
29
 
30
  query = st.text_input("πŸ“Œ Enter your legal query:")
31
 
32
  if query:
33
- results = search_legal_docs(query)
 
 
34
  st.write("### πŸ“„ Relevant Legal Documents:")
35
  for i, doc in enumerate(results, 1):
36
  st.write(f"**{i}.** {doc[:500]}...") # Show preview of the document
37
-
38
- # Generate AI-based legal response
39
- response = openai.ChatCompletion.create(
40
- model="gpt-4",
41
- messages=[{"role": "system", "content": "You are a legal assistant."},
42
- {"role": "user", "content": query}]
43
- )
44
- st.write("### πŸ§‘β€βš–οΈ AI Response:")
45
- st.write(response['choices'][0]['message']['content'])
 
1
  import streamlit as st
2
+ import pinecone
3
+ import os # To access environment variables
4
  from sentence_transformers import SentenceTransformer
5
+ import numpy as np
6
  from datasets import load_dataset
7
 
8
+ # Step 1: Get the Pinecone API key from the environment variable (Hugging Face secret)
9
+ pinecone_api_key = os.getenv('PINECONE_API_KEY') # Fetch Pinecone API key from Hugging Face secrets
10
+
11
+ if not pinecone_api_key:
12
+ st.error("Pinecone API key not found! Make sure to set the secret in Hugging Face settings.")
13
+ st.stop()
14
+
15
+ # Initialize Pinecone client using the API key
16
+ pinecone.init(api_key=pinecone_api_key, environment="us-west1-gcp") # Change the environment if needed
17
 
18
+ # Connect to your Pinecone index
19
+ index_name = "legal-docs-index-dji2ip8" # Your Pinecone index name
20
+ index = pinecone.Index(index_name)
21
+
22
+ # Step 2: Load the sentence-transformers model for embeddings
23
  model = SentenceTransformer("all-MiniLM-L6-v2")
24
 
25
+ # Step 3: Load dataset (for reference in your app)
26
  dataset = load_dataset("macadeliccc/US-LegalKit", split="train")
27
  law_texts = [item['text'] for item in dataset if 'text' in item]
28
 
29
+ # Step 4: Function to search Pinecone index
30
+ def search_pinecone(query, top_k=5):
31
+ # Create an embedding for the user's query
 
 
32
  query_embedding = model.encode([query])
33
+
34
+ # Query the Pinecone index for similar documents
35
+ results = index.query(query_embedding, top_k=top_k, include_metadata=True)
36
+
37
+ # Extract the text of the top-k results
38
+ return [match['metadata']['text'] for match in results['matches']]
39
+
40
+ # Step 5: Streamlit UI
41
  st.title("πŸ” Legal AI Assistant (US-LegalKit)")
42
 
43
  query = st.text_input("πŸ“Œ Enter your legal query:")
44
 
45
  if query:
46
+ # Get the top results from Pinecone
47
+ results = search_pinecone(query)
48
+
49
  st.write("### πŸ“„ Relevant Legal Documents:")
50
  for i, doc in enumerate(results, 1):
51
  st.write(f"**{i}.** {doc[:500]}...") # Show preview of the document