Ahmadkhan12 commited on
Commit
54146e4
·
verified ·
1 Parent(s): c921af8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -19
app.py CHANGED
@@ -3,43 +3,46 @@ import os
3
  import streamlit as st
4
  from langchain.document_loaders import PyPDFLoader
5
  from langchain.vectorstores import FAISS
6
- from langchain_community.embeddings.groq import GroqEmbedding # Corrected import
7
 
8
- # Function to process PDF
9
  def process_pdf(file):
10
- # Save the uploaded file into a temporary file
11
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmpfile:
12
- tmpfile.write(file.read()) # Write the uploaded file's content
13
- tmpfile_path = tmpfile.name # Get the file path
14
  return tmpfile_path
15
 
16
- # Main function to run the app
17
  def main():
18
  st.title("PDF Embedding and Query System")
19
-
 
20
  uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
21
-
22
  if uploaded_file is not None:
23
- # Process the uploaded PDF file
24
  tmp_file_path = process_pdf(uploaded_file)
25
-
26
- # Load the PDF content
27
  loader = PyPDFLoader(tmp_file_path)
28
  documents = loader.load()
29
-
30
- # Use Groq embeddings (assuming Groq API key is set correctly)
31
- embeddings = GroqEmbedding(api_key="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")
32
-
33
- # Create a vector database
34
  vector_db = FAISS.from_documents(documents, embeddings)
35
-
36
- # Perform search or other actions
37
  query = st.text_input("Enter a query to search:")
 
38
  if query:
 
39
  results = vector_db.similarity_search(query, k=5)
 
40
  for result in results:
41
  st.write(result["text"])
42
 
43
- # Run the app
44
  if __name__ == "__main__":
45
  main()
 
3
  import streamlit as st
4
  from langchain.document_loaders import PyPDFLoader
5
  from langchain.vectorstores import FAISS
6
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings # Updated to HuggingFaceEmbeddings
7
 
8
+ # Function to process the uploaded PDF and save it temporarily
9
  def process_pdf(file):
 
10
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmpfile:
11
+ tmpfile.write(file.read()) # Write the uploaded file's content to the temp file
12
+ tmpfile_path = tmpfile.name # Get the temporary file path
13
  return tmpfile_path
14
 
15
+ # Main function to run the Streamlit app
16
  def main():
17
  st.title("PDF Embedding and Query System")
18
+
19
+ # File uploader for the user to upload a PDF
20
  uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
21
+
22
  if uploaded_file is not None:
23
+ # Process the uploaded PDF and get its file path
24
  tmp_file_path = process_pdf(uploaded_file)
25
+
26
+ # Load the PDF content using the PyPDFLoader
27
  loader = PyPDFLoader(tmp_file_path)
28
  documents = loader.load()
29
+
30
+ # Initialize HuggingFace embeddings (replace this with your desired model)
31
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") # Example model
32
+
33
+ # Create a FAISS vector store using the loaded documents and generated embeddings
34
  vector_db = FAISS.from_documents(documents, embeddings)
35
+
36
+ # Query input field for users to enter their search queries
37
  query = st.text_input("Enter a query to search:")
38
+
39
  if query:
40
+ # Perform similarity search based on the query
41
  results = vector_db.similarity_search(query, k=5)
42
+ # Display the results
43
  for result in results:
44
  st.write(result["text"])
45
 
46
+ # Run the app if this script is executed directly
47
  if __name__ == "__main__":
48
  main()