Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,43 +3,46 @@ import os
|
|
3 |
import streamlit as st
|
4 |
from langchain.document_loaders import PyPDFLoader
|
5 |
from langchain.vectorstores import FAISS
|
6 |
-
from
|
7 |
|
8 |
-
# Function to process PDF
|
9 |
def process_pdf(file):
|
10 |
-
# Save the uploaded file into a temporary file
|
11 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmpfile:
|
12 |
-
tmpfile.write(file.read()) # Write the uploaded file's content
|
13 |
-
tmpfile_path = tmpfile.name # Get the file path
|
14 |
return tmpfile_path
|
15 |
|
16 |
-
# Main function to run the app
|
17 |
def main():
|
18 |
st.title("PDF Embedding and Query System")
|
19 |
-
|
|
|
20 |
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
|
21 |
-
|
22 |
if uploaded_file is not None:
|
23 |
-
# Process the uploaded PDF file
|
24 |
tmp_file_path = process_pdf(uploaded_file)
|
25 |
-
|
26 |
-
# Load the PDF content
|
27 |
loader = PyPDFLoader(tmp_file_path)
|
28 |
documents = loader.load()
|
29 |
-
|
30 |
-
#
|
31 |
-
embeddings =
|
32 |
-
|
33 |
-
# Create a vector
|
34 |
vector_db = FAISS.from_documents(documents, embeddings)
|
35 |
-
|
36 |
-
#
|
37 |
query = st.text_input("Enter a query to search:")
|
|
|
38 |
if query:
|
|
|
39 |
results = vector_db.similarity_search(query, k=5)
|
|
|
40 |
for result in results:
|
41 |
st.write(result["text"])
|
42 |
|
43 |
-
# Run the app
|
44 |
if __name__ == "__main__":
|
45 |
main()
|
|
|
3 |
import streamlit as st
|
4 |
from langchain.document_loaders import PyPDFLoader
|
5 |
from langchain.vectorstores import FAISS
|
6 |
+
from langchain.embeddings.huggingface import HuggingFaceEmbeddings # Updated to HuggingFaceEmbeddings
|
7 |
|
8 |
+
# Function to process the uploaded PDF and save it temporarily
|
9 |
def process_pdf(file):
|
|
|
10 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmpfile:
|
11 |
+
tmpfile.write(file.read()) # Write the uploaded file's content to the temp file
|
12 |
+
tmpfile_path = tmpfile.name # Get the temporary file path
|
13 |
return tmpfile_path
|
14 |
|
15 |
+
# Main function to run the Streamlit app
|
16 |
def main():
|
17 |
st.title("PDF Embedding and Query System")
|
18 |
+
|
19 |
+
# File uploader for the user to upload a PDF
|
20 |
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
|
21 |
+
|
22 |
if uploaded_file is not None:
|
23 |
+
# Process the uploaded PDF and get its file path
|
24 |
tmp_file_path = process_pdf(uploaded_file)
|
25 |
+
|
26 |
+
# Load the PDF content using the PyPDFLoader
|
27 |
loader = PyPDFLoader(tmp_file_path)
|
28 |
documents = loader.load()
|
29 |
+
|
30 |
+
# Initialize HuggingFace embeddings (replace this with your desired model)
|
31 |
+
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") # Example model
|
32 |
+
|
33 |
+
# Create a FAISS vector store using the loaded documents and generated embeddings
|
34 |
vector_db = FAISS.from_documents(documents, embeddings)
|
35 |
+
|
36 |
+
# Query input field for users to enter their search queries
|
37 |
query = st.text_input("Enter a query to search:")
|
38 |
+
|
39 |
if query:
|
40 |
+
# Perform similarity search based on the query
|
41 |
results = vector_db.similarity_search(query, k=5)
|
42 |
+
# Display the results
|
43 |
for result in results:
|
44 |
st.write(result["text"])
|
45 |
|
46 |
+
# Run the app if this script is executed directly
|
47 |
if __name__ == "__main__":
|
48 |
main()
|