Spaces:
Running
Running
Update app.py
Browse filesupdated to handle no document in embeddings
app.py
CHANGED
@@ -36,13 +36,15 @@ def store_document(text):
|
|
36 |
|
37 |
def retrieve_document(query):
|
38 |
print(f"retrieving doc based on: \n{query}")
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
print(f"retrieved: \n{documents[closest_idx[0][0]]}")
|
44 |
-
|
45 |
-
return documents[closest_idx[0][0]]
|
46 |
|
47 |
|
48 |
def clean_text(text):
|
@@ -81,12 +83,9 @@ def chatbot(pdf_file, user_question):
|
|
81 |
if not text:
|
82 |
return "Could not extract any text from the PDF."
|
83 |
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
except Exception as e:
|
88 |
-
return f"Error retrieving document relevant to the query: {user_question} \n{e}"
|
89 |
-
|
90 |
if doc:
|
91 |
print("found doc")
|
92 |
# Split into smaller chunks
|
|
|
36 |
|
37 |
def retrieve_document(query):
|
38 |
print(f"retrieving doc based on: \n{query}")
|
39 |
+
|
40 |
+
if len(documents) >= 1:
|
41 |
+
query_embedding = embedding_model.encode([query])
|
42 |
+
_, closest_idx = index.search(np.array(query_embedding, dtype=np.float32), 1)
|
43 |
+
|
44 |
+
print(f"retrieved: \n{documents[closest_idx[0][0]]}")
|
45 |
|
46 |
+
return documents[closest_idx[0][0]]
|
47 |
+
return None
|
|
|
|
|
|
|
|
|
48 |
|
49 |
|
50 |
def clean_text(text):
|
|
|
83 |
if not text:
|
84 |
return "Could not extract any text from the PDF."
|
85 |
|
86 |
+
# retrieve the document relevant to the query
|
87 |
+
doc = retrieve_document(user_question)
|
88 |
+
|
|
|
|
|
|
|
89 |
if doc:
|
90 |
print("found doc")
|
91 |
# Split into smaller chunks
|