Penality commited on
Commit
a6dfbcd
·
verified ·
1 Parent(s): c5c40e9

Update app.py

Browse files

updated both store_document and retrieve_document to work with huggingFace dataset folder

Files changed (1) hide show
  1. app.py +21 -0
app.py CHANGED
@@ -91,6 +91,27 @@ def store_document(text):
91
 
92
  return f"Document stored at: {filename}"
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
  def clean_text(text):
96
  """Cleans extracted text for better processing by the model."""
 
91
 
92
  return f"Document stored at: {filename}"
93
 
94
+ def retrieve_document(query):
95
+ print(f"Retrieving document based on:\n{query}")
96
+
97
+ # Generate query embedding
98
+ query_embedding = embedding_model.encode([query]).astype(np.float32)
99
+
100
+ # Search for the closest document in FAISS index
101
+ _, closest_idx = index.search(query_embedding, 1)
102
+
103
+ # Check if a relevant document was found
104
+ if closest_idx[0][0] == -1 or str(closest_idx[0][0]) not in metadata:
105
+ print("No relevant document found")
106
+ return None
107
+
108
+ # Retrieve the document file path
109
+ filename = metadata[str(closest_idx[0][0])]
110
+
111
+ # Read and return the document content
112
+ with open(filename, "r", encoding="utf-8") as f:
113
+ return f.read()
114
+
115
 
116
  def clean_text(text):
117
  """Cleans extracted text for better processing by the model."""