Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -295,8 +295,8 @@ def query_recipes_embeddings(query_embedding, embeddings_data=None, n_results=5)
|
|
295 |
print("No embeddings data available.")
|
296 |
return []
|
297 |
try:
|
298 |
-
doc_ids =
|
299 |
-
doc_embeddings =
|
300 |
similarities = cosine_similarity(query_embedding, doc_embeddings).flatten()
|
301 |
top_indices = similarities.argsort()[-n_results:][::-1]
|
302 |
return [(doc_ids[i], similarities[i]) for i in top_indices]
|
@@ -336,6 +336,15 @@ def retrieve_document_texts(doc_ids, folder_path='downloaded_articles/downloaded
|
|
336 |
texts.append("")
|
337 |
return texts
|
338 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
339 |
|
340 |
def rerank_documents(query, document_ids, document_texts, cross_encoder_model):
|
341 |
try:
|
@@ -717,7 +726,7 @@ async def recipes_endpoint(profile: MedicalProfile):
|
|
717 |
document_ids = [doc_id for doc_id, _ in initial_results]
|
718 |
|
719 |
# Retrieve document texts
|
720 |
-
document_texts =
|
721 |
if not document_texts:
|
722 |
raise ValueError("Failed to retrieve document texts.")
|
723 |
|
@@ -738,19 +747,20 @@ async def recipes_endpoint(profile: MedicalProfile):
|
|
738 |
recipes = []
|
739 |
for score, doc_id, text in scored_documents:
|
740 |
# Retrieve metadata for the document
|
741 |
-
doc_info =
|
742 |
if not doc_info.empty:
|
743 |
-
title = doc_info.iloc[0][
|
744 |
-
if
|
745 |
recipes.append({
|
746 |
"id": doc_id,
|
747 |
"title": title,
|
748 |
-
"content_preview": text[:200], # First 200 characters
|
749 |
"score": score,
|
750 |
})
|
751 |
|
|
|
752 |
# Limit the response to top 5 recipes
|
753 |
-
return {"recipes": recipes[:
|
754 |
|
755 |
except ValueError as ve:
|
756 |
# Handle expected errors
|
|
|
295 |
print("No embeddings data available.")
|
296 |
return []
|
297 |
try:
|
298 |
+
doc_ids = embeddings_data["doc_ids"]
|
299 |
+
doc_embeddings = embeddings_data["embeddings"]
|
300 |
similarities = cosine_similarity(query_embedding, doc_embeddings).flatten()
|
301 |
top_indices = similarities.argsort()[-n_results:][::-1]
|
302 |
return [(doc_ids[i], similarities[i]) for i in top_indices]
|
|
|
336 |
texts.append("")
|
337 |
return texts
|
338 |
|
339 |
+
def retrieve_rec_texts(document_ids_or_names, folder_path):
|
340 |
+
document_texts = []
|
341 |
+
for doc_id in document_ids:
|
342 |
+
file_path = os.path.join(folder_path, doc_id) # Match by file name
|
343 |
+
if os.path.exists(file_path):
|
344 |
+
with open(file_path, "r") as f:
|
345 |
+
document_texts.append(f.read())
|
346 |
+
return document_texts
|
347 |
+
|
348 |
|
349 |
def rerank_documents(query, document_ids, document_texts, cross_encoder_model):
|
350 |
try:
|
|
|
726 |
document_ids = [doc_id for doc_id, _ in initial_results]
|
727 |
|
728 |
# Retrieve document texts
|
729 |
+
document_texts = retrieve_rec_texts(document_ids, folder_path)
|
730 |
if not document_texts:
|
731 |
raise ValueError("Failed to retrieve document texts.")
|
732 |
|
|
|
747 |
recipes = []
|
748 |
for score, doc_id, text in scored_documents:
|
749 |
# Retrieve metadata for the document
|
750 |
+
doc_info = metadata_df[metadata_df["original_file_name"] == doc_id]
|
751 |
if not doc_info.empty:
|
752 |
+
title = doc_info.iloc[0]["title"] if "title" in doc_info.columns else "Unknown Title"
|
753 |
+
if "recipe" in text.lower() or "meal" in text.lower():
|
754 |
recipes.append({
|
755 |
"id": doc_id,
|
756 |
"title": title,
|
757 |
+
"content_preview": text[:200], # First 200 characters
|
758 |
"score": score,
|
759 |
})
|
760 |
|
761 |
+
|
762 |
# Limit the response to top 5 recipes
|
763 |
+
return {"recipes": recipes[:5], "success": True}
|
764 |
|
765 |
except ValueError as ve:
|
766 |
# Handle expected errors
|