Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -342,17 +342,33 @@ def retrieve_document_texts(doc_ids, folder_path='downloaded_articles/downloaded
|
|
342 |
texts.append("")
|
343 |
return texts
|
344 |
|
345 |
-
def retrieve_rec_texts(document_ids, folder_path):
|
346 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
347 |
for doc_id in document_ids:
|
348 |
-
|
349 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
350 |
if os.path.exists(file_path):
|
351 |
-
with open(file_path, "r") as f:
|
352 |
document_texts.append(f.read())
|
|
|
|
|
353 |
return document_texts
|
354 |
|
355 |
|
|
|
356 |
def rerank_documents(query, document_ids, document_texts, cross_encoder_model):
|
357 |
try:
|
358 |
# Prepare pairs for the cross-encoder
|
@@ -725,7 +741,7 @@ async def recipes_endpoint(profile: MedicalProfile):
|
|
725 |
# Load embeddings and retrieve initial results
|
726 |
embeddings_data = load_recipes_embeddings()
|
727 |
folder_path = 'downloaded_articles/downloaded_articles'
|
728 |
-
initial_results =
|
729 |
if not initial_results:
|
730 |
raise ValueError("No relevant recipes found.")
|
731 |
|
@@ -748,22 +764,17 @@ async def recipes_endpoint(profile: MedicalProfile):
|
|
748 |
|
749 |
# Load recipe metadata from DataFrame
|
750 |
file_path = 'recipes_metadata.xlsx'
|
|
|
751 |
metadata_df = pd.read_excel(file_path)
|
752 |
|
753 |
# Prepare the final recipes list
|
754 |
recipes = []
|
755 |
-
|
756 |
-
|
757 |
-
|
758 |
-
if not doc_info.empty:
|
759 |
-
title = doc_info.iloc[0]["original_file_name"] if "original_file_name" in doc_info.columns else "Unknown Title"
|
760 |
-
recipes.append({
|
761 |
-
"id": doc_id,
|
762 |
-
"title": title,
|
763 |
-
"content_preview": text[:200], # First 200 characters
|
764 |
-
"score": score,
|
765 |
-
})
|
766 |
|
|
|
|
|
767 |
|
768 |
|
769 |
# Limit the response to top 5 recipes
|
|
|
342 |
texts.append("")
|
343 |
return texts
|
344 |
|
345 |
+
def retrieve_rec_texts(document_ids, folder_path, metadata_path):
|
346 |
+
# Load metadata file to map document IDs to original file names
|
347 |
+
metadata_df = pd.read_excel(metadata_path)
|
348 |
+
# Ensure column names are as expected
|
349 |
+
if "id" not in metadata_df.columns or "original_file_name" not in metadata_df.columns:
|
350 |
+
raise ValueError("Metadata file must contain 'id' and 'original_file_name' columns.")
|
351 |
+
# Create a mapping of ID to original file name
|
352 |
+
id_to_file_name = dict(zip(metadata_df["id"].astype(str), metadata_df["original_file_name"]))
|
353 |
+
document_texts = []
|
354 |
for doc_id in document_ids:
|
355 |
+
# Get the original file name for the given document ID
|
356 |
+
original_file_name = id_to_file_name.get(doc_id)
|
357 |
+
if not original_file_name:
|
358 |
+
print(f"Warning: No original file name found for document ID {doc_id}")
|
359 |
+
continue
|
360 |
+
# Construct the file path using the original file name
|
361 |
+
file_path = os.path.join(folder_path, original_file_name)
|
362 |
+
# Check if the file exists and read its content
|
363 |
if os.path.exists(file_path):
|
364 |
+
with open(file_path, "r", encoding="utf-8") as f:
|
365 |
document_texts.append(f.read())
|
366 |
+
else:
|
367 |
+
print(f"Warning: File not found for {file_path}")
|
368 |
return document_texts
|
369 |
|
370 |
|
371 |
+
|
372 |
def rerank_documents(query, document_ids, document_texts, cross_encoder_model):
|
373 |
try:
|
374 |
# Prepare pairs for the cross-encoder
|
|
|
741 |
# Load embeddings and retrieve initial results
|
742 |
embeddings_data = load_recipes_embeddings()
|
743 |
folder_path = 'downloaded_articles/downloaded_articles'
|
744 |
+
initial_results = query_recipes_embeddings(query_embedding, embeddings_data, n_results=10)
|
745 |
if not initial_results:
|
746 |
raise ValueError("No relevant recipes found.")
|
747 |
|
|
|
764 |
|
765 |
# Load recipe metadata from DataFrame
|
766 |
file_path = 'recipes_metadata.xlsx'
|
767 |
+
metadata_path = 'recipes_metadata.xlsx'
|
768 |
metadata_df = pd.read_excel(file_path)
|
769 |
|
770 |
# Prepare the final recipes list
|
771 |
recipes = []
|
772 |
+
# Combine scores with resources
|
773 |
+
for i, recipe in enumerate(recipes):
|
774 |
+
recipe["score"] = scores[i] if i < len(scores) else 0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
775 |
|
776 |
+
# Sort resources by score
|
777 |
+
recipes.sort(key=lambda x: x["score"], reverse=True)
|
778 |
|
779 |
|
780 |
# Limit the response to top 5 recipes
|