Spaces:

thechaiexperiment
/

TeaRAG

Sleeping

App Files Files Community

thechaiexperiment commited on Jan 22

Commit

ee1566b

verified ·

1 Parent(s): a0889c0

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -8

app.py CHANGED Viewed

@@ -295,8 +295,8 @@ def query_recipes_embeddings(query_embedding, embeddings_data=None, n_results=5)
         print("No embeddings data available.")
         return []
     try:
-        doc_ids = list(embeddings_data.keys())
-        doc_embeddings = np.array(list(embeddings_data.values()))
         similarities = cosine_similarity(query_embedding, doc_embeddings).flatten()
         top_indices = similarities.argsort()[-n_results:][::-1]
         return [(doc_ids[i], similarities[i]) for i in top_indices]
@@ -336,6 +336,15 @@ def retrieve_document_texts(doc_ids, folder_path='downloaded_articles/downloaded
             texts.append("")
     return texts
 def rerank_documents(query, document_ids, document_texts, cross_encoder_model):
     try:
@@ -717,7 +726,7 @@ async def recipes_endpoint(profile: MedicalProfile):
         document_ids = [doc_id for doc_id, _ in initial_results]
         # Retrieve document texts
-        document_texts = retrieve_document_texts(document_ids, folder_path)
         if not document_texts:
             raise ValueError("Failed to retrieve document texts.")
@@ -738,19 +747,20 @@ async def recipes_endpoint(profile: MedicalProfile):
         recipes = []
         for score, doc_id, text in scored_documents:
             # Retrieve metadata for the document
-            doc_info = df[df['Unnamed: 0'] == doc_id]
             if not doc_info.empty:
-                title = doc_info.iloc[0]['title'] if 'title' in doc_info.columns else "Unknown Title"
-                if 'recipe' in text.lower() or 'meal' in text.lower():
                     recipes.append({
                         "id": doc_id,
                         "title": title,
-                        "content_preview": text[:200],  # First 200 characters of text
                         "score": score,
                     })
         # Limit the response to top 5 recipes
-        return {"recipes": recipes[:10], "success": True}
     except ValueError as ve:
         # Handle expected errors

         print("No embeddings data available.")
         return []
     try:
+        doc_ids = embeddings_data["doc_ids"]
+        doc_embeddings = embeddings_data["embeddings"]
         similarities = cosine_similarity(query_embedding, doc_embeddings).flatten()
         top_indices = similarities.argsort()[-n_results:][::-1]
         return [(doc_ids[i], similarities[i]) for i in top_indices]
             texts.append("")
     return texts
+def retrieve_rec_texts(document_ids_or_names, folder_path):
+    document_texts = []
+    for doc_id in document_ids:
+        file_path = os.path.join(folder_path, doc_id)  # Match by file name
+        if os.path.exists(file_path):
+            with open(file_path, "r") as f:
+                document_texts.append(f.read())
+    return document_texts
 def rerank_documents(query, document_ids, document_texts, cross_encoder_model):
     try:
         document_ids = [doc_id for doc_id, _ in initial_results]
         # Retrieve document texts
+        document_texts = retrieve_rec_texts(document_ids, folder_path)
         if not document_texts:
             raise ValueError("Failed to retrieve document texts.")
         recipes = []
         for score, doc_id, text in scored_documents:
             # Retrieve metadata for the document
+            doc_info = metadata_df[metadata_df["original_file_name"] == doc_id]
             if not doc_info.empty:
+                title = doc_info.iloc[0]["title"] if "title" in doc_info.columns else "Unknown Title"
+                if "recipe" in text.lower() or "meal" in text.lower():
                     recipes.append({
                         "id": doc_id,
                         "title": title,
+                        "content_preview": text[:200],  # First 200 characters
                         "score": score,
                     })
         # Limit the response to top 5 recipes
+        return {"recipes": recipes[:5], "success": True}
     except ValueError as ve:
         # Handle expected errors