thechaiexperiment commited on
Commit
ee1566b
·
verified ·
1 Parent(s): a0889c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -8
app.py CHANGED
@@ -295,8 +295,8 @@ def query_recipes_embeddings(query_embedding, embeddings_data=None, n_results=5)
295
  print("No embeddings data available.")
296
  return []
297
  try:
298
- doc_ids = list(embeddings_data.keys())
299
- doc_embeddings = np.array(list(embeddings_data.values()))
300
  similarities = cosine_similarity(query_embedding, doc_embeddings).flatten()
301
  top_indices = similarities.argsort()[-n_results:][::-1]
302
  return [(doc_ids[i], similarities[i]) for i in top_indices]
@@ -336,6 +336,15 @@ def retrieve_document_texts(doc_ids, folder_path='downloaded_articles/downloaded
336
  texts.append("")
337
  return texts
338
 
 
 
 
 
 
 
 
 
 
339
 
340
  def rerank_documents(query, document_ids, document_texts, cross_encoder_model):
341
  try:
@@ -717,7 +726,7 @@ async def recipes_endpoint(profile: MedicalProfile):
717
  document_ids = [doc_id for doc_id, _ in initial_results]
718
 
719
  # Retrieve document texts
720
- document_texts = retrieve_document_texts(document_ids, folder_path)
721
  if not document_texts:
722
  raise ValueError("Failed to retrieve document texts.")
723
 
@@ -738,19 +747,20 @@ async def recipes_endpoint(profile: MedicalProfile):
738
  recipes = []
739
  for score, doc_id, text in scored_documents:
740
  # Retrieve metadata for the document
741
- doc_info = df[df['Unnamed: 0'] == doc_id]
742
  if not doc_info.empty:
743
- title = doc_info.iloc[0]['title'] if 'title' in doc_info.columns else "Unknown Title"
744
- if 'recipe' in text.lower() or 'meal' in text.lower():
745
  recipes.append({
746
  "id": doc_id,
747
  "title": title,
748
- "content_preview": text[:200], # First 200 characters of text
749
  "score": score,
750
  })
751
 
 
752
  # Limit the response to top 5 recipes
753
- return {"recipes": recipes[:10], "success": True}
754
 
755
  except ValueError as ve:
756
  # Handle expected errors
 
295
  print("No embeddings data available.")
296
  return []
297
  try:
298
+ doc_ids = embeddings_data["doc_ids"]
299
+ doc_embeddings = embeddings_data["embeddings"]
300
  similarities = cosine_similarity(query_embedding, doc_embeddings).flatten()
301
  top_indices = similarities.argsort()[-n_results:][::-1]
302
  return [(doc_ids[i], similarities[i]) for i in top_indices]
 
336
  texts.append("")
337
  return texts
338
 
339
+ def retrieve_rec_texts(document_ids_or_names, folder_path):
340
+ document_texts = []
341
+ for doc_id in document_ids:
342
+ file_path = os.path.join(folder_path, doc_id) # Match by file name
343
+ if os.path.exists(file_path):
344
+ with open(file_path, "r") as f:
345
+ document_texts.append(f.read())
346
+ return document_texts
347
+
348
 
349
  def rerank_documents(query, document_ids, document_texts, cross_encoder_model):
350
  try:
 
726
  document_ids = [doc_id for doc_id, _ in initial_results]
727
 
728
  # Retrieve document texts
729
+ document_texts = retrieve_rec_texts(document_ids, folder_path)
730
  if not document_texts:
731
  raise ValueError("Failed to retrieve document texts.")
732
 
 
747
  recipes = []
748
  for score, doc_id, text in scored_documents:
749
  # Retrieve metadata for the document
750
+ doc_info = metadata_df[metadata_df["original_file_name"] == doc_id]
751
  if not doc_info.empty:
752
+ title = doc_info.iloc[0]["title"] if "title" in doc_info.columns else "Unknown Title"
753
+ if "recipe" in text.lower() or "meal" in text.lower():
754
  recipes.append({
755
  "id": doc_id,
756
  "title": title,
757
+ "content_preview": text[:200], # First 200 characters
758
  "score": score,
759
  })
760
 
761
+
762
  # Limit the response to top 5 recipes
763
+ return {"recipes": recipes[:5], "success": True}
764
 
765
  except ValueError as ve:
766
  # Handle expected errors