thechaiexperiment commited on
Commit
3f00af2
·
verified ·
1 Parent(s): 428bbc3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -74
app.py CHANGED
@@ -238,82 +238,11 @@ def load_documents_data(folder_path='downloaded_articles/downloaded_articles'):
238
  data['df'] = pd.DataFrame()
239
  return False
240
 
241
- def load_recipes_data(folder_path='pdf kb.zip'):
242
- try:
243
- print("Loading documents data...")
244
- temp_dir = None
245
-
246
- # Handle .zip file
247
- if folder_path.endswith('.zip'):
248
- if not os.path.exists(folder_path):
249
- print(f"Error: .zip file '{folder_path}' not found.")
250
- return False
251
-
252
- # Create a temporary directory for extracting the .zip
253
- temp_dir = tempfile.TemporaryDirectory()
254
- extract_path = temp_dir.name
255
-
256
- # Extract the .zip file
257
- try:
258
- with zipfile.ZipFile(folder_path, 'r') as zip_ref:
259
- zip_ref.extractall(extract_path)
260
- print(f"Extracted .zip file to temporary folder: {extract_path}")
261
- except Exception as e:
262
- print(f"Error extracting .zip file: {e}")
263
- return False
264
-
265
- # Update the folder_path to the extracted directory
266
- folder_path = extract_path
267
-
268
- # Check if the folder exists
269
- if not os.path.exists(folder_path) or not os.path.isdir(folder_path):
270
- print(f"Error: Folder '{folder_path}' not found.")
271
- return False
272
-
273
- # List all HTML or PDF files in the folder
274
- html_files = [f for f in os.listdir(folder_path) if f.endswith('.html')]
275
- pdf_files = [f for f in os.listdir(folder_path) if f.endswith('.pdf')]
276
-
277
- if not html_files and not pdf_files:
278
- print(f"No HTML or PDF files found in folder '{folder_path}'.")
279
- return False
280
-
281
- documents = []
282
-
283
- # Process PDF files (requires a PDF parser like PyPDF2)
284
- for file_name in pdf_files:
285
- file_path = os.path.join(folder_path, file_name)
286
- try:
287
- from PyPDF2 import PdfReader # Import here to avoid dependency issues
288
- reader = PdfReader(file_path)
289
- text = "\n".join(page.extract_text() for page in reader.pages if page.extract_text())
290
- documents.append({"file_name": file_name, "content": text})
291
- except Exception as e:
292
- print(f"Error reading PDF file {file_name}: {e}")
293
-
294
- # Convert the list of documents to a DataFrame
295
- data['df'] = pd.DataFrame(documents)
296
-
297
- if data['df'].empty:
298
- print("No valid documents loaded.")
299
- return False
300
-
301
- print(f"Successfully loaded {len(data['df'])} document records.")
302
- return True
303
- except Exception as e:
304
- print(f"Error loading documents data: {e}")
305
- data['df'] = pd.DataFrame()
306
- return False
307
- finally:
308
- # Clean up the temporary directory, if created
309
- if temp_dir:
310
- temp_dir.cleanup()
311
 
312
  def load_data():
313
  """Load all required data"""
314
  embeddings_success = load_embeddings()
315
  documents_success = load_documents_data()
316
- recipes_success = load_recipes_data()
317
  recipes_embeddings_success = load_recipes_embeddings()
318
  if not recipes_embeddings_success:
319
  print("Warning: Failed to load embeddings, falling back to basic functionality")
@@ -738,9 +667,12 @@ async def chat_endpoint(chat_query: ChatQuery):
738
  @app.post("/api/resources")
739
  async def resources_endpoint(profile: MedicalProfile):
740
  try:
 
741
  # Build the query text
742
  query_text = profile.conditions + " " + profile.daily_symptoms
743
 
 
 
744
  # Generate the query embedding
745
  query_embedding = embed_query_text(query_text)
746
  if query_embedding is None:
@@ -806,11 +738,13 @@ async def recipes_endpoint(profile: MedicalProfile):
806
  try:
807
  # Build the query text for recipes
808
  recipe_query = (
809
- f"Recipes and meals suitable for someone with: "
810
  f"{profile.conditions} and experiencing {profile.daily_symptoms}"
811
  )
812
  query_text = recipe_query
813
 
 
 
814
  # Generate the query embedding
815
  query_embedding = embed_query_text(query_text)
816
  if query_embedding is None:
@@ -818,7 +752,7 @@ async def recipes_endpoint(profile: MedicalProfile):
818
 
819
  # Load embeddings and retrieve initial results
820
  embeddings_data = load_recipes_embeddings()
821
- folder_path = 'pdf kb.zip'
822
  initial_results = query_recipes_embeddings(query_embedding, embeddings_data, n_results=10)
823
  if not initial_results:
824
  raise ValueError("No relevant recipes found.")
@@ -841,7 +775,7 @@ async def recipes_endpoint(profile: MedicalProfile):
841
  scored_documents.sort(key=lambda x: x[0], reverse=True) # Sort by score
842
 
843
  # Load recipe metadata from DataFrame
844
- file_path = 'finalcleaned_excel_file.xlsx'
845
  df = pd.read_excel(file_path)
846
 
847
  # Prepare the final recipes list
 
238
  data['df'] = pd.DataFrame()
239
  return False
240
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
 
242
  def load_data():
243
  """Load all required data"""
244
  embeddings_success = load_embeddings()
245
  documents_success = load_documents_data()
 
246
  recipes_embeddings_success = load_recipes_embeddings()
247
  if not recipes_embeddings_success:
248
  print("Warning: Failed to load embeddings, falling back to basic functionality")
 
667
  @app.post("/api/resources")
668
  async def resources_endpoint(profile: MedicalProfile):
669
  try:
670
+
671
  # Build the query text
672
  query_text = profile.conditions + " " + profile.daily_symptoms
673
 
674
+ print(f"Generated query text: {query_text}")
675
+
676
  # Generate the query embedding
677
  query_embedding = embed_query_text(query_text)
678
  if query_embedding is None:
 
738
  try:
739
  # Build the query text for recipes
740
  recipe_query = (
741
+ f"Recipes foods and meals suitable for someone with: "
742
  f"{profile.conditions} and experiencing {profile.daily_symptoms}"
743
  )
744
  query_text = recipe_query
745
 
746
+ print(f"Generated query text: {query_text}")
747
+
748
  # Generate the query embedding
749
  query_embedding = embed_query_text(query_text)
750
  if query_embedding is None:
 
752
 
753
  # Load embeddings and retrieve initial results
754
  embeddings_data = load_recipes_embeddings()
755
+ folder_path = 'downloaded_articles/downloaded_articles'
756
  initial_results = query_recipes_embeddings(query_embedding, embeddings_data, n_results=10)
757
  if not initial_results:
758
  raise ValueError("No relevant recipes found.")
 
775
  scored_documents.sort(key=lambda x: x[0], reverse=True) # Sort by score
776
 
777
  # Load recipe metadata from DataFrame
778
+ file_path = 'recipes_metadata.xlsx'
779
  df = pd.read_excel(file_path)
780
 
781
  # Prepare the final recipes list