Spaces:

thechaiexperiment
/

TeaRAG

Sleeping

App Files Files Community

thechaiexperiment commited on Jan 22

Commit

3f00af2

verified ·

1 Parent(s): 428bbc3

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -74

app.py CHANGED Viewed

@@ -238,82 +238,11 @@ def load_documents_data(folder_path='downloaded_articles/downloaded_articles'):
         data['df'] = pd.DataFrame()
         return False
-def load_recipes_data(folder_path='pdf kb.zip'):
-    try:
-        print("Loading documents data...")
-        temp_dir = None
-        # Handle .zip file
-        if folder_path.endswith('.zip'):
-            if not os.path.exists(folder_path):
-                print(f"Error: .zip file '{folder_path}' not found.")
-                return False
-            # Create a temporary directory for extracting the .zip
-            temp_dir = tempfile.TemporaryDirectory()
-            extract_path = temp_dir.name
-            # Extract the .zip file
-            try:
-                with zipfile.ZipFile(folder_path, 'r') as zip_ref:
-                    zip_ref.extractall(extract_path)
-                print(f"Extracted .zip file to temporary folder: {extract_path}")
-            except Exception as e:
-                print(f"Error extracting .zip file: {e}")
-                return False
-            # Update the folder_path to the extracted directory
-            folder_path = extract_path
-        # Check if the folder exists
-        if not os.path.exists(folder_path) or not os.path.isdir(folder_path):
-            print(f"Error: Folder '{folder_path}' not found.")
-            return False
-        # List all HTML or PDF files in the folder
-        html_files = [f for f in os.listdir(folder_path) if f.endswith('.html')]
-        pdf_files = [f for f in os.listdir(folder_path) if f.endswith('.pdf')]
-        if not html_files and not pdf_files:
-            print(f"No HTML or PDF files found in folder '{folder_path}'.")
-            return False
-        documents = []
-        # Process PDF files (requires a PDF parser like PyPDF2)
-        for file_name in pdf_files:
-            file_path = os.path.join(folder_path, file_name)
-            try:
-                from PyPDF2 import PdfReader  # Import here to avoid dependency issues
-                reader = PdfReader(file_path)
-                text = "\n".join(page.extract_text() for page in reader.pages if page.extract_text())
-                documents.append({"file_name": file_name, "content": text})
-            except Exception as e:
-                print(f"Error reading PDF file {file_name}: {e}")
-        # Convert the list of documents to a DataFrame
-        data['df'] = pd.DataFrame(documents)
-        if data['df'].empty:
-            print("No valid documents loaded.")
-            return False
-        print(f"Successfully loaded {len(data['df'])} document records.")
-        return True
-    except Exception as e:
-        print(f"Error loading documents data: {e}")
-        data['df'] = pd.DataFrame()
-        return False
-    finally:
-        # Clean up the temporary directory, if created
-        if temp_dir:
-            temp_dir.cleanup()
 def load_data():
     """Load all required data"""
     embeddings_success = load_embeddings()
     documents_success = load_documents_data()
-    recipes_success = load_recipes_data()
     recipes_embeddings_success = load_recipes_embeddings()
     if not recipes_embeddings_success:
         print("Warning: Failed to load embeddings, falling back to basic functionality")
@@ -738,9 +667,12 @@ async def chat_endpoint(chat_query: ChatQuery):
 @app.post("/api/resources")
 async def resources_endpoint(profile: MedicalProfile):
     try:
         # Build the query text
         query_text = profile.conditions + " " + profile.daily_symptoms
         # Generate the query embedding
         query_embedding = embed_query_text(query_text)
         if query_embedding is None:
@@ -806,11 +738,13 @@ async def recipes_endpoint(profile: MedicalProfile):
     try:
         # Build the query text for recipes
         recipe_query = (
-            f"Recipes and meals suitable for someone with: "
             f"{profile.conditions} and experiencing {profile.daily_symptoms}"
         )
         query_text = recipe_query
         # Generate the query embedding
         query_embedding = embed_query_text(query_text)
         if query_embedding is None:
@@ -818,7 +752,7 @@ async def recipes_endpoint(profile: MedicalProfile):
         # Load embeddings and retrieve initial results
         embeddings_data = load_recipes_embeddings()
-        folder_path = 'pdf kb.zip'
         initial_results = query_recipes_embeddings(query_embedding, embeddings_data, n_results=10)
         if not initial_results:
             raise ValueError("No relevant recipes found.")
@@ -841,7 +775,7 @@ async def recipes_endpoint(profile: MedicalProfile):
         scored_documents.sort(key=lambda x: x[0], reverse=True)  # Sort by score
         # Load recipe metadata from DataFrame
-        file_path = 'finalcleaned_excel_file.xlsx'
         df = pd.read_excel(file_path)
         # Prepare the final recipes list

         data['df'] = pd.DataFrame()
         return False
 def load_data():
     """Load all required data"""
     embeddings_success = load_embeddings()
     documents_success = load_documents_data()
     recipes_embeddings_success = load_recipes_embeddings()
     if not recipes_embeddings_success:
         print("Warning: Failed to load embeddings, falling back to basic functionality")
 @app.post("/api/resources")
 async def resources_endpoint(profile: MedicalProfile):
     try:
         # Build the query text
         query_text = profile.conditions + " " + profile.daily_symptoms
+        print(f"Generated query text: {query_text}")
         # Generate the query embedding
         query_embedding = embed_query_text(query_text)
         if query_embedding is None:
     try:
         # Build the query text for recipes
         recipe_query = (
+            f"Recipes foods and meals suitable for someone with: "
             f"{profile.conditions} and experiencing {profile.daily_symptoms}"
         )
         query_text = recipe_query
+        print(f"Generated query text: {query_text}")
         # Generate the query embedding
         query_embedding = embed_query_text(query_text)
         if query_embedding is None:
         # Load embeddings and retrieve initial results
         embeddings_data = load_recipes_embeddings()
+        folder_path = 'downloaded_articles/downloaded_articles'
         initial_results = query_recipes_embeddings(query_embedding, embeddings_data, n_results=10)
         if not initial_results:
             raise ValueError("No relevant recipes found.")
         scored_documents.sort(key=lambda x: x[0], reverse=True)  # Sort by score
         # Load recipe metadata from DataFrame
+        file_path = 'recipes_metadata.xlsx'
         df = pd.read_excel(file_path)
         # Prepare the final recipes list