Spaces:

thechaiexperiment
/

TeaRAG

Sleeping

App Files Files Community

thechaiexperiment commited on Jan 22

Commit

2fb8afb

verified ·

1 Parent(s): 3a9b0dd

Update app.py

Browse files

Files changed (1) hide show

app.py +1 -43

app.py CHANGED Viewed

@@ -336,48 +336,6 @@ def retrieve_document_texts(doc_ids, folder_path='downloaded_articles/downloaded
             texts.append("")
     return texts
-def retrieve_recipes_texts(doc_ids, zip_path='pdf kb.zip'):
-    texts = []
-    try:
-        # Check if the .zip file exists
-        if not os.path.exists(zip_path):
-            print(f"Error: Zip file not found at '{zip_path}'")
-            return ["" for _ in doc_ids]
-        # Create a temporary directory to extract the .zip contents
-        with tempfile.TemporaryDirectory() as temp_dir:
-            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
-                zip_ref.extractall(temp_dir)  # Extract all files to the temp directory
-            # Iterate through the document IDs
-            for doc_id in doc_ids:
-                # Construct the expected PDF file path
-                pdf_path = os.path.join(temp_dir, f"{doc_id}.pdf")
-                try:
-                    # Check if the PDF file exists
-                    if not os.path.exists(pdf_path):
-                        print(f"Warning: PDF file not found: {pdf_path}")
-                        texts.append("")
-                        continue
-                    # Read and extract text from the PDF
-                    with open(pdf_path, 'rb') as pdf_file:
-                        reader = PdfReader(pdf_file)
-                        pdf_text = ""
-                        for page in reader.pages:
-                            pdf_text += page.extract_text()
-                        # Add the extracted text to the result list
-                        texts.append(pdf_text.strip())
-                except Exception as e:
-                    print(f"Error retrieving text from document {doc_id}: {e}")
-                    texts.append("")
-    except Exception as e:
-        print(f"Error handling zip file: {e}")
-        return ["" for _ in doc_ids]
-    return texts
 def rerank_documents(query, document_ids, document_texts, cross_encoder_model):
     try:
@@ -759,7 +717,7 @@ async def recipes_endpoint(profile: MedicalProfile):
         document_ids = [doc_id for doc_id, _ in initial_results]
         # Retrieve document texts
-        document_texts = retrieve_recipes_texts(document_ids, folder_path)
         if not document_texts:
             raise ValueError("Failed to retrieve document texts.")

             texts.append("")
     return texts
 def rerank_documents(query, document_ids, document_texts, cross_encoder_model):
     try:
         document_ids = [doc_id for doc_id, _ in initial_results]
         # Retrieve document texts
+        document_texts = retrieve_document_texts(document_ids, folder_path)
         if not document_texts:
             raise ValueError("Failed to retrieve document texts.")