thechaiexperiment commited on
Commit
2fb8afb
·
verified ·
1 Parent(s): 3a9b0dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -43
app.py CHANGED
@@ -336,48 +336,6 @@ def retrieve_document_texts(doc_ids, folder_path='downloaded_articles/downloaded
336
  texts.append("")
337
  return texts
338
 
339
- def retrieve_recipes_texts(doc_ids, zip_path='pdf kb.zip'):
340
- texts = []
341
-
342
- try:
343
- # Check if the .zip file exists
344
- if not os.path.exists(zip_path):
345
- print(f"Error: Zip file not found at '{zip_path}'")
346
- return ["" for _ in doc_ids]
347
-
348
- # Create a temporary directory to extract the .zip contents
349
- with tempfile.TemporaryDirectory() as temp_dir:
350
- with zipfile.ZipFile(zip_path, 'r') as zip_ref:
351
- zip_ref.extractall(temp_dir) # Extract all files to the temp directory
352
-
353
- # Iterate through the document IDs
354
- for doc_id in doc_ids:
355
- # Construct the expected PDF file path
356
- pdf_path = os.path.join(temp_dir, f"{doc_id}.pdf")
357
- try:
358
- # Check if the PDF file exists
359
- if not os.path.exists(pdf_path):
360
- print(f"Warning: PDF file not found: {pdf_path}")
361
- texts.append("")
362
- continue
363
-
364
- # Read and extract text from the PDF
365
- with open(pdf_path, 'rb') as pdf_file:
366
- reader = PdfReader(pdf_file)
367
- pdf_text = ""
368
- for page in reader.pages:
369
- pdf_text += page.extract_text()
370
-
371
- # Add the extracted text to the result list
372
- texts.append(pdf_text.strip())
373
- except Exception as e:
374
- print(f"Error retrieving text from document {doc_id}: {e}")
375
- texts.append("")
376
-
377
- except Exception as e:
378
- print(f"Error handling zip file: {e}")
379
- return ["" for _ in doc_ids]
380
- return texts
381
 
382
  def rerank_documents(query, document_ids, document_texts, cross_encoder_model):
383
  try:
@@ -759,7 +717,7 @@ async def recipes_endpoint(profile: MedicalProfile):
759
  document_ids = [doc_id for doc_id, _ in initial_results]
760
 
761
  # Retrieve document texts
762
- document_texts = retrieve_recipes_texts(document_ids, folder_path)
763
  if not document_texts:
764
  raise ValueError("Failed to retrieve document texts.")
765
 
 
336
  texts.append("")
337
  return texts
338
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339
 
340
  def rerank_documents(query, document_ids, document_texts, cross_encoder_model):
341
  try:
 
717
  document_ids = [doc_id for doc_id, _ in initial_results]
718
 
719
  # Retrieve document texts
720
+ document_texts = retrieve_document_texts(document_ids, folder_path)
721
  if not document_texts:
722
  raise ValueError("Failed to retrieve document texts.")
723