thechaiexperiment commited on
Commit
471e24c
·
verified ·
1 Parent(s): 502fd27

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -57
app.py CHANGED
@@ -536,67 +536,112 @@ def remove_incomplete_sentence(text):
536
  return text
537
 
538
 
 
 
539
  language_code = 1
540
  query_text = "recipes and meals for vegan diabetes headache fatigue"
541
  print(f"Generated query text: {query_text}")
542
 
543
- # Generate the query embedding
544
- query_embedding = embed_query_text(query_text)
545
- if query_embedding is None:
546
- raise ValueError("Failed to generate query embedding.")
547
-
548
- # Load embeddings and retrieve initial results
549
- embeddings_data = load_recipes_embeddings()
550
- folder_path = 'downloaded_articles/downloaded_articles'
551
- initial_results = query_recipes_embeddings(query_embedding, embeddings_data, n_results=10)
552
- if not initial_results:
553
- raise ValueError("No relevant recipes found.")
554
- print(initial_results)
555
- # Extract document IDs
556
- document_ids = [doc_id for doc_id, _ in initial_results]
557
- print(document_ids)
558
- # Retrieve document texts
559
- document_texts = retrieve_rec_texts(document_ids, folder_path)
560
- if not document_texts:
561
- raise ValueError("Failed to retrieve document texts.")
562
- print(document_texts)
563
- # Load recipe metadata from DataFrame
564
- folder_path='downloaded_articles/downloaded_articles'
565
- file_path = 'recipes_metadata.xlsx'
566
- metadata_path = 'recipes_metadata.xlsx'
567
- metadata_df = pd.read_excel(file_path)
568
- relevant_portions = extract_relevant_portions(document_texts, query_text, max_portions=3, portion_size=1, min_query_words=1)
569
- print(relevant_portions)
570
- flattened_relevant_portions = []
571
- for doc_id, portions in relevant_portions.items():
572
- flattened_relevant_portions.extend(portions)
573
- unique_selected_parts = remove_duplicates(flattened_relevant_portions)
574
- print(unique_selected_parts)
575
- combined_parts = " ".join(unique_selected_parts)
576
- print(combined_parts)
577
- context = [query_text] + unique_selected_parts
578
- print(context)
579
- entities = extract_entities(query_text)
580
- print(entities)
581
- passage = enhance_passage_with_entities(combined_parts, entities)
582
- print(passage)
583
- prompt = create_prompt(query_text, passage)
584
- print(prompt)
585
- answer = generate_answer(prompt)
586
- print(answer)
587
- answer_part = answer.split("Answer:")[-1].strip()
588
- print(answer_part)
589
- cleaned_answer = remove_answer_prefix(answer_part)
590
- print(cleaned_answer)
591
- final_answer = remove_incomplete_sentence(cleaned_answer)
592
- print(final_answer )
593
- if language_code == 0:
594
- final_answer = translate_en_to_ar(final_answer)
595
- if final_answer:
596
- print("Answer:")
597
- print(final_answer)
598
- else:
599
- print("Sorry, I can't help with that.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
600
 
601
 
602
 
 
536
  return text
537
 
538
 
539
+ import traceback
540
+
541
  language_code = 1
542
  query_text = "recipes and meals for vegan diabetes headache fatigue"
543
  print(f"Generated query text: {query_text}")
544
 
545
+ try:
546
+ # Generate the query embedding
547
+ print("Generating query embedding...")
548
+ query_embedding = embed_query_text(query_text)
549
+ if query_embedding is None:
550
+ raise ValueError("Failed to generate query embedding.")
551
+ print(f"Query embedding generated: {query_embedding}")
552
+
553
+ # Load embeddings and retrieve initial results
554
+ print("Loading recipe embeddings...")
555
+ embeddings_data = load_recipes_embeddings()
556
+ print("Embeddings loaded. Retrieving initial results...")
557
+ initial_results = query_recipes_embeddings(query_embedding, embeddings_data, n_results=10)
558
+ if not initial_results:
559
+ raise ValueError("No relevant recipes found.")
560
+ print(f"Initial results: {initial_results}")
561
+
562
+ # Extract document IDs
563
+ document_ids = [doc_id for doc_id, _ in initial_results]
564
+ print(f"Document IDs: {document_ids}")
565
+
566
+ # Retrieve document texts
567
+ folder_path = 'downloaded_articles/downloaded_articles'
568
+ print("Retrieving document texts...")
569
+ document_texts = retrieve_rec_texts(document_ids, folder_path)
570
+ if not document_texts:
571
+ raise ValueError("Failed to retrieve document texts.")
572
+ print(f"Document texts retrieved: {document_texts}")
573
+
574
+ # Load recipe metadata from DataFrame
575
+ file_path = 'recipes_metadata.xlsx'
576
+ print("Loading metadata from Excel...")
577
+ metadata_df = pd.read_excel(file_path)
578
+ print(f"Metadata loaded: {metadata_df.head()}")
579
+
580
+ # Extract relevant portions
581
+ print("Extracting relevant portions...")
582
+ relevant_portions = extract_relevant_portions(
583
+ document_texts, query_text, max_portions=3, portion_size=1, min_query_words=1
584
+ )
585
+ print(f"Relevant portions: {relevant_portions}")
586
+
587
+ # Flatten portions
588
+ print("Flattening relevant portions...")
589
+ flattened_relevant_portions = []
590
+ for doc_id, portions in relevant_portions.items():
591
+ flattened_relevant_portions.extend(portions)
592
+ unique_selected_parts = remove_duplicates(flattened_relevant_portions)
593
+ print(f"Unique selected parts: {unique_selected_parts}")
594
+
595
+ # Combine parts into a single context
596
+ combined_parts = " ".join(unique_selected_parts)
597
+ print(f"Combined parts: {combined_parts}")
598
+ context = [query_text] + unique_selected_parts
599
+ print(f"Context: {context}")
600
+
601
+ # Extract entities
602
+ print("Extracting entities...")
603
+ entities = extract_entities(query_text)
604
+ print(f"Entities: {entities}")
605
+
606
+ # Enhance passage with entities
607
+ print("Enhancing passage with entities...")
608
+ passage = enhance_passage_with_entities(combined_parts, entities)
609
+ print(f"Enhanced passage: {passage}")
610
+
611
+ # Create the prompt
612
+ print("Creating prompt...")
613
+ prompt = create_prompt(query_text, passage)
614
+ print(f"Prompt: {prompt}")
615
+
616
+ # Generate the answer
617
+ print("Generating answer...")
618
+ answer = generate_answer(prompt)
619
+ print(f"Answer: {answer}")
620
+ answer_part = answer.split("Answer:")[-1].strip()
621
+ print(f"Answer part: {answer_part}")
622
+
623
+ # Clean and finalize the answer
624
+ print("Cleaning answer...")
625
+ cleaned_answer = remove_answer_prefix(answer_part)
626
+ print(f"Cleaned answer: {cleaned_answer}")
627
+ final_answer = remove_incomplete_sentence(cleaned_answer)
628
+ print(f"Final answer: {final_answer}")
629
+
630
+ # Translate if needed
631
+ if language_code == 0:
632
+ print("Translating answer to Arabic...")
633
+ final_answer = translate_en_to_ar(final_answer)
634
+
635
+ # Display the answer
636
+ if final_answer:
637
+ print("Final Answer:")
638
+ print(final_answer)
639
+ else:
640
+ print("Sorry, I can't help with that.")
641
+ except Exception as e:
642
+ print("An error occurred:")
643
+ print(traceback.format_exc())
644
+
645
 
646
 
647