thechaiexperiment commited on
Commit
811a449
·
verified ·
1 Parent(s): 4d36e19

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -57
app.py CHANGED
@@ -536,63 +536,69 @@ def remove_incomplete_sentence(text):
536
  return text
537
 
538
 
539
- language_code = 1
540
- query_text = 'What are symptoms of heart attack ?'
541
- query_embedding = embed_query_text(query_text) # Embed the query text
542
- embeddings_data = load_embeddings ()
543
- folder_path = 'downloaded_articles/downloaded_articles'
544
- initial_results = query_embeddings(query_embedding, embeddings_data, n_results=5)
545
- document_ids = [doc_id for doc_id, _ in initial_results]
546
- print(document_ids)
547
- document_ids = [doc_id for doc_id, _ in initial_results]
548
- document_texts = retrieve_document_texts(document_ids, folder_path)
549
-
550
- # Rerank the results using the CrossEncoder
551
- cross_encoder = models['cross_encoder']
552
- scores = cross_encoder.predict([(query_text, doc) for doc in document_texts])
553
- scored_documents = list(zip(scores, document_ids, document_texts))
554
- scored_documents.sort(key=lambda x: x[0], reverse=True)
555
- print("Reranked results:")
556
- for idx, (score, doc_id, doc) in enumerate(scored_documents):
557
- print(f"Rank {idx + 1} (Score: {score:.4f}, Document ID: {doc_id}")
558
- relevant_portions = extract_relevant_portions(document_texts, query_text, max_portions=3, portion_size=1, min_query_words=1)
559
-
560
- for doc_id, portions in relevant_portions.items():
561
- print(f"{doc_id}: {portions}")
562
-
563
- flattened_relevant_portions = []
564
- for doc_id, portions in relevant_portions.items():
565
- flattened_relevant_portions.extend(portions)
566
-
567
- # Remove duplicate portions
568
- unique_selected_parts = remove_duplicates(flattened_relevant_portions)
569
-
570
- # Combine the unique parts into a single string of context
571
- combined_parts = " ".join(unique_selected_parts)
572
-
573
- # Construct context as a list: first the query, then the unique selected portions
574
- context = [query_text] + unique_selected_parts
575
-
576
- # Print the context (query + relevant portions)
577
- print(context)
578
- entities = extract_entities(query_text)
579
- passage = enhance_passage_with_entities(combined_parts, entities)
580
- # Generate answer with the enhanced passage
581
- prompt = create_prompt(query_text, passage)
582
- answer = generate_answer(prompt)
583
- #print(f"\nTime taken to generate the answer: {generation_time:.2f} seconds")
584
- answer_part = answer.split("Answer:")[-1].strip()
585
- cleaned_answer = remove_answer_prefix(answer_part)
586
- final_answer = remove_incomplete_sentence(cleaned_answer)
587
-
588
- if language_code == 0:
589
- final_answer = translate_en_to_ar(final_answer)
590
-
591
- if final_answer:
592
- print("Answer:")
593
- print(final_answer)
594
- else:
595
- print("Sorry, I can't help with that.")
 
 
 
 
 
 
596
 
597
  @app.get("/")
598
  async def root():
 
536
  return text
537
 
538
 
539
+ language_code = 1
540
+ query_text = "recipes and meals for vegan diabetes headache fatigue"
541
+ print(f"Generated query text: {query_text}")
542
+
543
+ # Generate the query embedding
544
+ query_embedding = embed_query_text(query_text)
545
+ if query_embedding is None:
546
+ raise ValueError("Failed to generate query embedding.")
547
+
548
+ # Load embeddings and retrieve initial results
549
+ embeddings_data = load_recipes_embeddings()
550
+ folder_path = 'downloaded_articles/downloaded_articles'
551
+ initial_results = query_recipes_embeddings(query_embedding, embeddings_data, n_results=10)
552
+ if not initial_results:
553
+ raise ValueError("No relevant recipes found.")
554
+ print(initial_results)
555
+ # Extract document IDs
556
+ document_ids = [doc_id for doc_id, _ in initial_results]
557
+ print(document_ids)
558
+ # Retrieve document texts
559
+ document_texts = retrieve_rec_texts(document_ids, folder_path)
560
+ if not document_texts:
561
+ raise ValueError("Failed to retrieve document texts.")
562
+ print(document_texts)
563
+ # Load recipe metadata from DataFrame
564
+ folder_path='downloaded_articles/downloaded_articles'
565
+ file_path = 'recipes_metadata.xlsx'
566
+ metadata_path = 'recipes_metadata.xlsx'
567
+ metadata_df = pd.read_excel(file_path)
568
+ relevant_portions = extract_relevant_portions(document_texts, query_text, max_portions=3, portion_size=1, min_query_words=1)
569
+ print(relevant_portions)
570
+ flattened_relevant_portions = []
571
+ for doc_id, portions in relevant_portions.items():
572
+ flattened_relevant_portions.extend(portions)
573
+ unique_selected_parts = remove_duplicates(flattened_relevant_portions)
574
+ print(unique_selected_parts)
575
+ combined_parts = " ".join(unique_selected_parts)
576
+ print(combined_parts)
577
+ context = [query_text] + unique_selected_parts
578
+ print(context)
579
+ entities = extract_entities(query_text)
580
+ print(entities)
581
+ passage = enhance_passage_with_entities(combined_parts, entities)
582
+ print(passage)
583
+ prompt = create_prompt(query_text, passage)
584
+ print(prompt)
585
+ answer = generate_answer(prompt)
586
+ print(answer)
587
+ answer_part = answer.split("Answer:")[-1].strip()
588
+ print(answer_part)
589
+ cleaned_answer = remove_answer_prefix(answer_part)
590
+ print(cleaned_answer)
591
+ final_answer = remove_incomplete_sentence(cleaned_answer)
592
+ print(final_answer )
593
+ if language_code == 0:
594
+ final_answer = translate_en_to_ar(final_answer)
595
+ if final_answer:
596
+ print("Answer:")
597
+ print(final_answer)
598
+ else:
599
+ print("Sorry, I can't help with that.")
600
+
601
+
602
 
603
  @app.get("/")
604
  async def root():