thechaiexperiment commited on
Commit
6c38ae6
·
1 Parent(s): 58d2f18

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -60
app.py CHANGED
@@ -494,13 +494,6 @@ if final_answer:
494
  else:
495
  print("Sorry, I can't help with that.")
496
 
497
-
498
-
499
-
500
-
501
-
502
-
503
-
504
  @app.get("/")
505
  async def root():
506
  return {"message": "Welcome to the FastAPI application! Use the /health endpoint to check health, and /api/query for processing queries."}
@@ -520,59 +513,47 @@ async def health_check():
520
  async def chat_endpoint(chat_query: ChatQuery):
521
  try:
522
  query_text = chat_query.query
523
-
524
- # Step 1: Embed the query
525
- query_embedding = embed_query_text(query_text)
526
-
527
- # Step 2: Retrieve top results using embeddings similarity
528
  initial_results = query_embeddings(query_embedding, embeddings_data, n_results=5)
529
  document_ids = [doc_id for doc_id, _ in initial_results]
530
-
531
- # Step 3: Fetch document texts
532
  document_texts = retrieve_document_texts(document_ids, folder_path)
533
-
534
- # Step 4: Re-rank documents (optional, if reranking is used)
535
- reranked_documents = rerank_documents(query_text, document_ids, document_texts, cross_encoder_model)
536
-
537
- # Step 5: Extract relevant portions (if enabled)
538
- relevant_portions = extract_relevant_portions(
539
- document_texts,
540
- query=query_text,
541
- max_portions=3,
542
- portion_size=1,
543
- min_query_words=1
544
- )
545
-
546
- # Step 6: Flatten and clean relevant portions
547
  flattened_relevant_portions = []
548
  for doc_id, portions in relevant_portions.items():
549
  flattened_relevant_portions.extend(portions)
550
  unique_selected_parts = remove_duplicates(flattened_relevant_portions)
551
  combined_parts = " ".join(unique_selected_parts)
552
-
553
- # Step 7: Extract entities and enhance passage
554
  entities = extract_entities(query_text)
555
  passage = enhance_passage_with_entities(combined_parts, entities)
556
-
557
- # Step 8: Create prompt and generate answer
558
  prompt = create_prompt(query_text, passage)
559
- answer, generation_time = generate_answer(prompt)
560
-
561
- # Step 9: Clean the generated answer
562
  answer_part = answer.split("Answer:")[-1].strip()
563
  cleaned_answer = remove_answer_prefix(answer_part)
564
  final_answer = remove_incomplete_sentence(cleaned_answer)
565
-
 
 
 
 
 
 
566
  return {
567
  "response": final_answer,
568
  "conversation_id": chat_query.conversation_id,
569
  "success": True
570
  }
571
-
572
  except Exception as e:
573
  raise HTTPException(status_code=500, detail=str(e))
574
 
575
-
576
  @app.post("/api/resources")
577
  async def resources_endpoint(profile: MedicalProfile):
578
  try:
@@ -582,15 +563,17 @@ async def resources_endpoint(profile: MedicalProfile):
582
  Restrictions: {', '.join(profile.food_restrictions)}
583
  Mental health: {', '.join(profile.mental_conditions)}
584
  """
585
-
586
- query_embedding = models['embedding'].encode([context])
587
- relevant_docs = query_embeddings(query_embedding)
588
- doc_texts = [retrieve_document_text(doc_id) for doc_id, _ in relevant_docs]
589
- doc_texts = [text for text in doc_texts if text.strip()]
590
-
591
- rerank_scores = rerank_documents(context, doc_texts)
592
- ranked_docs = sorted(zip(relevant_docs, rerank_scores, doc_texts), key=lambda x: x[1], reverse=True)
593
-
 
 
594
  resources = []
595
  for (doc_id, _), score, text in ranked_docs[:10]:
596
  doc_info = data['df'][data['df']['id'] == doc_id].iloc[0]
@@ -600,7 +583,6 @@ async def resources_endpoint(profile: MedicalProfile):
600
  "content": text[:200],
601
  "score": float(score)
602
  })
603
-
604
  return {"resources": resources, "success": True}
605
  except Exception as e:
606
  raise HTTPException(status_code=500, detail=str(e))
@@ -609,15 +591,17 @@ async def resources_endpoint(profile: MedicalProfile):
609
  async def recipes_endpoint(profile: MedicalProfile):
610
  try:
611
  recipe_query = f"Recipes and meals suitable for someone with: {', '.join(profile.chronic_conditions + profile.food_restrictions)}"
612
-
613
- query_embedding = models['embedding'].encode([recipe_query])
614
- relevant_docs = query_embeddings(query_embedding)
615
- doc_texts = [retrieve_document_text(doc_id) for doc_id, _ in relevant_docs]
616
- doc_texts = [text for text in doc_texts if text.strip()]
617
-
618
- rerank_scores = rerank_documents(recipe_query, doc_texts)
619
- ranked_docs = sorted(zip(relevant_docs, rerank_scores, doc_texts), key=lambda x: x[1], reverse=True)
620
-
 
 
621
  recipes = []
622
  for (doc_id, _), score, text in ranked_docs[:10]:
623
  doc_info = data['df'][data['df']['id'] == doc_id].iloc[0]
@@ -628,13 +612,10 @@ async def recipes_endpoint(profile: MedicalProfile):
628
  "content": text[:200],
629
  "score": float(score)
630
  })
631
-
632
  return {"recipes": recipes[:5], "success": True}
633
  except Exception as e:
634
  raise HTTPException(status_code=500, detail=str(e))
635
 
636
-
637
-
638
  if not init_success:
639
  print("Warning: Application initialized with partial functionality")
640
 
 
494
  else:
495
  print("Sorry, I can't help with that.")
496
 
 
 
 
 
 
 
 
497
  @app.get("/")
498
  async def root():
499
  return {"message": "Welcome to the FastAPI application! Use the /health endpoint to check health, and /api/query for processing queries."}
 
513
  async def chat_endpoint(chat_query: ChatQuery):
514
  try:
515
  query_text = chat_query.query
516
+ language_code = chat_query.language_code
517
+ query_embedding = embed_query_text(query_text) # Embed the query text
518
+ embeddings_data = load_embeddings ()
519
+ folder_path = 'downloaded_articles/downloaded_articles'
 
520
  initial_results = query_embeddings(query_embedding, embeddings_data, n_results=5)
521
  document_ids = [doc_id for doc_id, _ in initial_results]
522
+ document_ids = [doc_id for doc_id, _ in initial_results]
 
523
  document_texts = retrieve_document_texts(document_ids, folder_path)
524
+ cross_encoder = models['cross_encoder']
525
+ scores = cross_encoder.predict([(query_text, doc) for doc in document_texts])
526
+ scored_documents = list(zip(scores, document_ids, document_texts))
527
+ scored_documents.sort(key=lambda x: x[0], reverse=True)
528
+ relevant_portions = extract_relevant_portions(document_texts, query_text, max_portions=3, portion_size=1, min_query_words=1)
 
 
 
 
 
 
 
 
 
529
  flattened_relevant_portions = []
530
  for doc_id, portions in relevant_portions.items():
531
  flattened_relevant_portions.extend(portions)
532
  unique_selected_parts = remove_duplicates(flattened_relevant_portions)
533
  combined_parts = " ".join(unique_selected_parts)
534
+ context = [query_text] + unique_selected_parts
 
535
  entities = extract_entities(query_text)
536
  passage = enhance_passage_with_entities(combined_parts, entities)
 
 
537
  prompt = create_prompt(query_text, passage)
538
+ answer = generate_answer(prompt)
 
 
539
  answer_part = answer.split("Answer:")[-1].strip()
540
  cleaned_answer = remove_answer_prefix(answer_part)
541
  final_answer = remove_incomplete_sentence(cleaned_answer)
542
+ if language_code == 0:
543
+ final_answer = translate_en_to_ar(final_answer)
544
+ if final_answer:
545
+ print("Answer:")
546
+ print(final_answer)
547
+ else:
548
+ print("Sorry, I can't help with that.")
549
  return {
550
  "response": final_answer,
551
  "conversation_id": chat_query.conversation_id,
552
  "success": True
553
  }
 
554
  except Exception as e:
555
  raise HTTPException(status_code=500, detail=str(e))
556
 
 
557
  @app.post("/api/resources")
558
  async def resources_endpoint(profile: MedicalProfile):
559
  try:
 
563
  Restrictions: {', '.join(profile.food_restrictions)}
564
  Mental health: {', '.join(profile.mental_conditions)}
565
  """
566
+ query_text = context
567
+ query_embedding = embed_query_text(query_text) # Embed the query text
568
+ embeddings_data = load_embeddings ()
569
+ folder_path = 'downloaded_articles/downloaded_articles'
570
+ initial_results = query_embeddings(query_embedding, embeddings_data, n_results=5)
571
+ document_ids = [doc_id for doc_id, _ in initial_results]
572
+ document_texts = retrieve_document_texts(document_ids, folder_path)
573
+ cross_encoder = models['cross_encoder']
574
+ scores = cross_encoder.predict([(query_text, doc) for doc in document_texts])
575
+ scored_documents = list(zip(scores, document_ids, document_texts))
576
+ ranked_docs = scored_documents.sort(key=lambda x: x[0], reverse=True)
577
  resources = []
578
  for (doc_id, _), score, text in ranked_docs[:10]:
579
  doc_info = data['df'][data['df']['id'] == doc_id].iloc[0]
 
583
  "content": text[:200],
584
  "score": float(score)
585
  })
 
586
  return {"resources": resources, "success": True}
587
  except Exception as e:
588
  raise HTTPException(status_code=500, detail=str(e))
 
591
  async def recipes_endpoint(profile: MedicalProfile):
592
  try:
593
  recipe_query = f"Recipes and meals suitable for someone with: {', '.join(profile.chronic_conditions + profile.food_restrictions)}"
594
+ query_text = recipe_query
595
+ query_embedding = embed_query_text(query_text) # Embed the query text
596
+ embeddings_data = load_embeddings ()
597
+ folder_path = 'downloaded_articles/downloaded_articles'
598
+ initial_results = query_embeddings(query_embedding, embeddings_data, n_results=5)
599
+ document_ids = [doc_id for doc_id, _ in initial_results]
600
+ document_texts = retrieve_document_texts(document_ids, folder_path)
601
+ cross_encoder = models['cross_encoder']
602
+ scores = cross_encoder.predict([(query_text, doc) for doc in document_texts])
603
+ scored_documents = list(zip(scores, document_ids, document_texts))
604
+ ranked_docs = scored_documents.sort(key=lambda x: x[0], reverse=True)
605
  recipes = []
606
  for (doc_id, _), score, text in ranked_docs[:10]:
607
  doc_info = data['df'][data['df']['id'] == doc_id].iloc[0]
 
612
  "content": text[:200],
613
  "score": float(score)
614
  })
 
615
  return {"recipes": recipes[:5], "success": True}
616
  except Exception as e:
617
  raise HTTPException(status_code=500, detail=str(e))
618
 
 
 
619
  if not init_success:
620
  print("Warning: Application initialized with partial functionality")
621