Spaces:

thechaiexperiment
/

TeaRAG

Sleeping

App Files Files Community

thechaiexperiment commited on Jan 17

Commit

6c38ae6

1 Parent(s): 58d2f18

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -60

app.py CHANGED Viewed

@@ -494,13 +494,6 @@ if final_answer:
 else:
     print("Sorry, I can't help with that.")
 @app.get("/")
 async def root():
     return {"message": "Welcome to the FastAPI application! Use the /health endpoint to check health, and /api/query for processing queries."}
@@ -520,59 +513,47 @@ async def health_check():
 async def chat_endpoint(chat_query: ChatQuery):
     try:
         query_text = chat_query.query
-        # Step 1: Embed the query
-        query_embedding = embed_query_text(query_text)
-        # Step 2: Retrieve top results using embeddings similarity
         initial_results = query_embeddings(query_embedding, embeddings_data, n_results=5)
         document_ids = [doc_id for doc_id, _ in initial_results]
-        # Step 3: Fetch document texts
         document_texts = retrieve_document_texts(document_ids, folder_path)
-        # Step 4: Re-rank documents (optional, if reranking is used)
-        reranked_documents = rerank_documents(query_text, document_ids, document_texts, cross_encoder_model)
-        # Step 5: Extract relevant portions (if enabled)
-        relevant_portions = extract_relevant_portions(
-            document_texts,
-            query=query_text,
-            max_portions=3,
-            portion_size=1,
-            min_query_words=1
-        )
-        # Step 6: Flatten and clean relevant portions
         flattened_relevant_portions = []
         for doc_id, portions in relevant_portions.items():
             flattened_relevant_portions.extend(portions)
         unique_selected_parts = remove_duplicates(flattened_relevant_portions)
         combined_parts = " ".join(unique_selected_parts)
-        # Step 7: Extract entities and enhance passage
         entities = extract_entities(query_text)
         passage = enhance_passage_with_entities(combined_parts, entities)
-        # Step 8: Create prompt and generate answer
         prompt = create_prompt(query_text, passage)
-        answer, generation_time = generate_answer(prompt)
-        # Step 9: Clean the generated answer
         answer_part = answer.split("Answer:")[-1].strip()
         cleaned_answer = remove_answer_prefix(answer_part)
         final_answer = remove_incomplete_sentence(cleaned_answer)
         return {
             "response": final_answer,
             "conversation_id": chat_query.conversation_id,
             "success": True
         }
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/api/resources")
 async def resources_endpoint(profile: MedicalProfile):
     try:
@@ -582,15 +563,17 @@ async def resources_endpoint(profile: MedicalProfile):
         Restrictions: {', '.join(profile.food_restrictions)}
         Mental health: {', '.join(profile.mental_conditions)}
         """
-        query_embedding = models['embedding'].encode([context])
-        relevant_docs = query_embeddings(query_embedding)
-        doc_texts = [retrieve_document_text(doc_id) for doc_id, _ in relevant_docs]
-        doc_texts = [text for text in doc_texts if text.strip()]
-        rerank_scores = rerank_documents(context, doc_texts)
-        ranked_docs = sorted(zip(relevant_docs, rerank_scores, doc_texts), key=lambda x: x[1], reverse=True)
         resources = []
         for (doc_id, _), score, text in ranked_docs[:10]:
             doc_info = data['df'][data['df']['id'] == doc_id].iloc[0]
@@ -600,7 +583,6 @@ async def resources_endpoint(profile: MedicalProfile):
                 "content": text[:200],
                 "score": float(score)
             })
         return {"resources": resources, "success": True}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
@@ -609,15 +591,17 @@ async def resources_endpoint(profile: MedicalProfile):
 async def recipes_endpoint(profile: MedicalProfile):
     try:
         recipe_query = f"Recipes and meals suitable for someone with: {', '.join(profile.chronic_conditions + profile.food_restrictions)}"
-        query_embedding = models['embedding'].encode([recipe_query])
-        relevant_docs = query_embeddings(query_embedding)
-        doc_texts = [retrieve_document_text(doc_id) for doc_id, _ in relevant_docs]
-        doc_texts = [text for text in doc_texts if text.strip()]
-        rerank_scores = rerank_documents(recipe_query, doc_texts)
-        ranked_docs = sorted(zip(relevant_docs, rerank_scores, doc_texts), key=lambda x: x[1], reverse=True)
         recipes = []
         for (doc_id, _), score, text in ranked_docs[:10]:
             doc_info = data['df'][data['df']['id'] == doc_id].iloc[0]
@@ -628,13 +612,10 @@ async def recipes_endpoint(profile: MedicalProfile):
                     "content": text[:200],
                     "score": float(score)
                 })
         return {"recipes": recipes[:5], "success": True}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 if not init_success:
     print("Warning: Application initialized with partial functionality")

 else:
     print("Sorry, I can't help with that.")
 @app.get("/")
 async def root():
     return {"message": "Welcome to the FastAPI application! Use the /health endpoint to check health, and /api/query for processing queries."}
 async def chat_endpoint(chat_query: ChatQuery):
     try:
         query_text = chat_query.query
+        language_code = chat_query.language_code
+        query_embedding = embed_query_text(query_text)  # Embed the query text
+        embeddings_data = load_embeddings ()
+        folder_path = 'downloaded_articles/downloaded_articles'
         initial_results = query_embeddings(query_embedding, embeddings_data, n_results=5)
         document_ids = [doc_id for doc_id, _ in initial_results]
+        document_ids = [doc_id for doc_id, _ in initial_results]
         document_texts = retrieve_document_texts(document_ids, folder_path)
+        cross_encoder = models['cross_encoder']
+        scores = cross_encoder.predict([(query_text, doc) for doc in document_texts])
+        scored_documents = list(zip(scores, document_ids, document_texts))
+        scored_documents.sort(key=lambda x: x[0], reverse=True)
+        relevant_portions = extract_relevant_portions(document_texts, query_text, max_portions=3, portion_size=1, min_query_words=1)
         flattened_relevant_portions = []
         for doc_id, portions in relevant_portions.items():
             flattened_relevant_portions.extend(portions)
         unique_selected_parts = remove_duplicates(flattened_relevant_portions)
         combined_parts = " ".join(unique_selected_parts)
+        context = [query_text] + unique_selected_parts
         entities = extract_entities(query_text)
         passage = enhance_passage_with_entities(combined_parts, entities)
         prompt = create_prompt(query_text, passage)
+        answer = generate_answer(prompt)
         answer_part = answer.split("Answer:")[-1].strip()
         cleaned_answer = remove_answer_prefix(answer_part)
         final_answer = remove_incomplete_sentence(cleaned_answer)
+        if language_code == 0:
+            final_answer = translate_en_to_ar(final_answer)
+        if final_answer:
+            print("Answer:")
+            print(final_answer)
+        else:
+            print("Sorry, I can't help with that.")
         return {
             "response": final_answer,
             "conversation_id": chat_query.conversation_id,
             "success": True
         }
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/api/resources")
 async def resources_endpoint(profile: MedicalProfile):
     try:
         Restrictions: {', '.join(profile.food_restrictions)}
         Mental health: {', '.join(profile.mental_conditions)}
         """
+        query_text = context
+        query_embedding = embed_query_text(query_text)  # Embed the query text
+        embeddings_data = load_embeddings ()
+        folder_path = 'downloaded_articles/downloaded_articles'
+        initial_results = query_embeddings(query_embedding, embeddings_data, n_results=5)
+        document_ids = [doc_id for doc_id, _ in initial_results]
+        document_texts = retrieve_document_texts(document_ids, folder_path)
+        cross_encoder = models['cross_encoder']
+        scores = cross_encoder.predict([(query_text, doc) for doc in document_texts])
+        scored_documents = list(zip(scores, document_ids, document_texts))
+        ranked_docs = scored_documents.sort(key=lambda x: x[0], reverse=True)
         resources = []
         for (doc_id, _), score, text in ranked_docs[:10]:
             doc_info = data['df'][data['df']['id'] == doc_id].iloc[0]
                 "content": text[:200],
                 "score": float(score)
             })
         return {"resources": resources, "success": True}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 async def recipes_endpoint(profile: MedicalProfile):
     try:
         recipe_query = f"Recipes and meals suitable for someone with: {', '.join(profile.chronic_conditions + profile.food_restrictions)}"
+        query_text = recipe_query
+        query_embedding = embed_query_text(query_text)  # Embed the query text
+        embeddings_data = load_embeddings ()
+        folder_path = 'downloaded_articles/downloaded_articles'
+        initial_results = query_embeddings(query_embedding, embeddings_data, n_results=5)
+        document_ids = [doc_id for doc_id, _ in initial_results]
+        document_texts = retrieve_document_texts(document_ids, folder_path)
+        cross_encoder = models['cross_encoder']
+        scores = cross_encoder.predict([(query_text, doc) for doc in document_texts])
+        scored_documents = list(zip(scores, document_ids, document_texts))
+        ranked_docs = scored_documents.sort(key=lambda x: x[0], reverse=True)
         recipes = []
         for (doc_id, _), score, text in ranked_docs[:10]:
             doc_info = data['df'][data['df']['id'] == doc_id].iloc[0]
                     "content": text[:200],
                     "score": float(score)
                 })
         return {"recipes": recipes[:5], "success": True}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 if not init_success:
     print("Warning: Application initialized with partial functionality")