Spaces:

thechaiexperiment
/

TeaRAG

Sleeping

App Files Files Community

thechaiexperiment commited on Jan 11

Commit

12bd822

1 Parent(s): 22f5f6f

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -36

app.py CHANGED Viewed

@@ -189,10 +189,6 @@ def query_embeddings(query_embedding, n_results=5):
         print(f"Error in query_embeddings: {e}")
         return []
-query_embedding = embed_query_text(query_text)  # Embed the query text
-initial_results = query_embeddings(query_embedding, embeddings_data, n_results=5)
-document_ids = [doc_id for doc_id, _ in initial_results]
 def retrieve_document_text(doc_id):
     """Retrieve document text from HTML file"""
     try:
@@ -208,7 +204,6 @@ def retrieve_document_text(doc_id):
         print(f"Error retrieving document {doc_id}: {e}")
         return ""
-document_texts = retrieve_document_texts(document_ids, folder_path)
 def rerank_documents(query, doc_texts):
     """Rerank documents using cross-encoder"""
@@ -274,7 +269,6 @@ def extract_relevant_portions(document_texts, query, max_portions=3, portion_siz
     return relevant_portions
-relevant_portions = extract_relevant_portions(document_texts, query_text, max_portions=3, portion_size=1, min_query_words=1)
 def remove_duplicates(selected_parts):
     unique_sentences = set()
@@ -287,20 +281,6 @@ def remove_duplicates(selected_parts):
     return unique_selected_parts
-# Flatten the dictionary of relevant portions (from earlier code)
-flattened_relevant_portions = []
-for doc_id, portions in relevant_portions.items():
-    flattened_relevant_portions.extend(portions)
-# Remove duplicate portions
-unique_selected_parts = remove_duplicates(flattened_relevant_portions)
-# Combine the unique parts into a single string of context
-combined_parts = " ".join(unique_selected_parts)
-# Construct context as a list: first the query, then the unique selected portions
-context = [query_text] + unique_selected_parts
 def extract_entities(text):
     inputs = biobert_tokenizer(text, return_tensors="pt")
     outputs = biobert_model(**inputs)
@@ -372,11 +352,6 @@ def remove_incomplete_sentence(text):
             return text[:last_period_index + 1].strip()
     return text
-answer_part = answer.split("Answer:")[-1].strip()
-cleaned_answer = remove_answer_prefix(answer_part)
-final_answer = remove_incomplete_sentence(cleaned_answer)
 @app.get("/")
 async def root():
     return {"message": "Welcome to the FastAPI application! Use the /health endpoint to check health, and /api/query for processing queries."}
@@ -397,20 +372,26 @@ async def health_check():
 async def chat_endpoint(chat_query: ChatQuery):
     try:
         query_text = chat_query.query
-        query_embedding = models['embedding'].encode([query_text])
-        relevant_docs = query_embeddings(query_embedding)
-        doc_texts = [retrieve_document_text(doc_id) for doc_id, _ in relevant_docs]
-        doc_texts = [text for text in doc_texts if text.strip()]
-        rerank_scores = rerank_documents(query_text, doc_texts)
-        ranked_texts = [text for _, text in sorted(zip(rerank_scores, doc_texts), reverse=True)]
         context = [query_text] + unique_selected_parts
-        answer = remove_incomplete_sentence(query_text, context)
         return {
-            "response": answer,
             "conversation_id": chat_query.conversation_id,
             "success": True
         }

         print(f"Error in query_embeddings: {e}")
         return []
 def retrieve_document_text(doc_id):
     """Retrieve document text from HTML file"""
     try:
         print(f"Error retrieving document {doc_id}: {e}")
         return ""
 def rerank_documents(query, doc_texts):
     """Rerank documents using cross-encoder"""
     return relevant_portions
 def remove_duplicates(selected_parts):
     unique_sentences = set()
     return unique_selected_parts
 def extract_entities(text):
     inputs = biobert_tokenizer(text, return_tensors="pt")
     outputs = biobert_model(**inputs)
             return text[:last_period_index + 1].strip()
     return text
 @app.get("/")
 async def root():
     return {"message": "Welcome to the FastAPI application! Use the /health endpoint to check health, and /api/query for processing queries."}
 async def chat_endpoint(chat_query: ChatQuery):
     try:
         query_text = chat_query.query
+        query_embedding = embed_query_text(query_text)
+        initial_results = query_embeddings(query_embedding, embeddings_data, n_results=5)
+        document_ids = [doc_id for doc_id, _ in initial_results]
+        document_texts = retrieve_document_texts(document_ids, folder_path)
+        flattened_relevant_portions = []
+        for doc_id, portions in relevant_portions.items():
+            flattened_relevant_portions.extend(portions)
+        unique_selected_parts = remove_duplicates(flattened_relevant_portions)
+        combined_parts = " ".join(unique_selected_parts)
         context = [query_text] + unique_selected_parts
+        entities = extract_entities(query_text)
+        passage = enhance_passage_with_entities(combined_parts, entities)
+        prompt = create_prompt(query_text, passage)
+        answer, generation_time = generate_answer(prompt)
+        answer_part = answer.split("Answer:")[-1].strip()
+        cleaned_answer = remove_answer_prefix(answer_part)
+        final_answer = remove_incomplete_sentence(cleaned_answer)
         return {
+            "response": final_answer,
             "conversation_id": chat_query.conversation_id,
             "success": True
         }