Spaces:

thechaiexperiment
/

TeaRAG

Sleeping

App Files Files Community

thechaiexperiment commited on Jan 28

Commit

462ad54

verified ·

1 Parent(s): f37cde7

Update app.py

Browse files

Files changed (1) hide show

app.py +250 -123

app.py CHANGED Viewed

@@ -28,9 +28,12 @@ from sklearn.metrics.pairwise import cosine_similarity
 from bs4 import BeautifulSoup
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file
-from typing import List, Dict, Optional
 from safetensors.numpy import load_file
 from safetensors.torch import safe_open
 nltk.download('punkt_tab')
 app = FastAPI()
@@ -63,6 +66,11 @@ class ChatMessage(BaseModel):
     content: str
     timestamp: str
 def init_nltk():
     try:
         nltk.download('punkt', quiet=True)
@@ -332,120 +340,155 @@ def retrieve_metadata(document_indices: List[int], metadata_path: str = 'recipes
         print(f"Error retrieving metadata: {e}")
         return {}
-def rerank_documents(query, document_ids, document_texts, cross_encoder_model):
     try:
         pairs = [(query, doc) for doc in document_texts]
-        scores = cross_encoder_model.predict(pairs)
         scored_documents = list(zip(scores, document_ids, document_texts))
         scored_documents.sort(key=lambda x: x[0], reverse=True)
-        print("Reranked results:")
-        for idx, (score, doc_id, doc) in enumerate(scored_documents):
-            print(f"Rank {idx + 1} (Score: {score:.4f}, Document ID: {doc_id})")
         return scored_documents
     except Exception as e:
         print(f"Error reranking documents: {e}")
         return []
-from sentence_transformers import SentenceTransformer
-from sklearn.metrics.pairwise import cosine_similarity
-import nltk
-def extract_relevant_portions(query_embedding, top_documents, embeddings_data, max_portions=3):
     try:
-        relevant_portions = {}
-        for _, doc_id, doc_text in top_documents:
-            if doc_id not in embeddings_data:
-                print(f"Warning: No embedding available for Document ID {doc_id}. Skipping...")
-                continue
-            # Retrieve the precomputed embedding for this document
-            doc_embedding = np.array(embeddings_data[doc_id])
-            # Compute similarity between the query embedding and the document embedding
-            similarity = cosine_similarity(query_embedding, [doc_embedding]).flatten()[0]
-            # Split the document into sentences
             sentences = nltk.sent_tokenize(doc_text)
-            # Rank sentences based on their length (proxy for importance) or other heuristic
-            # Since we're using document-level embeddings, we assume all sentences are equally relevant.
-            sorted_sentences = sorted(sentences, key=lambda x: len(x), reverse=True)[:max_portions]
-            relevant_portions[doc_id] = sorted_sentences
-            print(f"Extracted relevant portions for Document ID {doc_id} (Similarity: {similarity:.4f}):")
-            for i, sentence in enumerate(sorted_sentences, start=1):
-                print(f"  Portion {i}: {sentence[:100]}...")  # Print first 100 characters for preview
         return relevant_portions
     except Exception as e:
-        print(f"Error in extract_relevant_portions: {e}")
-        return {}
-def remove_duplicates(selected_parts):
-    unique_sentences = set()
-    unique_selected_parts = []
-    for sentence in selected_parts:
-        if sentence not in unique_sentences:
-            unique_selected_parts.append(sentence)
-            unique_sentences.add(sentence)
-    return unique_selected_parts
-def extract_entities(text):
     try:
-        biobert_tokenizer = models['bio_tokenizer']
-        biobert_model = models['bio_model']
-        inputs = biobert_tokenizer(text, return_tensors="pt")
-        outputs = biobert_model(**inputs)
-        predictions = torch.argmax(outputs.logits, dim=2)
-        tokens = biobert_tokenizer.convert_ids_to_tokens(inputs.input_ids[0])
-        entities = [
-            tokens[i]
-            for i in range(len(tokens))
-            if predictions[0][i].item() != 0  # Assuming 0 is the label for non-entity
-        ]
-        return entities
     except Exception as e:
-        print(f"Error extracting entities: {e}")
-        return []
-def enhance_passage_with_entities(passage, entities):
-    return f"{passage}\n\nEntities: {', '.join(entities)}"
-def create_prompt(question, passage):
-    prompt = ("""
-    As a medical expert, you are required to answer the following question based only on the provided passage. Do not include any information not present in the passage. Your response should directly reflect the content of the passage. Maintain accuracy and relevance to the provided information.
-    Passage: {passage}
-    Question: {question}
-    Answer:
-    """)
-    return prompt.format(passage=passage, question=question)
-def generate_answer(prompt, max_length=860, temperature=0.2):
-    tokenizer_f = models['llm_tokenizer']
-    model_f = models['llm_model']
-    inputs = tokenizer_f(prompt, return_tensors="pt", truncation=True)
-    output_ids = model_f.generate(
-        inputs.input_ids,
-        max_length=max_length,
-        num_return_sequences=1,
-        temperature=temperature,
-        pad_token_id=tokenizer_f.eos_token_id
-    )
-    answer = tokenizer_f.decode(output_ids[0], skip_special_tokens=True)
-    passage_keywords = set(prompt.lower().split())
-    answer_keywords = set(answer.lower().split())
-    if passage_keywords.intersection(answer_keywords):
-        return answer
-    else:
-        return "Sorry, I can't help with that."
 def remove_answer_prefix(text):
     prefix = "Answer:"
     if prefix in text:
@@ -511,48 +554,132 @@ async def health_check():
 @app.post("/api/chat")
 async def chat_endpoint(chat_query: ChatQuery):
     try:
         query_text = chat_query.query
-        language_code = chat_query.language_code
         if language_code == 0:
-            query_text = translate_ar_to_en(query_text)
-        query_embedding = embed_query_text(query_text)
         n_results = 5
-        embeddings_data = load_embeddings ()
         folder_path = 'downloaded_articles/downloaded_articles'
-        initial_results = query_embeddings(query_embedding, embeddings_data, n_results)
-        document_ids = [doc_id for doc_id, _ in initial_results]
-        document_texts = retrieve_document_texts(document_ids, folder_path)
         cross_encoder = models['cross_encoder']
-        scores = cross_encoder.predict([(query_text, doc) for doc in document_texts])
-        scored_documents = list(zip(scores, document_ids, document_texts))
-        scored_documents.sort(key=lambda x: x[0], reverse=True)
-        relevant_portions = extract_relevant_portions(query_embedding, scored_documents, embeddings_data, max_portions=3)
-        #flattened_relevant_portions = []
-        #for doc_id, portions in relevant_portions.items():
-            #flattened_relevant_portions.extend(portions)
-        unique_selected_parts = remove_duplicates(relevant_portions)
         combined_parts = " ".join(unique_selected_parts)
-        context = [query_text] + unique_selected_parts
-        entities = extract_entities(query_text)
         passage = enhance_passage_with_entities(combined_parts, entities)
         prompt = create_prompt(query_text, passage)
-        answer = generate_answer(prompt)
         answer_part = answer.split("Answer:")[-1].strip()
-        cleaned_answer = remove_answer_prefix(answer_part)
-        final_answer = remove_incomplete_sentence(cleaned_answer)
         if language_code == 0:
-            final_answer = translate_en_to_ar(final_answer)
         if final_answer:
-            print("Answer:")
             print(final_answer)
         else:
-            print("Sorry, I can't help with that.")
-        return {
-            "response": f"I hope this answers your question: {final_answer}",
-            # "conversation_id": chat_query.conversation_id,
-            "success": True
-        }
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/api/resources")

 from bs4 import BeautifulSoup
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file
+from typing import List, Dict,Any,Tuple, Optional
 from safetensors.numpy import load_file
 from safetensors.torch import safe_open
+from concurrent.futures import ThreadPoolExecutor
+import asyncio
+from functools import partial
 nltk.download('punkt_tab')
 app = FastAPI()
     content: str
     timestamp: str
+async def run_in_threadpool(func, *args, **kwargs):
+    return await asyncio.get_event_loop().run_in_executor(
+        None, partial(func, *args, **kwargs)
+    )
 def init_nltk():
     try:
         nltk.download('punkt', quiet=True)
         print(f"Error retrieving metadata: {e}")
         return {}
+def rerank_documents(query: str, document_ids: List[str], document_texts: List[str], cross_encoder_model) -> List[Tuple[float, str, str]]:
     try:
+        # Batch process all documents at once
         pairs = [(query, doc) for doc in document_texts]
+        scores = cross_encoder_model.predict(pairs, batch_size=8)  # Increased batch size
         scored_documents = list(zip(scores, document_ids, document_texts))
         scored_documents.sort(key=lambda x: x[0], reverse=True)
         return scored_documents
     except Exception as e:
         print(f"Error reranking documents: {e}")
         return []
+def extract_entities_batch(texts: List[str], biobert_tokenizer, biobert_model, batch_size: int = 8) -> List[List[str]]:
     try:
+        all_entities = []
+        for i in range(0, len(texts), batch_size):
+            batch_texts = texts[i:i + batch_size]
+            # Process multiple texts in parallel
+            inputs = biobert_tokenizer(batch_texts, padding=True, truncation=True, return_tensors="pt", max_length=512)
+            with torch.no_grad():  # Disable gradient calculation
+                outputs = biobert_model(**inputs)
+            predictions = torch.argmax(outputs.logits, dim=2)
+            for j, (input_ids, preds) in enumerate(zip(inputs.input_ids, predictions)):
+                tokens = biobert_tokenizer.convert_ids_to_tokens(input_ids)
+                entities = [tokens[k] for k in range(len(tokens)) if preds[k].item() != 0]
+                all_entities.append(entities)
+        return all_entities
+    except Exception as e:
+        print(f"Error in batch entity extraction: {e}")
+        return [[] for _ in texts]
+def extract_relevant_portions(document_texts: List[str], query: str, biobert_tokenizer, biobert_model,
+                            max_portions: int = 3, portion_size: int = 1) -> Dict[str, List[str]]:
+    try:
+        # Process query and all documents in one batch
+        all_texts = [query] + document_texts
+        all_entities = extract_entities_batch(all_texts, biobert_tokenizer, biobert_model)
+        query_entities = set(all_entities[0])
+        relevant_portions = {}
+        def process_document(doc_idx: int) -> Tuple[str, List[str]]:
+            doc_text = document_texts[doc_idx]
+            doc_entities = set(all_entities[doc_idx + 1])  # +1 because query was first
             sentences = nltk.sent_tokenize(doc_text)
+            doc_relevant_portions = []
+            # Score sentences based on entity overlap
+            sentence_scores = []
+            for i, sentence in enumerate(sentences):
+                entity_overlap = len(query_entities.intersection(doc_entities))
+                sentence_scores.append((entity_overlap, i))
+            # Sort and select top sentences
+            sentence_scores.sort(reverse=True)
+            for _, sent_idx in sentence_scores[:max_portions]:
+                start_idx = max(0, sent_idx - portion_size // 2)
+                end_idx = min(len(sentences), sent_idx + portion_size // 2 + 1)
+                portion = " ".join(sentences[start_idx:end_idx])
+                doc_relevant_portions.append(portion)
+            return f"Document_{doc_idx}", doc_relevant_portions
+        # Process documents in parallel
+        with ThreadPoolExecutor(max_workers=4) as executor:
+            results = list(executor.map(lambda x: process_document(x), range(len(document_texts))))
+        relevant_portions = dict(results)
         return relevant_portions
     except Exception as e:
+        print(f"Error extracting relevant portions: {e}")
+        return {f"Document_{i}": [] for i in range(len(document_texts))}
+def generate_answer(prompt: str, tokenizer_f, model_f, max_length: int = 860, temperature: float = 0.2) -> str:
     try:
+        # Optimize input processing
+        inputs = tokenizer_f(prompt, return_tensors="pt", truncation=True, max_length=512)
+        with torch.no_grad():  # Disable gradient calculation
+            output_ids = model_f.generate(
+                inputs.input_ids,
+                max_length=max_length,
+                num_return_sequences=1,
+                temperature=temperature,
+                pad_token_id=tokenizer_f.eos_token_id,
+                do_sample=False,  # Use greedy decoding for faster generation
+                early_stopping=True
+            )
+        answer = tokenizer_f.decode(output_ids[0], skip_special_tokens=True)
+        # Quick relevance check
+        if any(word in answer.lower() for word in prompt.lower().split()):
+            return answer
+        return "I apologize, but I cannot provide a relevant answer based on the given information."
     except Exception as e:
+        print(f"Error generating answer: {e}")
+        return "I apologize, but I encountered an error while generating the answer."
+def create_prompt(question: str, passage: str) -> str:
+    return f"""As a medical expert, answer the following question based only on the provided passage. Be concise and direct.
+Passage: {passage}
+Question: {question}
+Answer:"""
+def process_query_and_generate_answer(
+    query: str,
+    relevant_documents: List[Tuple[float, str, str]],
+    models: Dict,
+    max_portions: int = 3
+) -> str:
+    try:
+        # Extract relevant portions from top documents
+        relevant_portions = extract_relevant_portions(
+            [doc[2] for doc in relevant_documents[:3]],  # Use top 3 documents
+            query,
+            models['bio_tokenizer'],
+            models['bio_model'],
+            max_portions=max_portions
+        )
+        # Combine relevant portions
+        all_portions = []
+        for doc_portions in relevant_portions.values():
+            all_portions.extend(doc_portions)
+        # Remove duplicates while preserving order
+        unique_portions = list(dict.fromkeys(all_portions))
+        # Create context from unique portions
+        context = " ".join(unique_portions[:max_portions])
+        # Generate and return answer
+        prompt = create_prompt(query, context)
+        return generate_answer(
+            prompt,
+            models['llm_tokenizer'],
+            models['llm_model']
+        )
+    except Exception as e:
+        print(f"Error in query processing pipeline: {e}")
+        return "I apologize, but I encountered an error while processing your question."
 def remove_answer_prefix(text):
     prefix = "Answer:"
     if prefix in text:
 @app.post("/api/chat")
 async def chat_endpoint(chat_query: ChatQuery):
     try:
+        # Initialize response timing
+        start_time = asyncio.get_event_loop().time()
+        # Extract query and handle translation
         query_text = chat_query.query
+        language_code = chat_query.language_code
         if language_code == 0:
+            query_text = await run_in_threadpool(translate_ar_to_en, query_text)
+        # Embed query and load embeddings in parallel
+        query_embedding_task = run_in_threadpool(embed_query_text, query_text)
+        embeddings_data_task = run_in_threadpool(load_embeddings)
+        # Wait for both tasks to complete
+        query_embedding, embeddings_data = await asyncio.gather(
+            query_embedding_task,
+            embeddings_data_task
+        )
+        # Initial document retrieval
         n_results = 5
         folder_path = 'downloaded_articles/downloaded_articles'
+        # Get initial results and retrieve documents
+        initial_results = await run_in_threadpool(
+            query_embeddings,
+            query_embedding,
+            embeddings_data,
+            n_results
+        )
+        document_ids = [doc_id for doc_id, *_ in initial_results]
+        document_texts = await run_in_threadpool(
+            retrieve_document_texts,
+            document_ids,
+            folder_path
+        )
+        # Rerank documents
         cross_encoder = models['cross_encoder']
+        scored_documents = await run_in_threadpool(
+            rerank_documents,
+            query_text,
+            document_ids,
+            document_texts,
+            cross_encoder
+        )
+        # Process documents and generate answer
+        async with asyncio.TaskGroup() as tg:
+            # Extract entities in parallel
+            entities_task = tg.create_task(
+                run_in_threadpool(
+                    extract_entities_batch,
+                    [query_text] + [doc[2] for doc in scored_documents[:3]],
+                    models['bio_tokenizer'],
+                    models['bio_model']
+                )
+            )
+            # Extract relevant portions
+            portions_task = tg.create_task(
+                run_in_threadpool(
+                    extract_relevant_portions,
+                    [doc[2] for doc in scored_documents[:3]],
+                    query_text,
+                    models['bio_tokenizer'],
+                    models['bio_model']
+                )
+            )
+        entities = (await entities_task)[0]  # First item is query entities
+        relevant_portions = await portions_task
+        # Flatten and process portions
+        flattened_portions = []
+        for doc_portions in relevant_portions.values():
+            flattened_portions.extend(doc_portions)
+        unique_selected_parts = list(dict.fromkeys(flattened_portions))
         combined_parts = " ".join(unique_selected_parts)
+        # Enhance passage and create prompt
         passage = enhance_passage_with_entities(combined_parts, entities)
         prompt = create_prompt(query_text, passage)
+        # Generate answer
+        answer = await run_in_threadpool(
+            generate_answer,
+            prompt,
+            models['llm_tokenizer'],
+            models['llm_model']
+        )
+        # Process answer
         answer_part = answer.split("Answer:")[-1].strip()
+        cleaned_answer = await run_in_threadpool(remove_answer_prefix, answer_part)
+        final_answer = await run_in_threadpool(remove_incomplete_sentence, cleaned_answer)
+        # Handle translation if needed
         if language_code == 0:
+            final_answer = await run_in_threadpool(translate_en_to_ar, final_answer)
+        # Calculate response time
+        end_time = asyncio.get_event_loop().time()
+        response_time = end_time - start_time
         if final_answer:
+            print(f"Answer generated in {response_time:.2f} seconds")
             print(final_answer)
+            return {
+                "response": f"I hope this answers your question: {final_answer}",
+                "success": True,
+                "response_time": response_time
+            }
         else:
+            return {
+                "response": "Sorry, I can't help with that.",
+                "success": False,
+                "response_time": response_time
+            }
     except Exception as e:
+        print(f"Error in chat endpoint: {str(e)}")
         raise HTTPException(status_code=500, detail=str(e))
 @app.post("/api/resources")