CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 22

Commit

4b4b32b

verified ·

1 Parent(s): 5caebdc

Update app.py

Browse files

Files changed (1) hide show

app.py +107 -62

app.py CHANGED Viewed

@@ -1,14 +1,16 @@
 import sys
 import os
 import pandas as pd
-import json
 import gradio as gr
 from typing import List, Tuple, Dict, Any, Union
-import hashlib
 import shutil
 import re
 from datetime import datetime
 import time
 # Configuration and setup
 persistent_dir = "/data/hf_cache"
@@ -32,10 +34,22 @@ sys.path.insert(0, src_path)
 from txagent.txagent import TxAgent
 # Constants
-MAX_MODEL_TOKENS = 32768  # Model's maximum sequence length
-MAX_CHUNK_TOKENS = 8192   # Chunk size aligned with max_num_batched_tokens
-MAX_NEW_TOKENS = 2048     # Maximum tokens for generation
-PROMPT_OVERHEAD = 500     # Estimated tokens for prompt template overhead
 def clean_response(text: str) -> str:
     try:
@@ -48,8 +62,10 @@ def clean_response(text: str) -> str:
     return text.strip()
 def estimate_tokens(text: str) -> int:
-    """Estimate the number of tokens based on character length."""
-    return len(text) // 3.5 + 1  # Add 1 to avoid zero estimates
 def extract_text_from_excel(file_path: str) -> str:
     """Extract text from all sheets in an Excel file."""
@@ -67,10 +83,7 @@ def extract_text_from_excel(file_path: str) -> str:
     return "\n".join(all_text)
 def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS) -> List[str]:
-    """
-    Split text into chunks, ensuring each chunk is within token limits,
-    accounting for prompt overhead.
-    """
     effective_max_tokens = max_tokens - PROMPT_OVERHEAD
     if effective_max_tokens <= 0:
         raise ValueError(f"Effective max tokens ({effective_max_tokens}) must be positive.")
@@ -83,7 +96,7 @@ def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS) -> Lis
     for line in lines:
         line_tokens = estimate_tokens(line)
         if current_tokens + line_tokens > effective_max_tokens:
-            if current_chunk:  # Save the current chunk if it's not empty
                 chunks.append("\n".join(current_chunk))
             current_chunk = [line]
             current_tokens = line_tokens
@@ -118,7 +131,7 @@ Please analyze the above and provide:
 """
 def init_agent():
-    """Initialize the TxAgent with model and tool configurations."""
     default_tool_path = os.path.abspath("data/new_tool.json")
     target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
@@ -138,8 +151,47 @@ def init_agent():
     agent.init_model()
     return agent
-def process_final_report(agent, file, chatbot_state: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], Union[str, None]]:
-    """Process the Excel file and generate a final report."""
     messages = chatbot_state if chatbot_state else []
     report_path = None
@@ -152,57 +204,43 @@ def process_final_report(agent, file, chatbot_state: List[Dict[str, str]]) -> Tu
         messages.append({"role": "assistant", "content": "⏳ Extracting and analyzing data..."})
         # Extract text and split into chunks
         extracted_text = extract_text_from_excel(file.name)
         chunks = split_text_into_chunks(extracted_text, max_tokens=MAX_CHUNK_TOKENS)
-        chunk_responses = []
-        # Process each chunk
-        for i, chunk in enumerate(chunks):
-            messages.append({"role": "assistant", "content": f"🔍 Analyzing chunk {i+1}/{len(chunks)}..."})
-            prompt = build_prompt_from_text(chunk)
-            prompt_tokens = estimate_tokens(prompt)
-            if prompt_tokens > MAX_MODEL_TOKENS:
-                messages.append({"role": "assistant", "content": f"❌ Chunk {i+1} prompt too long ({prompt_tokens} tokens). Skipping..."})
-                continue
-            response = ""
-            try:
-                for result in agent.run_gradio_chat(
-                    message=prompt,
-                    history=[],
-                    temperature=0.2,
-                    max_new_tokens=MAX_NEW_TOKENS,
-                    max_token=MAX_MODEL_TOKENS,
-                    call_agent=False,
-                    conversation=[],
-                ):
-                    if isinstance(result, str):
-                        response += result
-                    elif hasattr(result, "content"):
-                        response += result.content
-                    elif isinstance(result, list):
-                        for r in result:
-                            if hasattr(r, "content"):
-                                response += r.content
-            except Exception as e:
-                messages.append({"role": "assistant", "content": f"❌ Error analyzing chunk {i+1}: {str(e)}"})
-                continue
-            chunk_responses.append(clean_response(response))
-            messages.append({"role": "assistant", "content": f"✅ Chunk {i+1} analysis complete"})
         if not chunk_responses:
             messages.append({"role": "assistant", "content": "❌ No valid chunk responses to summarize."})
             return messages, report_path
-        # Summarize chunk responses incrementally to avoid token limit
         summary = ""
         current_summary_tokens = 0
         for i, response in enumerate(chunk_responses):
             response_tokens = estimate_tokens(response)
             if current_summary_tokens + response_tokens > MAX_MODEL_TOKENS - PROMPT_OVERHEAD - MAX_NEW_TOKENS:
-                # Summarize current summary
                 summary_prompt = f"Summarize the following analysis:\n\n{summary}\n\nProvide a concise summary."
                 summary_response = ""
                 try:
@@ -270,13 +308,15 @@ def process_final_report(agent, file, chatbot_state: List[Dict[str, str]]) -> Tu
             f.write(final_report)
         messages.append({"role": "assistant", "content": f"✅ Report generated and saved: report_{timestamp}.md"})
     except Exception as e:
         messages.append({"role": "assistant", "content": f"❌ Error processing file: {str(e)}"})
     return messages, report_path
-def create_ui(agent):
     """Create the Gradio UI for the patient history analysis tool."""
     with gr.Blocks(title="Patient History Chat", css=".gradio-container {max-width: 900px !important}") as demo:
         gr.Markdown("## 🏥 Patient History Analysis Tool")
@@ -312,10 +352,15 @@ def create_ui(agent):
         # State to maintain chatbot messages
         chatbot_state = gr.State(value=[])
-        def update_ui(file, current_state):
-            messages, report_path = process_final_report(agent, file, current_state)
-            report_update = gr.update(visible=report_path is not None, value=report_path)
-            return messages, report_update, messages
         analyze_btn.click(
             fn=update_ui,
@@ -329,7 +374,7 @@ def create_ui(agent):
 if __name__ == "__main__":
     try:
         agent = init_agent()
-        demo = create_ui(agent)
         demo.launch(
             server_name="0.0.0.0",
             server_port=7860,

 import sys
 import os
 import pandas as pd
 import gradio as gr
 from typing import List, Tuple, Dict, Any, Union
 import shutil
 import re
 from datetime import datetime
 import time
+from transformers import AutoTokenizer
+import asyncio
+import logging
+from concurrent.futures import ThreadPoolExecutor, as_completed
 # Configuration and setup
 persistent_dir = "/data/hf_cache"
 from txagent.txagent import TxAgent
 # Constants
+MAX_MODEL_TOKENS = 131072  # TxAgent's max token limit
+MAX_CHUNK_TOKENS = 32768   # Larger chunks to reduce number of chunks
+MAX_NEW_TOKENS = 512       # Optimized for fast generation
+PROMPT_OVERHEAD = 500      # Estimated tokens for prompt template
+MAX_CONCURRENT = 8         # High concurrency for A100 80GB
+# Initialize tokenizer for precise token counting
+try:
+    tokenizer = AutoTokenizer.from_pretrained("mims-harvard/TxAgent-T1-Llama-3.1-8B")
+except Exception as e:
+    print(f"Warning: Could not load tokenizer, falling back to heuristic: {str(e)}")
+    tokenizer = None
+# Setup logging
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
+logger = logging.getLogger(__name__)
 def clean_response(text: str) -> str:
     try:
     return text.strip()
 def estimate_tokens(text: str) -> int:
+    """Estimate tokens using tokenizer if available, else fall back to heuristic."""
+    if tokenizer:
+        return len(tokenizer.encode(text, add_special_tokens=False))
+    return len(text) // 3.5 + 1
 def extract_text_from_excel(file_path: str) -> str:
     """Extract text from all sheets in an Excel file."""
     return "\n".join(all_text)
 def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS) -> List[str]:
+    """Split text into chunks within token limits, accounting for prompt overhead."""
     effective_max_tokens = max_tokens - PROMPT_OVERHEAD
     if effective_max_tokens <= 0:
         raise ValueError(f"Effective max tokens ({effective_max_tokens}) must be positive.")
     for line in lines:
         line_tokens = estimate_tokens(line)
         if current_tokens + line_tokens > effective_max_tokens:
+            if current_chunk:
                 chunks.append("\n".join(current_chunk))
             current_chunk = [line]
             current_tokens = line_tokens
 """
 def init_agent():
+    """Initialize the TxAgent with optimized vLLM settings for A100 80GB."""
     default_tool_path = os.path.abspath("data/new_tool.json")
     target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
     agent.init_model()
     return agent
+async def process_chunk(agent, chunk: str, chunk_index: int, total_chunks: int) -> Tuple[int, str, str]:
+    """Process a single chunk and return index, response, and status message."""
+    logger.info(f"Processing chunk {chunk_index+1}/{total_chunks}")
+    prompt = build_prompt_from_text(chunk)
+    prompt_tokens = estimate_tokens(prompt)
+    if prompt_tokens > MAX_MODEL_TOKENS:
+        error_msg = f"❌ Chunk {chunk_index+1} prompt too long ({prompt_tokens} tokens). Skipping..."
+        logger.warning(error_msg)
+        return chunk_index, "", error_msg
+    response = ""
+    try:
+        for result in agent.run_gradio_chat(
+            message=prompt,
+            history=[],
+            temperature=0.2,
+            max_new_tokens=MAX_NEW_TOKENS,
+            max_token=MAX_MODEL_TOKENS,
+            call_agent=False,
+            conversation=[],
+        ):
+            if isinstance(result, str):
+                response += result
+            elif hasattr(result, "content"):
+                response += result.content
+            elif isinstance(result, list):
+                for r in result:
+                    if hasattr(r, "content"):
+                        response += r.content
+        status = f"✅ Chunk {chunk_index+1} analysis complete"
+        logger.info(status)
+    except Exception as e:
+        status = f"❌ Error analyzing chunk {chunk_index+1}: {str(e)}"
+        logger.error(status)
+        response = ""
+    return chunk_index, clean_response(response), status
+async def process_final_report(agent, file, chatbot_state: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], Union[str, None]]:
+    """Process the Excel file and generate a final report with asynchronous updates."""
     messages = chatbot_state if chatbot_state else []
     report_path = None
         messages.append({"role": "assistant", "content": "⏳ Extracting and analyzing data..."})
         # Extract text and split into chunks
+        start_time = time.time()
         extracted_text = extract_text_from_excel(file.name)
         chunks = split_text_into_chunks(extracted_text, max_tokens=MAX_CHUNK_TOKENS)
+        logger.info(f"Extracted text and split into {len(chunks)} chunks in {time.time() - start_time:.2f} seconds")
+        chunk_responses = [None] * len(chunks)
+        batch_size = MAX_CONCURRENT
+        # Process chunks in batches
+        for batch_start in range(0, len(chunks), batch_size):
+            batch_chunks = chunks[batch_start:batch_start + batch_size]
+            batch_indices = list(range(batch_start, min(batch_start + batch_size, len(chunks))))
+            logger.info(f"Processing batch {batch_start//batch_size + 1}/{(len(chunks) + batch_size - 1)//batch_size}")
+            with ThreadPoolExecutor(max_workers=MAX_CONCURRENT) as executor:
+                futures = [
+                    executor.submit(lambda c, i: asyncio.run(process_chunk(agent, c, i, len(chunks))), chunk, i)
+                    for i, chunk in zip(batch_indices, batch_chunks)
+                ]
+                for future in as_completed(futures):
+                    chunk_index, response, status = future.result()
+                    chunk_responses[chunk_index] = response
+                    messages.append({"role": "assistant", "content": status})
+                    yield messages, None
+        # Filter out empty responses
+        chunk_responses = [r for r in chunk_responses if r]
         if not chunk_responses:
             messages.append({"role": "assistant", "content": "❌ No valid chunk responses to summarize."})
             return messages, report_path
+        # Summarize chunk responses incrementally
         summary = ""
         current_summary_tokens = 0
         for i, response in enumerate(chunk_responses):
             response_tokens = estimate_tokens(response)
             if current_summary_tokens + response_tokens > MAX_MODEL_TOKENS - PROMPT_OVERHEAD - MAX_NEW_TOKENS:
                 summary_prompt = f"Summarize the following analysis:\n\n{summary}\n\nProvide a concise summary."
                 summary_response = ""
                 try:
             f.write(final_report)
         messages.append({"role": "assistant", "content": f"✅ Report generated and saved: report_{timestamp}.md"})
+        logger.info(f"Total processing time: {time.time() - start_time:.2f} seconds")
     except Exception as e:
         messages.append({"role": "assistant", "content": f"❌ Error processing file: {str(e)}"})
+        logger.error(f"Processing failed: {str(e)}")
     return messages, report_path
+async def create_ui(agent):
     """Create the Gradio UI for the patient history analysis tool."""
     with gr.Blocks(title="Patient History Chat", css=".gradio-container {max-width: 900px !important}") as demo:
         gr.Markdown("## 🏥 Patient History Analysis Tool")
         # State to maintain chatbot messages
         chatbot_state = gr.State(value=[])
+        async def update_ui(file, current_state):
+            messages = current_state if current_state else []
+            report_path = None
+            async for new_messages, new_report_path in process_final_report(agent, file, messages):
+                messages = new_messages
+                report_path = new_report_path
+                report_update = gr.update(visible=report_path is not None, value=report_path)
+                yield messages, report_update, messages
+            yield messages, gr.update(visible=report_path is not None, value=report_path), messages
         analyze_btn.click(
             fn=update_ui,
 if __name__ == "__main__":
     try:
         agent = init_agent()
+        demo = asyncio.run(create_ui(agent))
         demo.launch(
             server_name="0.0.0.0",
             server_port=7860,