CPS-Test-Mobile

Paused

File size: 15,315 Bytes

f75a23b
f394b25
d184610
f394b25
2e8876b
1244d40
d16299c
1c5bd8e
d16299c
4b4b32b
 
 
 
d8282f1
f6e551c
 
 
d16299c
f6e551c
 
 
 
 
 
 
 
 
 
f75a23b
d16299c
 
 
1244d40
 
 
1de8c2b
4b4b32b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f6e551c
d16299c
f6e551c
 
 
 
d16299c
 
f6e551c
d16299c
 
f6e551c
4b4b32b
 
 
 
f6e551c
ad85a12
f260d4a
ad85a12
f260d4a
 
 
 
 
 
 
 
 
 
ad85a12
 
f260d4a
4b4b32b
f260d4a
 
 
 
ad85a12
 
 
 
 
 
f260d4a
 
4b4b32b
f260d4a
ad85a12
f260d4a
ad85a12
 
f260d4a
ad85a12
 
 
f260d4a
ad85a12
 
 
f260d4a
ad85a12
 
28e1ce8
ad85a12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f6e551c
d16299c
4b4b32b
f6e551c
 
6e39ead
f6e551c
 
6e39ead
f6e551c
d16299c
 
f6e551c
d16299c
 
 
 
13ad0d3
d16299c
f6e551c
 
d16299c
4b4b32b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d89dcd
2e8876b
9a0b74b
2200d70
77810f8
2e8876b
1d89dcd
77810f8
585f453
2e8876b
 
585f453
f260d4a
4b4b32b
585f453
f260d4a
4b4b32b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f260d4a
 
1d89dcd
f260d4a
4b4b32b
f260d4a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d89dcd
f260d4a
 
 
 
 
 
 
 
 
 
585f453
f260d4a
585f453
 
 
f260d4a
585f453
 
 
 
f260d4a
585f453
f260d4a
585f453
 
 
e594ff1
f260d4a
 
1d89dcd
585f453
f260d4a
 
2e8876b
f260d4a
585f453
 
 
 
 
 
 
4b4b32b
585f453
1d89dcd
cd41087
585f453
98f2d10
4b4b32b
1d89dcd
affa0af
4b4b32b
f260d4a
585f453
 
 
 
 
 
 
 
 
98f2d10
585f453
2200d70
 
585f453
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6e39ead
2e8876b
 
 
4b4b32b
 
1d89dcd
 
 
2e8876b
5b0bfb5
2e8876b
 
 
585f453
 
 
a71a831
55e3db0
f394b25
d8282f1
d16299c
4b4b32b
13ad0d3
d8282f1
 
1bdb280
585f453
1d89dcd
 
 
d8282f1
 
13ad0d3
c7670bd

import sys
import os
import pandas as pd
import gradio as gr
from typing import List, Tuple, Dict, Any, Union
import shutil
import re
from datetime import datetime
import time
from transformers import AutoTokenizer
import asyncio
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed

# Configuration and setup
persistent_dir = "/data/hf_cache"
os.makedirs(persistent_dir, exist_ok=True)

model_cache_dir = os.path.join(persistent_dir, "txagent_models")
tool_cache_dir = os.path.join(persistent_dir, "tool_cache")
file_cache_dir = os.path.join(persistent_dir, "cache")
report_dir = os.path.join(persistent_dir, "reports")

for directory in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir]:
    os.makedirs(directory, exist_ok=True)

os.environ["HF_HOME"] = model_cache_dir
os.environ["TRANSFORMERS_CACHE"] = model_cache_dir

current_dir = os.path.dirname(os.path.abspath(__file__))
src_path = os.path.abspath(os.path.join(current_dir, "src"))
sys.path.insert(0, src_path)

from txagent.txagent import TxAgent

# Constants
MAX_MODEL_TOKENS = 131072  # TxAgent's max token limit
MAX_CHUNK_TOKENS = 32768   # Larger chunks to reduce number of chunks
MAX_NEW_TOKENS = 512       # Optimized for fast generation
PROMPT_OVERHEAD = 500      # Estimated tokens for prompt template
MAX_CONCURRENT = 8         # High concurrency for A100 80GB

# Initialize tokenizer for precise token counting
try:
    tokenizer = AutoTokenizer.from_pretrained("mims-harvard/TxAgent-T1-Llama-3.1-8B")
except Exception as e:
    print(f"Warning: Could not load tokenizer, falling back to heuristic: {str(e)}")
    tokenizer = None

# Setup logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)

def clean_response(text: str) -> str:
    try:
        text = text.encode('utf-8', 'surrogatepass').decode('utf-8')
    except UnicodeError:
        text = text.encode('utf-8', 'replace').decode('utf-8')
    text = re.sub(r"\[.*?\]|\bNone\b", "", text, flags=re.DOTALL)
    text = re.sub(r"\n{3,}", "\n\n", text)
    text = re.sub(r"[^\n#\-\*\w\s\.,:\(\)]+", "", text)
    return text.strip()

def estimate_tokens(text: str) -> int:
    """Estimate tokens using tokenizer if available, else fall back to heuristic."""
    if tokenizer:
        return len(tokenizer.encode(text, add_special_tokens=False))
    return len(text) // 3.5 + 1

def extract_text_from_excel(file_path: str) -> str:
    """Extract text from all sheets in an Excel file."""
    all_text = []
    try:
        xls = pd.ExcelFile(file_path)
        for sheet_name in xls.sheet_names:
            df = xls.parse(sheet_name)
            df = df.astype(str).fillna("")
            rows = df.apply(lambda row: " | ".join(row), axis=1)
            sheet_text = [f"[{sheet_name}] {line}" for line in rows]
            all_text.extend(sheet_text)
    except Exception as e:
        raise ValueError(f"Failed to extract text from Excel file: {str(e)}")
    return "\n".join(all_text)

def split_text_into_chunks(text: str, max_tokens: int = MAX_CHUNK_TOKENS) -> List[str]:
    """Split text into chunks within token limits, accounting for prompt overhead."""
    effective_max_tokens = max_tokens - PROMPT_OVERHEAD
    if effective_max_tokens <= 0:
        raise ValueError(f"Effective max tokens ({effective_max_tokens}) must be positive.")

    lines = text.split("\n")
    chunks = []
    current_chunk = []
    current_tokens = 0

    for line in lines:
        line_tokens = estimate_tokens(line)
        if current_tokens + line_tokens > effective_max_tokens:
            if current_chunk:
                chunks.append("\n".join(current_chunk))
            current_chunk = [line]
            current_tokens = line_tokens
        else:
            current_chunk.append(line)
            current_tokens += line_tokens

    if current_chunk:
        chunks.append("\n".join(current_chunk))

    return chunks

def build_prompt_from_text(chunk: str) -> str:
    """Build a prompt for analyzing a chunk of clinical data."""
    return f"""
### Unstructured Clinical Records

You are reviewing unstructured, mixed-format clinical documentation from various forms, tables, and sheets.

**Objective:** Identify patterns, missed diagnoses, inconsistencies, and follow-up gaps.

Here is the extracted content chunk:

{chunk}

Please analyze the above and provide:
- Diagnostic Patterns
- Medication Issues
- Missed Opportunities
- Inconsistencies
- Follow-up Recommendations
"""

def init_agent():
    """Initialize the TxAgent with optimized vLLM settings for A100 80GB."""
    default_tool_path = os.path.abspath("data/new_tool.json")
    target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")

    if not os.path.exists(target_tool_path):
        shutil.copy(default_tool_path, target_tool_path)

    agent = TxAgent(
        model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
        rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
        tool_files_dict={"new_tool": target_tool_path},
        force_finish=True,
        enable_checker=True,
        step_rag_num=4,
        seed=100,
        additional_default_tools=[]
    )
    agent.init_model()
    return agent

async def process_chunk(agent, chunk: str, chunk_index: int, total_chunks: int) -> Tuple[int, str, str]:
    """Process a single chunk and return index, response, and status message."""
    logger.info(f"Processing chunk {chunk_index+1}/{total_chunks}")
    prompt = build_prompt_from_text(chunk)
    prompt_tokens = estimate_tokens(prompt)
    
    if prompt_tokens > MAX_MODEL_TOKENS:
        error_msg = f"❌ Chunk {chunk_index+1} prompt too long ({prompt_tokens} tokens). Skipping..."
        logger.warning(error_msg)
        return chunk_index, "", error_msg

    response = ""
    try:
        for result in agent.run_gradio_chat(
            message=prompt,
            history=[],
            temperature=0.2,
            max_new_tokens=MAX_NEW_TOKENS,
            max_token=MAX_MODEL_TOKENS,
            call_agent=False,
            conversation=[],
        ):
            if isinstance(result, str):
                response += result
            elif hasattr(result, "content"):
                response += result.content
            elif isinstance(result, list):
                for r in result:
                    if hasattr(r, "content"):
                        response += r.content
        status = f"✅ Chunk {chunk_index+1} analysis complete"
        logger.info(status)
    except Exception as e:
        status = f"❌ Error analyzing chunk {chunk_index+1}: {str(e)}"
        logger.error(status)
        response = ""
    
    return chunk_index, clean_response(response), status

async def process_final_report(agent, file, chatbot_state: List[Dict[str, str]]) -> Tuple[List[Dict[str, str]], Union[str, None]]:
    """Process the Excel file and generate a final report."""
    messages = chatbot_state if chatbot_state else []
    report_path = None

    if file is None or not hasattr(file, "name"):
        messages.append({"role": "assistant", "content": "❌ Please upload a valid Excel file before analyzing."})
        return messages, report_path

    try:
        messages.append({"role": "user", "content": f"Processing Excel file: {os.path.basename(file.name)}"})
        messages.append({"role": "assistant", "content": "⏳ Extracting and analyzing data..."})

        # Extract text and split into chunks
        start_time = time.time()
        extracted_text = extract_text_from_excel(file.name)
        chunks = split_text_into_chunks(extracted_text, max_tokens=MAX_CHUNK_TOKENS)
        logger.info(f"Extracted text and split into {len(chunks)} chunks in {time.time() - start_time:.2f} seconds")

        chunk_responses = [None] * len(chunks)
        batch_size = MAX_CONCURRENT

        # Process chunks in batches
        for batch_start in range(0, len(chunks), batch_size):
            batch_chunks = chunks[batch_start:batch_start + batch_size]
            batch_indices = list(range(batch_start, min(batch_start + batch_size, len(chunks))))
            logger.info(f"Processing batch {batch_start//batch_size + 1}/{(len(chunks) + batch_size - 1)//batch_size}")

            with ThreadPoolExecutor(max_workers=MAX_CONCURRENT) as executor:
                futures = [
                    executor.submit(lambda c, i: asyncio.run(process_chunk(agent, c, i, len(chunks))), chunk, i)
                    for i, chunk in zip(batch_indices, batch_chunks)
                ]
                for future in as_completed(futures):
                    chunk_index, response, status = future.result()
                    chunk_responses[chunk_index] = response
                    messages.append({"role": "assistant", "content": status})

        # Filter out empty responses
        chunk_responses = [r for r in chunk_responses if r]
        if not chunk_responses:
            messages.append({"role": "assistant", "content": "❌ No valid chunk responses to summarize."})
            return messages, report_path

        # Summarize chunk responses incrementally
        summary = ""
        current_summary_tokens = 0
        for i, response in enumerate(chunk_responses):
            response_tokens = estimate_tokens(response)
            if current_summary_tokens + response_tokens > MAX_MODEL_TOKENS - PROMPT_OVERHEAD - MAX_NEW_TOKENS:
                summary_prompt = f"Summarize the following analysis:\n\n{summary}\n\nProvide a concise summary."
                summary_response = ""
                try:
                    for result in agent.run_gradio_chat(
                        message=summary_prompt,
                        history=[],
                        temperature=0.2,
                        max_new_tokens=MAX_NEW_TOKENS,
                        max_token=MAX_MODEL_TOKENS,
                        call_agent=False,
                        conversation=[],
                    ):
                        if isinstance(result, str):
                            summary_response += result
                        elif hasattr(result, "content"):
                            summary_response += result.content
                        elif isinstance(result, list):
                            for r in result:
                                if hasattr(r, "content"):
                                    summary_response += r.content
                    summary = clean_response(summary_response)
                    current_summary_tokens = estimate_tokens(summary)
                except Exception as e:
                    messages.append({"role": "assistant", "content": f"❌ Error summarizing intermediate results: {str(e)}"})
                    return messages, report_path

            summary += f"\n\n### Chunk {i+1} Analysis\n{response}"
            current_summary_tokens += response_tokens

        # Final summarization
        final_prompt = f"Summarize the key findings from the following analyses:\n\n{summary}"
        messages.append({"role": "assistant", "content": "📊 Generating final report..."})

        final_report_text = ""
        try:
            for result in agent.run_gradio_chat(
                message=final_prompt,
                history=[],
                temperature=0.2,
                max_new_tokens=MAX_NEW_TOKENS,
                max_token=MAX_MODEL_TOKENS,
                call_agent=False,
                conversation=[],
            ):
                if isinstance(result, str):
                    final_report_text += result
                elif hasattr(result, "content"):
                    final_report_text += result.content
                elif isinstance(result, list):
                    for r in result:
                        if hasattr(r, "content"):
                            final_report_text += r.content
        except Exception as e:
            messages.append({"role": "assistant", "content": f"❌ Error generating final report: {str(e)}"})
            return messages, report_path

        final_report = f"# \U0001f9e0 Final Patient Report\n\n{clean_response(final_report_text)}"
        messages[-1]["content"] = f"📊 Final Report:\n\n{clean_response(final_report_text)}"

        # Save the report
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        report_path = os.path.join(report_dir, f"report_{timestamp}.md")
        
        with open(report_path, 'w') as f:
            f.write(final_report)

        messages.append({"role": "assistant", "content": f"✅ Report generated and saved: report_{timestamp}.md"})
        logger.info(f"Total processing time: {time.time() - start_time:.2f} seconds")

        return messages, report_path

    except Exception as e:
        messages.append({"role": "assistant", "content": f"❌ Error processing file: {str(e)}"})
        logger.error(f"Processing failed: {str(e)}")
        return messages, report_path

async def create_ui(agent):
    """Create the Gradio UI for the patient history analysis tool."""
    with gr.Blocks(title="Patient History Chat", css=".gradio-container {max-width: 900px !important}") as demo:
        gr.Markdown("## 🏥 Patient History Analysis Tool")
        
        with gr.Row():
            with gr.Column(scale=3):
                chatbot = gr.Chatbot(
                    label="Clinical Assistant",
                    show_copy_button=True,
                    height=600,
                    type="messages",
                    avatar_images=(
                        None,
                        "https://i.imgur.com/6wX7Zb4.png"
                    )
                )
            with gr.Column(scale=1):
                file_upload = gr.File(
                    label="Upload Excel File",
                    file_types=[".xlsx"],
                    height=100
                )
                analyze_btn = gr.Button(
                    "🧠 Analyze Patient History",
                    variant="primary"
                )
                report_output = gr.File(
                    label="Download Report",
                    visible=False,
                    interactive=False
                )

        # State to maintain chatbot messages
        chatbot_state = gr.State(value=[])

        async def update_ui(file, current_state):
            messages = current_state if current_state else []
            messages, report_path = await process_final_report(agent, file, messages)
            report_update = gr.update(visible=report_path is not None, value=report_path)
            return messages, report_update, messages

        analyze_btn.click(
            fn=update_ui,
            inputs=[file_upload, chatbot_state],
            outputs=[chatbot, report_output, chatbot_state],
            api_name="analyze"
        )

    return demo

if __name__ == "__main__":
    try:
        agent = init_agent()
        demo = asyncio.run(create_ui(agent))
        demo.launch(
            server_name="0.0.0.0",
            server_port=7860,
            show_error=True,
            allowed_paths=["/data/hf_cache/reports"],
            share=False,
            inline=False,
            max_threads=40
        )
    except Exception as e:
        print(f"Error: {str(e)}")
        sys.exit(1)