CPS-Test-Mobile

Paused

App Files Files

xet

Community

Ali2206 commited on Apr 18

Commit

78b3332

verified ·

1 Parent(s): ea2488a

Update app.py

Browse files

Files changed (1) hide show

app.py +402 -133

app.py CHANGED Viewed

@@ -1,155 +1,424 @@
-# ───────────────────────────────────────────────────────── app.py ─────────
-import os, sys, json, re, gc, time, hashlib, logging, shutil, subprocess
-from typing import List, Any
 from concurrent.futures import ThreadPoolExecutor, as_completed
-import torch, gradio as gr, psutil
 from diskcache import Cache
-# ----------  CONFIG  ----------
-MODEL_NAME = "mims-harvard/TxAgent-T1-Llama-3.1-8B"
-RAG_MODEL  = "mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B"
-PROMPT_MAX = 512
-GPU_UTIL   = 0.90          # leave a little head‑room
-PERSIST = "/data/hf_cache"
-MODEL_CACHE   = os.path.join(PERSIST, "txagent_models")
-TOOL_CACHE    = os.path.join(PERSIST, "tool_cache")
-FILE_CACHE    = os.path.join(PERSIST, "preprocessed")
-REPORT_DIR    = os.path.join(PERSIST, "reports")
-for d in (MODEL_CACHE, TOOL_CACHE, FILE_CACHE, REPORT_DIR):
-    os.makedirs(d, exist_ok=True)
-os.environ.update(
-    HF_HOME            = MODEL_CACHE,
-    TRANSFORMERS_CACHE = MODEL_CACHE,
-    VLLM_CACHE_DIR     = os.path.join(PERSIST, "vllm_cache"),
-    TOKENIZERS_PARALLELISM = "false",
-)
-ROOT = os.path.dirname(os.path.abspath(__file__))
-sys.path.insert(0, os.path.join(ROOT, "src"))
-from txagent.txagent import TxAgent          # noqa: E402
-logging.basicConfig(
-    level = logging.INFO,
-    format="%(asctime)s  %(levelname)s  %(name)s — %(message)s")
-log = logging.getLogger("app")
-cache = Cache(FILE_CACHE, size_limit=20 * 1024**3)        # 20 GB
-# ----------  GPU / CPU helpers  ----------
-def _gpu_ok() -> bool:
-    return torch.cuda.is_available() and torch.cuda.device_count() > 0
-def _sys(tag=""):
-    cpu = psutil.cpu_percent()
-    ram = psutil.virtual_memory()
-    log.info("[%s] CPU %.1f%% — RAM %.1f / %.1f GB",
-             tag, cpu, ram.used/1e9, ram.total/1e9)
-# ----------  AGENT LOADER  ----------
-def _init_vllm() -> TxAgent:
-    from vllm import LLM                                # local import avoids import‑time CUDA checks
-    agent = TxAgent(
-        model_name       = MODEL_NAME,
-        rag_model_name   = RAG_MODEL,
-        step_rag_num     = 4,
-        force_finish     = True,
-        enable_checker   = False,
-        seed             = 42,
-    )
-    # monkey‑patch TxAgent.load_models to use enforced kwargs
-    def _load():
-        agent.model = LLM(
-            model                   = MODEL_NAME,
-            dtype                   = "half",
-            gpu_memory_utilization  = GPU_UTIL,
-            enforce_eager           = True,          # avoids CUDAGraph crashes
-        )
-    agent.load_models = _load               # type: ignore
-    agent.init_model()
-    return agent
-def _init_cpu_pipe():
-    from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
-    tok = AutoTokenizer.from_pretrained("NousResearch/Nous-Hermes-2-Mistral-7B-DPO")
-    mdl = AutoModelForCausalLM.from_pretrained(
-        "NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
-        torch_dtype = (torch.float16 if _gpu_ok() else torch.float32),
-        device_map  = ("auto" if _gpu_ok() else None),
-    )
-    return pipeline("text-generation", model=mdl, tokenizer=tok,
-                    max_new_tokens=PROMPT_MAX, device=0 if _gpu_ok() else -1)
-def init_agent():
-    _sys("before‑load")
     try:
-        agent = _init_vllm()
-        log.info("✅  vLLM loaded on GPU")
-        agent.generator = None            # mark as vLLM path
     except Exception as e:
-        log.warning("⚠️  vLLM path failed (%s) → falling back to HF pipeline", e)
-        pipe = _init_cpu_pipe()
-        agent = TxAgent(dummy=True)       # bare object; we'll store pipe on it
-        agent.generator = pipe
-    _sys("after‑load")
-    return agent
-AGENT = init_agent()
-# ----------  LLM utility  ----------
-def run_llm(prompt: str) -> str:
-    """Unified call for either vLLM or HF pipeline"""
-    if AGENT.generator is None:           # vLLM path
-        out = list(AGENT.run_gradio_chat(prompt, [], 0.2,
-                                         PROMPT_MAX, 2048, False, []))[-1]
-        return out.content if hasattr(out, "content") else str(out)
-    # HF pipeline path
-    return AGENT.generator(prompt)[0]["generated_text"]
-# ----------  (dummy) IO helpers  ----------
-def md5(path: str) -> str:
-    h = hashlib.md5()
-    with open(path, "rb") as f:
-        for chunk in iter(lambda: f.read(1 << 20), b""):
-            h.update(chunk)
-    return h.hexdigest()
-# ----------  GRADIO  ----------
-def analyze(q, hist, _files):
-    hist.append({"role": "user", "content": q})
-    yield hist, None, ""
-    # (File‑parsing code omitted here for brevity — keep your fast PDF/CSV parts)
-    answer = run_llm("Summarise missed diagnoses only:\n\n" + q)
-    hist.append({"role": "assistant", "content": answer})
-    yield hist, None, answer
-def ui():
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
-        gr.Markdown("<h1 style='text-align:center'>🩺 Clinical Oversight Assistant</h1>")
-        chat = gr.Chatbot(height=600, type="messages")
-        summ = gr.Markdown()
-        ask  = gr.Textbox(placeholder="Ask…", show_label=False)
-        btn  = gr.Button("Analyze", variant="primary")
-        btn.click(analyze, [ask, gr.State([]), gr.State([])], [chat, gr.State(None), summ])
-        ask.submit(analyze, [ask, gr.State([]), gr.State([])], [chat, gr.State(None), summ])
     return demo
 if __name__ == "__main__":
-    ui().queue(api_open=False).launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        allowed_paths=[REPORT_DIR],
-        show_error=True,
-    )
-# ──────────────────────────────────────────────────────────────────────────

+import sys
+import os
+import pandas as pd
+import pdfplumber
+import json
+import gradio as gr
+from typing import List, Tuple, Optional, Generator
 from concurrent.futures import ThreadPoolExecutor, as_completed
+import hashlib
+import shutil
+import re
+import psutil
+import subprocess
+import logging
+import torch
+import gc
 from diskcache import Cache
+import time
+import pyarrow as pa
+import pyarrow.parquet as pq
+import pyarrow.csv as pc
+import numpy as np
+from functools import partial
+from itertools import islice
+import io
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Persistent directory
+persistent_dir = "/data/hf_cache"
+os.makedirs(persistent_dir, exist_ok=True)
+model_cache_dir = os.path.join(persistent_dir, "txagent_models")
+tool_cache_dir = os.path.join(persistent_dir, "tool_cache")
+file_cache_dir = os.path.join(persistent_dir, "cache")
+report_dir = os.path.join(persistent_dir, "reports")
+vllm_cache_dir = os.path.join(persistent_dir, "vllm_cache")
+for directory in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir, vllm_cache_dir]:
+    os.makedirs(directory, exist_ok=True)
+os.environ["HF_HOME"] = model_cache_dir
+os.environ["TRANSFORMERS_CACHE"] = model_cache_dir
+os.environ["VLLM_CACHE_DIR"] = vllm_cache_dir
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
+current_dir = os.path.dirname(os.path.abspath(__file__))
+src_path = os.path.abspath(os.path.join(current_dir, "src"))
+sys.path.insert(0, src_path)
+from txagent.txagent import TxAgent
+# Initialize cache with 10GB limit
+cache = Cache(file_cache_dir, size_limit=10 * 1024**3)
+def sanitize_utf8(text: str) -> str:
+    return text.encode("utf-8", "ignore").decode("utf-8")
+def file_hash(path: str) -> str:
+    with open(path, "rb") as f:
+        return hashlib.md5(f.read()).hexdigest()
+def extract_all_pages(file_path: str, progress_callback=None) -> str:
+    try:
+        with pdfplumber.open(file_path) as pdf:
+            total_pages = len(pdf.pages)
+            if total_pages == 0:
+                return ""
+        batch_size = 10
+        batches = [(i, min(i + batch_size, total_pages)) for i in range(0, total_pages, batch_size)]
+        text_chunks = [""] * total_pages
+        processed_pages = 0
+        def extract_batch(start: int, end: int) -> List[tuple]:
+            results = []
+            with pdfplumber.open(file_path) as pdf:
+                for page in pdf.pages[start:end]:
+                    page_num = start + pdf.pages.index(page)
+                    page_text = page.extract_text() or ""
+                    results.append((page_num, f"=== Page {page_num + 1} ===\n{page_text.strip()}"))
+            return results
+        with ThreadPoolExecutor(max_workers=6) as executor:
+            futures = [executor.submit(extract_batch, start, end) for start, end in batches]
+            for future in as_completed(futures):
+                for page_num, text in future.result():
+                    text_chunks[page_num] = text
+                processed_pages += batch_size
+                if progress_callback:
+                    progress_callback(min(processed_pages, total_pages), total_pages)
+        return "\n\n".join(filter(None, text_chunks))
+    except Exception as e:
+        logger.error("PDF processing error: %s", e)
+        return f"PDF processing error: {str(e)}"
+def excel_to_ndjson(file_path: str) -> Generator[str, None, None]:
+    """Stream Excel file as NDJSON for maximum performance"""
     try:
+        # Use openpyxl in streaming mode
+        with pd.ExcelFile(file_path, engine='openpyxl') as xls:
+            for sheet_name in xls.sheet_names:
+                for chunk in pd.read_excel(
+                    xls,
+                    sheet_name=sheet_name,
+                    header=None,
+                    dtype=str,
+                    chunksize=1000
+                ):
+                    for _, row in chunk.iterrows():
+                        yield json.dumps({
+                            "sheet": sheet_name,
+                            "row": row.fillna("").astype(str).tolist()
+                        }) + "\n"
     except Exception as e:
+        logger.error(f"Error streaming Excel: {e}")
+        raise
+def csv_to_ndjson(file_path: str) -> Generator[str, None, None]:
+    """Stream CSV file as NDJSON for maximum performance"""
+    try:
+        for chunk in pd.read_csv(
+            file_path,
+            header=None,
+            dtype=str,
+            chunksize=1000,
+            encoding_errors='replace',
+            on_bad_lines='skip'
+        ):
+            for _, row in chunk.iterrows():
+                yield json.dumps({
+                    "row": row.fillna("").astype(str).tolist()
+                }) + "\n"
+    except Exception as e:
+        logger.error(f"Error streaming CSV: {e}")
+        raise
+def stream_file_to_json(file_path: str, file_type: str) -> Generator[str, None, None]:
+    """Stream file content as JSON chunks"""
+    try:
+        if file_type == "pdf":
+            text = extract_all_pages(file_path)
+            yield json.dumps({
+                "filename": os.path.basename(file_path),
+                "content": text,
+                "status": "initial"
+            })
+        elif file_type in ["csv", "xls", "xlsx"]:
+            # Stream the file content
+            yield json.dumps({
+                "filename": os.path.basename(file_path),
+                "streaming": True,
+                "type": file_type
+            })
+            if file_type == "csv":
+                stream_gen = csv_to_ndjson(file_path)
+            else:
+                stream_gen = excel_to_ndjson(file_path)
+            for chunk in stream_gen:
+                yield chunk
+        else:
+            yield json.dumps({"error": f"Unsupported file type: {file_type}"})
+    except Exception as e:
+        logger.error("Error processing %s: %s", os.path.basename(file_path), e)
+        yield json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
+def log_system_usage(tag=""):
+    try:
+        cpu = psutil.cpu_percent(interval=1)
+        mem = psutil.virtual_memory()
+        logger.info("[%s] CPU: %.1f%% | RAM: %dMB / %dMB", tag, cpu, mem.used // (1024**2), mem.total // (1024**2))
+        result = subprocess.run(
+            ["nvidia-smi", "--query-gpu=memory.used,memory.total,utilization.gpu", "--format=csv,nounits,noheader"],
+            capture_output=True, text=True
+        )
+        if result.returncode == 0:
+            used, total, util = result.stdout.strip().split(", ")
+            logger.info("[%s] GPU: %sMB / %sMB | Utilization: %s%%", tag, used, total, util)
+    except Exception as e:
+        logger.error("[%s] GPU/CPU monitor failed: %s", tag, e)
+def clean_response(text: str) -> str:
+    text = sanitize_utf8(text)
+    text = re.sub(r"\[.*?\]|\bNone\b|To analyze the patient record excerpt.*?medications\.|Since the previous attempts.*?\.|I need to.*?medications\.|Retrieving tools.*?\.", "", text, flags=re.DOTALL)
+    diagnoses = []
+    lines = text.splitlines()
+    in_diagnoses_section = False
+    for line in lines:
+        line = line.strip()
+        if not line:
+            continue
+        if re.match(r"###\s*Missed Diagnoses", line):
+            in_diagnoses_section = True
+            continue
+        if re.match(r"###\s*(Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", line):
+            in_diagnoses_section = False
+            continue
+        if in_diagnoses_section and re.match(r"-\s*.+", line):
+            diagnosis = re.sub(r"^\-\s*", "", line).strip()
+            if diagnosis and not re.match(r"No issues identified", diagnosis, re.IGNORECASE):
+                diagnoses.append(diagnosis)
+    text = " ".join(diagnoses)
+    text = re.sub(r"\s+", " ", text).strip()
+    text = re.sub(r"[^\w\s\.\,\(\)\-]", "", text)
+    return text if text else ""
+def summarize_findings(combined_response: str) -> str:
+    chunks = combined_response.split("--- Analysis for Chunk")
+    diagnoses = []
+    for chunk in chunks:
+        chunk = chunk.strip()
+        if not chunk or "No oversights identified" in chunk:
+            continue
+        lines = chunk.splitlines()
+        in_diagnoses_section = False
+        for line in lines:
+            line = line.strip()
+            if not line:
+                continue
+            if re.match(r"###\s*Missed Diagnoses", line):
+                in_diagnoses_section = True
+                continue
+            if re.match(r"###\s*(Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", line):
+                in_diagnoses_section = False
+                continue
+            if in_diagnoses_section and re.match(r"-\s*.+", line):
+                diagnosis = re.sub(r"^\-\s*", "", line).strip()
+                if diagnosis and not re.match(r"No issues identified", diagnosis, re.IGNORECASE):
+                    diagnoses.append(diagnosis)
+    seen = set()
+    unique_diagnoses = [d for d in diagnoses if not (d in seen or seen.add(d))]
+    if not unique_diagnoses:
+        return "No missed diagnoses were identified in the provided records."
+    summary = "Missed diagnoses include " + ", ".join(unique_diagnoses[:-1])
+    if len(unique_diagnoses) > 1:
+        summary += f", and {unique_diagnoses[-1]}"
+    elif len(unique_diagnoses) == 1:
+        summary = "Missed diagnoses include " + unique_diagnoses[0]
+    summary += ", all of which require urgent clinical review to prevent potential adverse outcomes."
+    return summary.strip()
+def init_agent():
+    logger.info("Initializing model...")
+    log_system_usage("Before Load")
+    default_tool_path = os.path.abspath("data/new_tool.json")
+    target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
+    if not os.path.exists(target_tool_path):
+        shutil.copy(default_tool_path, target_tool_path)
+    agent = TxAgent(
+        model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
+        rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
+        tool_files_dict={"new_tool": target_tool_path},
+        force_finish=True,
+        enable_checker=False,
+        step_rag_num=4,
+        seed=100,
+        additional_default_tools=[],
+    )
+    agent.init_model()
+    log_system_usage("After Load")
+    logger.info("Agent Ready")
+    return agent
+def batched(iterable, n):
+    """Batch data into tuples of length n. The last batch may be shorter."""
+    it = iter(iterable)
+    while True:
+        batch = list(islice(it, n))
+        if not batch:
+            return
+        yield batch
+def create_ui(agent):
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
+        gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
+        chatbot = gr.Chatbot(label="Detailed Analysis", height=600, type="messages")
+        final_summary = gr.Markdown(label="Summary of Missed Diagnoses")
+        file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
+        msg_input = gr.Textbox(placeholder="Ask about potential oversights...", show_label=False)
+        send_btn = gr.Button("Analyze", variant="primary")
+        download_output = gr.File(label="Download Full Report")
+        progress_bar = gr.Progress()
+        prompt_template = """
+Analyze the patient record excerpt for missed diagnoses only. Provide a concise, evidence-based summary as a single paragraph without headings or bullet points. Include specific clinical findings (e.g., 'elevated blood pressure (160/95) on page 10'), their potential implications (e.g., 'may indicate untreated hypertension'), and a recommendation for urgent review. Do not include other oversight categories like medication conflicts. If no missed diagnoses are found, state 'No missed diagnoses identified' in a single sentence.
+Patient Record Excerpt (Chunk {0} of {1}):
+{chunk}
+"""
+        def analyze(message: str, history: List[dict], files: List, progress=gr.Progress()):
+            history.append({"role": "user", "content": message})
+            yield history, None, ""
+            extracted = []
+            file_hash_value = ""
+            if files:
+                # Process files in parallel with streaming
+                with ThreadPoolExecutor(max_workers=4) as executor:
+                    futures = []
+                    for f in files:
+                        file_type = f.name.split(".")[-1].lower()
+                        futures.append(executor.submit(
+                            lambda f: list(stream_file_to_json(f.name, file_type)),
+                            f
+                        ))
+                    for future in as_completed(futures):
+                        try:
+                            extracted.extend(future.result())
+                        except Exception as e:
+                            logger.error(f"File processing error: {e}")
+                            extracted.append(json.dumps({
+                                "error": f"Error processing file: {str(e)}"
+                            }))
+                file_hash_value = file_hash(files[0].name) if files else ""
+                history.append({"role": "assistant", "content": "✅ File processing complete"})
+                yield history, None, ""
+            # Process chunks in parallel with dynamic batching
+            chunk_size = 8000  # Larger chunks reduce overhead
+            combined_response = ""
+            try:
+                # Convert extracted data to text chunks
+                text_content = "\n".join(extracted)
+                chunks = [text_content[i:i+chunk_size] for i in range(0, len(text_content), chunk_size)]
+                # Process chunks in parallel batches
+                batch_size = 4  # Optimal for most GPUs
+                total_chunks = len(chunks)
+                for batch_idx, batch_chunks in enumerate(batched(chunks, batch_size)):
+                    batch_prompts = [
+                        prompt_template.format(
+                            batch_idx * batch_size + i + 1,
+                            total_chunks,
+                            chunk=chunk[:6000]  # Slightly larger context
+                        )
+                        for i, chunk in enumerate(batch_chunks)
+                    ]
+                    progress((batch_idx * batch_size) / total_chunks,
+                           desc=f"Analyzing batch {batch_idx + 1}/{(total_chunks + batch_size - 1) // batch_size}")
+                    # Process batch in parallel
+                    with ThreadPoolExecutor(max_workers=len(batch_prompts)) as executor:
+                        future_to_prompt = {
+                            executor.submit(
+                                agent.run_gradio_chat,
+                                prompt, [], 0.2, 512, 2048, False, []
+                            ): prompt
+                            for prompt in batch_prompts
+                        }
+                        for future in as_completed(future_to_prompt):
+                            chunk_response = ""
+                            for chunk_output in future.result():
+                                if chunk_output is None:
+                                    continue
+                                if isinstance(chunk_output, list):
+                                    for m in chunk_output:
+                                        if hasattr(m, 'content') and m.content:
+                                            cleaned = clean_response(m.content)
+                                            if cleaned:
+                                                chunk_response += cleaned + " "
+                                elif isinstance(chunk_output, str) and chunk_output.strip():
+                                    cleaned = clean_response(chunk_output)
+                                    if cleaned:
+                                        chunk_response += cleaned + " "
+                            combined_response += f"--- Analysis for Chunk {batch_idx * batch_size + 1} ---\n{chunk_response.strip()}\n"
+                            history[-1] = {"role": "assistant", "content": combined_response.strip()}
+                            yield history, None, ""
+                            # Clean up memory
+                            torch.cuda.empty_cache()
+                            gc.collect()
+                # Generate final summary
+                summary = summarize_findings(combined_response)
+                report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
+                if report_path:
+                    with open(report_path, "w", encoding="utf-8") as f:
+                        f.write(combined_response + "\n\n" + summary)
+                yield history, report_path if report_path and os.path.exists(report_path) else None, summary
+            except Exception as e:
+                logger.error("Analysis error: %s", e)
+                history.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
+                yield history, None, f"Error occurred during analysis: {str(e)}"
+        send_btn.click(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output, final_summary])
+        msg_input.submit(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output, final_summary])
     return demo
 if __name__ == "__main__":
+    try:
+        logger.info("Launching app...")
+        agent = init_agent()
+        demo = create_ui(agent)
+        demo.queue(api_open=False).launch(
+            server_name="0.0.0.0",
+            server_port=7860,
+            show_error=True,
+            allowed_paths=[report_dir],
+            share=False
+        )
+    finally:
+        if torch.distributed.is_initialized():
+            torch.distributed.destroy_process_group()