CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 13

Commit

5f7a1a1

verified ·

1 Parent(s): 13560d3

Update app.py

Browse files

Files changed (1) hide show

app.py +176 -248

app.py CHANGED Viewed

@@ -1,314 +1,242 @@
-import json
 import gradio as gr
-from typing import List, Optional
-from concurrent.futures import ThreadPoolExecutor, as_completed
 import hashlib
-import shutil
 import time
-from functools import lru_cache
-from threading import Thread
 import re
-import tempfile
-# Environment setup
-current_dir = os.path.dirname(os.path.abspath(__file__))
-src_path = os.path.abspath(os.path.join(current_dir, "src"))
-sys.path.insert(0, src_path)
-# Cache directories
-base_dir = "/data"
-os.makedirs(base_dir, exist_ok=True)
-model_cache_dir = os.path.join(base_dir, "txagent_models")
-tool_cache_dir = os.path.join(base_dir, "tool_cache")
-file_cache_dir = os.path.join(base_dir, "cache")
-report_dir = "/data/reports"
-vllm_cache_dir = os.path.join(base_dir, "vllm_cache")
-os.makedirs(model_cache_dir, exist_ok=True)
-os.makedirs(tool_cache_dir, exist_ok=True)
-os.makedirs(file_cache_dir, exist_ok=True)
-os.makedirs(report_dir, exist_ok=True)
-os.makedirs(vllm_cache_dir, exist_ok=True)
 os.environ.update({
-    "TRANSFORMERS_CACHE": model_cache_dir,
-    "HF_HOME": model_cache_dir,
-    "VLLM_CACHE_DIR": vllm_cache_dir,
     "TOKENIZERS_PARALLELISM": "false",
     "CUDA_LAUNCH_BLOCKING": "1"
 })
-from txagent.txagent import TxAgent
-MEDICAL_KEYWORDS = {
-    'diagnosis', 'assessment', 'plan', 'results', 'medications',
-    'allergies', 'summary', 'impression', 'findings', 'recommendations'
-}
-def sanitize_utf8(text: str) -> str:
-    return text.encode("utf-8", "ignore").decode("utf-8")
 def file_hash(path: str) -> str:
     with open(path, "rb") as f:
         return hashlib.md5(f.read()).hexdigest()
-def extract_priority_pages(file_path: str, max_pages: int = 20) -> str:
     try:
-        text_chunks = []
         with pdfplumber.open(file_path) as pdf:
-            for i, page in enumerate(pdf.pages[:3]):
-                text_chunks.append(f"=== Page {i+1} ===\n{(page.extract_text() or '').strip()}")
-            for i, page in enumerate(pdf.pages[3:max_pages], start=4):
-                page_text = page.extract_text() or ""
-                if any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
-                    text_chunks.append(f"=== Page {i} ===\n{page_text.strip()}")
-        return "\n\n".join(text_chunks)
     except Exception as e:
         return f"PDF processing error: {str(e)}"
-def convert_file_to_json(file_path: str, file_type: str) -> str:
     try:
         h = file_hash(file_path)
-        cache_path = os.path.join(file_cache_dir, f"{h}.json")
         if os.path.exists(cache_path):
-            return open(cache_path, "r", encoding="utf-8").read()
         if file_type == "pdf":
-            text = extract_priority_pages(file_path)
-            result = json.dumps({"filename": os.path.basename(file_path), "content": text, "status": "initial"})
-            Thread(target=full_pdf_processing, args=(file_path, h)).start()
         elif file_type == "csv":
-            df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str, skip_blank_lines=False, on_bad_lines="skip")
-            content = df.fillna("").astype(str).values.tolist()
-            result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
         elif file_type in ["xls", "xlsx"]:
-            try:
-                df = pd.read_excel(file_path, engine="openpyxl", header=None, dtype=str)
-            except:
-                df = pd.read_excel(file_path, engine="xlrd", header=None, dtype=str)
-            content = df.fillna("").astype(str).values.tolist()
-            result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
         else:
             return json.dumps({"error": f"Unsupported file type: {file_type}"})
         with open(cache_path, "w", encoding="utf-8") as f:
             f.write(result)
         return result
-    except Exception as e:
-        return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
-def full_pdf_processing(file_path: str, file_hash: str):
-    try:
-        cache_path = os.path.join(file_cache_dir, f"{file_hash}_full.json")
-        if os.path.exists(cache_path):
-            return
-        with pdfplumber.open(file_path) as pdf:
-            full_text = "\n".join([f"=== Page {i+1} ===\n{(page.extract_text() or '').strip()}" for i, page in enumerate(pdf.pages)])
-        result = json.dumps({"filename": os.path.basename(file_path), "content": full_text, "status": "complete"})
-        with open(cache_path, "w", encoding="utf-8") as f:
-            f.write(result)
-        with open(os.path.join(report_dir, f"{file_hash}_report.txt"), "w", encoding="utf-8") as out:
-            out.write(full_text)
     except Exception as e:
-        print(f"Background processing failed: {str(e)}")
-def init_agent():
-    default_tool_path = os.path.abspath("data/new_tool.json")
-    target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
-    if not os.path.exists(target_tool_path):
-        shutil.copy(default_tool_path, target_tool_path)
-    agent = TxAgent(
-        model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
-        rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
-        tool_files_dict={"new_tool": target_tool_path},
-        force_finish=True,
-        enable_checker=True,
-        step_rag_num=8,
-        seed=100,
-        additional_default_tools=[],
-    )
-    agent.init_model()
-    return agent
 def format_response(response: str) -> str:
-    """Clean and format the response for display"""
-    # Remove all tool call artifacts
     response = response.replace("[TOOL_CALLS]", "").strip()
-    # Remove duplicate sections if they exist
     if "Based on the medical records provided" in response:
         parts = response.split("Based on the medical records provided")
-        if len(parts) > 1:
-            response = "Based on the medical records provided" + parts[-1]
-    # Format sections with Markdown
-    formatted = response.replace("1. **Missed Diagnoses**:", "### 🔍 Missed Diagnoses")
-    formatted = formatted.replace("2. **Medication Conflicts**:", "\n### 💊 Medication Conflicts")
-    formatted = formatted.replace("3. **Incomplete Assessments**:", "\n### 📋 Incomplete Assessments")
-    formatted = formatted.replace("4. **Abnormal Results Needing Follow-up**:", "\n### ⚠️ Abnormal Results Needing Follow-up")
-    formatted = formatted.replace("Overall, the patient's medical records", "\n### 📝 Overall Assessment")
-    return formatted
-def analyze_potential_oversights(message: str, history: list, conversation: list, files: list):
-    start_time = time.time()
     try:
-        # Initial loading message
-        history = history + [
-            {"role": "user", "content": message},
-            {"role": "assistant", "content": "⏳ Analyzing records for potential oversights..."}
-        ]
         yield history, None
-        # Process uploaded files
         extracted_data = ""
-        file_hash_value = ""
-        if files and isinstance(files, list):
             with ThreadPoolExecutor(max_workers=4) as executor:
-                futures = [executor.submit(convert_file_to_json, f.name, f.name.split(".")[-1].lower())
                           for f in files if hasattr(f, 'name')]
-                extracted_data = "\n".join([sanitize_utf8(f.result()) for f in as_completed(futures)])
-                file_hash_value = file_hash(files[0].name) if files else ""
-        # Prepare the analysis prompt
-        analysis_prompt = f"""Review these medical records and identify EXACTLY what might have been missed:
-1. List potential missed diagnoses
-2. Flag any medication conflicts
-3. Note incomplete assessments
-4. Highlight abnormal results needing follow-up
-Medical Records:\n{extracted_data[:15000]}
-### Potential Oversights:\n"""
-        # Process the response from the agent
-        full_response = ""
         for chunk in agent.run_gradio_chat(
-            message=analysis_prompt,
             history=[],
             temperature=0.2,
-            max_new_tokens=1024,
-            max_token=4096,
-            call_agent=False,
-            conversation=conversation
         ):
             if isinstance(chunk, str):
-                full_response += chunk
             elif isinstance(chunk, list):
-                full_response += "".join([c.content for c in chunk if hasattr(c, 'content')])
-            # Format and display the partial response
-            formatted = format_response(full_response)
             if formatted.strip():
-                history = history[:-1] + [{"role": "assistant", "content": formatted}]
                 yield history, None
-        # Final formatting and cleanup
-        final_output = format_response(full_response)
-        if not final_output.strip():
-            final_output = "No clear oversights identified. Recommend comprehensive review."
-        # Prepare report download if available
-        report_path = None
-        if file_hash_value:
-            possible_report = os.path.join(report_dir, f"{file_hash_value}_report.txt")
-            if os.path.exists(possible_report):
-                report_path = possible_report
-        # Update history with final response
-        history = history[:-1] + [{"role": "assistant", "content": final_output}]
-        yield history, report_path
-    except Exception as e:
-        history.append({"role": "assistant", "content": f"❌ Analysis failed: {str(e)}"})
         yield history, None
-def create_ui(agent: TxAgent):
-    with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {max-width: 900px !important}") as demo:
-        gr.Markdown("""
-        <div style='text-align: center;'>
-            <h1>🩺 Clinical Oversight Assistant</h1>
-            <h3>Identify potential oversights in patient care</h3>
-            <p>Upload medical records to analyze for missed diagnoses, medication conflicts, and other potential issues.</p>
-        </div>
-        """)
-        with gr.Row():
-            with gr.Column(scale=2):
-                file_upload = gr.File(
-                    label="Upload Medical Records",
-                    file_types=[".pdf", ".csv", ".xls", ".xlsx"],
-                    file_count="multiple",
-                    height=100
-                )
-                msg_input = gr.Textbox(
-                    placeholder="Ask about potential oversights...",
-                    show_label=False,
-                    lines=3,
-                    max_lines=6
-                )
-                send_btn = gr.Button("Analyze", variant="primary", size="lg")
-                gr.Examples(
-                    examples=[
-                        ["What might have been missed in this patient's treatment?"],
-                        ["Are there any medication conflicts in these records?"],
-                        ["What abnormal results require follow-up?"],
-                        ["Identify any incomplete assessments in these records"]
-                    ],
-                    inputs=msg_input,
-                    label="Example Queries"
-                )
-            with gr.Column(scale=3):
-                chatbot = gr.Chatbot(
-                    label="Analysis Results",
-                    height=600,
-                    show_copy_button=True,
-                    avatar_images=(
-                        "assets/user.png",
-                        "assets/doctor.png"
-                    )
-                )
-                download_output = gr.File(
-                    label="Download Full Report",
-                    visible=False
-                )
-        conversation_state = gr.State([])
-        inputs = [msg_input, chatbot, conversation_state, file_upload]
-        outputs = [chatbot, download_output]
-        send_btn.click(
-            analyze_potential_oversights,
-            inputs=inputs,
-            outputs=outputs
-        )
-        msg_input.submit(
-            analyze_potential_oversights,
-            inputs=inputs,
-            outputs=outputs
-        )
-    return demo
 if __name__ == "__main__":
-    print("Initializing medical analysis agent...")
-    agent = init_agent()
-    print("Launching interface...")
-    demo = create_ui(agent)
-    demo.queue().launch(
         server_name="0.0.0.0",
         server_port=7860,
-        show_error=True,
-        allowed_paths=["/data/reports"],
-        share=False
     )

+import sys
+import os
 import gradio as gr
+from typing import List
 import hashlib
 import time
+import json
 import re
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from threading import Thread
+import pandas as pd
+import pdfplumber
+# Optimized environment setup
 os.environ.update({
+    "HF_HOME": "/data/hf_cache",
+    "VLLM_CACHE_DIR": "/data/vllm_cache",
     "TOKENIZERS_PARALLELISM": "false",
     "CUDA_LAUNCH_BLOCKING": "1"
 })
+# Create cache directories if they don't exist
+os.makedirs("/data/hf_cache", exist_ok=True)
+os.makedirs("/data/tool_cache", exist_ok=True)
+os.makedirs("/data/file_cache", exist_ok=True)
+os.makedirs("/data/reports", exist_ok=True)
+os.makedirs("/data/vllm_cache", exist_ok=True)
+# Lazy loading of heavy dependencies
+def lazy_load_agent():
+    from txagent.txagent import TxAgent
+    # Initialize agent with optimized settings
+    agent = TxAgent(
+        model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
+        rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
+        tool_files_dict={"new_tool": "/data/tool_cache/new_tool.json"},
+        force_finish=True,
+        enable_checker=True,
+        step_rag_num=8,
+        seed=100,
+        additional_default_tools=[],
+    )
+    agent.init_model()
+    return agent
+# Pre-load the agent in a separate thread
+agent = None
+def preload_agent():
+    global agent
+    agent = lazy_load_agent()
+Thread(target=preload_agent).start()
+# File processing functions
 def file_hash(path: str) -> str:
     with open(path, "rb") as f:
         return hashlib.md5(f.read()).hexdigest()
+def extract_priority_pages(file_path: str, max_pages: int = 10) -> str:
     try:
         with pdfplumber.open(file_path) as pdf:
+            return "\n\n".join(
+                f"=== Page {i+1} ===\n{(page.extract_text() or '').strip()}"
+                for i, page in enumerate(pdf.pages[:max_pages])
+            )
     except Exception as e:
         return f"PDF processing error: {str(e)}"
+def process_file(file_path: str, file_type: str) -> str:
     try:
         h = file_hash(file_path)
+        cache_path = f"/data/file_cache/{h}.json"
         if os.path.exists(cache_path):
+            with open(cache_path, "r", encoding="utf-8") as f:
+                return f.read()
         if file_type == "pdf":
+            content = extract_priority_pages(file_path)
+            result = json.dumps({"filename": os.path.basename(file_path), "content": content})
         elif file_type == "csv":
+            df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str)
+            result = json.dumps({"filename": os.path.basename(file_path), "rows": df.fillna("").values.tolist()})
         elif file_type in ["xls", "xlsx"]:
+            df = pd.read_excel(file_path, engine="openpyxl", header=None, dtype=str)
+            result = json.dumps({"filename": os.path.basename(file_path), "rows": df.fillna("").values.tolist()})
         else:
             return json.dumps({"error": f"Unsupported file type: {file_type}"})
         with open(cache_path, "w", encoding="utf-8") as f:
             f.write(result)
         return result
     except Exception as e:
+        return json.dumps({"error": str(e)})
 def format_response(response: str) -> str:
     response = response.replace("[TOOL_CALLS]", "").strip()
     if "Based on the medical records provided" in response:
         parts = response.split("Based on the medical records provided")
+        response = "Based on the medical records provided" + parts[-1]
+    replacements = {
+        "1. **Missed Diagnoses**:": "### 🔍 Missed Diagnoses",
+        "2. **Medication Conflicts**:": "\n### 💊 Medication Conflicts",
+        "3. **Incomplete Assessments**:": "\n### 📋 Incomplete Assessments",
+        "4. **Abnormal Results Needing Follow-up**:": "\n### ⚠️ Abnormal Results Needing Follow-up",
+        "Overall, the patient's medical records": "\n### 📝 Overall Assessment"
+    }
+    for old, new in replacements.items():
+        response = response.replace(old, new)
+    return response
+def analyze_files(message: str, history: List, files: List):
     try:
+        # Wait for agent to load if not ready
+        while agent is None:
+            time.sleep(0.1)
+        # Append user message to history in correct format
+        history.append([message, None])
         yield history, None
+        # Process files in parallel
         extracted_data = ""
+        if files:
             with ThreadPoolExecutor(max_workers=4) as executor:
+                futures = [executor.submit(process_file, f.name, f.name.split(".")[-1].lower())
                           for f in files if hasattr(f, 'name')]
+                extracted_data = "\n".join(f.result() for f in as_completed(futures))
+        prompt = f"""Review these medical records:
+{extracted_data[:10000]}
+Identify:
+1. Potential missed diagnoses
+2. Medication conflicts
+3. Incomplete assessments
+4. Abnormal results needing follow-up
+Analysis:"""
+        response = ""
         for chunk in agent.run_gradio_chat(
+            message=prompt,
             history=[],
             temperature=0.2,
+            max_new_tokens=800,
+            max_token=3000
         ):
             if isinstance(chunk, str):
+                response += chunk
             elif isinstance(chunk, list):
+                response += "".join(getattr(c, 'content', '') for c in chunk)
+            formatted = format_response(response)
             if formatted.strip():
+                history[-1][1] = formatted
                 yield history, None
+        final_output = format_response(response) or "No clear oversights identified."
+        history[-1][1] = final_output
         yield history, None
+    except Exception as e:
+        history[-1][1] = f"❌ Error: {str(e)}"
+        yield history, None
+# Create optimized UI with better layout
+with gr.Blocks(title="Clinical Oversight Assistant", css="""
+    .gradio-container {
+        max-width: 1200px !important;
+        margin: auto;
+    }
+    .container {
+        max-width: 1200px !important;
+    }
+    .chatbot {
+        min-height: 500px;
+    }
+""") as demo:
+    gr.Markdown("""
+    <div style='text-align: center; margin-bottom: 20px;'>
+        <h1 style='margin-bottom: 10px;'>🩺 Clinical Oversight Assistant</h1>
+        <p>Upload medical records to analyze for potential oversights in patient care</p>
+    </div>
+    """)
+    with gr.Row():
+        with gr.Column(scale=1, min_width=400):
+            file_upload = gr.File(
+                label="Upload Medical Records",
+                file_types=[".pdf", ".csv", ".xls", ".xlsx"],
+                file_count="multiple",
+                height=100
+            )
+            query = gr.Textbox(
+                label="Your Query",
+                placeholder="Ask about potential oversights...",
+                lines=3
+            )
+            submit = gr.Button("Analyze", variant="primary")
+            gr.Examples(
+                examples=[
+                    ["What potential diagnoses might have been missed?"],
+                    ["Are there any medication conflicts I should be aware of?"],
+                    ["What assessments appear incomplete in these records?"]
+                ],
+                inputs=query,
+                label="Example Queries"
+            )
+        with gr.Column(scale=2, min_width=600):
+            chatbot = gr.Chatbot(
+                label="Analysis Results",
+                height=600,
+                bubble_full_width=False,
+                show_copy_button=True
+            )
+    submit.click(
+        analyze_files,
+        inputs=[query, chatbot, file_upload],
+        outputs=[chatbot, gr.File(visible=False)]
+    )
+    query.submit(
+        analyze_files,
+        inputs=[query, chatbot, file_upload],
+        outputs=[chatbot, gr.File(visible=False)]
+    )
 if __name__ == "__main__":
+    demo.queue(concurrency_count=1).launch(
         server_name="0.0.0.0",
         server_port=7860,
+        show_error=True
     )