CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 13

Commit

9ef8abc

verified ·

1 Parent(s): ecb8e1d

Update app.py

Browse files

Files changed (1) hide show

app.py +104 -108

app.py CHANGED Viewed

@@ -1,36 +1,30 @@
-# Optimized app.py with lazy loading and preloading thread, fixed chatbot format and startup error handling
 import os
 import gradio as gr
-from typing import List
 import hashlib
 import time
 import json
-import re
 from concurrent.futures import ThreadPoolExecutor, as_completed
-from threading import Thread
 import pandas as pd
 import pdfplumber
-# Optimized environment setup
 os.environ.update({
     "HF_HOME": "/data/hf_cache",
-    "VLLM_CACHE_DIR": "/data/vllm_cache",
-    "TOKENIZERS_PARALLELISM": "false",
-    "CUDA_LAUNCH_BLOCKING": "1"
 })
-# Create cache directories if they don't exist
 os.makedirs("/data/hf_cache", exist_ok=True)
-os.makedirs("/data/tool_cache", exist_ok=True)
 os.makedirs("/data/file_cache", exist_ok=True)
 os.makedirs("/data/reports", exist_ok=True)
-os.makedirs("/data/vllm_cache", exist_ok=True)
-# Lazy loading of heavy dependencies
-def lazy_load_agent():
-    from txagent.txagent import TxAgent
     agent = TxAgent(
         model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
         rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
@@ -38,159 +32,161 @@ def lazy_load_agent():
         force_finish=True,
         enable_checker=True,
         step_rag_num=8,
-        seed=100,
-        additional_default_tools=[],
     )
     agent.init_model()
-    return agent
-# Pre-load the agent in a separate thread
-agent = None
-def preload_agent():
-    global agent
-    agent = lazy_load_agent()
-Thread(target=preload_agent).start()
-# File processing functions
 def file_hash(path: str) -> str:
     with open(path, "rb") as f:
         return hashlib.md5(f.read()).hexdigest()
-def extract_priority_pages(file_path: str, max_pages: int = 10) -> str:
     try:
         with pdfplumber.open(file_path) as pdf:
-            return "\n\n".join(
-                f"=== Page {i+1} ===\n{(page.extract_text() or '').strip()}"
                 for i, page in enumerate(pdf.pages[:max_pages])
             )
     except Exception as e:
-        return f"PDF processing error: {str(e)}"
 def process_file(file_path: str, file_type: str) -> str:
     try:
-        h = file_hash(file_path)
-        cache_path = f"/data/file_cache/{h}.json"
         if os.path.exists(cache_path):
-            with open(cache_path, "r", encoding="utf-8") as f:
                 return f.read()
         if file_type == "pdf":
-            content = extract_priority_pages(file_path)
-            result = json.dumps({"filename": os.path.basename(file_path), "content": content})
         elif file_type == "csv":
-            df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str)
-            result = json.dumps({"filename": os.path.basename(file_path), "rows": df.fillna("").values.tolist()})
         elif file_type in ["xls", "xlsx"]:
-            df = pd.read_excel(file_path, engine="openpyxl", header=None, dtype=str)
-            result = json.dumps({"filename": os.path.basename(file_path), "rows": df.fillna("").values.tolist()})
         else:
-            return json.dumps({"error": f"Unsupported file type: {file_type}"})
-        with open(cache_path, "w", encoding="utf-8") as f:
             f.write(result)
         return result
     except Exception as e:
         return json.dumps({"error": str(e)})
 def format_response(response: str) -> str:
     response = response.replace("[TOOL_CALLS]", "").strip()
-    if "Based on the medical records provided" in response:
-        parts = response.split("Based on the medical records provided")
-        response = "Based on the medical records provided" + parts[-1]
-    replacements = {
-        "1. **Missed Diagnoses**:": "### 🔍 Missed Diagnoses",
-        "2. **Medication Conflicts**:": "\n### 💊 Medication Conflicts",
-        "3. **Incomplete Assessments**:": "\n### 📋 Incomplete Assessments",
-        "4. **Abnormal Results Needing Follow-up**:": "\n### ⚠️ Abnormal Results Needing Follow-up",
-        "Overall, the patient's medical records": "\n### 📝 Overall Assessment"
     }
-    for old, new in replacements.items():
-        response = response.replace(old, new)
     return response
-def analyze_files(message: str, history: List, files: List):
     try:
-        while agent is None:
-            time.sleep(0.1)
-        history.append([message, None])
-        yield history, None
         extracted_data = ""
         if files:
-            with ThreadPoolExecutor(max_workers=4) as executor:
-                futures = [executor.submit(process_file, f.name, f.name.split(".")[-1].lower())
-                           for f in files if hasattr(f, 'name')]
                 extracted_data = "\n".join(f.result() for f in as_completed(futures))
         prompt = f"""Review these medical records:
 {extracted_data[:10000]}
-Identify:
-1. Potential missed diagnoses
-2. Medication conflicts
 3. Incomplete assessments
 4. Abnormal results needing follow-up
 Analysis:"""
         response = ""
         for chunk in agent.run_gradio_chat(
             message=prompt,
             history=[],
             temperature=0.2,
-            max_new_tokens=800,
-            max_token=3000
         ):
             if isinstance(chunk, str):
                 response += chunk
             elif isinstance(chunk, list):
                 response += "".join(getattr(c, 'content', '') for c in chunk)
-            formatted = format_response(response)
-            if formatted.strip():
-                history[-1][1] = formatted
-                yield history, None
-        final_output = format_response(response) or "No clear oversights identified."
-        history[-1][1] = final_output
         yield history, None
     except Exception as e:
-        history[-1][1] = f"❌ Error: {str(e)}"
         yield history, None
-# UI definition
-with gr.Blocks(title="Clinical Oversight Assistant") as demo:
-    gr.Markdown("""
-    <div style='text-align: center;'>
-        <h1>🩺 Clinical Oversight Assistant</h1>
-        <p>Upload medical records to analyze for potential oversights in patient care</p>
-    </div>
-    """)
     with gr.Row():
         with gr.Column(scale=1):
-            file_upload = gr.File(label="Upload Medical Records", file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
-            query = gr.Textbox(label="Your Query", placeholder="Ask about potential oversights...", lines=3)
             submit = gr.Button("Analyze", variant="primary")
-            gr.Examples([
-                ["What potential diagnoses might have been missed?"],
-                ["Are there any medication conflicts I should be aware of?"],
-                ["What assessments appear incomplete in these records?"]
-            ], inputs=query)
         with gr.Column(scale=2):
-            chatbot = gr.Chatbot(label="Analysis Results", height=600, type="messages")
-    submit.click(analyze_files, inputs=[query, chatbot, file_upload], outputs=[chatbot, gr.File(visible=False)])
-    query.submit(analyze_files, inputs=[query, chatbot, file_upload], outputs=[chatbot, gr.File(visible=False)])
 if __name__ == "__main__":
-    demo.queue().launch(server_name="0.0.0.0", server_port=7860, show_error=True)

+import sys
 import os
 import gradio as gr
 import hashlib
 import time
 import json
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import pandas as pd
 import pdfplumber
+# Set up environment
 os.environ.update({
     "HF_HOME": "/data/hf_cache",
+    "TOKENIZERS_PARALLELISM": "false"
 })
+# Create cache directories
 os.makedirs("/data/hf_cache", exist_ok=True)
 os.makedirs("/data/file_cache", exist_ok=True)
 os.makedirs("/data/reports", exist_ok=True)
+# Import TxAgent after setting up environment
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "src")))
+from txagent.txagent import TxAgent
+# Initialize agent with error handling
+try:
     agent = TxAgent(
         model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
         rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
         force_finish=True,
         enable_checker=True,
         step_rag_num=8,
+        seed=100
     )
     agent.init_model()
+except Exception as e:
+    print(f"Failed to initialize agent: {str(e)}")
+    agent = None
 def file_hash(path: str) -> str:
     with open(path, "rb") as f:
         return hashlib.md5(f.read()).hexdigest()
+def extract_text_from_pdf(file_path: str, max_pages: int = 10) -> str:
     try:
         with pdfplumber.open(file_path) as pdf:
+            return "\n".join(
+                f"Page {i+1}:\n{(page.extract_text() or '').strip()}\n"
                 for i, page in enumerate(pdf.pages[:max_pages])
             )
     except Exception as e:
+        return f"PDF error: {str(e)}"
 def process_file(file_path: str, file_type: str) -> str:
     try:
+        cache_path = f"/data/file_cache/{file_hash(file_path)}.json"
         if os.path.exists(cache_path):
+            with open(cache_path, "r") as f:
                 return f.read()
         if file_type == "pdf":
+            content = extract_text_from_pdf(file_path)
         elif file_type == "csv":
+            df = pd.read_csv(file_path, header=None, dtype=str, on_bad_lines="skip")
+            content = df.fillna("").to_string()
         elif file_type in ["xls", "xlsx"]:
+            df = pd.read_excel(file_path, header=None, dtype=str)
+            content = df.fillna("").to_string()
         else:
+            return json.dumps({"error": "Unsupported file type"})
+        result = json.dumps({"filename": os.path.basename(file_path), "content": content})
+        with open(cache_path, "w") as f:
             f.write(result)
         return result
     except Exception as e:
         return json.dumps({"error": str(e)})
 def format_response(response: str) -> str:
     response = response.replace("[TOOL_CALLS]", "").strip()
+    sections = {
+        "1. **Missed Diagnoses**:": "🔍 Missed Diagnoses",
+        "2. **Medication Conflicts**:": "💊 Medication Conflicts",
+        "3. **Incomplete Assessments**:": "📋 Incomplete Assessments",
+        "4. **Abnormal Results Needing Follow-up**:": "⚠️ Abnormal Results"
     }
+    for old, new in sections.items():
+        response = response.replace(old, f"\n### {new}\n")
     return response
+def analyze(message: str, history: list, files: list):
+    if agent is None:
+        yield history + [(message, "Agent initialization failed. Please try again later.")], None
+        return
+    history.append((message, None))
+    yield history, None
     try:
         extracted_data = ""
         if files:
+            with ThreadPoolExecutor() as executor:
+                futures = [executor.submit(process_file, f.name, f.name.split(".")[-1])
+                         for f in files if hasattr(f, 'name')]
                 extracted_data = "\n".join(f.result() for f in as_completed(futures))
         prompt = f"""Review these medical records:
 {extracted_data[:10000]}
+Identify potential issues:
+1. Missed diagnoses
+2. Medication conflicts
 3. Incomplete assessments
 4. Abnormal results needing follow-up
 Analysis:"""
         response = ""
         for chunk in agent.run_gradio_chat(
             message=prompt,
             history=[],
             temperature=0.2,
+            max_new_tokens=800
         ):
             if isinstance(chunk, str):
                 response += chunk
             elif isinstance(chunk, list):
                 response += "".join(getattr(c, 'content', '') for c in chunk)
+            history[-1] = (message, format_response(response))
+            yield history, None
+        history[-1] = (message, format_response(response))
         yield history, None
     except Exception as e:
+        history[-1] = (message, f"❌ Error: {str(e)}")
         yield history, None
+# Create the interface
+with gr.Blocks(
+    title="Clinical Oversight Assistant",
+    css="""
+    .gradio-container {
+        max-width: 1000px;
+        margin: auto;
+    }
+    .chatbot {
+        min-height: 500px;
+    }
+    """
+) as demo:
+    gr.Markdown("# 🩺 Clinical Oversight Assistant")
     with gr.Row():
         with gr.Column(scale=1):
+            files = gr.File(
+                label="Upload Medical Records",
+                file_types=[".pdf", ".csv", ".xlsx"],
+                file_count="multiple"
+            )
+            query = gr.Textbox(
+                label="Your Query",
+                placeholder="Ask about potential oversights..."
+            )
             submit = gr.Button("Analyze", variant="primary")
         with gr.Column(scale=2):
+            chatbot = gr.Chatbot(
+                label="Analysis Results",
+                show_copy_button=True
+            )
+    submit.click(
+        analyze,
+        inputs=[query, chatbot, files],
+        outputs=[chatbot, gr.File(visible=False)]
+    )
+    query.submit(
+        analyze,
+        inputs=[query, chatbot, files],
+        outputs=[chatbot, gr.File(visible=False)]
+    )
 if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_error=True
+    )