CPS-Test-Mobile

Paused

App Files Files

xet

Community

Ali2206 commited on Apr 12

Commit

1da2cfd

verified ·

1 Parent(s): 3b1f183

Update app.py

Browse files

Files changed (1) hide show

app.py +164 -130

app.py CHANGED Viewed

@@ -10,15 +10,17 @@ import hashlib
 import shutil
 import time
 from functools import lru_cache
-# Environment and path setup
 current_dir = os.path.dirname(os.path.abspath(__file__))
 src_path = os.path.abspath(os.path.join(current_dir, "src"))
-print(f"Adding to path: {src_path}")
 sys.path.insert(0, src_path)
-# Configure cache directories
 base_dir = "/data"
 model_cache_dir = os.path.join(base_dir, "txagent_models")
 tool_cache_dir = os.path.join(base_dir, "tool_cache")
 file_cache_dir = os.path.join(base_dir, "cache")
@@ -27,13 +29,21 @@ os.makedirs(model_cache_dir, exist_ok=True)
 os.makedirs(tool_cache_dir, exist_ok=True)
 os.makedirs(file_cache_dir, exist_ok=True)
-os.environ["TRANSFORMERS_CACHE"] = model_cache_dir
-os.environ["HF_HOME"] = model_cache_dir
-os.environ["TOKENIZERS_PARALLELISM"] = "false"
-os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
 from txagent.txagent import TxAgent
 def sanitize_utf8(text: str) -> str:
     return text.encode("utf-8", "ignore").decode("utf-8")
@@ -41,193 +51,217 @@ def file_hash(path: str) -> str:
     with open(path, "rb") as f:
         return hashlib.md5(f.read()).hexdigest()
-@lru_cache(maxsize=100)
-def get_cached_response(prompt: str, file_hash: str) -> Optional[str]:
-    return None
 def convert_file_to_json(file_path: str, file_type: str) -> str:
     try:
         h = file_hash(file_path)
         cache_path = os.path.join(file_cache_dir, f"{h}.json")
         if os.path.exists(cache_path):
             return open(cache_path, "r", encoding="utf-8").read()
-        if file_type == "csv":
-            df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str, skip_blank_lines=False, on_bad_lines="skip")
         elif file_type in ["xls", "xlsx"]:
             try:
                 df = pd.read_excel(file_path, engine="openpyxl", header=None, dtype=str)
             except:
                 df = pd.read_excel(file_path, engine="xlrd", header=None, dtype=str)
-        elif file_type == "pdf":
-            with pdfplumber.open(file_path) as pdf:
-                text = "\n".join([page.extract_text() or "" for page in pdf.pages])
-            result = json.dumps({"filename": os.path.basename(file_path), "content": text.strip()})
-            with open(cache_path, "w", encoding="utf-8") as f:
-                f.write(result)
-            return result
         else:
             return json.dumps({"error": f"Unsupported file type: {file_type}"})
-        if df is None or df.empty:
-            return json.dumps({"warning": f"No data extracted from: {file_path}"})
-        df = df.fillna("")
-        content = df.astype(str).values.tolist()
-        result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
         with open(cache_path, "w", encoding="utf-8") as f:
             f.write(result)
         return result
     except Exception as e:
-        return json.dumps({"error": f"Error reading {os.path.basename(file_path)}: {str(e)}"})
-def convert_files_to_json_parallel(uploaded_files: list) -> str:
-    extracted_text = []
-    with ThreadPoolExecutor(max_workers=4) as executor:
-        futures = []
-        for file in uploaded_files:
-            if not hasattr(file, 'name'):
-                continue
-            path = file.name
-            ext = path.split(".")[-1].lower()
-            futures.append(executor.submit(convert_file_to_json, path, ext))
-        for future in as_completed(futures):
-            extracted_text.append(sanitize_utf8(future.result()))
-    return "\n".join(extracted_text)
 def init_agent():
     default_tool_path = os.path.abspath("data/new_tool.json")
     target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
     if not os.path.exists(target_tool_path):
         shutil.copy(default_tool_path, target_tool_path)
-    model_name = "mims-harvard/TxAgent-T1-Llama-3.1-8B"
-    rag_model_name = "mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B"
     agent = TxAgent(
-        model_name=model_name,
-        rag_model_name=rag_model_name,
         tool_files_dict={"new_tool": target_tool_path},
         force_finish=True,
         enable_checker=True,
         step_rag_num=8,
         seed=100,
-        additional_default_tools=[]
     )
     agent.init_model()
     return agent
 def create_ui(agent: TxAgent):
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
-        gr.Markdown("<h1 style='text-align: center;'>📋 CPS: Clinical Patient Support System</h1>")
-        chatbot = gr.Chatbot(label="CPS Assistant", height=600, type="messages")
         file_upload = gr.File(
-            label="Upload Medical File",
-            file_types=[".pdf", ".txt", ".docx", ".jpg", ".png", ".csv", ".xls", ".xlsx"],
             file_count="multiple"
         )
-        message_input = gr.Textbox(placeholder="Ask a biomedical question or just upload the files...", show_label=False)
-        send_button = gr.Button("Send", variant="primary")
         conversation_state = gr.State([])
-        def handle_chat(message: str, history: list, conversation: list, uploaded_files: list, progress=gr.Progress()):
             start_time = time.time()
             try:
-                history.append({"role": "user", "content": message})
-                history.append({"role": "assistant", "content": "⏳ Processing your request..."})
                 yield history
-                file_process_time = time.time()
-                extracted_text = ""
-                if uploaded_files and isinstance(uploaded_files, list):
-                    extracted_text = convert_files_to_json_parallel(uploaded_files)
-                print(f"File processing took: {time.time() - file_process_time:.2f}s")
-                context = (
-                    "You are an expert clinical AI assistant. Review this patient's history, "
-                    "medications, and notes, and ONLY provide a final answer summarizing "
-                    "what the doctor might have missed."
-                )
-                chunked_prompt = f"{context}\n\n--- Patient Record ---\n{extracted_text}\n\n[Final Analysis]"
-                model_start = time.time()
-                generator = agent.run_gradio_chat(
-                    message=chunked_prompt,
                     history=[],
-                    temperature=0.3,
-                    max_new_tokens=768,
                     max_token=4096,
                     call_agent=False,
-                    conversation=conversation,
-                    uploaded_files=uploaded_files,
-                    max_round=10
-                )
-                final_response = ""
-                for update in generator:
-                    if not update:
-                        continue
-                    if isinstance(update, list):
-                        for msg in update:
-                            if hasattr(msg, 'content'):
-                                final_response += msg.content
-                    elif isinstance(update, str):
-                        final_response += update
-                    cleaned = final_response.strip().replace("[TOOL_CALLS]", "")
-                    history[-1] = {"role": "assistant", "content": cleaned or "❌ No response."}
-                    yield history
-                print("Final model response:\n", final_response)
-                history[-1] = {"role": "assistant", "content": final_response.strip() or "❌ No response."}
-                print(f"Model processing took: {time.time() - model_start:.2f}s")
                 yield history
-            except Exception as chat_error:
-                print(f"Chat handling error: {chat_error}")
-                history[-1] = {"role": "assistant", "content": "❌ An error occurred while processing your request."}
                 yield history
-            finally:
-                print(f"Total request time: {time.time() - start_time:.2f}s")
-        inputs = [message_input, chatbot, conversation_state, file_upload]
-        send_button.click(fn=handle_chat, inputs=inputs, outputs=chatbot)
-        message_input.submit(fn=handle_chat, inputs=inputs, outputs=chatbot)
         gr.Examples([
-            ["Upload your medical form and ask what the doctor might've missed."],
-            ["This patient was treated with antibiotics for UTI. What else should we check?"],
-            ["Is there anything abnormal in the attached blood work report?"]
-        ], inputs=message_input)
     return demo
 if __name__ == "__main__":
-    print("Initializing agent...")
     agent = init_agent()
-    print("Performing warm-up call...")
-    try:
-        warm_up = agent.run_gradio_chat(
-            message="Warm up",
-            history=[],
-            temperature=0.1,
-            max_new_tokens=10,
-            max_token=100,
-            call_agent=False,
-            conversation=[]
-        )
-        for _ in warm_up:
-            pass
-    except:
-        pass
     print("Launching interface...")
     demo = create_ui(agent)
-    demo.queue().launch(
         server_name="0.0.0.0",
         server_port=7860,
         show_error=True,

 import shutil
 import time
 from functools import lru_cache
+from threading import Thread
+import re
+# Environment setup
 current_dir = os.path.dirname(os.path.abspath(__file__))
 src_path = os.path.abspath(os.path.join(current_dir, "src"))
 sys.path.insert(0, src_path)
+# Cache directories
 base_dir = "/data"
+os.makedirs(base_dir, exist_ok=True)
 model_cache_dir = os.path.join(base_dir, "txagent_models")
 tool_cache_dir = os.path.join(base_dir, "tool_cache")
 file_cache_dir = os.path.join(base_dir, "cache")
 os.makedirs(tool_cache_dir, exist_ok=True)
 os.makedirs(file_cache_dir, exist_ok=True)
+os.environ.update({
+    "TRANSFORMERS_CACHE": model_cache_dir,
+    "HF_HOME": model_cache_dir,
+    "TOKENIZERS_PARALLELISM": "false",
+    "CUDA_LAUNCH_BLOCKING": "1"
+})
 from txagent.txagent import TxAgent
+# Medical keywords for priority detection
+MEDICAL_KEYWORDS = {
+    'diagnosis', 'assessment', 'plan', 'results', 'medications',
+    'allergies', 'summary', 'impression', 'findings', 'recommendations'
+}
 def sanitize_utf8(text: str) -> str:
     return text.encode("utf-8", "ignore").decode("utf-8")
     with open(path, "rb") as f:
         return hashlib.md5(f.read()).hexdigest()
+def extract_priority_pages(file_path: str, max_pages: int = 20) -> str:
+    """Fast extraction of first pages and medically relevant sections"""
+    try:
+        text_chunks = []
+        with pdfplumber.open(file_path) as pdf:
+            # Always process first 3 pages
+            for i, page in enumerate(pdf.pages[:3]):
+                text_chunks.append(f"=== Page {i+1} ===\n{(page.extract_text() or '').strip()}")
+            # Scan subsequent pages for medical keywords
+            for i, page in enumerate(pdf.pages[3:max_pages], start=4):
+                page_text = page.extract_text() or ""
+                if any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
+                    text_chunks.append(f"=== Page {i} ===\n{page_text.strip()}")
+        return "\n\n".join(text_chunks)
+    except Exception as e:
+        return f"PDF processing error: {str(e)}"
 def convert_file_to_json(file_path: str, file_type: str) -> str:
+    """Optimized file conversion with medical focus"""
     try:
         h = file_hash(file_path)
         cache_path = os.path.join(file_cache_dir, f"{h}.json")
         if os.path.exists(cache_path):
             return open(cache_path, "r", encoding="utf-8").read()
+        if file_type == "pdf":
+            # Fast initial processing
+            text = extract_priority_pages(file_path)
+            result = json.dumps({
+                "filename": os.path.basename(file_path),
+                "content": text,
+                "status": "initial"
+            })
+            # Start background full processing
+            Thread(target=full_pdf_processing, args=(file_path, h)).start()
+        elif file_type == "csv":
+            df = pd.read_csv(file_path, encoding_errors="replace", header=None,
+                           dtype=str, skip_blank_lines=False, on_bad_lines="skip")
+            content = df.fillna("").astype(str).values.tolist()
+            result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
         elif file_type in ["xls", "xlsx"]:
             try:
                 df = pd.read_excel(file_path, engine="openpyxl", header=None, dtype=str)
             except:
                 df = pd.read_excel(file_path, engine="xlrd", header=None, dtype=str)
+            content = df.fillna("").astype(str).values.tolist()
+            result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
         else:
             return json.dumps({"error": f"Unsupported file type: {file_type}"})
         with open(cache_path, "w", encoding="utf-8") as f:
             f.write(result)
         return result
     except Exception as e:
+        return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
+def full_pdf_processing(file_path: str, file_hash: str):
+    """Background full PDF processing"""
+    try:
+        cache_path = os.path.join(file_cache_dir, f"{file_hash}_full.json")
+        if os.path.exists(cache_path):
+            return
+        with pdfplumber.open(file_path) as pdf:
+            full_text = "\n".join([f"=== Page {i+1} ===\n{(page.extract_text() or '').strip()}"
+                                 for i, page in enumerate(pdf.pages)])
+        result = json.dumps({
+            "filename": os.path.basename(file_path),
+            "content": full_text,
+            "status": "complete"
+        })
+        with open(cache_path, "w", encoding="utf-8") as f:
+            f.write(result)
+    except Exception as e:
+        print(f"Background processing failed: {str(e)}")
 def init_agent():
+    """Initialize TxAgent with medical analysis focus"""
     default_tool_path = os.path.abspath("data/new_tool.json")
     target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
     if not os.path.exists(target_tool_path):
         shutil.copy(default_tool_path, target_tool_path)
     agent = TxAgent(
+        model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
+        rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
         tool_files_dict={"new_tool": target_tool_path},
         force_finish=True,
         enable_checker=True,
         step_rag_num=8,
         seed=100,
+        additional_default_tools=[],
+        device_map="auto"
     )
     agent.init_model()
     return agent
 def create_ui(agent: TxAgent):
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
+        gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
+        gr.Markdown("<h3 style='text-align: center;'>Identify potential oversights in patient care</h3>")
+        chatbot = gr.Chatbot(label="Analysis", height=600)
         file_upload = gr.File(
+            label="Upload Medical Records",
+            file_types=[".pdf", ".csv", ".xls", ".xlsx"],
             file_count="multiple"
         )
+        msg_input = gr.Textbox(placeholder="Ask about potential oversights...", show_label=False)
+        send_btn = gr.Button("Analyze", variant="primary")
         conversation_state = gr.State([])
+        def analyze_potential_oversights(message: str, history: list, conversation: list, files: list):
             start_time = time.time()
             try:
+                history.append((message, "Analyzing records for potential oversights..."))
                 yield history
+                # Process files
+                extracted_data = ""
+                if files:
+                    with ThreadPoolExecutor(max_workers=4) as executor:
+                        futures = [executor.submit(convert_file_to_json, f.name, f.name.split(".")[-1].lower())
+                                 for f in files if hasattr(f, 'name')]
+                        extracted_data = "\n".join([sanitize_utf8(f.result()) for f in as_completed(futures)])
+                # Medical oversight analysis prompt
+                analysis_prompt = """Review these medical records and identify EXACTLY what might have been missed:
+1. List potential missed diagnoses
+2. Flag any medication conflicts
+3. Note incomplete assessments
+4. Highlight abnormal results needing follow-up
+Medical Records:
+{records}
+Provide ONLY the potential oversights in this format:
+### Potential Oversights:
+1. [Missed diagnosis] - [Evidence from records]
+2. [Medication issue] - [Supporting data]
+3. [Assessment gap] - [Relevant findings]""".format(records=extracted_data[:15000])  # Limit input size
+                # Generate analysis
+                response = []
+                for chunk in agent.run_gradio_chat(
+                    message=analysis_prompt,
                     history=[],
+                    temperature=0.2,  # More deterministic
+                    max_new_tokens=1024,
                     max_token=4096,
                     call_agent=False,
+                    conversation=conversation
+                ):
+                    if isinstance(chunk, str):
+                        response.append(chunk)
+                    elif isinstance(chunk, list):
+                        response.extend([c.content for c in chunk if hasattr(c, 'content')])
+                    if len(response) % 3 == 0:  # Update every 3 chunks
+                        history[-1] = (message, "".join(response).strip())
+                        yield history
+                # Finalize output
+                final_output = "".join(response).strip()
+                if not final_output:
+                    final_output = "No clear oversights identified. Recommend comprehensive review."
+                # Format as bullet points if not already
+                if not final_output.startswith(("1.", "-", "*", "#")):
+                    final_output = "• " + final_output.replace("\n", "\n• ")
+                history[-1] = (message, f"### Potential Clinical Oversights:\n{final_output}")
+                print(f"Analysis completed in {time.time()-start_time:.2f}s")
                 yield history
+            except Exception as e:
+                history.append((message, f"❌ Analysis failed: {str(e)}"))
                 yield history
+        # UI event handlers
+        inputs = [msg_input, chatbot, conversation_state, file_upload]
+        send_btn.click(analyze_potential_oversights, inputs=inputs, outputs=chatbot)
+        msg_input.submit(analyze_potential_oversights, inputs=inputs, outputs=chatbot)
         gr.Examples([
+            ["What might have been missed in this patient's treatment?"],
+            ["Are there any medication conflicts in these records?"],
+            ["What abnormal results require follow-up?"]
+        ], inputs=msg_input)
     return demo
 if __name__ == "__main__":
+    print("Initializing medical analysis agent...")
     agent = init_agent()
     print("Launching interface...")
     demo = create_ui(agent)
+    demo.queue(concurrency_count=2).launch(
         server_name="0.0.0.0",
         server_port=7860,
         show_error=True,