CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 16

Commit

1ba0100

verified ·

1 Parent(s): f640ef8

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -117

app.py CHANGED Viewed

@@ -1,13 +1,11 @@
 import sys
 import os
-import pandas as pd
 import pdfplumber
 import json
 import gradio as gr
 from typing import List
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import hashlib
-import shutil
 import re
 import psutil
 import subprocess
@@ -17,12 +15,11 @@ persistent_dir = "/data/hf_cache"
 os.makedirs(persistent_dir, exist_ok=True)
 model_cache_dir = os.path.join(persistent_dir, "txagent_models")
-tool_cache_dir = os.path.join(persistent_dir, "tool_cache")
 file_cache_dir = os.path.join(persistent_dir, "cache")
 report_dir = os.path.join(persistent_dir, "reports")
 vllm_cache_dir = os.path.join(persistent_dir, "vllm_cache")
-for directory in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir, vllm_cache_dir]:
     os.makedirs(directory, exist_ok=True)
 os.environ["HF_HOME"] = model_cache_dir
@@ -47,15 +44,23 @@ def file_hash(path: str) -> str:
     with open(path, "rb") as f:
         return hashlib.md5(f.read()).hexdigest()
-def extract_priority_pages(file_path: str) -> str:
     try:
         text_chunks = []
         with pdfplumber.open(file_path) as pdf:
             for i, page in enumerate(pdf.pages):
                 page_text = page.extract_text() or ""
                 if i < 3 or any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
-                    text_chunks.append(f"=== Page {i+1} ===\n{page_text.strip()}")
-        return "\n\n".join(text_chunks)
     except Exception as e:
         return f"PDF processing error: {str(e)}"
@@ -70,18 +75,6 @@ def convert_file_to_json(file_path: str, file_type: str) -> str:
         if file_type == "pdf":
             text = extract_priority_pages(file_path)
             result = json.dumps({"filename": os.path.basename(file_path), "content": text, "status": "initial"})
-        elif file_type == "csv":
-            df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str,
-                             skip_blank_lines=False, on_bad_lines="skip")
-            content = df.fillna("").astype(str).values.tolist()
-            result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
-        elif file_type in ["xls", "xlsx"]:
-            try:
-                df = pd.read_excel(file_path, engine="openpyxl", header=None, dtype=str)
-            except Exception:
-                df = pd.read_excel(file_path, engine="xlrd", header=None, dtype=str)
-            content = df.fillna("").astype(str).values.tolist()
-            result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
         else:
             result = json.dumps({"error": f"Unsupported file type: {file_type}"})
         with open(cache_path, "w", encoding="utf-8") as f:
@@ -107,34 +100,25 @@ def log_system_usage(tag=""):
 def clean_response(text: str) -> str:
     text = sanitize_utf8(text)
-    # Remove tool calls, JSON data, and repetitive phrases
     text = re.sub(r"\[TOOL_CALLS\].*", "", text, flags=re.DOTALL)
-    text = re.sub(r"\['get_[^\]]+\']\n?", "", text)  # Remove tool names
-    text = re.sub(r"\{'meta':\s*\{.*?\}\s*,\s*'results':\s*\[.*?\]\}\n?", "", text, flags=re.DOTALL)  # Remove JSON
-    text = re.sub(r"To analyze the medical records for clinical oversights.*?begin by reviewing.*?\n", "", text, flags=re.DOTALL)
     text = re.sub(r"\n{3,}", "\n\n", text).strip()
-    # Only keep text under analysis headings or relevant content
-    if not re.search(r"(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", text):
         return ""
     return text
 def init_agent():
     print("🔁 Initializing model...")
     log_system_usage("Before Load")
-    default_tool_path = os.path.abspath("data/new_tool.json")
-    target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
-    if not os.path.exists(target_tool_path):
-        shutil.copy(default_tool_path, target_tool_path)
     agent = TxAgent(
         model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
         rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
-        tool_files_dict={"new_tool": target_tool_path},
         force_finish=True,
         enable_checker=True,
-        step_rag_num=2,
         seed=100,
-        additional_default_tools=[],
     )
     agent.init_model()
     log_system_usage("After Load")
@@ -145,14 +129,13 @@ def create_ui(agent):
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
         chatbot = gr.Chatbot(label="Analysis", height=600, type="messages")
-        file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
         msg_input = gr.Textbox(placeholder="Ask about potential oversights...", show_label=False)
         send_btn = gr.Button("Analyze", variant="primary")
-        download_output = gr.File(label="Download Full Report")
         def analyze(message: str, history: List[dict], files: List):
             history.append({"role": "user", "content": message})
-            history.append({"role": "assistant", "content": "⏳ Analyzing records for potential oversights..."})
             yield history, None
             extracted = ""
@@ -164,101 +147,64 @@ def create_ui(agent):
                     extracted = "\n".join(results)
                     file_hash_value = file_hash(files[0].name) if files else ""
-            # Split extracted text into chunks of ~4,000 characters
-            chunk_size = 4000
-            chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
-            combined_response = ""
-            prompt_template = f"""
-Analyze the medical records for clinical oversights. Provide a concise, evidence-based summary under these headings:
-1. **Missed Diagnoses**:
-   - Identify inconsistencies in history, symptoms, or tests.
-   - Consider psychiatric, neurological, infectious, autoimmune, genetic conditions, family history, trauma, and developmental factors.
-2. **Medication Conflicts**:
-   - Check for contraindications, interactions, or unjustified off-label use.
-   - Assess if medications worsen diagnoses or cause adverse effects.
-3. **Incomplete Assessments**:
-   - Note missing or superficial cognitive, psychiatric, social, or family assessments.
-   - Highlight gaps in medical history, substance use, or lab/imaging documentation.
-4. **Urgent Follow-up**:
-   - Flag abnormal lab results, imaging, behaviors, or legal history needing immediate reassessment or referral.
-Medical Records (Chunk {0} of {1}):
-{{chunk}}
-Begin analysis:
 """
             try:
-                if history and history[-1]["content"].startswith("⏳"):
-                    history.pop()
-                # Process each chunk and stream cleaned results
-                for chunk_idx, chunk in enumerate(chunks, 1):
-                    # Update UI with progress
-                    history.append({"role": "assistant", "content": f"🔄 Processing Chunk {chunk_idx} of {len(chunks)}..."})
-                    yield history, None
-                    prompt = prompt_template.format(chunk_idx, len(chunks), chunk=chunk)
-                    chunk_response = ""
-                    for chunk_output in agent.run_gradio_chat(
-                        message=prompt,
-                        history=[],
-                        temperature=0.2,
-                        max_new_tokens=1024,
-                        max_token=4096,
-                        call_agent=False,
-                        conversation=[],
-                    ):
-                        if chunk_output is None:
-                            continue
-                        if isinstance(chunk_output, list):
-                            for m in chunk_output:
-                                if hasattr(m, 'content') and m.content:
-                                    cleaned = clean_response(m.content)
-                                    if cleaned:
-                                        chunk_response += cleaned + "\n"
-                                        # Stream partial response to UI
-                                        if history[-1]["content"].startswith("🔄"):
-                                            history[-1] = {"role": "assistant", "content": f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response.strip()}"}
-                                        else:
-                                            history[-1]["content"] = f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response.strip()}"
-                                        yield history, None
-                        elif isinstance(chunk_output, str) and chunk_output.strip():
-                            cleaned = clean_response(chunk_output)
-                            if cleaned:
-                                chunk_response += cleaned + "\n"
-                                # Stream partial response to UI
-                                if history[-1]["content"].startswith("🔄"):
-                                    history[-1] = {"role": "assistant", "content": f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response.strip()}"}
-                                else:
-                                    history[-1]["content"] = f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response.strip()}"
-                                yield history, None
-                    # Append completed chunk response to combined response
-                    if chunk_response:
-                        combined_response += f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response}\n"
-                # Finalize UI with complete response
-                if combined_response:
-                    history[-1]["content"] = combined_response.strip()
-                else:
-                    history.append({"role": "assistant", "content": "No oversights identified."})
-                # Generate report file with cleaned response
                 report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
-                if report_path:
                     with open(report_path, "w", encoding="utf-8") as f:
-                        f.write(combined_response)
                 yield history, report_path if report_path and os.path.exists(report_path) else None
             except Exception as e:
                 print("🚨 ERROR:", e)
-                history.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
                 yield history, None
         send_btn.click(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output])

 import sys
 import os
 import pdfplumber
 import json
 import gradio as gr
 from typing import List
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import hashlib
 import re
 import psutil
 import subprocess
 os.makedirs(persistent_dir, exist_ok=True)
 model_cache_dir = os.path.join(persistent_dir, "txagent_models")
 file_cache_dir = os.path.join(persistent_dir, "cache")
 report_dir = os.path.join(persistent_dir, "reports")
 vllm_cache_dir = os.path.join(persistent_dir, "vllm_cache")
+for directory in [model_cache_dir, file_cache_dir, report_dir, vllm_cache_dir]:
     os.makedirs(directory, exist_ok=True)
 os.environ["HF_HOME"] = model_cache_dir
     with open(path, "rb") as f:
         return hashlib.md5(f.read()).hexdigest()
+def extract_priority_pages(file_path: str, max_chars: int = 6000) -> str:
     try:
         text_chunks = []
+        total_chars = 0
         with pdfplumber.open(file_path) as pdf:
             for i, page in enumerate(pdf.pages):
                 page_text = page.extract_text() or ""
                 if i < 3 or any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
+                    page_chunk = f"=== Page {i+1} ===\n{page_text.strip()}\n"
+                    if total_chars + len(page_chunk) <= max_chars:
+                        text_chunks.append(page_chunk)
+                        total_chars += len(page_chunk)
+                    else:
+                        remaining = max_chars - total_chars
+                        text_chunks.append(page_chunk[:remaining])
+                        break
+        return "".join(text_chunks).strip()
     except Exception as e:
         return f"PDF processing error: {str(e)}"
         if file_type == "pdf":
             text = extract_priority_pages(file_path)
             result = json.dumps({"filename": os.path.basename(file_path), "content": text, "status": "initial"})
         else:
             result = json.dumps({"error": f"Unsupported file type: {file_type}"})
         with open(cache_path, "w", encoding="utf-8") as f:
 def clean_response(text: str) -> str:
     text = sanitize_utf8(text)
     text = re.sub(r"\[TOOL_CALLS\].*", "", text, flags=re.DOTALL)
+    text = re.sub(r"\['get_[^\]]+\']\n?", "", text)
+    text = re.sub(r"\{'meta':\s*\{.*?\}\s*,\s*'results':\s*\[.*?\]\}\n?", "", text, flags=re.DOTALL)
+    text = re.sub(r"(?i)(to analyze|based on|will start|no (drug|clinical|information)).*?\n", "", text, flags=re.DOTALL)
     text = re.sub(r"\n{3,}", "\n\n", text).strip()
+    if not re.search(r"(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", text, re.IGNORECASE):
         return ""
     return text
 def init_agent():
     print("🔁 Initializing model...")
     log_system_usage("Before Load")
     agent = TxAgent(
         model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
         rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
         force_finish=True,
         enable_checker=True,
+        step_rag_num=1,
         seed=100,
     )
     agent.init_model()
     log_system_usage("After Load")
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
         chatbot = gr.Chatbot(label="Analysis", height=600, type="messages")
+        file_upload = gr.File(file_types=[".pdf"], file_count="multiple")
         msg_input = gr.Textbox(placeholder="Ask about potential oversights...", show_label=False)
         send_btn = gr.Button("Analyze", variant="primary")
+        download_output = gr.File(label="Download Report")
         def analyze(message: str, history: List[dict], files: List):
             history.append({"role": "user", "content": message})
             yield history, None
             extracted = ""
                     extracted = "\n".join(results)
                     file_hash_value = file_hash(files[0].name) if files else ""
+            prompt = f"""
+Analyze the medical records and list potential doctor oversights under these headings only, with brief details:
+**Missed Diagnoses**: Inconsistencies or unaddressed conditions.
+**Medication Conflicts**: Contraindications or risky prescriptions.
+**Incomplete Assessments**: Missing or shallow evaluations.
+**Urgent Follow-up**: Issues needing immediate attention.
+Records:
+{extracted[:6000]}
+Respond concisely.
 """
             try:
+                history.append({"role": "assistant", "content": "🔄 Analyzing..."})
+                yield history, None
+                response = ""
+                for output in agent.run_gradio_chat(
+                    message=prompt,
+                    history=[],
+                    temperature=0.1,
+                    max_new_tokens=512,
+                    max_token=4096,
+                    call_agent=False,
+                    conversation=[],
+                ):
+                    if output is None:
+                        continue
+                    if isinstance(output, list):
+                        for m in output:
+                            if hasattr(m, 'content') and m.content:
+                                cleaned = clean_response(m.content)
+                                if cleaned:
+                                    response += cleaned + "\n"
+                                    history[-1]["content"] = response.strip()
+                                    yield history, None
+                    elif isinstance(output, str) and output.strip():
+                        cleaned = clean_response(output)
+                        if cleaned:
+                            response += cleaned + "\n"
+                            history[-1]["content"] = response.strip()
+                            yield history, None
+                if not response:
+                    history[-1]["content"] = "No oversights identified."
+                yield history, None
                 report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
+                if report_path and response:
                     with open(report_path, "w", encoding="utf-8") as f:
+                        f.write(response.strip())
                 yield history, report_path if report_path and os.path.exists(report_path) else None
             except Exception as e:
                 print("🚨 ERROR:", e)
+                history[-1]["content"] = f"❌ Error: {str(e)}"
                 yield history, None
         send_btn.click(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output])