CPS-Test-Mobile

Paused

App Files Files

xet

Community

Ali2206 commited on Apr 13

Commit

6af3907

verified ·

1 Parent(s): f858e79

Update app.py

Browse files

Files changed (1) hide show

app.py +126 -105

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import pandas as pd
 import pdfplumber
 import json
@@ -8,10 +9,10 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
 import hashlib
 import shutil
 import time
-from functools import lru_cache
-from threading import Thread
 import re
 import tempfile
 # Environment setup
 current_dir = os.path.dirname(os.path.abspath(__file__))
@@ -59,8 +60,11 @@ def extract_priority_pages(file_path: str, max_pages: int = 20) -> str:
     try:
         text_chunks = []
         with pdfplumber.open(file_path) as pdf:
             for i, page in enumerate(pdf.pages[:3]):
-                text_chunks.append(f"=== Page {i+1} ===\n{(page.extract_text() or '').strip()}")
             for i, page in enumerate(pdf.pages[3:max_pages], start=4):
                 page_text = page.extract_text() or ""
                 if any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
@@ -74,18 +78,18 @@ def convert_file_to_json(file_path: str, file_type: str) -> str:
         h = file_hash(file_path)
         cache_path = os.path.join(file_cache_dir, f"{h}.json")
         if os.path.exists(cache_path):
-            return open(cache_path, "r", encoding="utf-8").read()
         if file_type == "pdf":
             text = extract_priority_pages(file_path)
             result = json.dumps({"filename": os.path.basename(file_path), "content": text, "status": "initial"})
             Thread(target=full_pdf_processing, args=(file_path, h)).start()
         elif file_type == "csv":
-            df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str, skip_blank_lines=False, on_bad_lines="skip")
             content = df.fillna("").astype(str).values.tolist()
             result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
         elif file_type in ["xls", "xlsx"]:
             try:
                 df = pd.read_excel(file_path, engine="openpyxl", header=None, dtype=str)
@@ -93,39 +97,40 @@ def convert_file_to_json(file_path: str, file_type: str) -> str:
                 df = pd.read_excel(file_path, engine="xlrd", header=None, dtype=str)
             content = df.fillna("").astype(str).values.tolist()
             result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
         else:
-            return json.dumps({"error": f"Unsupported file type: {file_type}"})
         with open(cache_path, "w", encoding="utf-8") as f:
             f.write(result)
         return result
     except Exception as e:
         return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
-def full_pdf_processing(file_path: str, file_hash: str):
     try:
-        cache_path = os.path.join(file_cache_dir, f"{file_hash}_full.json")
         if os.path.exists(cache_path):
             return
         with pdfplumber.open(file_path) as pdf:
-            full_text = "\n".join([f"=== Page {i+1} ===\n{(page.extract_text() or '').strip()}" for i, page in enumerate(pdf.pages)])
         result = json.dumps({"filename": os.path.basename(file_path), "content": full_text, "status": "complete"})
         with open(cache_path, "w", encoding="utf-8") as f:
             f.write(result)
-        with open(os.path.join(report_dir, f"{file_hash}_report.txt"), "w", encoding="utf-8") as out:
             out.write(full_text)
     except Exception as e:
         print(f"Background processing failed: {str(e)}")
 def init_agent():
     default_tool_path = os.path.abspath("data/new_tool.json")
     target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
     if not os.path.exists(target_tool_path):
         shutil.copy(default_tool_path, target_tool_path)
-    agent = TxAgent(
         model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
         rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
         tool_files_dict={"new_tool": target_tool_path},
@@ -135,49 +140,68 @@ def init_agent():
         seed=100,
         additional_default_tools=[],
     )
-    agent.init_model()
-    return agent
-def create_ui(agent: TxAgent):
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown("""
         <h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>
         <h3 style='text-align: center;'>Identify potential oversights in patient care</h3>
         """)
         chatbot = gr.Chatbot(label="Analysis", height=600, type="messages")
-        file_upload = gr.File(label="Upload Medical Records", file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
         msg_input = gr.Textbox(placeholder="Ask about potential oversights...", show_label=False)
         send_btn = gr.Button("Analyze", variant="primary")
-        conversation_state = gr.State([])
         download_output = gr.File(label="Download Full Report")
-        def analyze_potential_oversights(message: str, history: list, conversation: list, files: list):
-            start_time = time.time()
-            try:
-                # Add initial user and temporary assistant messages to update UI immediately
-                history = history + [
-                    {"role": "user", "content": message},
-                    {"role": "assistant", "content": "⏳ Analyzing records for potential oversights..."}
-                ]
                 yield history, None
-                extracted_data = ""
-                file_hash_value = ""
-                if files and isinstance(files, list):
-                    with ThreadPoolExecutor(max_workers=4) as executor:
-                        futures = [
-                            executor.submit(convert_file_to_json, f.name, f.name.split(".")[-1].lower())
-                            for f in files if hasattr(f, 'name')
-                        ]
-                        extracted_data = "\n".join([sanitize_utf8(f.result()) for f in as_completed(futures)])
-                        file_hash_value = file_hash(files[0].name) if hasattr(files[0], 'name') else ""
-                # Truncate extracted data to reduce overall token count (tune the character limit as needed)
-                max_extracted_chars = 12000
-                truncated_data = extracted_data[:max_extracted_chars]
-                analysis_prompt = f"""Review these medical records and identify EXACTLY what might have been missed:
 1. List potential missed diagnoses
 2. Flag any medication conflicts
 3. Note incomplete assessments
@@ -188,68 +212,65 @@ Medical Records:
 ### Potential Oversights:
 """
-                response = ""
-                try:
-                    # Stream the agent responses; skip any None chunks
-                    for chunk in agent.run_gradio_chat(
-                        message=analysis_prompt,
-                        history=[],
-                        temperature=0.2,
-                        max_new_tokens=1024,
-                        max_token=4096,
-                        call_agent=False,
-                        conversation=conversation
-                    ):
-                        if chunk is None:
-                            continue
-                        if isinstance(chunk, str):
-                            response += chunk
-                        elif isinstance(chunk, list):
-                            response += "".join([c.content for c in chunk if hasattr(c, 'content')])
-                        # Yield partial response updates
-                        cleaned = response.replace("[TOOL_CALLS]", "").strip()
-                        yield history[:-1] + [{"role": "assistant", "content": cleaned}], None
-                except Exception as agent_error:
-                    history.append({"role": "assistant", "content": f"❌ Analysis failed during processing: {str(agent_error)}"})
-                    yield history, None
-                    return
-                final_output = response.replace("[TOOL_CALLS]", "").strip()
-                if not final_output:
-                    final_output = "No clear oversights identified. Recommend comprehensive review."
-                report_path = None
-                if file_hash_value:
-                    possible_report = os.path.join(report_dir, f"{file_hash_value}_report.txt")
-                    if os.path.exists(possible_report):
-                        report_path = possible_report
-                history = history[:-1] + [{"role": "assistant", "content": final_output}]
-                yield history, report_path
-            except Exception as e:
-                history.append({"role": "assistant", "content": f"❌ Analysis failed: {str(e)}"})
                 yield history, None
-        inputs = [msg_input, chatbot, conversation_state, file_upload]
-        outputs = [chatbot, download_output]
-        send_btn.click(analyze_potential_oversights, inputs=inputs, outputs=outputs)
-        msg_input.submit(analyze_potential_oversights, inputs=inputs, outputs=outputs)
-        gr.Examples([
-            ["What might have been missed in this patient's treatment?"],
-            ["Are there any medication conflicts in these records?"],
-            ["What abnormal results require follow-up?"]
-        ], inputs=msg_input)
     return demo
 if __name__ == "__main__":
-    print("Initializing medical analysis agent...")
-    agent = init_agent()
     print("Launching interface...")
-    demo = create_ui(agent)
     demo.queue(api_open=False).launch(
         server_name="0.0.0.0",
         server_port=7860,

+import sys
+import os
 import pandas as pd
 import pdfplumber
 import json
 import hashlib
 import shutil
 import time
+from threading import Thread, Lock
 import re
 import tempfile
+import threading
 # Environment setup
 current_dir = os.path.dirname(os.path.abspath(__file__))
     try:
         text_chunks = []
         with pdfplumber.open(file_path) as pdf:
+            # Process first three pages
             for i, page in enumerate(pdf.pages[:3]):
+                text = page.extract_text() or ""
+                text_chunks.append(f"=== Page {i+1} ===\n{text.strip()}")
+            # Check for keywords on later pages and add if found
             for i, page in enumerate(pdf.pages[3:max_pages], start=4):
                 page_text = page.extract_text() or ""
                 if any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
         h = file_hash(file_path)
         cache_path = os.path.join(file_cache_dir, f"{h}.json")
         if os.path.exists(cache_path):
+            with open(cache_path, "r", encoding="utf-8") as f:
+                return f.read()
         if file_type == "pdf":
             text = extract_priority_pages(file_path)
             result = json.dumps({"filename": os.path.basename(file_path), "content": text, "status": "initial"})
             Thread(target=full_pdf_processing, args=(file_path, h)).start()
         elif file_type == "csv":
+            df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str,
+                             skip_blank_lines=False, on_bad_lines="skip")
             content = df.fillna("").astype(str).values.tolist()
             result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
         elif file_type in ["xls", "xlsx"]:
             try:
                 df = pd.read_excel(file_path, engine="openpyxl", header=None, dtype=str)
                 df = pd.read_excel(file_path, engine="xlrd", header=None, dtype=str)
             content = df.fillna("").astype(str).values.tolist()
             result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
         else:
+            result = json.dumps({"error": f"Unsupported file type: {file_type}"})
         with open(cache_path, "w", encoding="utf-8") as f:
             f.write(result)
         return result
     except Exception as e:
         return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
+def full_pdf_processing(file_path: str, file_hash_value: str):
     try:
+        cache_path = os.path.join(file_cache_dir, f"{file_hash_value}_full.json")
         if os.path.exists(cache_path):
             return
         with pdfplumber.open(file_path) as pdf:
+            full_text = "\n".join([f"=== Page {i+1} ===\n{(page.extract_text() or '').strip()}"
+                                   for i, page in enumerate(pdf.pages)])
         result = json.dumps({"filename": os.path.basename(file_path), "content": full_text, "status": "complete"})
         with open(cache_path, "w", encoding="utf-8") as f:
             f.write(result)
+        with open(os.path.join(report_dir, f"{file_hash_value}_report.txt"), "w", encoding="utf-8") as out:
             out.write(full_text)
     except Exception as e:
         print(f"Background processing failed: {str(e)}")
+# Global agent and a lock for safe multi-threaded access
+agent = None
+agent_lock = Lock()
 def init_agent():
     default_tool_path = os.path.abspath("data/new_tool.json")
     target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
     if not os.path.exists(target_tool_path):
         shutil.copy(default_tool_path, target_tool_path)
+    new_agent = TxAgent(
         model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
         rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
         tool_files_dict={"new_tool": target_tool_path},
         seed=100,
         additional_default_tools=[],
     )
+    new_agent.init_model()
+    return new_agent
+def load_agent_in_background():
+    global agent
+    with agent_lock:
+        if agent is None:
+            print("Initializing agent in background...")
+            agent = init_agent()
+            print("Agent initialization complete.")
+# Start background agent loading at startup
+threading.Thread(target=load_agent_in_background, daemon=True).start()
+def create_ui():
     with gr.Blocks(theme=gr.themes.Soft()) as demo:
         gr.Markdown("""
         <h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>
         <h3 style='text-align: center;'>Identify potential oversights in patient care</h3>
         """)
         chatbot = gr.Chatbot(label="Analysis", height=600, type="messages")
+        file_upload = gr.File(label="Upload Medical Records",
+                              file_types=[".pdf", ".csv", ".xls", ".xlsx"],
+                              file_count="multiple")
         msg_input = gr.Textbox(placeholder="Ask about potential oversights...", show_label=False)
         send_btn = gr.Button("Analyze", variant="primary")
         download_output = gr.File(label="Download Full Report")
+        def analyze_potential_oversights(message: str, history: list, files: list):
+            global agent
+            # Append user and interim assistant message
+            history = history + [
+                {"role": "user", "content": message},
+                {"role": "assistant", "content": "⏳ Analyzing records for potential oversights..."}
+            ]
+            yield history, None
+            if agent is None:
+                history.append({"role": "assistant",
+                                "content": "🕒 The model is still loading. Please wait a moment and try again."})
                 yield history, None
+                return
+            extracted_data = ""
+            file_hash_value = ""
+            if files and isinstance(files, list):
+                with ThreadPoolExecutor(max_workers=4) as executor:
+                    futures = [
+                        executor.submit(convert_file_to_json, f.name, f.name.split(".")[-1].lower())
+                        for f in files if hasattr(f, 'name')
+                    ]
+                    results = []
+                    for future in as_completed(futures):
+                        results.append(sanitize_utf8(future.result()))
+                    extracted_data = "\n".join(results)
+                    file_hash_value = file_hash(files[0].name) if hasattr(files[0], 'name') else ""
+            # Truncate the extracted data to avoid token overflows
+            max_extracted_chars = 12000
+            truncated_data = extracted_data[:max_extracted_chars]
+            analysis_prompt = f"""Review these medical records and identify EXACTLY what might have been missed:
 1. List potential missed diagnoses
 2. Flag any medication conflicts
 3. Note incomplete assessments
 ### Potential Oversights:
 """
+            response = ""
+            try:
+                # Stream agent responses and update the last message in the conversation with each chunk.
+                for chunk in agent.run_gradio_chat(
+                    message=analysis_prompt,
+                    history=[],
+                    temperature=0.2,
+                    max_new_tokens=1024,
+                    max_token=4096,
+                    call_agent=False,
+                    conversation=[]
+                ):
+                    if chunk is None:
+                        continue
+                    if isinstance(chunk, str):
+                        response += chunk
+                    elif isinstance(chunk, list):
+                        response += "".join([c.content for c in chunk if hasattr(c, 'content')])
+                    cleaned = response.replace("[TOOL_CALLS]", "").strip()
+                    # Update the assistant message (last item in history) with the latest accumulated answer
+                    history[-1] = {"role": "assistant", "content": cleaned}
+                    yield history, None
+            except Exception as agent_error:
+                history[-1] = {"role": "assistant",
+                               "content": f"❌ Analysis failed during processing: {str(agent_error)}"}
                 yield history, None
+                return
+            final_output = response.replace("[TOOL_CALLS]", "").strip()
+            if not final_output:
+                final_output = "No clear oversights identified. Recommend comprehensive review."
+            # Update the assistant's message with the final output
+            history[-1] = {"role": "assistant", "content": final_output}
+            report_path = None
+            if file_hash_value:
+                possible_report = os.path.join(report_dir, f"{file_hash_value}_report.txt")
+                if os.path.exists(possible_report):
+                    report_path = possible_report
+            yield history, report_path
+        send_btn.click(analyze_potential_oversights,
+                       inputs=[msg_input, gr.State([]), file_upload],
+                       outputs=[chatbot, download_output])
+        msg_input.submit(analyze_potential_oversights,
+                         inputs=[msg_input, gr.State([]), file_upload],
+                         outputs=[chatbot, download_output])
+        gr.Examples([["What might have been missed in this patient's treatment?"],
+                     ["Are there any medication conflicts in these records?"],
+                     ["What abnormal results require follow-up?"]],
+                    inputs=msg_input)
     return demo
 if __name__ == "__main__":
     print("Launching interface...")
+    demo = create_ui()
     demo.queue(api_open=False).launch(
         server_name="0.0.0.0",
         server_port=7860,