CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 20

Commit

2416301

verified ·

1 Parent(s): b33bf6c

Update app.py

Browse files

Files changed (1) hide show

app.py +200 -91

app.py CHANGED Viewed

@@ -46,9 +46,9 @@ MEDICAL_KEYWORDS = {
     'conclusion', 'history', 'examination', 'progress', 'discharge'
 }
 TOKENIZER = "cl100k_base"
-MAX_MODEL_LEN = 2048
-TARGET_CHUNK_TOKENS = 1200
-PROMPT_RESERVE = 300
 MEDICAL_SECTION_HEADER = "=== MEDICAL SECTION ==="
 def log_system_usage(tag=""):
@@ -251,20 +251,49 @@ def split_content_by_tokens(content: str, max_tokens: int = TARGET_CHUNK_TOKENS)
     return chunks
-def analyze_complete_document(content: str, filename: str, agent: TxAgent) -> str:
     """Analyze complete document with strict token management"""
     chunks = split_content_by_tokens(content)
     analysis_results = []
     for i, chunk in enumerate(chunks):
         try:
             base_prompt = "Analyze for:\n1. Critical\n2. Missed DX\n3. Med issues\n4. Gaps\n5. Follow-up\n\nContent:\n"
             prompt_tokens = count_tokens(base_prompt)
-            max_content_tokens = MAX_MODEL_LEN - prompt_tokens - 100
             chunk_tokens = count_tokens(chunk)
             if chunk_tokens > max_content_tokens:
                 adjusted_chunk = ""
                 tokens_used = 0
                 paragraphs = re.split(r"\n\s*\n", chunk)
@@ -278,6 +307,7 @@ def analyze_complete_document(content: str, filename: str, agent: TxAgent) -> st
                         break
                 if not adjusted_chunk:
                     sentences = re.split(r'(?<=[.!?])\s+', chunk)
                     for sent in sentences:
                         sent_tokens = count_tokens(sent)
@@ -295,8 +325,8 @@ def analyze_complete_document(content: str, filename: str, agent: TxAgent) -> st
             for output in agent.run_gradio_chat(
                 message=prompt,
                 history=[],
-                temperature=0.1,
-                max_new_tokens=300,
                 max_token=MAX_MODEL_LEN,
                 call_agent=False,
                 conversation=[],
@@ -317,78 +347,137 @@ def analyze_complete_document(content: str, filename: str, agent: TxAgent) -> st
     return format_final_report(analysis_results, filename)
-def init_agent():
-    """Initialize the TxAgent with proper configuration."""
-    print("🔁 Initializing model...")
-    log_system_usage("Before Load")
-    default_tool_path = os.path.abspath("data/new_tool.json")
-    target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
-    if not os.path.exists(target_tool_path):
-        shutil.copy(default_tool_path, target_tool_path)
-    agent = TxAgent(
-        model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
-        rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
-        tool_files_dict={"new_tool": target_tool_path},
-        force_finish=True,
-        enable_checker=True,
-        step_rag_num=2,
-        seed=100,
-        additional_default_tools=[],
-    )
-    agent.init_model()
-    log_system_usage("After Load")
-    print("✅ Agent Ready")
-    return agent
 def create_ui(agent):
-    """Create the Gradio interface."""
-    with gr.Blocks(theme=gr.themes.Soft(), title="Clinical Oversight Assistant") as demo:
         gr.Markdown("""
-        <h1 style='text-align: center;'>🩺 Comprehensive Clinical Oversight Assistant</h1>
-        <p style='text-align: center;'>Analyze complete medical records for potential oversights</p>
         """)
-        with gr.Row():
-            with gr.Column(scale=3):
-                file_upload = gr.File(
-                    file_types=[".pdf", ".csv", ".xls", ".xlsx"],
-                    file_count="multiple",
-                    label="Upload Medical Records"
-                )
-                msg_input = gr.Textbox(
-                    placeholder="Optional: Add specific focus areas or questions...",
-                    label="Analysis Focus"
                 )
                 with gr.Row():
-                    send_btn = gr.Button("Analyze Complete Documents", variant="primary")
-                    clear_btn = gr.Button("Clear")
-                status = gr.Textbox(label="Status", interactive=False)
-            with gr.Column(scale=7):
-                report_output = gr.Textbox(
-                    label="Clinical Oversight Report",
-                    lines=20,
-                    max_lines=50,
-                    interactive=False
-                )
-                download_output = gr.File(
-                    label="Download Full Report",
-                    visible=False
-                )
-        def analyze(files: List, message: str):
-            """Process files and generate analysis."""
             if not files:
-                yield "", None, "⚠️ Please upload at least one file to analyze."
                 return
-            yield "", None, "⏳ Processing documents (this may take several minutes for large files)..."
             file_contents = []
             filenames = []
-            total_tokens = 0
             with ThreadPoolExecutor(max_workers=4) as executor:
                 futures = []
@@ -403,30 +492,34 @@ def create_ui(agent):
                 results = []
                 for future in as_completed(futures):
                     result = sanitize_utf8(future.result())
-                    results.append(result)
                     try:
                         data = json.loads(result)
-                        if "total_tokens" in data:
-                            total_tokens += data["total_tokens"]
                     except:
                         pass
-                file_contents = results
-            combined_filename = " + ".join(filenames)
-            combined_content = "\n".join([
-                json.loads(fc).get("content", "") if "content" in json.loads(fc)
-                else str(json.loads(fc).get("rows", ""))
-                for fc in file_contents
-            ])
-            yield "", None, f"🔍 Analyzing content ({total_tokens//1000}k tokens)..."
             try:
                 full_report = analyze_complete_document(
                     combined_content,
-                    combined_filename,
-                    agent
                 )
                 file_hash_value = hashlib.md5(combined_content.encode()).hexdigest()
@@ -434,30 +527,46 @@ def create_ui(agent):
                 with open(report_path, "w", encoding="utf-8") as f:
                     f.write(full_report)
-                yield full_report, report_path if os.path.exists(report_path) else None, "✅ Analysis complete!"
             except Exception as e:
                 error_msg = f"❌ Error during analysis: {str(e)}"
                 print(error_msg)
-                yield "", None, error_msg
         send_btn.click(
             fn=analyze,
-            inputs=[file_upload, msg_input],
-            outputs=[report_output, download_output, status],
             api_name="analyze"
         )
         clear_btn.click(
-            fn=lambda: ("", None, ""),
             inputs=None,
-            outputs=[report_output, download_output, status]
         )
     return demo
 if __name__ == "__main__":
     print("🚀 Launching app...")
     try:
         import tiktoken
     except ImportError:

     'conclusion', 'history', 'examination', 'progress', 'discharge'
 }
 TOKENIZER = "cl100k_base"
+MAX_MODEL_LEN = 2048  # Matches your model's actual limit
+TARGET_CHUNK_TOKENS = 1200  # Leaves room for prompt and response
+PROMPT_RESERVE = 300  # Tokens reserved for prompt structure
 MEDICAL_SECTION_HEADER = "=== MEDICAL SECTION ==="
 def log_system_usage(tag=""):
     return chunks
+def init_agent():
+    """Initialize the TxAgent with proper configuration."""
+    print("🔁 Initializing model...")
+    log_system_usage("Before Load")
+    default_tool_path = os.path.abspath("data/new_tool.json")
+    target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
+    if not os.path.exists(target_tool_path):
+        shutil.copy(default_tool_path, target_tool_path)
+    agent = TxAgent(
+        model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
+        rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
+        tool_files_dict={"new_tool": target_tool_path},
+        force_finish=True,
+        enable_checker=True,
+        step_rag_num=2,
+        seed=100,
+        additional_default_tools=[],
+    )
+    agent.init_model()
+    log_system_usage("After Load")
+    print("✅ Agent Ready")
+    return agent
+def analyze_complete_document(content: str, filename: str, agent: TxAgent, temperature: float = 0.3) -> str:
     """Analyze complete document with strict token management"""
     chunks = split_content_by_tokens(content)
     analysis_results = []
     for i, chunk in enumerate(chunks):
         try:
+            # Ultra-minimal prompt to maximize content space
             base_prompt = "Analyze for:\n1. Critical\n2. Missed DX\n3. Med issues\n4. Gaps\n5. Follow-up\n\nContent:\n"
+            # Calculate available space for content
             prompt_tokens = count_tokens(base_prompt)
+            max_content_tokens = MAX_MODEL_LEN - prompt_tokens - 100  # Response buffer
+            # Ensure chunk fits
             chunk_tokens = count_tokens(chunk)
             if chunk_tokens > max_content_tokens:
+                # Find last paragraph that fits
                 adjusted_chunk = ""
                 tokens_used = 0
                 paragraphs = re.split(r"\n\s*\n", chunk)
                         break
                 if not adjusted_chunk:
+                    # If even one paragraph is too big, split sentences
                     sentences = re.split(r'(?<=[.!?])\s+', chunk)
                     for sent in sentences:
                         sent_tokens = count_tokens(sent)
             for output in agent.run_gradio_chat(
                 message=prompt,
                 history=[],
+                temperature=temperature,
+                max_new_tokens=300,  # Keep responses very concise
                 max_token=MAX_MODEL_LEN,
                 call_agent=False,
                 conversation=[],
     return format_final_report(analysis_results, filename)
 def create_ui(agent):
+    """Create the Gradio interface with enhanced design."""
+    with gr.Blocks(
+        theme=gr.themes.Soft(
+            primary_hue="indigo",
+            secondary_hue="blue",
+            neutral_hue="slate",
+            spacing_size="md",
+            radius_size="md"
+        ),
+        title="Clinical Oversight Assistant",
+        css="""
+        .report-box {
+            border: 1px solid #e0e0e0;
+            border-radius: 8px;
+            padding: 16px;
+            background-color: #f9f9f9;
+        }
+        .file-upload {
+            background-color: #f5f7fa;
+            padding: 16px;
+            border-radius: 8px;
+        }
+        .analysis-btn {
+            width: 100%;
+        }
+        .critical-finding {
+            color: #d32f2f;
+            font-weight: bold;
+        }
+        """
+    ) as demo:
+        # Header Section
         gr.Markdown("""
+        <div style='text-align: center; margin-bottom: 20px;'>
+            <h1 style='color: #2b3a67; margin-bottom: 8px;'>🩺 Clinical Oversight Assistant</h1>
+            <p style='color: #5a6a8a; font-size: 16px;'>
+                Analyze medical records for potential oversights and generate comprehensive reports
+            </p>
+        </div>
         """)
+        with gr.Row(equal_height=False):
+            # Left Column - Inputs
+            with gr.Column(scale=1, min_width=400):
+                with gr.Group(label="Document Upload", elem_classes="file-upload"):
+                    file_upload = gr.File(
+                        file_types=[".pdf", ".csv", ".xls", ".xlsx"],
+                        file_count="multiple",
+                        label="Upload Medical Records",
+                        elem_id="file-upload"
+                    )
+                    with gr.Row():
+                        clear_btn = gr.Button("Clear All", size="sm")
+                        send_btn = gr.Button(
+                            "Analyze Documents",
+                            variant="primary",
+                            elem_classes="analysis-btn"
+                        )
+                    with gr.Accordion("Additional Options", open=False):
+                        msg_input = gr.Textbox(
+                            placeholder="Enter specific focus areas or questions...",
+                            label="Analysis Focus",
+                            lines=3
+                        )
+                        temperature = gr.Slider(
+                            minimum=0.1,
+                            maximum=1.0,
+                            value=0.3,
+                            step=0.1,
+                            label="Analysis Strictness"
+                        )
+                status = gr.Textbox(
+                    label="Processing Status",
+                    interactive=False,
+                    visible=True
                 )
+            # Right Column - Outputs
+            with gr.Column(scale=2, min_width=600):
+                with gr.Tabs():
+                    with gr.TabItem("Analysis Report", id="report"):
+                        report_output = gr.Textbox(
+                            label="Clinical Oversight Findings",
+                            lines=25,
+                            max_lines=50,
+                            interactive=False,
+                            elem_classes="report-box"
+                        )
+                    with gr.TabItem("Raw Data Preview", id="preview"):
+                        data_preview = gr.Dataframe(
+                            headers=["Page", "Content"],
+                            datatype=["str", "str"],
+                            interactive=False,
+                            height=600
+                        )
                 with gr.Row():
+                    download_output = gr.File(
+                        label="Download Full Report",
+                        visible=True,
+                        interactive=False
+                    )
+                    gr.Button("Save to EHR", visible=False)
+        # Analysis function with UI updates
+        def analyze(files: List, message: str, temp: float):
             if not files:
+                yield (
+                    gr.Textbox.update(value="", visible=True),
+                    gr.File.update(value=None, visible=False),
+                    gr.Textbox.update(value="⚠️ Please upload at least one file to analyze.", visible=True),
+                    gr.Dataframe.update(value=None, visible=True)
+                )
                 return
+            # Update UI for processing state
+            yield (
+                gr.Textbox.update(value="", visible=True),
+                gr.File.update(value=None, visible=False),
+                gr.Textbox.update(value="⏳ Processing documents...", visible=True),
+                gr.Dataframe.update(value=None, visible=True)
+            )
+            # Process files
             file_contents = []
             filenames = []
+            preview_data = []
             with ThreadPoolExecutor(max_workers=4) as executor:
                 futures = []
                 results = []
                 for future in as_completed(futures):
                     result = sanitize_utf8(future.result())
                     try:
                         data = json.loads(result)
+                        results.append(result)
+                        if "content" in data:
+                            preview_data.append([data["filename"], data["content"][:500] + "..."])
                     except:
                         pass
+            # Update UI for analysis state
+            yield (
+                gr.Textbox.update(value="", visible=True),
+                gr.File.update(value=None, visible=False),
+                gr.Textbox.update(value=f"🔍 Analyzing {len(files)} documents...", visible=True),
+                gr.Dataframe.update(value=preview_data[:20], visible=True)
+            )
             try:
+                combined_content = "\n".join([
+                    json.loads(fc).get("content", "") if "content" in json.loads(fc)
+                    else str(json.loads(fc).get("rows", ""))
+                    for fc in results
+                ])
                 full_report = analyze_complete_document(
                     combined_content,
+                    " + ".join(filenames),
+                    agent,
+                    temperature=temp
                 )
                 file_hash_value = hashlib.md5(combined_content.encode()).hexdigest()
                 with open(report_path, "w", encoding="utf-8") as f:
                     f.write(full_report)
+                yield (
+                    gr.Textbox.update(value=full_report, visible=True),
+                    gr.File.update(value=report_path if os.path.exists(report_path) else None, visible=True),
+                    gr.Textbox.update(value="✅ Analysis complete!", visible=True),
+                    gr.Dataframe.update(value=preview_data[:20], visible=True)
+                )
             except Exception as e:
                 error_msg = f"❌ Error during analysis: {str(e)}"
                 print(error_msg)
+                yield (
+                    gr.Textbox.update(value="", visible=True),
+                    gr.File.update(value=None, visible=False),
+                    gr.Textbox.update(value=error_msg, visible=True),
+                    gr.Dataframe.update(value=None, visible=True)
+                )
+        # Event handlers
         send_btn.click(
             fn=analyze,
+            inputs=[file_upload, msg_input, temperature],
+            outputs=[report_output, download_output, status, data_preview],
             api_name="analyze"
         )
         clear_btn.click(
+            fn=lambda: (
+                None, None, "", None,
+                gr.Slider.update(value=0.3),
+                gr.Textbox.update(value="")
+            ),
             inputs=None,
+            outputs=[file_upload, download_output, status, data_preview, temperature, msg_input]
         )
     return demo
 if __name__ == "__main__":
     print("🚀 Launching app...")
+    # Install tiktoken if not available
     try:
         import tiktoken
     except ImportError: