CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 21

Commit

cc93544

verified ·

1 Parent(s): cf765da

Update app.py

Browse files

Files changed (1) hide show

app.py +137 -370

app.py CHANGED Viewed

@@ -27,12 +27,14 @@ vllm_cache_dir = os.path.join(persistent_dir, "vllm_cache")
 for directory in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir, vllm_cache_dir]:
     os.makedirs(directory, exist_ok=True)
 os.environ["HF_HOME"] = model_cache_dir
 os.environ["TRANSFORMERS_CACHE"] = model_cache_dir
 os.environ["VLLM_CACHE_DIR"] = vllm_cache_dir
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
 current_dir = os.path.dirname(os.path.abspath(__file__))
 src_path = os.path.abspath(os.path.join(current_dir, "src"))
 sys.path.insert(0, src_path)
@@ -46,11 +48,14 @@ MEDICAL_KEYWORDS = {
     'conclusion', 'history', 'examination', 'progress', 'discharge'
 }
 TOKENIZER = "cl100k_base"
-MAX_MODEL_LEN = 2048
 TARGET_CHUNK_TOKENS = 1200
-PROMPT_RESERVE = 300
 MEDICAL_SECTION_HEADER = "=== MEDICAL SECTION ==="
 def log_system_usage(tag=""):
     try:
         cpu = psutil.cpu_percent(interval=1)
@@ -66,6 +71,7 @@ def log_system_usage(tag=""):
     except Exception as e:
         print(f"[{tag}] GPU/CPU monitor failed: {e}")
 def sanitize_utf8(text: str) -> str:
     return text.encode("utf-8", "ignore").decode("utf-8")
@@ -77,41 +83,33 @@ def count_tokens(text: str) -> int:
     encoding = tiktoken.get_encoding(TOKENIZER)
     return len(encoding.encode(text))
 def extract_all_pages_with_token_count(file_path: str) -> Tuple[str, int, int]:
     try:
         text_chunks = []
         total_pages = 0
         total_tokens = 0
         with pdfplumber.open(file_path) as pdf:
             total_pages = len(pdf.pages)
             for i, page in enumerate(pdf.pages):
                 page_text = page.extract_text() or ""
                 lower_text = page_text.lower()
-                if any(re.search(rf'\b{kw}\b', lower_text) for kw in MEDICAL_KEYWORDS):
-                    section_header = f"\n{MEDICAL_SECTION_HEADER} (Page {i+1})\n"
-                    text_chunks.append(section_header + page_text.strip())
-                    total_tokens += count_tokens(section_header)
-                else:
-                    text_chunks.append(f"\n=== Page {i+1} ===\n{page_text.strip()}")
-                total_tokens += count_tokens(page_text)
         return "\n".join(text_chunks), total_pages, total_tokens
     except Exception as e:
         return f"PDF processing error: {str(e)}", 0, 0
 def convert_file_to_json(file_path: str, file_type: str) -> str:
     try:
         h = file_hash(file_path)
         cache_path = os.path.join(file_cache_dir, f"{h}.json")
         if os.path.exists(cache_path):
-            with open(cache_path, "r", encoding="utf-8") as f:
-                return f.read()
         if file_type == "pdf":
             text, total_pages, total_tokens = extract_all_pages_with_token_count(file_path)
             result = json.dumps({
@@ -123,10 +121,12 @@ def convert_file_to_json(file_path: str, file_type: str) -> str:
             })
         elif file_type == "csv":
             chunks = []
-            for chunk in pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str,
-                                   skip_blank_lines=False, on_bad_lines="skip", chunksize=1000):
                 chunks.append(chunk.fillna("").astype(str).values.tolist())
-            content = [item for sublist in chunks for item in sublist]
             result = json.dumps({
                 "filename": os.path.basename(file_path),
                 "rows": content,
@@ -135,7 +135,7 @@ def convert_file_to_json(file_path: str, file_type: str) -> str:
         elif file_type in ["xls", "xlsx"]:
             try:
                 df = pd.read_excel(file_path, engine="openpyxl", header=None, dtype=str)
-            except Exception:
                 df = pd.read_excel(file_path, engine="xlrd", header=None, dtype=str)
             content = df.fillna("").astype(str).values.tolist()
             result = json.dumps({
@@ -145,109 +145,91 @@ def convert_file_to_json(file_path: str, file_type: str) -> str:
             })
         else:
             result = json.dumps({"error": f"Unsupported file type: {file_type}"})
         with open(cache_path, "w", encoding="utf-8") as f:
             f.write(result)
         return result
     except Exception as e:
         return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
 def clean_response(text: str) -> str:
     text = sanitize_utf8(text)
-    text = re.sub(r"\[TOOL_CALLS\].*", "", text, flags=re.DOTALL)
-    text = re.sub(r"\['get_[^\]]+\']\n?", "", text)
-    text = re.sub(r"\{'meta':\s*\{.*?\}\s*,\s*'results':\s*\[.*?\]\}\n?", "", text, flags=re.DOTALL)
-    text = re.sub(r"To analyze the medical records for clinical oversights.*?begin by reviewing.*?\n", "", text, flags=re.DOTALL)
-    text = re.sub(r"\n{3,}", "\n\n", text).strip()
-    return text
 def format_final_report(analysis_results: List[str], filename: str) -> str:
-    report = []
-    report.append(f"COMPREHENSIVE CLINICAL OVERSIGHT ANALYSIS")
-    report.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
-    report.append(f"File: {filename}")
-    report.append("=" * 80)
-    sections = {
-        "CRITICAL FINDINGS": [],
-        "MISSED DIAGNOSES": [],
-        "MEDICATION ISSUES": [],
-        "ASSESSMENT GAPS": [],
-        "FOLLOW-UP RECOMMENDATIONS": []
-    }
-    for result in analysis_results:
-        for section in sections:
-            section_match = re.search(
-                rf"{re.escape(section)}:?\s*\n([^*]+?)(?=\n\*|\n\n|$)",
-                result,
-                re.IGNORECASE | re.DOTALL
             )
-            if section_match:
-                content = section_match.group(1).strip()
-                if content and content not in sections[section]:
-                    sections[section].append(content)
     if sections["CRITICAL FINDINGS"]:
         report.append("\n🚨 **CRITICAL FINDINGS** 🚨")
-        for content in sections["CRITICAL FINDINGS"]:
-            report.append(f"\n{content}")
-    for section, contents in sections.items():
-        if section != "CRITICAL FINDINGS" and contents:
-            report.append(f"\n**{section.upper()}**")
-            for content in contents:
-                report.append(f"\n{content}")
     if not any(sections.values()):
         report.append("\nNo significant clinical oversights identified.")
-    report.append("\n" + "=" * 80)
     report.append("END OF REPORT")
     return "\n".join(report)
-def split_content_by_tokens(content: str, max_tokens: int = TARGET_CHUNK_TOKENS) -> List[str]:
     paragraphs = re.split(r"\n\s*\n", content)
-    chunks = []
-    current_chunk = []
-    current_tokens = 0
     for para in paragraphs:
-        para_tokens = count_tokens(para)
-        if para_tokens > max_tokens:
-            sentences = re.split(r'(?<=[.!?])\s+', para)
-            for sent in sentences:
-                sent_tokens = count_tokens(sent)
-                if current_tokens + sent_tokens > max_tokens:
-                    chunks.append("\n\n".join(current_chunk))
-                    current_chunk = [sent]
-                    current_tokens = sent_tokens
                 else:
-                    current_chunk.append(sent)
-                    current_tokens += sent_tokens
-        elif current_tokens + para_tokens > max_tokens:
-            chunks.append("\n\n".join(current_chunk))
-            current_chunk = [para]
-            current_tokens = para_tokens
         else:
-            current_chunk.append(para)
-            current_tokens += para_tokens
-    if current_chunk:
-        chunks.append("\n\n".join(current_chunk))
     return chunks
 def init_agent():
     print("🔁 Initializing model...")
     log_system_usage("Before Load")
     default_tool_path = os.path.abspath("data/new_tool.json")
     target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
     if not os.path.exists(target_tool_path):
         shutil.copy(default_tool_path, target_tool_path)
     agent = TxAgent(
         model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
         rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
@@ -256,293 +238,89 @@ def init_agent():
         enable_checker=True,
         step_rag_num=2,
         seed=100,
-        additional_default_tools=[],
     )
-    agent.init_model()
     log_system_usage("After Load")
     print("✅ Agent Ready")
     return agent
 def analyze_complete_document(content: str, filename: str, agent: TxAgent, temperature: float = 0.3) -> str:
-    chunks = split_content_by_tokens(content)
-    analysis_results = []
     for i, chunk in enumerate(chunks):
         try:
-            base_prompt = "Analyze for:\n1. Critical\n2. Missed DX\n3. Med issues\n4. Gaps\n5. Follow-up\n\nContent:\n"
-            prompt_tokens = count_tokens(base_prompt)
-            max_content_tokens = MAX_MODEL_LEN - prompt_tokens - 100
-            chunk_tokens = count_tokens(chunk)
-            if chunk_tokens > max_content_tokens:
-                adjusted_chunk = ""
-                tokens_used = 0
-                paragraphs = re.split(r"\n\s*\n", chunk)
-                for para in paragraphs:
-                    para_tokens = count_tokens(para)
-                    if tokens_used + para_tokens <= max_content_tokens:
-                        adjusted_chunk += "\n\n" + para
-                        tokens_used += para_tokens
-                    else:
-                        break
-                if not adjusted_chunk:
-                    sentences = re.split(r'(?<=[.!?])\s+', chunk)
-                    for sent in sentences:
-                        sent_tokens = count_tokens(sent)
-                        if tokens_used + sent_tokens <= max_content_tokens:
-                            adjusted_chunk += " " + sent
-                            tokens_used += sent_tokens
-                        else:
-                            break
-                chunk = adjusted_chunk.strip()
             prompt = base_prompt + chunk
             response = ""
-            for output in agent.run_gradio_chat(
                 message=prompt,
                 history=[],
                 temperature=temperature,
                 max_new_tokens=300,
                 max_token=MAX_MODEL_LEN,
                 call_agent=False,
-                conversation=[],
             ):
-                if output:
-                    if isinstance(output, list):
-                        for m in output:
-                            if hasattr(m, 'content'):
-                                response += clean_response(m.content)
-                    elif isinstance(output, str):
-                        response += clean_response(output)
             if response:
-                analysis_results.append(response)
         except Exception as e:
-            print(f"Error processing chunk {i}: {str(e)}")
-            continue
-    return format_final_report(analysis_results, filename)
 def create_ui(agent):
-    with gr.Blocks(
-        theme=gr.themes.Soft(
-            primary_hue="indigo",
-            secondary_hue="blue",
-            neutral_hue="slate",
-            spacing_size="md",
-            radius_size="md"
-        ),
-        title="Clinical Oversight Assistant",
-        css="""
-        .report-box {
-            border: 1px solid #e0e0e0;
-            border-radius: 8px;
-            padding: 16px;
-            background-color: #f9f9f9;
-        }
-        .file-upload {
-            background-color: #f5f7fa;
-            padding: 16px;
-            border-radius: 8px;
-        }
-        .analysis-btn {
-            width: 100%;
-        }
-        .critical-finding {
-            color: #d32f2f;
-            font-weight: bold;
-        }
-        .dataframe-container {
-            height: 600px;
-            overflow-y: auto;
-        }
-        """
-    ) as demo:
         gr.Markdown("""
-        <div style='text-align: center; margin-bottom: 20px;'>
-            <h1 style='color: #2b3a67; margin-bottom: 8px;'>🩺 Clinical Oversight Assistant</h1>
-            <p style='color: #5a6a8a; font-size: 16px;'>
-                Analyze medical records for potential oversights and generate comprehensive reports
-            </p>
-        </div>
         """)
-        with gr.Row(equal_height=False):
-            with gr.Column(scale=1, min_width=400):
-                with gr.Group(elem_classes="file-upload"):
-                    file_upload = gr.File(
-                        file_types=[".pdf", ".csv", ".xls", ".xlsx"],
-                        file_count="multiple",
-                        label="Upload Medical Records",
-                        elem_id="file-upload"
-                    )
-                    with gr.Row():
-                        clear_btn = gr.Button("Clear All", size="sm")
-                        send_btn = gr.Button(
-                            "Analyze Documents",
-                            variant="primary",
-                            elem_classes="analysis-btn"
-                        )
-                    with gr.Accordion("Additional Options", open=False):
-                        msg_input = gr.Textbox(
-                            placeholder="Enter specific focus areas or questions...",
-                            label="Analysis Focus",
-                            lines=3
-                        )
-                        temperature = gr.Slider(
-                            minimum=0.1,
-                            maximum=1.0,
-                            value=0.3,
-                            step=0.1,
-                            label="Analysis Strictness"
-                        )
-                status = gr.Textbox(
-                    label="Processing Status",
-                    interactive=False,
-                    visible=True
-                )
-            with gr.Column(scale=2, min_width=600):
-                with gr.Tabs():
-                    with gr.TabItem("Analysis Report", id="report"):
-                        report_output = gr.Textbox(
-                            label="Clinical Oversight Findings",
-                            lines=25,
-                            max_lines=50,
-                            interactive=False,
-                            elem_classes="report-box"
-                        )
-                    with gr.TabItem("Raw Data Preview", id="preview"):
-                        with gr.Column(elem_classes="dataframe-container"):
-                            data_preview = gr.Dataframe(
-                                headers=["Page", "Content"],
-                                datatype=["str", "str"],
-                                interactive=False
-                            )
-                with gr.Row():
-                    download_output = gr.File(
-                        label="Download Full Report",
-                        visible=True,
-                        interactive=False
-                    )
-                    gr.Button("Save to EHR", visible=False)
-        def analyze(files: List, message: str, temp: float):
             if not files:
-                return (
-                    {"value": "", "visible": True},
-                    None,
-                    {"value": "⚠️ Please upload at least one file to analyze.", "visible": True},
-                    {"value": None, "visible": True}
-                )
-            yield (
-                {"value": "", "visible": True},
-                None,
-                {"value": "⏳ Processing documents...", "visible": True},
-                {"value": None, "visible": True}
-            )
-            file_contents = []
-            filenames = []
-            preview_data = []
-            with ThreadPoolExecutor(max_workers=4) as executor:
-                futures = []
-                for f in files:
-                    file_path = f.name
-                    futures.append(executor.submit(
-                        convert_file_to_json,
-                        file_path,
-                        os.path.splitext(file_path)[1][1:].lower()
-                    ))
-                    filenames.append(os.path.basename(file_path))
-                results = []
-                for future in as_completed(futures):
-                    result = sanitize_utf8(future.result())
-                    try:
-                        data = json.loads(result)
-                        results.append(data)
-                        if "content" in data:
-                            preview_data.append([data["filename"], data["content"][:500] + "..."])
-                    except Exception as e:
-                        print(f"Error processing result: {e}")
-                        continue
-            yield (
-                {"value": "", "visible": True},
-                None,
-                {"value": f"🔍 Analyzing {len(files)} documents...", "visible": True},
-                {"value": preview_data[:20], "visible": True}
-            )
-            try:
-                combined_content = "\n".join([
-                    item.get("content", "") if isinstance(item, dict) and "content" in item
-                    else str(item.get("rows", "")) if isinstance(item, dict)
-                    else str(item)
-                    for item in results
-                ])
-                full_report = analyze_complete_document(
-                    combined_content,
-                    " + ".join(filenames),
-                    agent,
-                    temperature=temp
-                )
-                file_hash_value = hashlib.md5(combined_content.encode()).hexdigest()
-                report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt")
-                with open(report_path, "w", encoding="utf-8") as f:
-                    f.write(full_report)
-                yield (
-                    {"value": full_report, "visible": True},
-                    report_path if os.path.exists(report_path) else None,
-                    {"value": "✅ Analysis complete!", "visible": True},
-                    {"value": preview_data[:20], "visible": True}
-                )
-            except Exception as e:
-                error_msg = f"❌ Error during analysis: {str(e)}"
-                print(error_msg)
-                yield (
-                    {"value": "", "visible": True},
-                    None,
-                    {"value": error_msg, "visible": True},
-                    {"value": None, "visible": True}
-                )
-        send_btn.click(
-            fn=analyze,
-            inputs=[file_upload, msg_input, temperature],
-            outputs=[report_output, download_output, status, data_preview],
-            api_name="analyze"
-        )
-        clear_btn.click(
-            fn=lambda: (
-                None,
-                None,
-                "",
-                None,
-                {"value": 0.3},
-                {"value": ""}
-            ),
-            inputs=None,
-            outputs=[file_upload, download_output, status, data_preview, temperature, msg_input]
-        )
     return demo
 if __name__ == "__main__":
@@ -550,18 +328,7 @@ if __name__ == "__main__":
     try:
         import tiktoken
     except ImportError:
-        print("Installing tiktoken...")
-        subprocess.run([sys.executable, "-m", "pip", "install", "tiktoken"])
     agent = init_agent()
     demo = create_ui(agent)
-    demo.queue(
-        api_open=False,
-        max_size=20
-    ).launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        show_error=True,
-        allowed_paths=[report_dir],
-        share=False
-    )

 for directory in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir, vllm_cache_dir]:
     os.makedirs(directory, exist_ok=True)
+# Environment variables
 os.environ["HF_HOME"] = model_cache_dir
 os.environ["TRANSFORMERS_CACHE"] = model_cache_dir
 os.environ["VLLM_CACHE_DIR"] = vllm_cache_dir
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
+# Add src to path
 current_dir = os.path.dirname(os.path.abspath(__file__))
 src_path = os.path.abspath(os.path.join(current_dir, "src"))
 sys.path.insert(0, src_path)
     'conclusion', 'history', 'examination', 'progress', 'discharge'
 }
 TOKENIZER = "cl100k_base"
+# Increase max model length to support larger contexts
+MAX_MODEL_LEN = 4096
+# Default chunk target tokens
 TARGET_CHUNK_TOKENS = 1200
+PROMPT_RESERVE = 100
 MEDICAL_SECTION_HEADER = "=== MEDICAL SECTION ==="
 def log_system_usage(tag=""):
     try:
         cpu = psutil.cpu_percent(interval=1)
     except Exception as e:
         print(f"[{tag}] GPU/CPU monitor failed: {e}")
 def sanitize_utf8(text: str) -> str:
     return text.encode("utf-8", "ignore").decode("utf-8")
     encoding = tiktoken.get_encoding(TOKENIZER)
     return len(encoding.encode(text))
 def extract_all_pages_with_token_count(file_path: str) -> Tuple[str, int, int]:
     try:
         text_chunks = []
         total_pages = 0
         total_tokens = 0
         with pdfplumber.open(file_path) as pdf:
             total_pages = len(pdf.pages)
             for i, page in enumerate(pdf.pages):
                 page_text = page.extract_text() or ""
                 lower_text = page_text.lower()
+                header = f"\n{MEDICAL_SECTION_HEADER} (Page {i+1})\n" if any(
+                    re.search(rf'\b{kw}\b', lower_text) for kw in MEDICAL_KEYWORDS
+                ) else f"\n=== Page {i+1} ===\n"
+                text_chunks.append(header + page_text.strip())
+                total_tokens += count_tokens(header) + count_tokens(page_text)
         return "\n".join(text_chunks), total_pages, total_tokens
     except Exception as e:
         return f"PDF processing error: {str(e)}", 0, 0
 def convert_file_to_json(file_path: str, file_type: str) -> str:
     try:
         h = file_hash(file_path)
         cache_path = os.path.join(file_cache_dir, f"{h}.json")
         if os.path.exists(cache_path):
+            return open(cache_path, "r", encoding="utf-8").read()
         if file_type == "pdf":
             text, total_pages, total_tokens = extract_all_pages_with_token_count(file_path)
             result = json.dumps({
             })
         elif file_type == "csv":
             chunks = []
+            for chunk in pd.read_csv(
+                file_path, encoding_errors="replace", header=None, dtype=str,
+                skip_blank_lines=False, on_bad_lines="skip", chunksize=1000
+            ):
                 chunks.append(chunk.fillna("").astype(str).values.tolist())
+            content = [item for sub in chunks for item in sub]
             result = json.dumps({
                 "filename": os.path.basename(file_path),
                 "rows": content,
         elif file_type in ["xls", "xlsx"]:
             try:
                 df = pd.read_excel(file_path, engine="openpyxl", header=None, dtype=str)
+            except:
                 df = pd.read_excel(file_path, engine="xlrd", header=None, dtype=str)
             content = df.fillna("").astype(str).values.tolist()
             result = json.dumps({
             })
         else:
             result = json.dumps({"error": f"Unsupported file type: {file_type}"})
         with open(cache_path, "w", encoding="utf-8") as f:
             f.write(result)
         return result
     except Exception as e:
         return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
 def clean_response(text: str) -> str:
     text = sanitize_utf8(text)
+    patterns = [
+        r"\[TOOL_CALLS\].*", r"\['get_[^\]]+\']\n?", r"\{'meta':\s*\{.*?\}\s*,\s*'results':\s*\[.*?\]\}\n?",
+        r"To analyze the medical records for clinical oversights.*?\n"  # remove generic prompt
+    ]
+    for pat in patterns:
+        text = re.sub(pat, "", text, flags=re.DOTALL)
+    return re.sub(r"\n{3,}", "\n\n", text).strip()
 def format_final_report(analysis_results: List[str], filename: str) -> str:
+    report = [
+        "COMPREHENSIVE CLINICAL OVERSIGHT ANALYSIS",
+        f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
+        f"File: {filename}",
+        "=" * 80
+    ]
+    sections = {s: [] for s in [
+        "CRITICAL FINDINGS", "MISSED DIAGNOSES", "MEDICATION ISSUES",
+        "ASSESSMENT GAPS", "FOLLOW-UP RECOMMENDATIONS"
+    ]}
+    for res in analysis_results:
+        for sec in sections:
+            m = re.search(
+                rf"{re.escape(sec)}:?\s*\n(.+?)(?=\n\*|\n\n|$)",
+                res, re.IGNORECASE | re.DOTALL
             )
+            if m:
+                content = m.group(1).strip()
+                if content and content not in sections[sec]:
+                    sections[sec].append(content)
     if sections["CRITICAL FINDINGS"]:
         report.append("\n🚨 **CRITICAL FINDINGS** 🚨")
+        report.extend(f"\n{c}" for c in sections["CRITICAL FINDINGS"])
+    for sec, conts in sections.items():
+        if sec != "CRITICAL FINDINGS" and conts:
+            report.append(f"\n**{sec}**")
+            report.extend(f"\n{c}" for c in conts)
     if not any(sections.values()):
         report.append("\nNo significant clinical oversights identified.")
+    report.append("\n" + "="*80)
     report.append("END OF REPORT")
     return "\n".join(report)
+def split_content_by_tokens(content: str, max_tokens: int) -> List[str]:
     paragraphs = re.split(r"\n\s*\n", content)
+    chunks, current, curr_toks = [], [], 0
     for para in paragraphs:
+        toks = count_tokens(para)
+        if toks > max_tokens:
+            for sent in re.split(r'(?<=[.!?])\s+', para):
+                sent_toks = count_tokens(sent)
+                if curr_toks + sent_toks > max_tokens:
+                    chunks.append("\n\n".join(current))
+                    current, curr_toks = [sent], sent_toks
                 else:
+                    current.append(sent)
+                    curr_toks += sent_toks
+        elif curr_toks + toks > max_tokens:
+            chunks.append("\n\n".join(current))
+            current, curr_toks = [para], toks
         else:
+            current.append(para)
+            curr_toks += toks
+    if current:
+        chunks.append("\n\n".join(current))
     return chunks
 def init_agent():
     print("🔁 Initializing model...")
     log_system_usage("Before Load")
     default_tool_path = os.path.abspath("data/new_tool.json")
     target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
     if not os.path.exists(target_tool_path):
         shutil.copy(default_tool_path, target_tool_path)
     agent = TxAgent(
         model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
         rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
         enable_checker=True,
         step_rag_num=2,
         seed=100,
+        additional_default_tools=[]
     )
+    agent.init_model(max_model_len=MAX_MODEL_LEN)
     log_system_usage("After Load")
     print("✅ Agent Ready")
     return agent
 def analyze_complete_document(content: str, filename: str, agent: TxAgent, temperature: float = 0.3) -> str:
+    base_prompt = (
+        "Analyze for:\n1. Critical\n2. Missed DX\n3. Med issues\n4. Gaps\n5. Follow-up\n\nContent:\n"
+    )
+    prompt_toks = count_tokens(base_prompt)
+    max_chunk_toks = MAX_MODEL_LEN - prompt_toks - PROMPT_RESERVE
+    chunks = split_content_by_tokens(content, max_chunk_toks)
+    results = []
     for i, chunk in enumerate(chunks):
         try:
             prompt = base_prompt + chunk
             response = ""
+            for out in agent.run_gradio_chat(
                 message=prompt,
                 history=[],
                 temperature=temperature,
                 max_new_tokens=300,
                 max_token=MAX_MODEL_LEN,
                 call_agent=False,
+                conversation=[]
             ):
+                if out:
+                    if isinstance(out, list):
+                        for m in out:
+                            response += clean_response(m.content if hasattr(m, 'content') else str(m))
+                    else:
+                        response += clean_response(str(out))
             if response:
+                results.append(response)
         except Exception as e:
+            print(f"Error processing chunk {i}: {e}")
+    return format_final_report(results, filename)
 def create_ui(agent):
+    with gr.Blocks(title="Clinical Oversight Assistant") as demo:
         gr.Markdown("""
+        # 🩺 Clinical Oversight Assistant
+        Analyze medical records for potential oversights and generate comprehensive reports
         """)
+        with gr.Row():
+            with gr.Column():
+                file_upload = gr.File(label="Upload Medical Records", file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
+                msg_input = gr.Textbox(label="Analysis Focus (optional)")
+                temperature = gr.Slider(0.1, 1.0, value=0.3, label="Analysis Strictness")
+                send_btn = gr.Button("Analyze Documents", variant="primary")
+                clear_btn = gr.Button("Clear All")
+                status = gr.Textbox(label="Status", interactive=False)
+            with gr.Column():
+                report_output = gr.Textbox(label="Report", lines=20, interactive=False)
+                data_preview = gr.Dataframe(headers=["File", "Snippet"], interactive=False)
+                download_output = gr.File(label="Download Report")
+        def analyze(files, msg, temp):
             if not files:
+                yield "", None, "⚠️ Please upload files.", None
+                return
+            yield "", None, "⏳ Processing...", None
+            # convert files
+            previews = []
+            contents = []
+            for f in files:
+                res = json.loads(sanitize_utf8(convert_file_to_json(f.name, os.path.splitext(f.name)[1][1:].lower())))
+                if "content" in res:
+                    previews.append([res["filename"], res["content"][:200] + "..."])
+                    contents.append(res["content"])
+            yield "", None, f"🔍 Analyzing {len(contents)} docs...", previews
+            combined = "\n".join(contents)
+            report = analyze_complete_document(combined, "+".join([os.path.basename(f.name) for f in files]), agent, temp)
+            file_hash_val = hashlib.md5(combined.encode()).hexdigest()
+            path = os.path.join(report_dir, f"{file_hash_val}_report.txt")
+            with open(path, "w") as rd:
+                rd.write(report)
+            yield report, path, "✅ Analysis complete!", previews
+        send_btn.click(analyze, [file_upload, msg_input, temperature], [report_output, download_output, status, data_preview])
+        clear_btn.click(lambda: (None, None, "", None), None, [report_output, download_output, status, data_preview])
     return demo
 if __name__ == "__main__":
     try:
         import tiktoken
     except ImportError:
+        subprocess.run([sys.executable, "-m", "pip", "install", "tiktoken"] )
     agent = init_agent()
     demo = create_ui(agent)
+    demo.queue(api_open=False, max_size=20).launch(server_name="0.0.0.0", server_port=7860, show_error=True, share=False)