CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 19

Commit

e12aa83

verified ·

1 Parent(s): 99e7b0d

Update app.py

Browse files

Files changed (1) hide show

app.py +326 -249

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import pdfplumber
 import json
 import gradio as gr
 from typing import List, Dict, Optional, Generator
-from concurrent.futures import ProcessPoolExecutor, as_completed
 import hashlib
 import shutil
 import re
@@ -17,26 +17,20 @@ import gc
 from diskcache import Cache
 import time
 from transformers import AutoTokenizer
-import pyarrow as pa
-import pyarrow.csv as pc
-import pyarrow.parquet as pq
-from vllm import LLM, SamplingParams
-import asyncio
-import threading
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# File handler for response logging
-response_log_file = os.path.join("/data/hf_cache", "response_log.txt")
-response_logger = logging.getLogger("ResponseLogger")
-response_handler = logging.FileHandler(response_log_file, mode="a")
-response_handler.setFormatter(logging.Formatter("%(asctime)s - %(message)s"))
-response_logger.addHandler(response_handler)
-response_logger.setLevel(logging.INFO)
-# Persistent directory
 persistent_dir = "/data/hf_cache"
 os.makedirs(persistent_dir, exist_ok=True)
@@ -49,113 +43,129 @@ vllm_cache_dir = os.path.join(persistent_dir, "vllm_cache")
 for directory in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir, vllm_cache_dir]:
     os.makedirs(directory, exist_ok=True)
-os.environ["HF_HOME"] = model_cache_dir
-os.environ["TRANSFORMERS_CACHE"] = model_cache_dir
-os.environ["VLLM_CACHE_DIR"] = vllm_cache_dir
-os.environ["TOKENIZERS_PARALLELISM"] = "false"
-os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
-current_dir = os.path.dirname(os.path.abspath(__file__))
-src_path = os.path.abspath(os.path.join(current_dir, "src"))
-sys.path.insert(0, src_path)
-from txagent.txagent import TxAgent
 # Initialize cache with 10GB limit
 cache = Cache(file_cache_dir, size_limit=10 * 1024**3)
-# Initialize tokenizer for precise chunking
-tokenizer = AutoTokenizer.from_pretrained("mims-harvard/TxAgent-T1-Llama-3.1-8B")
 def sanitize_utf8(text: str) -> str:
     return text.encode("utf-8", "ignore").decode("utf-8")
 def file_hash(path: str) -> str:
     with open(path, "rb") as f:
-        return hashlib.md5(f.read()).hexdigest()
-def extract_all_pages(file_path: str, progress_callback=None) -> str:
-    cache_key = f"pdf_{file_hash(file_path)}"
-    if cache_key in cache:
-        return cache[cache_key]
     try:
         with pdfplumber.open(file_path) as pdf:
             total_pages = len(pdf.pages)
             if total_pages == 0:
                 return ""
-        batch_size = 5
-        batches = [(i, min(i + batch_size, total_pages)) for i in range(0, total_pages, batch_size)]
-        text_chunks = [""] * total_pages
-        processed_pages = 0
-        def extract_batch(start: int, end: int) -> List[tuple]:
-            results = []
             with pdfplumber.open(file_path) as pdf:
-                for page in pdf.pages[start:end]:
-                    page_num = start + pdf.pages.index(page)
-                    page_text = page.extract_text_simple() or ""
-                    results.append((page_num, f"=== Page {page_num + 1} ===\n{page_text.strip()}"))
-            return results
-        with ProcessPoolExecutor(max_workers=4) as executor:
-            futures = [executor.submit(extract_batch, start, end) for start, end in batches]
-            for future in as_completed(futures):
-                for page_num, text in future.result():
-                    text_chunks[page_num] = text
-                processed_pages += batch_size
-                if progress_callback:
-                    progress_callback(min(processed_pages, total_pages), total_pages)
-        result = "\n\n".join(filter(None, text_chunks))
-        cache[cache_key] = result
-        return result
     except Exception as e:
-        logger.error("PDF processing error: %s", e)
         return f"PDF processing error: {str(e)}"
 def excel_to_json(file_path: str) -> List[Dict]:
-    cache_key = f"excel_{file_hash(file_path)}"
-    if cache_key in cache:
-        return cache[cache_key]
     try:
-        table = pq.read_table(file_path)
-        df = table.to_pandas(use_threads=True, split_blocks=True)
-        content = df.where(pd.notnull(df), "").astype(str).values.tolist()
-        result = [{
-            "filename": os.path.basename(file_path),
-            "rows": content,
-            "type": "excel"
-        }]
-        cache[cache_key] = result
-        return result
     except Exception as e:
-        logger.error(f"Error processing Excel file: {e}")
-        return [{"error": f"Error processing Excel file: {str(e)}"}]
 def csv_to_json(file_path: str) -> List[Dict]:
-    cache_key = f"csv_{file_hash(file_path)}"
-    if cache_key in cache:
-        return cache[cache_key]
     try:
-        table = pc.read_csv(file_path, parse_options=pc.ParseOptions(invalid_row_handler=lambda x: "skip"))
-        df = table.to_pandas(use_threads=True, split_blocks=True)
-        content = df.where(pd.notnull(df), "").astype(str).values.tolist()
-        result = [{
             "filename": os.path.basename(file_path),
-            "rows": content,
             "type": "csv"
         }]
-        cache[cache_key] = result
-        return result
     except Exception as e:
-        logger.error(f"Error processing CSV file: {e}")
-        return [{"error": f"Error processing CSV file: {str(e)}"}]
-def process_file(file_path: str, file_type: str) -> List[Dict]:
     try:
         if file_type == "pdf":
             text = extract_all_pages(file_path)
@@ -172,248 +182,315 @@ def process_file(file_path: str, file_type: str) -> List[Dict]:
         else:
             return [{"error": f"Unsupported file type: {file_type}"}]
     except Exception as e:
-        logger.error("Error processing %s: %s", os.path.basename(file_path), e)
         return [{"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"}]
-def tokenize_and_chunk(text: str, max_tokens: int = 800) -> List[str]:
-    cache_key = f"tokens_{hashlib.md5(text.encode()).hexdigest()}"
-    if cache_key in cache:
-        return cache[cache_key]
     tokens = tokenizer.encode(text, add_special_tokens=False)
-    chunks = []
-    for i in range(0, len(tokens), max_tokens):
-        chunk_tokens = tokens[i:i + max_tokens]
-        chunks.append(tokenizer.decode(chunk_tokens, skip_special_tokens=True))
-    cache[cache_key] = chunks
-    return chunks
 def log_system_usage(tag=""):
     try:
-        cpu = psutil.cpu_percent(interval=0.1)
         mem = psutil.virtual_memory()
-        logger.info("[%s] CPU: %.1f%% | RAM: %dMB / %dMB", tag, cpu, mem.used // (1024**2), mem.total // (1024**2))
-        result = subprocess.run(
-            ["nvidia-smi", "--query-gpu=memory.used,memory.total,utilization.gpu", "--format=csv,nounits,noheader"],
-            capture_output=True, text=True
-        )
-        if result.returncode == 0:
-            used, total, util = result.stdout.strip().split(", ")
-            logger.info("[%s] GPU: %sMB / %sMB | Utilization: %s%%", tag, used, total, util)
     except Exception as e:
-        logger.error("[%s] GPU/CPU monitor failed: %s", tag, e)
 def clean_response(text: str) -> str:
-    text = sanitize_utf8(text)
-    text = re.sub(r"\[.*?\]|\bNone\b|To analyze the patient record excerpt.*?medications\.|Since the previous attempts.*?\.|I need to.*?medications\.|Retrieving tools.*?\.", "", text, flags=re.DOTALL)
     diagnoses = []
-    lines = text.splitlines()
-    in_diagnoses_section = False
-    for line in lines:
         line = line.strip()
         if not line:
             continue
-        if re.match(r"###\s*Missed Diagnoses", line):
-            in_diagnoses_section = True
-            continue
-        if re.match(r"###\s*(Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", line):
-            in_diagnoses_section = False
-            continue
-        if in_diagnoses_section and re.match(r"-\s*.+", line):
-            diagnosis = re.sub(r"^\-\s*", "", line).strip()
-            if diagnosis and not re.match(r"No issues identified", diagnosis, re.IGNORECASE):
-                diagnoses.append(diagnosis)
-    text = " ".join(diagnoses)
-    text = re.sub(r"\s+", " ", text).strip()
-    text = re.sub(r"[^\w\s\.\,\(\)\-]", "", text)
-    return text if text else ""
-def summarize_findings(combined_response: str) -> str:
-    chunks = combined_response.split("--- Analysis for Chunk")
-    diagnoses = []
-    for chunk in chunks:
-        chunk = chunk.strip()
-        if not chunk or "No oversights identified" in chunk:
             continue
-        lines = chunk.splitlines()
-        in_diagnoses_section = False
-        for line in lines:
-            line = line.strip()
-            if not line:
-                continue
-            if re.match(r"###\s*Missed Diagnoses", line):
-                in_diagnoses_section = True
-                continue
-            if re.match(r"###\s*(Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", line):
-                in_diagnoses_section = False
-                continue
-            if in_diagnoses_section and re.match(r"-\s*.+", line):
-                diagnosis = re.sub(r"^\-\s*", "", line).strip()
-                if diagnosis and not re.match(r"No issues identified", diagnosis, re.IGNORECASE):
                     diagnoses.append(diagnosis)
     seen = set()
     unique_diagnoses = [d for d in diagnoses if not (d in seen or seen.add(d))]
-    if not unique_diagnoses:
-        return "No missed diagnoses were identified in the provided records."
     summary = "Missed diagnoses include " + ", ".join(unique_diagnoses[:-1])
-    if len(unique_diagnoses) > 1:
-        summary += f", and {unique_diagnoses[-1]}"
-    elif len(unique_diagnoses) == 1:
-        summary = "Missed diagnoses include " + unique_diagnoses[0]
     summary += ", all of which require urgent clinical review to prevent potential adverse outcomes."
-    return summary.strip()
 def init_agent():
     logger.info("Initializing model...")
     log_system_usage("Before Load")
     default_tool_path = os.path.abspath("data/new_tool.json")
     target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
     if not os.path.exists(target_tool_path):
         shutil.copy(default_tool_path, target_tool_path)
-    llm = LLM(
-        model="mims-harvard/TxAgent-T1-Llama-3.1-8B",
-        gpu_memory_utilization=0.8,
-        max_model_len=2048,
-        tensor_parallel_size=1,
-    )
-    sampling_params = SamplingParams(
-        temperature=0.2,
-        max_tokens=256,
-        stop=["</s>", "[INST]"],
     )
     log_system_usage("After Load")
     logger.info("Agent Ready")
-    return llm, sampling_params
-async def create_ui(llm, sampling_params):
-    with gr.Blocks(theme=gr.themes.Soft()) as demo:
-        gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
-        chatbot = gr.Chatbot(label="Detailed Analysis", height=600, type="messages")
-        final_summary = gr.Markdown(label="Summary of Missed Diagnoses")
-        file_upload = gr.File(file_types=["pdf", "csv", "xls", "xlsx"], file_count="multiple")
-        msg_input = gr.Textbox(placeholder="Ask about potential oversights...", show_label=False)
-        send_btn = gr.Button("Analyze", variant="primary")
-        download_output = gr.File(label="Download Full Report")
-        progress_bar = gr.Progress()
-        prompt_template = """
 Analyze the patient record excerpt for missed diagnoses only. Provide a concise, evidence-based summary as a single paragraph without headings or bullet points. Include specific clinical findings (e.g., 'elevated blood pressure (160/95) on page 10'), their potential implications (e.g., 'may indicate untreated hypertension'), and a recommendation for urgent review. Do not include other oversight categories like medication conflicts. If no missed diagnoses are found, state 'No missed diagnoses identified' in a single sentence.
 Patient Record Excerpt (Chunk {0} of {1}):
 {chunk}
 """
-        def log_response_partial(text: str):
-            response_logger.info(text)
-        async def analyze(message: str, history: List[dict], files: List, progress=gr.Progress()):
             history.append({"role": "user", "content": message})
             yield history, None, ""
             extracted = []
             file_hash_value = ""
             if files:
-                with ProcessPoolExecutor(max_workers=4) as executor:
-                    futures = []
-                    for f in files:
-                        file_type = f.name.split(".")[-1].lower()
-                        futures.append(executor.submit(
-                            process_file,
-                            f.name,
-                            file_type
-                        ))
-                    for future in as_completed(futures):
-                        try:
-                            extracted.extend(future.result())
-                        except Exception as e:
-                            logger.error(f"File processing error: {e}")
-                            extracted.append({"error": f"Error processing file: {str(e)}"})
                 file_hash_value = file_hash(files[0].name) if files else ""
                 history.append({"role": "assistant", "content": "✅ File processing complete"})
                 yield history, None, ""
-            text_content = "\n".join(json.dumps(item) for item in extracted)
             chunks = tokenize_and_chunk(text_content)
             combined_response = ""
-            batch_size = 1
             try:
-                for batch_idx in range(0, len(chunks), batch_size):
-                    batch_chunks = chunks[batch_idx:batch_idx + batch_size]
                     batch_prompts = [
-                        prompt_template.format(
                             batch_idx + i + 1,
                             len(chunks),
-                            chunk=chunk[:800]
                         )
                         for i, chunk in enumerate(batch_chunks)
                     ]
-                    progress((batch_idx) / len(chunks),
-                           desc=f"Analyzing batch {(batch_idx // batch_size) + 1}/{(len(chunks) + batch_size - 1) // batch_size}")
-                    with torch.no_grad():
-                        for prompt in batch_prompts:
                             chunk_response = ""
-                            current_response = ""
-                            stream = llm.generate([prompt], sampling_params, use_tqdm=False)
-                            for output in stream:
-                                for request_output in output:
-                                    new_text = request_output.outputs[0].text[len(current_response):]
-                                    if new_text:
-                                        current_response += new_text
-                                        cleaned = clean_response(current_response)
-                                        if cleaned and cleaned != chunk_response:
-                                            chunk_response = cleaned
-                                            history[-1] = {"role": "assistant", "content": chunk_response}
-                                            threading.Thread(target=log_response_partial, args=(chunk_response,)).start()
-                                            yield history, None, ""
-                                            await asyncio.sleep(0.01)
-                            if chunk_response:
-                                combined_response += f"--- Analysis for Chunk {batch_idx + 1} ---\n{chunk_response}\n"
-                    torch.cuda.empty_cache()
-                    gc.collect()
                 summary = summarize_findings(combined_response)
-                report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
-                if report_path:
-                    with open(report_path, "w", encoding="utf-8") as f:
-                        f.write(combined_response + "\n\n" + summary)
-                    threading.Thread(target=log_response_partial, args=(summary,)).start()
-                yield history, report_path if report_path and os.path.exists(report_path) else None, summary
             except Exception as e:
-                logger.error("Analysis error: %s", e)
                 history.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
-                threading.Thread(target=log_response_partial, args=(f"Error: {str(e)}",)).start()
                 yield history, None, f"Error occurred during analysis: {str(e)}"
-        send_btn.click(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output, final_summary])
-        msg_input.submit(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output, final_summary])
     return demo
 if __name__ == "__main__":
     try:
-        logger.info("Launching app...")
-        llm, sampling_params = init_agent()
-        demo = asyncio.run(create_ui(llm, sampling_params))
-        demo.queue(api_open=False).launch(
             server_name="0.0.0.0",
             server_port=7860,
             show_error=True,
             allowed_paths=[report_dir],
             share=False
         )
     finally:
         if torch.distributed.is_initialized():
             torch.distributed.destroy_process_group()

 import json
 import gradio as gr
 from typing import List, Dict, Optional, Generator
+from concurrent.futures import ThreadPoolExecutor, as_completed
 import hashlib
 import shutil
 import re
 from diskcache import Cache
 import time
 from transformers import AutoTokenizer
+from functools import lru_cache
+import numpy as np
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# Constants
+MAX_TOKENS = 1800
+BATCH_SIZE = 2
+MAX_WORKERS = 4
+CHUNK_SIZE = 10  # For PDF processing
+# Persistent directory setup
 persistent_dir = "/data/hf_cache"
 os.makedirs(persistent_dir, exist_ok=True)
 for directory in [model_cache_dir, tool_cache_dir, file_cache_dir, report_dir, vllm_cache_dir]:
     os.makedirs(directory, exist_ok=True)
+os.environ.update({
+    "HF_HOME": model_cache_dir,
+    "TRANSFORMERS_CACHE": model_cache_dir,
+    "VLLM_CACHE_DIR": vllm_cache_dir,
+    "TOKENIZERS_PARALLELISM": "false",
+    "CUDA_LAUNCH_BLOCKING": "1"
+})
 # Initialize cache with 10GB limit
 cache = Cache(file_cache_dir, size_limit=10 * 1024**3)
+# Initialize tokenizer for precise chunking (with caching)
+@lru_cache(maxsize=1)
+def get_tokenizer():
+    return AutoTokenizer.from_pretrained("mims-harvard/TxAgent-T1-Llama-3.1-8B")
 def sanitize_utf8(text: str) -> str:
+    """Optimized UTF-8 sanitization"""
     return text.encode("utf-8", "ignore").decode("utf-8")
 def file_hash(path: str) -> str:
+    """Optimized file hashing with buffer reading"""
+    hash_md5 = hashlib.md5()
     with open(path, "rb") as f:
+        for chunk in iter(lambda: f.read(4096), b""):
+            hash_md5.update(chunk)
+    return hash_md5.hexdigest()
+def extract_pdf_page(page) -> str:
+    """Optimized single page extraction"""
+    try:
+        text = page.extract_text() or ""
+        return f"=== Page {page.page_number} ===\n{text.strip()}"
+    except Exception as e:
+        logger.warning(f"Error extracting page {page.page_number}: {str(e)}")
+        return ""
+def extract_all_pages(file_path: str, progress_callback=None) -> str:
+    """Optimized PDF extraction with memory management"""
     try:
         with pdfplumber.open(file_path) as pdf:
             total_pages = len(pdf.pages)
             if total_pages == 0:
                 return ""
+        # Process in chunks with memory cleanup
+        results = []
+        for chunk_start in range(0, total_pages, CHUNK_SIZE):
+            chunk_end = min(chunk_start + CHUNK_SIZE, total_pages)
             with pdfplumber.open(file_path) as pdf:
+                with ThreadPoolExecutor(max_workers=min(CHUNK_SIZE, 4)) as executor:
+                    futures = [executor.submit(extract_pdf_page, pdf.pages[i])
+                             for i in range(chunk_start, chunk_end)]
+                    for future in as_completed(futures):
+                        results.append(future.result())
+                    if progress_callback:
+                        progress_callback(min(chunk_end, total_pages), total_pages)
+            # Explicit cleanup
+            del pdf
+            gc.collect()
+        return "\n\n".join(filter(None, results))
     except Exception as e:
+        logger.error(f"PDF processing error: {e}")
         return f"PDF processing error: {str(e)}"
 def excel_to_json(file_path: str) -> List[Dict]:
+    """Optimized Excel processing with chunking"""
     try:
+        # Try fastest engines first
+        for engine in ['openpyxl', 'xlrd']:
+            try:
+                df = pd.read_excel(
+                    file_path,
+                    engine=engine,
+                    header=None,
+                    dtype=str,
+                    na_filter=False
+                )
+                return [{
+                    "filename": os.path.basename(file_path),
+                    "rows": df.values.tolist(),
+                    "type": "excel"
+                }]
+            except Exception:
+                continue
+        raise Exception("No suitable Excel engine found")
     except Exception as e:
+        logger.error(f"Excel processing error: {e}")
+        return [{"error": f"Excel processing error: {str(e)}"}]
 def csv_to_json(file_path: str) -> List[Dict]:
+    """Optimized CSV processing with chunking"""
     try:
+        chunks = []
+        for chunk in pd.read_csv(
+            file_path,
+            header=None,
+            dtype=str,
+            encoding_errors='replace',
+            on_bad_lines='skip',
+            chunksize=10000,
+            na_filter=False
+        ):
+            chunks.append(chunk)
+        df = pd.concat(chunks) if chunks else pd.DataFrame()
+        return [{
             "filename": os.path.basename(file_path),
+            "rows": df.values.tolist(),
             "type": "csv"
         }]
     except Exception as e:
+        logger.error(f"CSV processing error: {e}")
+        return [{"error": f"CSV processing error: {str(e)}"}]
+@lru_cache(maxsize=100)
+def process_file_cached(file_path: str, file_type: str) -> List[Dict]:
+    """Cached file processing with memory optimization"""
     try:
         if file_type == "pdf":
             text = extract_all_pages(file_path)
         else:
             return [{"error": f"Unsupported file type: {file_type}"}]
     except Exception as e:
+        logger.error(f"Error processing {os.path.basename(file_path)}: {e}")
         return [{"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"}]
+def tokenize_and_chunk(text: str, max_tokens: int = MAX_TOKENS) -> List[str]:
+    """Optimized tokenization and chunking"""
+    tokenizer = get_tokenizer()
     tokens = tokenizer.encode(text, add_special_tokens=False)
+    return [
+        tokenizer.decode(tokens[i:i + max_tokens])
+        for i in range(0, len(tokens), max_tokens)
+    ]
 def log_system_usage(tag=""):
+    """Optimized system monitoring"""
     try:
+        cpu = psutil.cpu_percent(interval=0.5)
         mem = psutil.virtual_memory()
+        logger.info(f"[{tag}] CPU: {cpu:.1f}% | RAM: {mem.used // (1024**2)}MB / {mem.total // (1024**2)}MB")
+        # GPU monitoring with timeout
+        try:
+            result = subprocess.run(
+                ["nvidia-smi", "--query-gpu=memory.used,memory.total,utilization.gpu", "--format=csv,nounits,noheader"],
+                capture_output=True,
+                text=True,
+                timeout=2
+            )
+            if result.returncode == 0:
+                used, total, util = result.stdout.strip().split(", ")
+                logger.info(f"[{tag}] GPU: {used}MB / {total}MB | Utilization: {util}%")
+        except subprocess.TimeoutExpired:
+            logger.warning(f"[{tag}] GPU monitoring timed out")
     except Exception as e:
+        logger.error(f"[{tag}] Monitor failed: {e}")
 def clean_response(text: str) -> str:
+    """Optimized response cleaning with regex compilation"""
+    if not text:
+        return ""
+    # Pre-compiled regex patterns
+    patterns = [
+        (re.compile(r"\[.*?\]|\bNone\b"), ""),
+        (re.compile(r"To analyze the patient record excerpt.*?medications\."), ""),
+        (re.compile(r"Since the previous attempts.*?\."), ""),
+        (re.compile(r"I need to.*?medications\."), ""),
+        (re.compile(r"Retrieving tools.*?\."), ""),
+        (re.compile(r"\s+"), " "),
+        (re.compile(r"[^\w\s\.\,\(\)\-]"), "")
+    ]
+    for pattern, repl in patterns:
+        text = pattern.sub(repl, text)
+    return text.strip()
+def summarize_findings(combined_response: str) -> str:
+    """Optimized findings summarization"""
+    if not combined_response:
+        return "No missed diagnoses were identified in the provided records."
+    # Pre-compiled regex patterns
+    diagnosis_pattern = re.compile(r"-\s*(.+)$")
+    section_pattern = re.compile(r"###\s*(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)")
+    no_issues_pattern = re.compile(r"No issues identified", re.IGNORECASE)
     diagnoses = []
+    current_section = None
+    for line in combined_response.splitlines():
         line = line.strip()
         if not line:
             continue
+        # Check section headers
+        section_match = section_pattern.match(line)
+        if section_match:
+            current_section = "diagnoses" if section_match.group(1) == "Missed Diagnoses" else None
             continue
+        # Only process diagnosis lines in the correct section
+        if current_section == "diagnoses":
+            diagnosis_match = diagnosis_pattern.match(line)
+            if diagnosis_match and not no_issues_pattern.search(line):
+                diagnosis = diagnosis_match.group(1).strip()
+                if diagnosis:
                     diagnoses.append(diagnosis)
+    if not diagnoses:
+        return "No missed diagnoses were identified in the provided records."
+    # Remove duplicates while preserving order
     seen = set()
     unique_diagnoses = [d for d in diagnoses if not (d in seen or seen.add(d))]
+    if len(unique_diagnoses) == 1:
+        return f"Missed diagnoses include {unique_diagnoses[0]}"
     summary = "Missed diagnoses include " + ", ".join(unique_diagnoses[:-1])
+    summary += f", and {unique_diagnoses[-1]}" if len(unique_diagnoses) > 1 else ""
     summary += ", all of which require urgent clinical review to prevent potential adverse outcomes."
+    return summary
+@lru_cache(maxsize=1)
 def init_agent():
+    """Cached agent initialization with memory optimization"""
     logger.info("Initializing model...")
     log_system_usage("Before Load")
+    # Tool setup
     default_tool_path = os.path.abspath("data/new_tool.json")
     target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
     if not os.path.exists(target_tool_path):
         shutil.copy(default_tool_path, target_tool_path)
+    # Initialize with optimized settings
+    agent = TxAgent(
+        model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
+        rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
+        tool_files_dict={"new_tool": target_tool_path},
+        force_finish=True,
+        enable_checker=False,
+        step_rag_num=4,
+        seed=100,
+        additional_default_tools=[],
     )
+    agent.init_model()
     log_system_usage("After Load")
     logger.info("Agent Ready")
+    return agent
+def create_ui(agent):
+    """Optimized UI creation with pre-compiled templates"""
+    PROMPT_TEMPLATE = """
 Analyze the patient record excerpt for missed diagnoses only. Provide a concise, evidence-based summary as a single paragraph without headings or bullet points. Include specific clinical findings (e.g., 'elevated blood pressure (160/95) on page 10'), their potential implications (e.g., 'may indicate untreated hypertension'), and a recommendation for urgent review. Do not include other oversight categories like medication conflicts. If no missed diagnoses are found, state 'No missed diagnoses identified' in a single sentence.
 Patient Record Excerpt (Chunk {0} of {1}):
 {chunk}
 """
+    with gr.Blocks(theme=gr.themes.Soft()) as demo:
+        gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
+        with gr.Row():
+            with gr.Column(scale=3):
+                chatbot = gr.Chatbot(label="Detailed Analysis", height=600)
+                msg_input = gr.Textbox(placeholder="Ask about potential oversights...", show_label=False)
+                send_btn = gr.Button("Analyze", variant="primary")
+                file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
+            with gr.Column(scale=1):
+                final_summary = gr.Markdown(label="Summary of Missed Diagnoses")
+                download_output = gr.File(label="Download Full Report")
+                progress_bar = gr.Progress()
+        def analyze(message: str, history: List[dict], files: List, progress=gr.Progress()):
+            """Optimized analysis pipeline with memory management"""
             history.append({"role": "user", "content": message})
             yield history, None, ""
+            # Process files with caching
             extracted = []
             file_hash_value = ""
             if files:
+                # Use cached results when possible
+                for f in files:
+                    file_type = f.name.split(".")[-1].lower()
+                    cache_key = f"{file_hash(f.name)}_{file_type}"
+                    if cache_key in cache:
+                        extracted.extend(cache[cache_key])
+                    else:
+                        result = process_file_cached(f.name, file_type)
+                        cache[cache_key] = result
+                        extracted.extend(result)
                 file_hash_value = file_hash(files[0].name) if files else ""
                 history.append({"role": "assistant", "content": "✅ File processing complete"})
                 yield history, None, ""
+            # Convert to text with memory efficiency
+            text_content = "\n".join(json.dumps(item, ensure_ascii=False) for item in extracted)
+            del extracted
+            gc.collect()
+            # Tokenize and chunk
             chunks = tokenize_and_chunk(text_content)
+            del text_content
+            gc.collect()
             combined_response = ""
+            report_path = None
             try:
+                # Process in optimized batches
+                for batch_idx in range(0, len(chunks), BATCH_SIZE):
+                    batch_chunks = chunks[batch_idx:batch_idx + BATCH_SIZE]
+                    # Prepare prompts
                     batch_prompts = [
+                        PROMPT_TEMPLATE.format(
                             batch_idx + i + 1,
                             len(chunks),
+                            chunk=chunk[:1800]  # Conservative size
                         )
                         for i, chunk in enumerate(batch_chunks)
                     ]
+                    progress(batch_idx / len(chunks),
+                           desc=f"Analyzing batch {(batch_idx // BATCH_SIZE) + 1}/{(len(chunks) + BATCH_SIZE - 1) // BATCH_SIZE}")
+                    # Process batch
+                    with ThreadPoolExecutor(max_workers=min(BATCH_SIZE, MAX_WORKERS)) as executor:
+                        futures = {
+                            executor.submit(
+                                agent.run_gradio_chat,
+                                prompt, [], 0.2, 512, 2048, False, []
+                            ): idx
+                            for idx, prompt in enumerate(batch_prompts)
+                        }
+                        for future in as_completed(futures):
+                            chunk_idx = futures[future]
                             chunk_response = ""
+                            try:
+                                for chunk_output in future.result():
+                                    if isinstance(chunk_output, (list, str)):
+                                        content = ""
+                                        if isinstance(chunk_output, list):
+                                            content = " ".join(
+                                                clean_response(m.content)
+                                                for m in chunk_output
+                                                if hasattr(m, 'content') and m.content
+                                            )
+                                        elif isinstance(chunk_output, str):
+                                            content = clean_response(chunk_output)
+                                        if content:
+                                            chunk_response += content + " "
+                                if chunk_response:
+                                    combined_response += f"--- Analysis for Chunk {batch_idx + chunk_idx + 1} ---\n{chunk_response.strip()}\n"
+                                    history[-1] = {"role": "assistant", "content": combined_response.strip()}
+                                    yield history, None, ""
+                            finally:
+                                # Ensure cleanup
+                                del future
+                                torch.cuda.empty_cache()
+                                gc.collect()
+                # Generate final outputs
                 summary = summarize_findings(combined_response)
+                if file_hash_value:
+                    report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt")
+                    try:
+                        with open(report_path, "w", encoding="utf-8") as f:
+                            f.write(combined_response + "\n\n" + summary)
+                    except Exception as e:
+                        logger.error(f"Report save failed: {e}")
+                        report_path = None
+                yield history, report_path, summary
             except Exception as e:
+                logger.error(f"Analysis error: {e}")
                 history.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
                 yield history, None, f"Error occurred during analysis: {str(e)}"
+            finally:
+                # Final cleanup
+                torch.cuda.empty_cache()
+                gc.collect()
+        # Event handlers
+        send_btn.click(
+            analyze,
+            inputs=[msg_input, gr.State([]), file_upload],
+            outputs=[chatbot, download_output, final_summary]
+        )
+        msg_input.submit(
+            analyze,
+            inputs=[msg_input, gr.State([]), file_upload],
+            outputs=[chatbot, download_output, final_summary]
+        )
     return demo
 if __name__ == "__main__":
     try:
+        logger.info("Launching optimized app...")
+        agent = init_agent()
+        demo = create_ui(agent)
+        demo.queue(
+            api_open=False,
+            max_size=20,
+            concurrency_count=4
+        ).launch(
             server_name="0.0.0.0",
             server_port=7860,
             show_error=True,
             allowed_paths=[report_dir],
             share=False
         )
+    except Exception as e:
+        logger.error(f"Fatal error: {e}")
+        raise
     finally:
         if torch.distributed.is_initialized():
             torch.distributed.destroy_process_group()