Ali2206 commited on
Commit
4cf6d2e
·
verified ·
1 Parent(s): e57552a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +346 -889
app.py CHANGED
@@ -4,7 +4,7 @@ import pandas as pd
4
  import pdfplumber
5
  import json
6
  import gradio as gr
7
- from typing import List, Dict, Generator, Any
8
  from concurrent.futures import ThreadPoolExecutor, as_completed
9
  import hashlib
10
  import shutil
@@ -14,94 +14,50 @@ import subprocess
14
  import logging
15
  import torch
16
  import gc
17
- import atexit
18
- import signal
19
  from diskcache import Cache
20
  from transformers import AutoTokenizer
21
- from datetime import datetime
22
 
23
  # ==================== CONFIGURATION ====================
24
- # Configure logging
25
  logging.basicConfig(level=logging.INFO)
26
  logger = logging.getLogger(__name__)
27
 
28
- # Setup directories
29
- PERSISTENT_DIR = "/data/hf_cache"
30
  DIRECTORIES = {
31
- "models": os.path.join(PERSISTENT_DIR, "txagent_models"),
32
- "tools": os.path.join(PERSISTENT_DIR, "tool_cache"),
33
- "cache": os.path.join(PERSISTENT_DIR, "cache"),
34
- "reports": os.path.join(PERSISTENT_DIR, "reports"),
35
- "vllm": os.path.join(PERSISTENT_DIR, "vllm_cache")
36
  }
37
 
38
- # Create directories
39
  for dir_path in DIRECTORIES.values():
40
- os.makedirs(dir_path, exist_ok=True)
41
 
42
- # Environment variables
43
  os.environ.update({
44
- "HF_HOME": DIRECTORIES["models"],
45
- "TRANSFORMERS_CACHE": DIRECTORIES["models"],
46
- "VLLM_CACHE_DIR": DIRECTORIES["vllm"],
47
  "TOKENIZERS_PARALLELISM": "false",
48
  "CUDA_LAUNCH_BLOCKING": "1"
49
  })
50
 
51
- # Add src path for txagent
52
- current_dir = os.path.dirname(os.path.abspath(__file__))
53
- src_path = os.path.abspath(os.path.join(current_dir, "src"))
54
- sys.path.insert(0, src_path)
55
-
56
- from txagent.txagent import TxAgent
57
-
58
- # Log Gradio version for debugging
59
- logger.info(f"Gradio version: {gr.__version__}")
60
-
61
- # ==================== UTILITY FUNCTIONS ====================
62
- def sanitize_text(text: str) -> str:
63
- """Clean and sanitize text input"""
64
- return text.encode("utf-8", "ignore").decode("utf-8")
65
-
66
- def get_file_hash(file_path: str) -> str:
67
- """Generate MD5 hash of file content"""
68
- with open(file_path, "rb") as f:
69
- return hashlib.md5(f.read()).hexdigest()
70
-
71
- def log_system_resources(tag: str = "") -> None:
72
- """Log system resource usage"""
73
- try:
74
- cpu = psutil.cpu_percent(interval=1)
75
- mem = psutil.virtual_memory()
76
- logger.info(f"[{tag}] CPU: {cpu:.1f}% | RAM: {mem.used//(1024**2)}MB/{mem.total//(1024**2)}MB")
77
-
78
- gpu_info = subprocess.run(
79
- ["nvidia-smi", "--query-gpu=memory.used,memory.total,utilization.gpu",
80
- "--format=csv,nounits,noheader"],
81
- capture_output=True, text=True
82
- )
83
- if gpu_info.returncode == 0:
84
- used, total, util = gpu_info.stdout.strip().split(", ")
85
- logger.info(f"[{tag}] GPU: {used}MB/{total}MB | Util: {util}%")
86
- except Exception as e:
87
- logger.error(f"[{tag}] Resource monitoring failed: {e}")
88
-
89
- # ==================== FILE PROCESSING ====================
90
  class FileProcessor:
 
 
91
  @staticmethod
92
- def extract_pdf_text(file_path: str, cache: Cache) -> str:
93
- """Extract text from PDF with caching"""
94
- cache_key = f"pdf_{get_file_hash(file_path)}"
95
- if cache_key in cache:
96
- return cache[cache_key]
97
-
98
  try:
99
  with pdfplumber.open(file_path) as pdf:
100
  total_pages = len(pdf.pages)
101
  if not total_pages:
102
  return ""
103
 
104
- def process_page_range(start: int, end: int) -> List[tuple]:
105
  results = []
106
  with pdfplumber.open(file_path) as pdf:
107
  for page in pdf.pages[start:end]:
@@ -110,193 +66,180 @@ class FileProcessor:
110
  results.append((page_num, f"=== Page {page_num + 1} ===\n{text.strip()}"))
111
  return results
112
 
113
- batch_size = 10
114
- batches = [(i, min(i+batch_size, total_pages)) for i in range(0, total_pages, batch_size)]
115
  text_chunks = [""] * total_pages
116
 
117
- with ThreadPoolExecutor(max_workers=2) as executor:
118
- futures = [executor.submit(process_page_range, start, end) for start, end in batches]
119
  for future in as_completed(futures):
120
  for page_num, text in future.result():
121
  text_chunks[page_num] = text
122
 
123
- result = "\n\n".join(filter(None, text_chunks))
124
- cache[cache_key] = result
125
- return result
126
  except Exception as e:
127
- logger.error(f"PDF processing error: {e}")
128
  return f"PDF processing error: {str(e)}"
129
 
130
  @staticmethod
131
- def excel_to_data(file_path: str, cache: Cache) -> List[Dict]:
132
- """Convert Excel file to structured data with caching"""
133
- cache_key = f"excel_{get_file_hash(file_path)}"
134
- if cache_key in cache:
135
- return cache[cache_key]
136
-
137
- try:
138
- df = pd.read_excel(file_path, engine='openpyxl', header=None, dtype=str)
139
- content = df.where(pd.notnull(df), "").astype(str).values.tolist()
140
- result = [{"filename": os.path.basename(file_path), "rows": content, "type": "excel"}]
141
- cache[cache_key] = result
142
- return result
143
- except Exception as e:
144
- logger.error(f"Excel processing error: {e}")
145
- return [{"error": f"Excel processing error: {str(e)}"}]
146
-
147
- @staticmethod
148
- def csv_to_data(file_path: str, cache: Cache) -> List[Dict]:
149
- """Convert CSV file to structured data with caching"""
150
- cache_key = f"csv_{get_file_hash(file_path)}"
151
- if cache_key in cache:
152
- return cache[cache_key]
153
-
154
  try:
155
- chunks = []
156
- for chunk in pd.read_csv(
157
- file_path, header=None, dtype=str,
158
- encoding_errors='replace', on_bad_lines='skip', chunksize=10000
159
- ):
160
- chunks.append(chunk)
 
 
 
 
 
 
 
 
 
161
 
162
- df = pd.concat(chunks) if chunks else pd.DataFrame()
163
- content = df.where(pd.notnull(df), "").astype(str).values.tolist()
164
- result = [{"filename": os.path.basename(file_path), "rows": content, "type": "csv"}]
165
- cache[cache_key] = result
166
- return result
167
  except Exception as e:
168
- logger.error(f"CSV processing error: {e}")
169
- return [{"error": f"CSV processing error: {str(e)}"}]
170
 
171
  @classmethod
172
- def process_file(cls, file_path: str, file_type: str, cache: Cache) -> List[Dict]:
173
  """Route file processing based on type"""
174
- processors = {
175
- "pdf": cls.extract_pdf_text,
176
- "xls": cls.excel_to_data,
177
- "xlsx": cls.excel_to_data,
178
- "csv": cls.csv_to_data
179
  }
180
 
181
- if file_type not in processors:
182
  return [{"error": f"Unsupported file type: {file_type}"}]
183
 
184
  try:
185
- result = processors[file_type](file_path, cache)
186
  if file_type == "pdf":
187
  return [{
188
  "filename": os.path.basename(file_path),
189
  "content": result,
190
- "status": "initial",
191
  "type": "pdf"
192
  }]
193
  return result
194
  except Exception as e:
195
- logger.error(f"Error processing {file_type} file: {e}")
196
- return [{"error": f"Error processing file: {str(e)}"}]
197
 
198
- # ==================== TEXT PROCESSING ====================
199
- class TextProcessor:
 
200
  def __init__(self):
201
  self.tokenizer = AutoTokenizer.from_pretrained("mims-harvard/TxAgent-T1-Llama-3.1-8B")
202
  self.cache = Cache(DIRECTORIES["cache"], size_limit=10*1024**3)
203
-
204
- def chunk_text(self, text: str, max_tokens: int = 1200) -> List[str]:
205
  """Split text into token-limited chunks"""
206
  tokens = self.tokenizer.encode(text)
207
  return [
208
  self.tokenizer.decode(tokens[i:i+max_tokens])
209
  for i in range(0, len(tokens), max_tokens)
210
  ]
211
-
212
- def clean_response(self, text: str) -> str:
213
  """Clean and format model response"""
214
- text = sanitize_text(text)
215
- text = re.sub(r"\[.*?\]|\bNone\b", "", text)
 
 
 
 
216
 
217
  diagnoses = []
218
- in_diagnoses = False
219
 
220
  for line in text.splitlines():
221
  line = line.strip()
222
  if not line:
223
  continue
224
  if re.match(r"###\s*Missed Diagnoses", line):
225
- in_diagnoses = True
226
  continue
227
  if re.match(r"###\s*(Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", line):
228
- in_diagnoses = False
229
  continue
230
- if in_diagnoses and re.match(r"-\s*.+", line):
231
  diagnosis = re.sub(r"^\-\s*", "", line).strip()
232
  if diagnosis and not re.match(r"No issues identified", diagnosis, re.IGNORECASE):
233
  diagnoses.append(diagnosis)
234
 
235
  return " ".join(diagnoses) if diagnoses else ""
236
-
237
- def summarize_results(self, analysis: str) -> str:
238
- """Generate concise summary from full analysis"""
239
- chunks = analysis.split("--- Analysis for Chunk")
240
- diagnoses = []
241
-
242
- for chunk in chunks:
243
  chunk = chunk.strip()
244
  if not chunk or "No oversights identified" in chunk:
245
  continue
246
 
247
- in_diagnoses = False
248
  for line in chunk.splitlines():
249
  line = line.strip()
250
  if not line:
251
  continue
252
  if re.match(r"###\s*Missed Diagnoses", line):
253
- in_diagnoses = True
254
  continue
255
  if re.match(r"###\s*(Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", line):
256
- in_diagnoses = False
257
  continue
258
- if in_diagnoses and re.match(r"-\s*.+", line):
259
- diagnosis = re.sub(r"^\-\s*", "", line).strip()
260
- if diagnosis and not re.match(r"No issues identified", diagnosis, re.IGNORECASE):
261
- diagnoses.append(diagnosis)
262
 
263
- unique_diagnoses = list(dict.fromkeys(diagnoses))
264
 
265
- if not unique_diagnoses:
266
- return "No missed diagnoses were identified in the provided records."
267
 
268
- if len(unique_diagnoses) > 1:
269
- summary = "Missed diagnoses include " + ", ".join(unique_diagnoses[:-1])
270
- summary += f", and {unique_diagnoses[-1]}"
271
  else:
272
- summary = "Missed diagnoses include " + unique_diagnoses[0]
273
 
274
- return summary + ", all requiring urgent clinical review."
275
 
276
- # ==================== CORE APPLICATION ====================
277
- class ClinicalOversightApp:
 
278
  def __init__(self):
279
- self.agent = self._initialize_agent()
280
- self.text_processor = TextProcessor()
281
  self.file_processor = FileProcessor()
282
- atexit.register(self.cleanup_resources)
283
- signal.signal(signal.SIGTERM, self._signal_handler)
284
- signal.signal(signal.SIGINT, self._signal_handler)
285
-
286
- def _initialize_agent(self):
287
- """Initialize the TxAgent with proper configuration"""
288
- logger.info("Initializing AI model...")
289
- log_system_resources("Before Load")
 
 
 
 
290
 
291
- tool_path = os.path.join(DIRECTORIES["tools"], "new_tool.json")
292
- if not os.path.exists(tool_path):
293
- default_tools = os.path.abspath("data/new_tool.json")
294
- shutil.copy(default_tools, tool_path)
295
-
296
  agent = TxAgent(
297
  model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
298
  rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
299
- tool_files_dict={"new_tool": tool_path},
300
  force_finish=True,
301
  enable_checker=False,
302
  step_rag_num=4,
@@ -305,785 +248,299 @@ class ClinicalOversightApp:
305
  )
306
  agent.init_model()
307
 
308
- log_system_resources("After Load")
309
- logger.info("AI Agent Ready")
310
  return agent
311
-
312
- def _signal_handler(self, signum, frame):
313
- """Handle termination signals"""
314
- logger.info(f"Received signal {signum}, cleaning up...")
315
- self.cleanup_resources()
316
- sys.exit(0)
317
-
318
- def cleanup_resources(self):
319
- """Clean up GPU memory and collect garbage"""
320
- logger.info("Cleaning up resources...")
321
- log_system_resources("Before Cleanup")
322
- torch.cuda.empty_cache()
323
- gc.collect()
324
- for _ in range(2): # Retry to ensure cleanup
325
- try:
326
- if torch.distributed.is_initialized():
327
- logger.info("Destroying PyTorch distributed process group...")
328
- torch.distributed.destroy_process_group()
329
- break
330
- except Exception as e:
331
- logger.error(f"Cleanup error: {e}")
332
- log_system_resources("After Cleanup")
333
-
334
- def process_response_stream(self, prompt: str, history: List[dict]) -> Generator[dict, None, None]:
335
- """Stream the agent's response with proper formatting"""
336
  full_response = ""
337
  for chunk in self.agent.run_gradio_chat(prompt, [], 0.2, 512, 2048, False, []):
338
  if not chunk:
339
  continue
340
 
341
  if isinstance(chunk, list):
342
- for message in chunk:
343
- if hasattr(message, 'content') and message.content:
344
- cleaned = self.text_processor.clean_response(message.content)
345
  if cleaned:
346
  full_response += cleaned + " "
347
- yield {
348
- "role": "assistant",
349
- "content": f"✅ {cleaned} [{datetime.now().strftime('%H:%M:%S')}]"
350
- }
351
  elif isinstance(chunk, str) and chunk.strip():
352
- cleaned = self.text_processor.clean_response(chunk)
353
  if cleaned:
354
  full_response += cleaned + " "
355
- yield {
356
- "role": "assistant",
357
- "content": f"✅ {cleaned} [{datetime.now().strftime('%H:%M:%S')}]"
358
- }
359
-
360
- def analyze(self, message: str, history: List[dict], files: List) -> Generator[tuple, None, None]:
361
- """Main analysis pipeline with proper output formatting"""
362
- chatbot_output = history.copy()
363
- download_output = None
364
- final_summary = ""
365
- progress_text = {"value": "Starting analysis...", "visible": True}
366
 
367
  try:
368
- # Add user message to history
369
- chatbot_output.append({
370
- "role": "user",
371
- "content": f"{message} [{datetime.now().strftime('%H:%M:%S')}]"
372
- })
373
- yield (chatbot_output, download_output, final_summary, progress_text)
374
 
375
- # Process uploaded files
376
  extracted = []
377
- file_hash_value = ""
378
 
379
  if files:
380
- with ThreadPoolExecutor(max_workers=2) as executor:
381
  futures = []
382
  for f in files:
383
- file_type = f.name.split(".")[-1].lower()
384
- futures.append(executor.submit(self.file_processor.process_file, f.name, file_type, self.text_processor.cache))
 
 
 
 
385
 
386
  for i, future in enumerate(as_completed(futures), 1):
387
  try:
388
  extracted.extend(future.result())
389
- progress_text = self._update_progress(i, len(files), "Processing files")
390
- yield (chatbot_output, download_output, final_summary, progress_text)
391
  except Exception as e:
392
- logger.error(f"File processing error: {e}")
393
- extracted.append({"error": f"Error processing file: {str(e)}"})
394
 
395
- file_hash_value = get_file_hash(files[0].name) if files else ""
396
- chatbot_output.append({
397
- "role": "assistant",
398
- "content": f"✅ File processing complete [{datetime.now().strftime('%H:%M:%S')}]"
 
 
 
399
  })
400
- progress_text = self._update_progress(len(files), len(files), "Files processed")
401
- yield (chatbot_output, download_output, final_summary, progress_text)
402
 
403
  # Analyze content
404
  text_content = "\n".join(json.dumps(item) for item in extracted)
405
- chunks = self.text_processor.chunk_text(text_content)
406
- combined_response = ""
407
 
408
- for chunk_idx, chunk in enumerate(chunks, 1):
409
  prompt = f"""
410
- Analyze this patient record for missed diagnoses. Provide a concise, evidence-based summary
411
- as a single paragraph without headings or bullet points. Include specific clinical findings
412
- with their potential implications and urgent review recommendations. If no missed diagnoses
413
- are found, state 'No missed diagnoses identified'.
414
-
415
- Patient Record (Chunk {chunk_idx}/{len(chunks)}):
416
- {chunk[:1200]}
 
417
  """
418
- chatbot_output.append({"role": "assistant", "content": "⏳ Analyzing..."})
419
- progress_text = self._update_progress(chunk_idx, len(chunks), "Analyzing")
420
- yield (chatbot_output, download_output, final_summary, progress_text)
 
 
 
421
 
422
- # Stream response
423
  chunk_response = ""
424
- for update in self.process_response_stream(prompt, chatbot_output):
425
- chatbot_output[-1] = update
426
  chunk_response = update["content"]
427
- progress_text = self._update_progress(chunk_idx, len(chunks), "Analyzing")
428
- yield (chatbot_output, download_output, final_summary, progress_text)
 
 
 
429
 
430
- combined_response += f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response}\n"
431
- self.cleanup_resources()
 
432
 
433
- # Generate final outputs
434
- final_summary = self.text_processor.summarize_results(combined_response)
435
- report_path = os.path.join(DIRECTORIES["reports"], f"{file_hash_value}_report.txt") if file_hash_value else None
436
 
437
  if report_path:
438
  with open(report_path, "w", encoding="utf-8") as f:
439
- f.write(combined_response + "\n\n" + final_summary)
440
 
441
- download_output = report_path if report_path and os.path.exists(report_path) else None
442
- progress_text = {"visible": False}
443
- yield (chatbot_output, download_output, final_summary, progress_text)
 
 
 
444
 
445
  except Exception as e:
446
- logger.error(f"Analysis error: {e}")
447
- chatbot_output.append({
448
- "role": "assistant",
449
- "content": f"❌ Error: {str(e)} [{datetime.now().strftime('%H:%M:%S')}]"
 
 
450
  })
451
- final_summary = f"Error occurred: {str(e)}"
452
- progress_text = {"visible": False}
453
- yield (chatbot_output, download_output, final_summary, progress_text)
454
- finally:
455
- self.cleanup_resources()
456
-
457
- def _update_progress(self, current: int, total: int, stage: str = "") -> Dict[str, Any]:
458
  """Format progress update for UI"""
459
- progress = f"{stage} - {current}/{total}" if stage else f"{current}/{total}"
460
- return {"value": progress, "visible": True}
461
-
462
- def toggle_theme(self, theme_state: str) -> tuple[str, str]:
463
- """Toggle between light and dark themes"""
464
- new_theme = "dark" if theme_state == "light" else "light"
465
- button_text = "☀️ Light Mode" if new_theme == "dark" else "🌙 Dark Mode"
466
- return new_theme, button_text
467
-
468
- def toggle_sidebar(self, sidebar_state: bool) -> bool:
469
- """Toggle sidebar visibility"""
470
- return not sidebar_state
471
-
472
- def create_interface(self):
473
- """Create Gradio interface with refined ChatGPT-like design"""
474
- css = """
475
- /* ==================== BASE STYLES ==================== */
476
- :root {
477
- --primary-color: #007bff;
478
- --primary-dark: #0056b3;
479
- --border-radius: 12px;
480
- --transition: all 0.3s ease;
481
- --shadow: 0 4px 12px rgba(0,0,0,0.15);
482
- --font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
483
- --background: #ffffff;
484
- --text-color: #333333;
485
- --chat-bg: #f9fafb;
486
- --message-bg: #e5e5ea;
487
- --sidebar-bg: rgba(241, 243, 245, 0.9);
488
- --sidebar-dark-bg: rgba(42, 54, 80, 0.9);
489
- }
490
-
491
- [data-theme="dark"] {
492
- --background: #1e2a44;
493
- --text-color: #ffffff;
494
- --chat-bg: #2d3b55;
495
- --message-bg: #3e4c6a;
496
- --sidebar-bg: var(--sidebar-dark-bg);
497
- }
498
-
499
- body, .gradio-container {
500
- font-family: var(--font-family);
501
- background: var(--background);
502
- color: var(--text-color);
503
- margin: 0;
504
- padding: 0;
505
- transition: var(--transition);
506
- }
507
-
508
- /* ==================== LAYOUT ==================== */
509
- .gradio-container {
510
- max-width: 900px;
511
- margin: 0 auto;
512
- padding: 1.5rem;
513
- display: flex;
514
- flex-direction: column;
515
- gap: 1.5rem;
516
- }
517
-
518
- .chat-container {
519
- background: var(--chat-bg);
520
- border-radius: var(--border-radius);
521
- padding: 1.5rem;
522
- min-height: 60vh;
523
- max-height: 80vh;
524
- overflow-y: auto;
525
- box-shadow: var(--shadow);
526
- position: relative;
527
- margin-bottom: 5rem; /* Space for sticky input */
528
- }
529
-
530
- .header {
531
- text-align: center;
532
- margin-bottom: 1.5rem;
533
- }
534
-
535
- .header h1 {
536
- font-size: 1.8rem;
537
- margin: 0.5rem 0;
538
- }
539
-
540
- .header p {
541
- font-size: 1rem;
542
- opacity: 0.7;
543
- }
544
-
545
- /* ==================== COMPONENTS ==================== */
546
- .chat__message {
547
- margin: 0.75rem 0;
548
- padding: 0.75rem 1rem;
549
- border-radius: var(--border-radius);
550
- max-width: 85%;
551
- transition: var(--transition);
552
- background: var(--message-bg);
553
- border: 1px solid rgba(0,0,0,0.05);
554
- animation: messageFade 0.3s ease;
555
- }
556
-
557
- .chat__message:hover {
558
- transform: translateY(-2px);
559
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
560
- }
561
-
562
- .chat__message.user {
563
- background: linear-gradient(135deg, var(--primary-color), var(--primary-dark));
564
- color: white;
565
- margin-left: auto;
566
- }
567
-
568
- .chat__message.assistant {
569
- background: var(--message-bg);
570
- color: var(--text-color);
571
- }
572
-
573
- .chat__message-timestamp {
574
- font-size: 0.75rem;
575
- opacity: 0.7;
576
- margin-top: 0.25rem;
577
- text-align: right;
578
- }
579
-
580
- .input-container {
581
- display: flex;
582
- align-items: center;
583
- gap: 0.75rem;
584
- background: var(--chat-bg);
585
- padding: 0.75rem 1rem;
586
- border-radius: 1.5rem;
587
- box-shadow: var(--shadow);
588
- position: sticky;
589
- bottom: 1rem;
590
- z-index: 10;
591
- }
592
-
593
- .input-textbox {
594
- flex-grow: 1;
595
- border: none;
596
- background: transparent;
597
- color: var(--text-color);
598
- outline: none;
599
- font-size: 1rem;
600
- }
601
-
602
- .input-textbox:focus {
603
- border-bottom: 2px solid var(--primary-color);
604
- }
605
-
606
- .send-btn {
607
- background: linear-gradient(135deg, var(--primary-color), var(--primary-dark));
608
- color: white;
609
- border: none;
610
- border-radius: 1rem;
611
- padding: 0.5rem 1.25rem;
612
- font-size: 0.9rem;
613
- transition: var(--transition);
614
- }
615
-
616
- .send-btn:hover {
617
- transform: scale(1.05);
618
- }
619
-
620
- .send-btn:active {
621
- animation: glow 0.3s ease;
622
- }
623
-
624
- .sidebar {
625
- background: var(--sidebar-bg);
626
- padding: 1.5rem;
627
- border-radius: var(--border-radius);
628
- box-shadow: var(--shadow);
629
- transition: transform 0.4s ease, opacity 0.4s ease;
630
- position: fixed;
631
- right: 1rem;
632
- top: 5rem;
633
- width: 320px;
634
- max-height: calc(100vh - 6rem);
635
- overflow-y: auto;
636
- z-index: 1000;
637
- animation: fadeInUp 0.4s ease;
638
- }
639
-
640
- .sidebar-hidden {
641
- transform: translateX(100%);
642
- opacity: 0;
643
- }
644
-
645
- .sidebar-backdrop {
646
- position: fixed;
647
- top: 0;
648
- left: 0;
649
- width: 100%;
650
- height: 100%;
651
- background: rgba(0,0,0,0.4);
652
- z-index: 999;
653
- opacity: 0;
654
- transition: opacity 0.4s ease;
655
- pointer-events: none;
656
- }
657
-
658
- .sidebar:not(.sidebar-hidden) ~ .sidebar-backdrop {
659
- opacity: 1;
660
- pointer-events: auto;
661
- }
662
-
663
- .sidebar__tooltip, .file-tooltip {
664
- display: block;
665
- margin-bottom: 1rem;
666
- }
667
-
668
- .sidebar__tooltip:hover::after, .file-tooltip:hover::after {
669
- content: attr(data-tip);
670
- position: absolute;
671
- top: -2.5rem;
672
- left: 50%;
673
- transform: translateX(-50%);
674
- background: #333;
675
- color: white;
676
- padding: 0.4rem 0.8rem;
677
- border-radius: 0.4rem;
678
- font-size: 0.85rem;
679
- max-width: 200px;
680
- white-space: normal;
681
- text-align: center;
682
- z-index: 1000;
683
- animation: fadeIn 0.3s ease;
684
- }
685
-
686
- .theme-toggle {
687
- background: linear-gradient(135deg, var(--primary-color), var(--primary-dark));
688
- color: white;
689
- border: none;
690
- border-radius: 1rem;
691
- padding: 0.5rem 1rem;
692
- font-size: 0.9rem;
693
- position: fixed;
694
- top: 1rem;
695
- right: 1rem;
696
- z-index: 100;
697
- display: flex;
698
- align-items: center;
699
- gap: 0.5rem;
700
- }
701
-
702
- .tools-button {
703
- background: var(--message-bg);
704
- color: var(--text-color);
705
- border: none;
706
- border-radius: 1rem;
707
- padding: 0.5rem 1.25rem;
708
- font-size: 0.9rem;
709
- transition: var(--transition);
710
- }
711
-
712
- .tools-button:hover {
713
- background: var(--primary-color);
714
- color: white;
715
- }
716
-
717
- .loading-spinner {
718
- position: absolute;
719
- bottom: 4rem;
720
- left: 50%;
721
- transform: translateX(-50%);
722
- font-size: 1.2rem;
723
- animation: glow 1.5s ease infinite;
724
- }
725
-
726
- .typing-indicator {
727
- display: none;
728
- font-size: 0.9rem;
729
- color: var(--text-color);
730
- opacity: 0.7;
731
- margin: 0.75rem;
732
- }
733
-
734
- .typing-indicator.active {
735
- display: block;
736
- animation: blink 1s step-end infinite;
737
- }
738
-
739
- .progress-text {
740
- position: relative;
741
- padding: 0.5rem;
742
- background: var(--message-bg);
743
- border-radius: var(--border-radius);
744
- margin-top: 0.75rem;
745
- overflow: hidden;
746
- }
747
-
748
- .progress-text::before {
749
- content: '';
750
- position: absolute;
751
- top: 0;
752
- left: 0;
753
- height: 100%;
754
- width: 0;
755
- background: linear-gradient(to right, var(--primary-color), var(--primary-dark));
756
- opacity: 0.3;
757
- animation: progress 2s ease-in-out infinite;
758
- }
759
-
760
- /* ==================== ANIMATIONS ==================== */
761
- @keyframes glow {
762
- 0%, 100% { transform: translateX(-50%) scale(1); opacity: 1; color: var(--primary-color); }
763
- 50% { transform: translateX(-50%) scale(1.2); opacity: 0.7; color: var(--primary-dark); }
764
- }
765
-
766
- @keyframes blink {
767
- 50% { opacity: 0.3; }
768
- }
769
-
770
- @keyframes fadeIn {
771
- from { opacity: 0; }
772
- to { opacity: 1; }
773
- }
774
-
775
- @keyframes fadeInUp {
776
- from { opacity: 0; transform: translateY(20px); }
777
- to { opacity: 1; transform: translateY(0); }
778
- }
779
-
780
- @keyframes messageFade {
781
- from { opacity: 0; transform: translateY(10px) scale(0.95); }
782
- to { opacity: 1; transform: translateY(0) scale(1); }
783
- }
784
-
785
- @keyframes progress {
786
- 0% { width: 0; }
787
- 50% { width: 60%; }
788
- 100% { width: 0; }
789
- }
790
-
791
- /* ==================== MEDIA QUERIES ==================== */
792
- @media (max-width: 768px) {
793
- .gradio-container {
794
- padding: 1rem;
795
- }
796
-
797
- .chat-container {
798
- min-height: 50vh;
799
- max-height: 70vh;
800
- margin-bottom: 4rem;
801
- }
802
-
803
- .sidebar {
804
- width: 100%;
805
- right: 0;
806
- top: 4rem;
807
- max-height: calc(100vh - 4rem);
808
- }
809
-
810
- .theme-toggle {
811
- top: 0.5rem;
812
- right: 0.5rem;
813
- padding: 0.4rem 0.8rem;
814
- font-size: 0.85rem;
815
- }
816
-
817
- .input-container {
818
- gap: 0.5rem;
819
- padding: 0.5rem;
820
  }
821
-
822
- .send-btn {
823
- padding: 0.4rem 1rem;
 
 
824
  }
825
- }
826
-
827
- @media (max-width: 480px) {
828
  .chat-container {
829
- padding: 1rem;
830
- margin-bottom: 3.5rem;
831
- }
832
-
833
- .input-container {
834
- flex-direction: column;
835
- padding: 0.5rem;
836
- }
837
-
838
- .input-textbox {
839
- font-size: 0.9rem;
840
- }
841
-
842
- .send-btn {
843
- width: 100%;
844
- padding: 0.5rem;
845
- font-size: 0.85rem;
846
  }
847
-
848
- .chat__message {
849
- max-width: 90%;
850
- padding: 0.5rem 0.75rem;
851
- }
852
-
853
- .header h1 {
854
- font-size: 1.5rem;
855
- }
856
-
857
- .header p {
858
- font-size: 0.9rem;
859
- }
860
-
861
- .sidebar {
862
- top: 3.5rem;
863
- max-height: calc(100vh - 3.5rem);
864
- animation: fadeInUp 0.4s ease;
865
- }
866
-
867
- .sidebar__tooltip:hover::after, .file-tooltip:hover::after {
868
- top: auto;
869
- bottom: -2.5rem;
870
- max-width: 80vw;
871
- }
872
- }
873
- """
874
-
875
- js = """
876
- function applyTheme(theme) {
877
- document.documentElement.setAttribute('data-theme', theme);
878
- localStorage.setItem('theme', theme);
879
- document.querySelector('.theme-toggle').innerHTML = theme === 'dark' ? '☀️ Light Mode' : '🌙 Dark Mode';
880
- }
881
-
882
- function toggleSidebar() {
883
- const sidebar = document.querySelector('.sidebar');
884
- sidebar.classList.toggle('sidebar-hidden');
885
- if (!sidebar.classList.contains('sidebar-hidden')) {
886
- setTimeout(() => {
887
- if (window.innerWidth <= 600) {
888
- sidebar.classList.add('sidebar-hidden');
889
- }
890
- }, 5000);
891
- }
892
- }
893
-
894
- document.addEventListener('DOMContentLoaded', () => {
895
- const savedTheme = localStorage.getItem('theme') || 'light';
896
- applyTheme(savedTheme);
897
- document.querySelector('.sidebar').classList.add('sidebar-hidden');
898
- });
899
- """
900
-
901
- with gr.Blocks(theme=gr.themes.Default(), css=css, js=js, title="Clinical Oversight Assistant") as app:
902
- try:
903
- theme_state = gr.State(value="light")
904
- sidebar_state = gr.State(value=False)
905
-
906
- gr.HTML("""
907
- <div class='header'>
908
- <h1 style='color: var(--text-color);'>🩺 Clinical Oversight Assistant</h1>
909
- <p style='color: var(--text-color); opacity: 0.7;'>
910
- AI-powered analysis of patient records for missed diagnoses
911
- </p>
912
- </div>
913
- <div class='sidebar-backdrop'></div>
914
- """)
915
-
916
- theme_button = gr.Button("🌙 Dark Mode", elem_classes="theme-toggle")
917
-
918
- with gr.Column(elem_classes="chat-container"):
919
  chatbot = gr.Chatbot(
920
- label="Clinical Analysis",
921
- height="100%",
922
  show_copy_button=True,
 
 
 
 
 
923
  type="messages",
924
- elem_classes="chatbot",
925
- render_markdown=True
926
- )
927
- gr.HTML("<div class='loading-spinner' style='display: none;'>⏳</div>")
928
- gr.HTML("<div class='typing-indicator'>Typing...</div>")
929
-
930
- with gr.Row():
931
- tools_button = gr.Button("📂 Tools", variant="secondary", elem_classes="tools-button")
932
-
933
- with gr.Column(elem_classes="sidebar"):
934
- gr.Markdown(
935
- "<div class='sidebar__tooltip' data-tip='Upload patient records'>### 📎 Upload Records</div>",
936
- elem_classes="markdown-tooltip"
937
  )
938
- gr.HTML(
939
- "<div class='file-tooltip' data-tip='Select PDF, CSV, or Excel files'>"
940
- )
941
- file_upload = gr.File(
942
- file_types=[".pdf", ".csv", ".xls", ".xlsx"],
943
- file_count="multiple",
944
- label="Patient Records",
945
- elem_classes="file-input"
946
- )
947
- gr.HTML("</div>")
948
- gr.Markdown(
949
- "<div class='sidebar__tooltip' data-tip='Summary of findings'>### 📝 Analysis Summary</div>",
950
- elem_classes="markdown-tooltip"
951
- )
952
- final_summary = gr.Markdown(
953
- "<div class='sidebar__tooltip' data-tip='View analysis results'>Analysis results will appear here...</div>",
954
- elem_classes="markdown-tooltip"
955
- )
956
- gr.Markdown(
957
- "<div class='sidebar__tooltip' data-tip='Download full report'>### 📄 Full Report</div>",
958
- elem_classes="markdown-tooltip"
959
- )
960
- gr.HTML(
961
- "<div class='file-tooltip' data-tip='Download analysis report'>"
962
- )
963
- download_output = gr.File(
964
- label="Download Report",
965
- visible=False,
966
- interactive=False,
967
- elem_classes="file-output"
968
- )
969
- gr.HTML("</div>")
970
-
971
- with gr.Row(elem_classes="input-container"):
972
- msg_input = gr.Textbox(
973
- placeholder="Ask about potential oversights or upload files...",
974
- show_label=False,
975
- container=False,
976
- elem_classes="input-textbox",
977
- autofocus=True
978
- )
979
- send_btn = gr.Button(
980
- "Analyze",
981
- variant="primary",
982
- elem_classes="send-btn"
983
- )
984
-
985
- progress_text = gr.Textbox(
986
- label="Progress Status",
987
- visible=False,
988
- interactive=False,
989
- elem_classes="progress-text"
990
- )
991
 
992
- def show_loading(state: bool) -> dict:
993
- return {
994
- "value": "<div class='loading-spinner'>⏳</div>" if state else "<div class='loading-spinner' style='display: none;'>⏳</div>",
995
- "visible": state
996
- }
997
-
998
- def show_typing(state: bool) -> dict:
999
- return {
1000
- "value": f"<div class='typing-indicator{' active' if state else ''}'>Typing...</div>",
1001
- "visible": state
1002
- }
1003
-
1004
- # Theme toggle handler
1005
- theme_button.click(
1006
- fn=self.toggle_theme,
1007
- inputs=[theme_state],
1008
- outputs=[theme_state, theme_button]
1009
- )
1010
-
1011
- # Sidebar toggle handler
1012
- tools_button.click(
1013
- fn=self.toggle_sidebar,
1014
- inputs=[sidebar_state],
1015
- outputs=[sidebar_state]
1016
  )
1017
 
1018
- # Analysis handlers
1019
- send_btn.click(
1020
- fn=show_loading,
1021
- inputs=[gr.State(value=True)],
1022
- outputs=[chatbot]
1023
- ).then(
1024
- fn=show_typing,
1025
- inputs=[gr.State(value=True)],
1026
- outputs=[chatbot]
1027
- ).then(
1028
- fn=self.analyze,
1029
- inputs=[msg_input, chatbot, file_upload],
1030
- outputs=[chatbot, download_output, final_summary, progress_text],
1031
- show_progress="hidden"
1032
- ).then(
1033
- fn=show_loading,
1034
- inputs=[gr.State(value=False)],
1035
- outputs=[chatbot]
1036
- ).then(
1037
- fn=show_typing,
1038
- inputs=[gr.State(value=False)],
1039
- outputs=[chatbot]
1040
  )
1041
-
1042
- msg_input.submit(
1043
- fn=show_loading,
1044
- inputs=[gr.State(value=True)],
1045
- outputs=[chatbot]
1046
- ).then(
1047
- fn=show_typing,
1048
- inputs=[gr.State(value=True)],
1049
- outputs=[chatbot]
1050
- ).then(
1051
- fn=self.analyze,
1052
- inputs=[msg_input, chatbot, file_upload],
1053
- outputs=[chatbot, download_output, final_summary, progress_text],
1054
- show_progress="hidden"
1055
- ).then(
1056
- fn=show_loading,
1057
- inputs=[gr.State(value=False)],
1058
- outputs=[chatbot]
1059
- ).then(
1060
- fn=show_typing,
1061
- inputs=[gr.State(value=False)],
1062
- outputs=[chatbot]
1063
- )
1064
-
1065
- app.load(
1066
- fn=lambda: [
1067
- [], None, "<div class='sidebar__tooltip' data-tip='View analysis results'>Analysis results will appear here...</div>",
1068
- "", None, {"visible": False}, "light", False, "🌙 Dark Mode"
1069
- ],
1070
- outputs=[chatbot, download_output, final_summary, msg_input, file_upload, progress_text, theme_state, sidebar_state, theme_button],
1071
- queue=False
1072
  )
1073
 
1074
- except Exception as e:
1075
- logger.error(f"Interface creation failed: {e}")
1076
- self.cleanup_resources()
1077
- raise
1078
- return app
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1079
 
1080
  # ==================== APPLICATION ENTRY POINT ====================
1081
  if __name__ == "__main__":
1082
- app = None
1083
  try:
1084
- logger.info("Starting Clinical Oversight Assistant...")
1085
- app = ClinicalOversightApp()
1086
- interface = app.create_interface()
1087
 
1088
  interface.queue(
1089
  api_open=False,
@@ -1092,12 +549,12 @@ if __name__ == "__main__":
1092
  server_name="0.0.0.0",
1093
  server_port=7860,
1094
  show_error=True,
1095
- allowed_paths=[DIRECTORIES["reports"]],
1096
  share=False
1097
  )
1098
  except Exception as e:
1099
  logger.error(f"Application failed to start: {e}")
1100
  raise
1101
  finally:
1102
- if app:
1103
- app.cleanup_resources()
 
4
  import pdfplumber
5
  import json
6
  import gradio as gr
7
+ from typing import List, Dict, Generator, Any, Optional
8
  from concurrent.futures import ThreadPoolExecutor, as_completed
9
  import hashlib
10
  import shutil
 
14
  import logging
15
  import torch
16
  import gc
 
 
17
  from diskcache import Cache
18
  from transformers import AutoTokenizer
19
+ from pathlib import Path
20
 
21
  # ==================== CONFIGURATION ====================
 
22
  logging.basicConfig(level=logging.INFO)
23
  logger = logging.getLogger(__name__)
24
 
25
+ # Directory Setup
26
+ BASE_DIR = Path("/data/hf_cache")
27
  DIRECTORIES = {
28
+ "models": BASE_DIR / "txagent_models",
29
+ "tools": BASE_DIR / "tool_cache",
30
+ "cache": BASE_DIR / "cache",
31
+ "reports": BASE_DIR / "reports",
32
+ "vllm": BASE_DIR / "vllm_cache"
33
  }
34
 
 
35
  for dir_path in DIRECTORIES.values():
36
+ dir_path.mkdir(parents=True, exist_ok=True)
37
 
38
+ # Environment Configuration
39
  os.environ.update({
40
+ "HF_HOME": str(DIRECTORIES["models"]),
41
+ "TRANSFORMERS_CACHE": str(DIRECTORIES["models"]),
42
+ "VLLM_CACHE_DIR": str(DIRECTORIES["vllm"]),
43
  "TOKENIZERS_PARALLELISM": "false",
44
  "CUDA_LAUNCH_BLOCKING": "1"
45
  })
46
 
47
+ # ==================== CORE COMPONENTS ====================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  class FileProcessor:
49
+ """Handles all file processing operations"""
50
+
51
  @staticmethod
52
+ def extract_pdf_content(file_path: str) -> str:
53
+ """Extract text from PDF with parallel processing"""
 
 
 
 
54
  try:
55
  with pdfplumber.open(file_path) as pdf:
56
  total_pages = len(pdf.pages)
57
  if not total_pages:
58
  return ""
59
 
60
+ def process_batch(start: int, end: int) -> List[tuple]:
61
  results = []
62
  with pdfplumber.open(file_path) as pdf:
63
  for page in pdf.pages[start:end]:
 
66
  results.append((page_num, f"=== Page {page_num + 1} ===\n{text.strip()}"))
67
  return results
68
 
69
+ batch_size = min(10, total_pages)
70
+ batches = [(i, min(i + batch_size, total_pages)) for i in range(0, total_pages, batch_size)]
71
  text_chunks = [""] * total_pages
72
 
73
+ with ThreadPoolExecutor(max_workers=min(6, os.cpu_count() or 4)) as executor:
74
+ futures = [executor.submit(process_batch, start, end) for start, end in batches]
75
  for future in as_completed(futures):
76
  for page_num, text in future.result():
77
  text_chunks[page_num] = text
78
 
79
+ return "\n\n".join(filter(None, text_chunks))
 
 
80
  except Exception as e:
81
+ logger.error(f"PDF extraction failed: {e}")
82
  return f"PDF processing error: {str(e)}"
83
 
84
  @staticmethod
85
+ def process_tabular_data(file_path: str, file_type: str) -> List[Dict]:
86
+ """Process Excel or CSV files"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  try:
88
+ if file_type == "csv":
89
+ chunks = pd.read_csv(
90
+ file_path,
91
+ header=None,
92
+ dtype=str,
93
+ encoding_errors='replace',
94
+ on_bad_lines='skip',
95
+ chunksize=10000
96
+ )
97
+ df = pd.concat(chunks) if chunks else pd.DataFrame()
98
+ else: # Excel
99
+ try:
100
+ df = pd.read_excel(file_path, engine='openpyxl', header=None, dtype=str)
101
+ except:
102
+ df = pd.read_excel(file_path, engine='xlrd', header=None, dtype=str)
103
 
104
+ return [{
105
+ "filename": os.path.basename(file_path),
106
+ "rows": df.where(pd.notnull(df), "").astype(str).values.tolist(),
107
+ "type": file_type
108
+ }]
109
  except Exception as e:
110
+ logger.error(f"{file_type.upper()} processing failed: {e}")
111
+ return [{"error": f"{file_type.upper()} processing error: {str(e)}"}]
112
 
113
  @classmethod
114
+ def handle_upload(cls, file_path: str, file_type: str) -> List[Dict]:
115
  """Route file processing based on type"""
116
+ processor_map = {
117
+ "pdf": cls.extract_pdf_content,
118
+ "xls": lambda x: cls.process_tabular_data(x, "excel"),
119
+ "xlsx": lambda x: cls.process_tabular_data(x, "excel"),
120
+ "csv": lambda x: cls.process_tabular_data(x, "csv")
121
  }
122
 
123
+ if file_type not in processor_map:
124
  return [{"error": f"Unsupported file type: {file_type}"}]
125
 
126
  try:
127
+ result = processor_map[file_type](file_path)
128
  if file_type == "pdf":
129
  return [{
130
  "filename": os.path.basename(file_path),
131
  "content": result,
 
132
  "type": "pdf"
133
  }]
134
  return result
135
  except Exception as e:
136
+ logger.error(f"File processing failed: {e}")
137
+ return [{"error": f"File processing error: {str(e)}"}]
138
 
139
+ class TextAnalyzer:
140
+ """Handles text processing and analysis"""
141
+
142
  def __init__(self):
143
  self.tokenizer = AutoTokenizer.from_pretrained("mims-harvard/TxAgent-T1-Llama-3.1-8B")
144
  self.cache = Cache(DIRECTORIES["cache"], size_limit=10*1024**3)
145
+
146
+ def chunk_content(self, text: str, max_tokens: int = 1800) -> List[str]:
147
  """Split text into token-limited chunks"""
148
  tokens = self.tokenizer.encode(text)
149
  return [
150
  self.tokenizer.decode(tokens[i:i+max_tokens])
151
  for i in range(0, len(tokens), max_tokens)
152
  ]
153
+
154
+ def clean_output(self, text: str) -> str:
155
  """Clean and format model response"""
156
+ text = text.encode("utf-8", "ignore").decode("utf-8")
157
+ text = re.sub(
158
+ r"\[.*?\]|\bNone\b|To analyze the patient record excerpt.*?medications\."
159
+ r"|Since the previous attempts.*?\.|I need to.*?medications\."
160
+ r"|Retrieving tools.*?\.", "", text, flags=re.DOTALL
161
+ )
162
 
163
  diagnoses = []
164
+ in_section = False
165
 
166
  for line in text.splitlines():
167
  line = line.strip()
168
  if not line:
169
  continue
170
  if re.match(r"###\s*Missed Diagnoses", line):
171
+ in_section = True
172
  continue
173
  if re.match(r"###\s*(Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", line):
174
+ in_section = False
175
  continue
176
+ if in_section and re.match(r"-\s*.+", line):
177
  diagnosis = re.sub(r"^\-\s*", "", line).strip()
178
  if diagnosis and not re.match(r"No issues identified", diagnosis, re.IGNORECASE):
179
  diagnoses.append(diagnosis)
180
 
181
  return " ".join(diagnoses) if diagnoses else ""
182
+
183
+ def generate_summary(self, analysis: str) -> str:
184
+ """Create concise clinical summary"""
185
+ findings = []
186
+ for chunk in analysis.split("--- Analysis for Chunk"):
 
 
187
  chunk = chunk.strip()
188
  if not chunk or "No oversights identified" in chunk:
189
  continue
190
 
191
+ in_section = False
192
  for line in chunk.splitlines():
193
  line = line.strip()
194
  if not line:
195
  continue
196
  if re.match(r"###\s*Missed Diagnoses", line):
197
+ in_section = True
198
  continue
199
  if re.match(r"###\s*(Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", line):
200
+ in_section = False
201
  continue
202
+ if in_section and re.match(r"-\s*.+", line):
203
+ finding = re.sub(r"^\-\s*", "", line).strip()
204
+ if finding and not re.match(r"No issues identified", finding, re.IGNORECASE):
205
+ findings.append(finding)
206
 
207
+ unique_findings = list(dict.fromkeys(findings))
208
 
209
+ if not unique_findings:
210
+ return "No clinical concerns identified in the provided records."
211
 
212
+ if len(unique_findings) > 1:
213
+ summary = "Potential concerns include: " + ", ".join(unique_findings[:-1])
214
+ summary += f", and {unique_findings[-1]}"
215
  else:
216
+ summary = "Potential concern identified: " + unique_findings[0]
217
 
218
+ return summary + ". Recommend urgent clinical review."
219
 
220
+ class ClinicalAgent:
221
+ """Main application controller"""
222
+
223
  def __init__(self):
224
+ self.agent = self._init_agent()
 
225
  self.file_processor = FileProcessor()
226
+ self.text_analyzer = TextAnalyzer()
227
+
228
+ def _init_agent(self) -> Any:
229
+ """Initialize the AI agent"""
230
+ logger.info("Initializing clinical agent...")
231
+ self._log_system_status("pre-init")
232
+
233
+ tool_path = DIRECTORIES["tools"] / "new_tool.json"
234
+ if not tool_path.exists():
235
+ default_tools = Path("data/new_tool.json")
236
+ if default_tools.exists():
237
+ shutil.copy(default_tools, tool_path)
238
 
 
 
 
 
 
239
  agent = TxAgent(
240
  model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
241
  rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
242
+ tool_files_dict={"new_tool": str(tool_path)},
243
  force_finish=True,
244
  enable_checker=False,
245
  step_rag_num=4,
 
248
  )
249
  agent.init_model()
250
 
251
+ self._log_system_status("post-init")
252
+ logger.info("Clinical agent ready")
253
  return agent
254
+
255
+ def _log_system_status(self, phase: str) -> None:
256
+ """Log system resource utilization"""
257
+ try:
258
+ cpu = psutil.cpu_percent(interval=1)
259
+ mem = psutil.virtual_memory()
260
+ logger.info(f"[{phase}] CPU: {cpu:.1f}% | RAM: {mem.used//(1024**2)}MB/{mem.total//(1024**2)}MB")
261
+
262
+ gpu_info = subprocess.run(
263
+ ["nvidia-smi", "--query-gpu=memory.used,memory.total,utilization.gpu",
264
+ "--format=csv,nounits,noheader"],
265
+ capture_output=True, text=True
266
+ )
267
+ if gpu_info.returncode == 0:
268
+ used, total, util = gpu_info.stdout.strip().split(", ")
269
+ logger.info(f"[{phase}] GPU: {used}MB/{total}MB | Util: {util}%")
270
+ except Exception as e:
271
+ logger.error(f"Resource monitoring failed: {e}")
272
+
273
+ def process_stream(self, prompt: str, history: List[Dict]) -> Generator[Dict, None, None]:
274
+ """Stream the agent's responses"""
 
 
 
 
275
  full_response = ""
276
  for chunk in self.agent.run_gradio_chat(prompt, [], 0.2, 512, 2048, False, []):
277
  if not chunk:
278
  continue
279
 
280
  if isinstance(chunk, list):
281
+ for msg in chunk:
282
+ if hasattr(msg, 'content') and msg.content:
283
+ cleaned = self.text_analyzer.clean_output(msg.content)
284
  if cleaned:
285
  full_response += cleaned + " "
286
+ yield {"role": "assistant", "content": full_response}
 
 
 
287
  elif isinstance(chunk, str) and chunk.strip():
288
+ cleaned = self.text_analyzer.clean_output(chunk)
289
  if cleaned:
290
  full_response += cleaned + " "
291
+ yield {"role": "assistant", "content": full_response}
292
+
293
+ def analyze_records(self, message: str, history: List[Dict], files: List) -> Generator[Dict[str, Any], None, None]:
294
+ """Main analysis workflow"""
295
+ outputs = {
296
+ "chatbot": history.copy(),
297
+ "download_output": None,
298
+ "final_summary": "",
299
+ "progress": {"value": "Initializing...", "visible": True}
300
+ }
301
+ yield outputs
302
 
303
  try:
304
+ # Add user message
305
+ history.append({"role": "user", "content": message})
306
+ outputs["chatbot"] = history
307
+ yield outputs
 
 
308
 
309
+ # Process files
310
  extracted = []
311
+ file_hash = ""
312
 
313
  if files:
314
+ with ThreadPoolExecutor(max_workers=4) as executor:
315
  futures = []
316
  for f in files:
317
+ file_type = Path(f.name).suffix[1:].lower()
318
+ futures.append(executor.submit(
319
+ self.file_processor.handle_upload,
320
+ f.name,
321
+ file_type
322
+ ))
323
 
324
  for i, future in enumerate(as_completed(futures), 1):
325
  try:
326
  extracted.extend(future.result())
327
+ outputs["progress"] = self._format_progress(i, len(files), "Processing files")
328
+ yield outputs
329
  except Exception as e:
330
+ logger.error(f"File processing failed: {e}")
331
+ extracted.append({"error": str(e)})
332
 
333
+ if files and os.path.exists(files[0].name):
334
+ file_hash = hashlib.md5(open(files[0].name, "rb").read()).hexdigest()
335
+
336
+ history.append({"role": "assistant", "content": "✅ Files processed successfully"})
337
+ outputs.update({
338
+ "chatbot": history,
339
+ "progress": self._format_progress(len(files), len(files), "Files processed")
340
  })
341
+ yield outputs
 
342
 
343
  # Analyze content
344
  text_content = "\n".join(json.dumps(item) for item in extracted)
345
+ chunks = self.text_analyzer.chunk_content(text_content)
346
+ full_analysis = ""
347
 
348
+ for idx, chunk in enumerate(chunks, 1):
349
  prompt = f"""
350
+ Analyze this clinical documentation for potential missed diagnoses. Provide:
351
+ 1. Specific clinical findings with references (e.g., "Elevated BP (160/95) on page 3")
352
+ 2. Their clinical significance
353
+ 3. Urgency of review
354
+ Use concise, continuous prose without bullet points. If no concerns, state "No missed diagnoses identified."
355
+
356
+ Document Excerpt (Part {idx}/{len(chunks)}):
357
+ {chunk[:1750]}
358
  """
359
+ history.append({"role": "assistant", "content": ""})
360
+ outputs.update({
361
+ "chatbot": history,
362
+ "progress": self._format_progress(idx, len(chunks), "Analyzing")
363
+ })
364
+ yield outputs
365
 
366
+ # Stream analysis
367
  chunk_response = ""
368
+ for update in self.process_stream(prompt, history):
369
+ history[-1] = update
370
  chunk_response = update["content"]
371
+ outputs.update({
372
+ "chatbot": history,
373
+ "progress": self._format_progress(idx, len(chunks), "Analyzing")
374
+ })
375
+ yield outputs
376
 
377
+ full_analysis += f"--- Analysis Part {idx} ---\n{chunk_response}\n"
378
+ torch.cuda.empty_cache()
379
+ gc.collect()
380
 
381
+ # Final outputs
382
+ summary = self.text_analyzer.generate_summary(full_analysis)
383
+ report_path = DIRECTORIES["reports"] / f"{file_hash}_report.txt" if file_hash else None
384
 
385
  if report_path:
386
  with open(report_path, "w", encoding="utf-8") as f:
387
+ f.write(full_analysis + "\n\nSUMMARY:\n" + summary)
388
 
389
+ outputs.update({
390
+ "download_output": str(report_path) if report_path and report_path.exists() else None,
391
+ "final_summary": summary,
392
+ "progress": {"visible": False}
393
+ })
394
+ yield outputs
395
 
396
  except Exception as e:
397
+ logger.error(f"Analysis failed: {e}")
398
+ history.append({"role": "assistant", "content": f"❌ Analysis error: {str(e)}"})
399
+ outputs.update({
400
+ "chatbot": history,
401
+ "final_summary": f"Error: {str(e)}",
402
+ "progress": {"visible": False}
403
  })
404
+ yield outputs
405
+
406
+ def _format_progress(self, current: int, total: int, stage: str = "") -> Dict[str, Any]:
 
 
 
 
407
  """Format progress update for UI"""
408
+ status = f"{stage} - {current}/{total}" if stage else f"{current}/{total}"
409
+ return {"value": status, "visible": True, "label": f"Progress: {status}"}
410
+
411
+ def create_interface(self) -> gr.Blocks:
412
+ """Build the Gradio interface"""
413
+ with gr.Blocks(
414
+ theme=gr.themes.Soft(
415
+ primary_hue="indigo",
416
+ secondary_hue="blue",
417
+ neutral_hue="slate"
418
+ ),
419
+ title="Clinical Oversight Assistant",
420
+ css="""
421
+ .summary-panel {
422
+ border-left: 4px solid #4f46e5;
423
+ padding: 16px;
424
+ background: #f8fafc;
425
+ border-radius: 8px;
426
+ margin-bottom: 16px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
427
  }
428
+ .upload-area {
429
+ border: 2px dashed #cbd5e1;
430
+ border-radius: 8px;
431
+ padding: 24px;
432
+ margin: 12px 0;
433
  }
 
 
 
434
  .chat-container {
435
+ border-radius: 8px;
436
+ border: 1px solid #e2e8f0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437
  }
438
+ """
439
+ ) as app:
440
+ # Header
441
+ gr.Markdown("""
442
+ <div style='text-align: center; margin-bottom: 24px;'>
443
+ <h1 style='color: #4f46e5; margin-bottom: 8px;'>🩺 Clinical Oversight Assistant</h1>
444
+ <p style='color: #64748b;'>
445
+ AI-powered analysis for identifying potential missed diagnoses in patient records
446
+ </p>
447
+ </div>
448
+ """)
449
+
450
+ with gr.Row(equal_height=False):
451
+ # Main Chat Panel
452
+ with gr.Column(scale=3):
453
+ gr.Markdown("**Clinical Analysis Conversation**")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
454
  chatbot = gr.Chatbot(
455
+ label="",
456
+ height=650,
457
  show_copy_button=True,
458
+ avatar_images=(
459
+ "assets/user.png",
460
+ "assets/assistant.png"
461
+ ) if Path("assets/user.png").exists() else None,
462
+ bubble_full_width=False,
463
  type="messages",
464
+ elem_classes=["chat-container"]
 
 
 
 
 
 
 
 
 
 
 
 
465
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
466
 
467
+ # Results Panel
468
+ with gr.Column(scale=1):
469
+ with gr.Group():
470
+ gr.Markdown("**Clinical Summary**")
471
+ final_summary = gr.Markdown(
472
+ "Analysis results will appear here...",
473
+ elem_classes=["summary-panel"]
474
+ )
475
+
476
+ with gr.Group():
477
+ gr.Markdown("**Report Export**")
478
+ download_output = gr.File(
479
+ label="Download Full Analysis",
480
+ visible=False,
481
+ interactive=False
482
+ )
483
+
484
+ # Input Section
485
+ with gr.Row():
486
+ file_upload = gr.File(
487
+ file_types=[".pdf", ".csv", ".xls", ".xlsx"],
488
+ file_count="multiple",
489
+ label="Upload Patient Records",
490
+ elem_classes=["upload-area"]
491
  )
492
 
493
+ with gr.Row():
494
+ user_input = gr.Textbox(
495
+ placeholder="Enter your clinical query or analysis request...",
496
+ show_label=False,
497
+ container=False,
498
+ scale=7,
499
+ autofocus=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
500
  )
501
+ submit_btn = gr.Button(
502
+ "Analyze",
503
+ variant="primary",
504
+ scale=1,
505
+ min_width=120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
506
  )
507
 
508
+ # Hidden progress tracker
509
+ progress_tracker = gr.Textbox(
510
+ label="Analysis Progress",
511
+ visible=False,
512
+ interactive=False
513
+ )
514
+
515
+ # Event handlers
516
+ submit_btn.click(
517
+ self.analyze_records,
518
+ inputs=[user_input, chatbot, file_upload],
519
+ outputs=[chatbot, download_output, final_summary, progress_tracker],
520
+ show_progress="hidden"
521
+ )
522
+
523
+ user_input.submit(
524
+ self.analyze_records,
525
+ inputs=[user_input, chatbot, file_upload],
526
+ outputs=[chatbot, download_output, final_summary, progress_tracker],
527
+ show_progress="hidden"
528
+ )
529
+
530
+ app.load(
531
+ lambda: [[], None, "", "", None, {"visible": False}],
532
+ outputs=[chatbot, download_output, final_summary, user_input, file_upload, progress_tracker],
533
+ queue=False
534
+ )
535
+
536
+ return app
537
 
538
  # ==================== APPLICATION ENTRY POINT ====================
539
  if __name__ == "__main__":
 
540
  try:
541
+ logger.info("Launching Clinical Oversight Assistant...")
542
+ clinical_app = ClinicalAgent()
543
+ interface = clinical_app.create_interface()
544
 
545
  interface.queue(
546
  api_open=False,
 
549
  server_name="0.0.0.0",
550
  server_port=7860,
551
  show_error=True,
552
+ allowed_paths=[str(DIRECTORIES["reports"])],
553
  share=False
554
  )
555
  except Exception as e:
556
  logger.error(f"Application failed to start: {e}")
557
  raise
558
  finally:
559
+ if torch.distributed.is_initialized():
560
+ torch.distributed.destroy_process_group()