Ali2206 commited on
Commit
02a4d5e
Β·
verified Β·
1 Parent(s): b9a61ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +141 -241
app.py CHANGED
@@ -11,15 +11,6 @@ import shutil
11
  import re
12
  import psutil
13
  import subprocess
14
- import logging
15
- import torch
16
- import gc
17
- from diskcache import Cache
18
- import time
19
-
20
- # Configure logging with a more specific logger name to avoid conflicts
21
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
22
- logger = logging.getLogger("ClinicalOversightApp")
23
 
24
  # Persistent directory
25
  persistent_dir = "/data/hf_cache"
@@ -46,8 +37,8 @@ sys.path.insert(0, src_path)
46
 
47
  from txagent.txagent import TxAgent
48
 
49
- # Initialize cache with 10GB limit
50
- cache = Cache(file_cache_dir, size_limit=10 * 1024**3)
51
 
52
  def sanitize_utf8(text: str) -> str:
53
  return text.encode("utf-8", "ignore").decode("utf-8")
@@ -56,60 +47,28 @@ def file_hash(path: str) -> str:
56
  with open(path, "rb") as f:
57
  return hashlib.md5(f.read()).hexdigest()
58
 
59
- def extract_all_pages(file_path: str, progress_callback=None) -> str:
60
  try:
 
61
  with pdfplumber.open(file_path) as pdf:
62
- total_pages = len(pdf.pages)
63
- if total_pages == 0:
64
- logger.error("No pages found in PDF: %s", file_path)
65
- return ""
66
-
67
- batch_size = 10
68
- batches = [(i, min(i + batch_size, total_pages)) for i in range(0, total_pages, batch_size)]
69
- text_chunks = [""] * total_pages
70
- processed_pages = 0
71
-
72
- def extract_batch(start: int, end: int) -> List[tuple]:
73
- results = []
74
- with pdfplumber.open(file_path) as pdf:
75
- for idx, page in enumerate(pdf.pages[start:end], start=start):
76
- page_text = page.extract_text() or ""
77
- results.append((idx, f"=== Page {idx + 1} ===\n{page_text.strip()}"))
78
- logger.debug("Extracted page %d, text length: %d chars", idx + 1, len(page_text))
79
- return results
80
-
81
- with ThreadPoolExecutor(max_workers=6) as executor:
82
- futures = [executor.submit(extract_batch, start, end) for start, end in batches]
83
- for future in as_completed(futures):
84
- for page_num, text in future.result():
85
- if page_num < len(text_chunks):
86
- text_chunks[page_num] = text
87
- else:
88
- logger.warning("Page number %d out of range for text_chunks (size %d)", page_num, len(text_chunks))
89
- processed_pages += batch_size
90
- if progress_callback:
91
- progress_callback(min(processed_pages, total_pages), total_pages)
92
- logger.info("Processed %d/%d pages for %s", min(processed_pages, total_pages), total_pages, file_path)
93
-
94
- extracted_text = "\n\n".join(filter(None, text_chunks))
95
- logger.info("Extracted %d pages from %s, total length: %d chars", total_pages, file_path, len(extracted_text))
96
- return extracted_text
97
  except Exception as e:
98
- logger.error("PDF processing error for %s: %s", file_path, e, exc_info=True)
99
  return f"PDF processing error: {str(e)}"
100
 
101
- def convert_file_to_json(file_path: str, file_type: str, progress_callback=None) -> str:
102
  try:
103
- file_h = file_hash(file_path)
104
- cache_key = f"{file_h}_{file_type}"
105
- if cache_key in cache:
106
- logger.info("Cache hit for %s (key: %s)", file_path, cache_key)
107
- return cache[cache_key]
108
- else:
109
- logger.info("Cache miss for %s (key: %s), performing fresh extraction", file_path, cache_key)
110
 
111
  if file_type == "pdf":
112
- text = extract_all_pages(file_path, progress_callback)
113
  result = json.dumps({"filename": os.path.basename(file_path), "content": text, "status": "initial"})
114
  elif file_type == "csv":
115
  df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str,
@@ -125,99 +84,42 @@ def convert_file_to_json(file_path: str, file_type: str, progress_callback=None)
125
  result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
126
  else:
127
  result = json.dumps({"error": f"Unsupported file type: {file_type}"})
128
-
129
- cache[cache_key] = result
130
- logger.info("Cached extraction for %s, size: %d bytes", file_path, len(result))
131
  return result
132
  except Exception as e:
133
- logger.error("Error processing %s: %s", os.path.basename(file_path), e, exc_info=True)
134
  return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
135
 
136
  def log_system_usage(tag=""):
137
  try:
138
  cpu = psutil.cpu_percent(interval=1)
139
  mem = psutil.virtual_memory()
140
- logger.info("[%s] CPU: %.1f%% | RAM: %dMB / %dMB", tag, cpu, mem.used // (1024**2), mem.total // (1024**2))
141
  result = subprocess.run(
142
  ["nvidia-smi", "--query-gpu=memory.used,memory.total,utilization.gpu", "--format=csv,nounits,noheader"],
143
  capture_output=True, text=True
144
  )
145
  if result.returncode == 0:
146
  used, total, util = result.stdout.strip().split(", ")
147
- logger.info("[%s] GPU: %sMB / %sMB | Utilization: %s%%", tag, used, total, util)
148
  except Exception as e:
149
- logger.error("[%s] GPU/CPU monitor failed: %s", tag, e)
150
 
151
  def clean_response(text: str) -> str:
152
  text = sanitize_utf8(text)
153
- text = re.sub(r"\[.*?\]|\bNone\b|To analyze the patient record excerpt.*?medications\.|Since the previous attempts.*?\.|I need to.*?medications\.|Retrieving tools.*?\.", "", text, flags=re.DOTALL)
154
- text = re.sub(r"\n{3,}", "\n\n", text)
155
- text = re.sub(r"[^\n#\-\*\w\s\.\,\:\(\)]+", "", text)
156
-
157
- sections = {}
158
- current_section = None
159
- lines = text.splitlines()
160
- for line in lines:
161
- line = line.strip()
162
- if not line:
163
- continue
164
- section_match = re.match(r"###\s*(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", line)
165
- if section_match:
166
- current_section = section_match.group(1)
167
- if current_section not in sections:
168
- sections[current_section] = []
169
- continue
170
- finding_match = re.match(r"-\s*.+", line)
171
- if finding_match and current_section and not re.match(r"-\s*No issues identified", line):
172
- sections[current_section].append(line)
173
-
174
- cleaned = []
175
- for heading, findings in sections.items():
176
- if findings:
177
- cleaned.append(f"### {heading}\n" + "\n".join(findings))
178
-
179
- text = "\n\n".join(cleaned).strip()
180
- logger.debug("Cleaned response length: %d chars", len(text))
181
- return text if text else ""
182
-
183
- def summarize_findings(combined_response: str) -> str:
184
- if not combined_response or all("No oversights identified" in chunk for chunk in combined_response.split("--- Analysis for Chunk")):
185
- logger.info("No clinical oversights identified in analysis")
186
- return "### Summary of Clinical Oversights\nNo critical oversights identified in the provided records."
187
-
188
- sections = {}
189
- lines = combined_response.splitlines()
190
- current_section = None
191
- for line in lines:
192
- line = line.strip()
193
- if not line:
194
- continue
195
- section_match = re.match(r"###\s*(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", line)
196
- if section_match:
197
- current_section = section_match.group(1)
198
- if current_section not in sections:
199
- sections[current_section] = []
200
- continue
201
- finding_match = re.match(r"-\s*(.+)", line)
202
- if finding_match and current_section:
203
- sections[current_section].append(finding_match.group(1))
204
-
205
- summary_lines = []
206
- for heading, findings in sections.items():
207
- if findings:
208
- summary = f"- **{heading}**: {'; '.join(findings[:2])}. Risks: {heading.lower()} may lead to adverse outcomes. Recommend: urgent review and specialist referral."
209
- summary_lines.append(summary)
210
-
211
- if not summary_lines:
212
- logger.info("No clinical oversights identified after summarization")
213
- return "### Summary of Clinical Oversights\nNo critical oversights identified."
214
-
215
- summary = "### Summary of Clinical Oversights\n" + "\n".join(summary_lines)
216
- logger.info("Summarized findings: %s", summary[:100])
217
- return summary
218
 
219
  def init_agent():
220
- logger.info("Initializing model...")
221
  log_system_usage("Before Load")
222
  default_tool_path = os.path.abspath("data/new_tool.json")
223
  target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
@@ -229,150 +131,148 @@ def init_agent():
229
  rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
230
  tool_files_dict={"new_tool": target_tool_path},
231
  force_finish=True,
232
- enable_checker=False,
233
- step_rag_num=4,
234
  seed=100,
235
  additional_default_tools=[],
236
  )
237
  agent.init_model()
238
  log_system_usage("After Load")
239
- logger.info("Agent Ready")
240
  return agent
241
 
242
  def create_ui(agent):
243
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
244
  gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
245
- chatbot = gr.Chatbot(label="Detailed Analysis", height=600, type="messages")
246
- final_summary = gr.Markdown(label="Summary of Clinical Oversights")
247
  file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
248
  msg_input = gr.Textbox(placeholder="Ask about potential oversights...", show_label=False)
249
  send_btn = gr.Button("Analyze", variant="primary")
250
  download_output = gr.File(label="Download Full Report")
251
- progress_bar = gr.Progress()
252
-
253
- prompt_template = """
254
- Analyze the patient record excerpt for clinical oversights. Provide a concise, evidence-based summary in markdown with findings grouped under headings (e.g., 'Missed Diagnoses'). For each finding, include clinical context, risks, and recommendations. Output only markdown bullet points under headings. If no issues, state "No issues identified".
255
- Patient Record Excerpt (Chunk {0} of {1}):
256
- {chunk}
257
- """
258
 
259
- def analyze(message: str, history: List[dict], files: List, progress=gr.Progress()):
260
  history.append({"role": "user", "content": message})
261
- yield history, None, ""
262
- logger.info("Starting analysis for message: %s", message[:100])
263
-
264
- if not files:
265
- logger.error("No files uploaded for analysis")
266
- history.append({"role": "assistant", "content": "❌ Please upload a file to analyze."})
267
- yield history, None, "### Summary of Clinical Oversights\nNo file uploaded for analysis."
268
- return
269
 
270
  extracted = ""
271
  file_hash_value = ""
272
- logger.info("Processing %d uploaded files", len(files))
273
- for f in files:
274
- logger.info("Processing file: %s", f.name)
275
-
276
- def update_extraction_progress(current, total):
277
- progress(current / total, desc=f"Extracting text... Page {current}/{total}")
278
- return history, None, ""
279
-
280
- with ThreadPoolExecutor(max_workers=6) as executor:
281
- futures = [executor.submit(convert_file_to_json, f.name, f.name.split(".")[-1].lower(), update_extraction_progress) for f in files]
282
- results = [sanitize_utf8(f.result()) for f in as_completed(futures)]
283
- extracted = "\n".join(results)
284
- file_hash_value = file_hash(files[0].name) if files else ""
285
- logger.info("Extraction complete for %d files", len(files))
286
- history.append({"role": "assistant", "content": "βœ… Text extraction complete."})
287
- yield history, None, ""
288
-
289
- logger.info("Extracted text length: %d chars", len(extracted))
290
- if len(extracted.strip()) == 0:
291
- logger.error("Extracted text is empty")
292
- history.append({"role": "assistant", "content": "❌ Extracted text is empty. Please ensure the file contains readable content."})
293
- yield history, None, "### Summary of Clinical Oversights\nExtracted text is empty."
294
- return
295
-
296
- chunk_size = 6000
297
  chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
298
- if not chunks:
299
- chunks = [""]
300
- logger.info("Created %d chunks", len(chunks))
301
  combined_response = ""
302
- batch_size = 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
 
304
  try:
305
- for batch_idx in range(0, len(chunks), batch_size):
306
- batch_chunks = chunks[batch_idx:batch_idx + batch_size]
307
- batch_prompts = [prompt_template.format(i + 1, len(chunks), chunk=chunk[:4000]) for i, chunk in enumerate(batch_chunks)]
308
- batch_responses = []
309
-
310
- progress((batch_idx + 1) / len(chunks), desc=f"Analyzing chunks {batch_idx + 1}-{min(batch_idx + batch_size, len(chunks))}/{len(chunks)}")
311
-
312
- with ThreadPoolExecutor(max_workers=len(batch_chunks)) as executor:
313
- futures = [executor.submit(agent.run_gradio_chat, prompt, [], 0.2, 512, 2048, False, []) for prompt in batch_prompts]
314
- for future in as_completed(futures):
315
- chunk_response = ""
316
- for chunk_output in future.result():
317
- if chunk_output is None:
318
- continue
319
- if isinstance(chunk_output, list):
320
- for m in chunk_output:
321
- if hasattr(m, 'content') and m.content:
322
- cleaned = clean_response(m.content)
323
- if cleaned and re.search(r"###\s*\w+", cleaned):
324
- chunk_response += cleaned + "\n\n"
325
- elif isinstance(chunk_output, str) and chunk_output.strip():
326
- cleaned = clean_response(chunk_output)
327
- if cleaned and re.search(r"###\s*\w+", cleaned):
328
- chunk_response += cleaned + "\n\n"
329
- batch_responses.append(chunk_response)
330
- torch.cuda.empty_cache()
331
- gc.collect()
332
- logger.debug("Processed chunk response length: %d chars", len(chunk_response))
333
-
334
- for chunk_idx, chunk_response in enumerate(batch_responses, batch_idx + 1):
335
- if chunk_response:
336
- combined_response += f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response}\n"
337
- else:
338
- combined_response += f"--- Analysis for Chunk {chunk_idx} ---\nNo oversights identified for this chunk.\n\n"
339
- history[-1] = {"role": "assistant", "content": combined_response.strip()}
340
- yield history, None, ""
341
-
342
- if combined_response.strip() and not all("No oversights identified" in chunk for chunk in combined_response.split("--- Analysis for Chunk")):
 
 
 
 
 
 
 
 
 
 
 
 
 
343
  history[-1]["content"] = combined_response.strip()
344
  else:
345
- history.append({"role": "assistant", "content": "No oversights identified in the provided records."})
346
 
347
- summary = summarize_findings(combined_response)
348
  report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
349
  if report_path:
350
  with open(report_path, "w", encoding="utf-8") as f:
351
- f.write(combined_response + "\n\n" + summary)
352
- logger.info("Analysis complete, report saved at: %s", report_path if report_path else "None")
353
- yield history, report_path if report_path and os.path.exists(report_path) else None, summary
354
 
355
  except Exception as e:
356
- logger.error("Analysis error: %s", e, exc_info=True)
357
  history.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
358
- yield history, None, f"### Summary of Clinical Oversights\nError occurred during analysis: {str(e)}"
359
 
360
- send_btn.click(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output, final_summary])
361
- msg_input.submit(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output, final_summary])
362
  return demo
363
 
364
  if __name__ == "__main__":
365
- try:
366
- logger.info("Launching app...")
367
- agent = init_agent()
368
- demo = create_ui(agent)
369
- demo.queue(api_open=False).launch(
370
- server_name="0.0.0.0",
371
- server_port=7860,
372
- show_error=True,
373
- allowed_paths=[report_dir],
374
- share=False
375
- )
376
- finally:
377
- if torch.distributed.is_initialized():
378
- torch.distributed.destroy_process_group()
 
11
  import re
12
  import psutil
13
  import subprocess
 
 
 
 
 
 
 
 
 
14
 
15
  # Persistent directory
16
  persistent_dir = "/data/hf_cache"
 
37
 
38
  from txagent.txagent import TxAgent
39
 
40
+ MEDICAL_KEYWORDS = {'diagnosis', 'assessment', 'plan', 'results', 'medications',
41
+ 'allergies', 'summary', 'impression', 'findings', 'recommendations'}
42
 
43
  def sanitize_utf8(text: str) -> str:
44
  return text.encode("utf-8", "ignore").decode("utf-8")
 
47
  with open(path, "rb") as f:
48
  return hashlib.md5(f.read()).hexdigest()
49
 
50
+ def extract_priority_pages(file_path: str) -> str:
51
  try:
52
+ text_chunks = []
53
  with pdfplumber.open(file_path) as pdf:
54
+ for i, page in enumerate(pdf.pages):
55
+ page_text = page.extract_text() or ""
56
+ if i < 3 or any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
57
+ text_chunks.append(f"=== Page {i+1} ===\n{page_text.strip()}")
58
+ return "\n\n".join(text_chunks)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  except Exception as e:
 
60
  return f"PDF processing error: {str(e)}"
61
 
62
+ def convert_file_to_json(file_path: str, file_type: str) -> str:
63
  try:
64
+ h = file_hash(file_path)
65
+ cache_path = os.path.join(file_cache_dir, f"{h}.json")
66
+ if os.path.exists(cache_path):
67
+ with open(cache_path, "r", encoding="utf-8") as f:
68
+ return f.read()
 
 
69
 
70
  if file_type == "pdf":
71
+ text = extract_priority_pages(file_path)
72
  result = json.dumps({"filename": os.path.basename(file_path), "content": text, "status": "initial"})
73
  elif file_type == "csv":
74
  df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str,
 
84
  result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
85
  else:
86
  result = json.dumps({"error": f"Unsupported file type: {file_type}"})
87
+ with open(cache_path, "w", encoding="utf-8") as f:
88
+ f.write(result)
 
89
  return result
90
  except Exception as e:
 
91
  return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
92
 
93
  def log_system_usage(tag=""):
94
  try:
95
  cpu = psutil.cpu_percent(interval=1)
96
  mem = psutil.virtual_memory()
97
+ print(f"[{tag}] CPU: {cpu}% | RAM: {mem.used // (1024**2)}MB / {mem.total // (1024**2)}MB")
98
  result = subprocess.run(
99
  ["nvidia-smi", "--query-gpu=memory.used,memory.total,utilization.gpu", "--format=csv,nounits,noheader"],
100
  capture_output=True, text=True
101
  )
102
  if result.returncode == 0:
103
  used, total, util = result.stdout.strip().split(", ")
104
+ print(f"[{tag}] GPU: {used}MB / {total}MB | Utilization: {util}%")
105
  except Exception as e:
106
+ print(f"[{tag}] GPU/CPU monitor failed: {e}")
107
 
108
  def clean_response(text: str) -> str:
109
  text = sanitize_utf8(text)
110
+ # Remove tool calls, JSON data, and repetitive phrases
111
+ text = re.sub(r"\[TOOL_CALLS\].*", "", text, flags=re.DOTALL)
112
+ text = re.sub(r"\['get_[^\]]+\']\n?", "", text) # Remove tool names
113
+ text = re.sub(r"\{'meta':\s*\{.*?\}\s*,\s*'results':\s*\[.*?\]\}\n?", "", text, flags=re.DOTALL) # Remove JSON
114
+ text = re.sub(r"To analyze the medical records for clinical oversights.*?begin by reviewing.*?\n", "", text, flags=re.DOTALL)
115
+ text = re.sub(r"\n{3,}", "\n\n", text).strip()
116
+ # Only keep text under analysis headings or relevant content
117
+ if not re.search(r"(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", text):
118
+ return ""
119
+ return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
  def init_agent():
122
+ print("πŸ” Initializing model...")
123
  log_system_usage("Before Load")
124
  default_tool_path = os.path.abspath("data/new_tool.json")
125
  target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
 
131
  rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
132
  tool_files_dict={"new_tool": target_tool_path},
133
  force_finish=True,
134
+ enable_checker=True,
135
+ step_rag_num=2,
136
  seed=100,
137
  additional_default_tools=[],
138
  )
139
  agent.init_model()
140
  log_system_usage("After Load")
141
+ print("βœ… Agent Ready")
142
  return agent
143
 
144
  def create_ui(agent):
145
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
146
  gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
147
+ chatbot = gr.Chatbot(label="Analysis", height=600, type="messages")
 
148
  file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
149
  msg_input = gr.Textbox(placeholder="Ask about potential oversights...", show_label=False)
150
  send_btn = gr.Button("Analyze", variant="primary")
151
  download_output = gr.File(label="Download Full Report")
 
 
 
 
 
 
 
152
 
153
+ def analyze(message: str, history: List[dict], files: List):
154
  history.append({"role": "user", "content": message})
155
+ history.append({"role": "assistant", "content": "⏳ Analyzing records for potential oversights..."})
156
+ yield history, None
 
 
 
 
 
 
157
 
158
  extracted = ""
159
  file_hash_value = ""
160
+ if files:
161
+ with ThreadPoolExecutor(max_workers=6) as executor:
162
+ futures = [executor.submit(convert_file_to_json, f.name, f.name.split(".")[-1].lower()) for f in files]
163
+ results = [sanitize_utf8(f.result()) for f in as_completed(futures)]
164
+ extracted = "\n".join(results)
165
+ file_hash_value = file_hash(files[0].name) if files else ""
166
+
167
+ # Split extracted text into chunks of ~4,000 characters
168
+ chunk_size = 4000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
 
 
 
170
  combined_response = ""
171
+
172
+ prompt_template = f"""
173
+ Analyze the medical records for clinical oversights. Provide a concise, evidence-based summary under these headings:
174
+
175
+ 1. **Missed Diagnoses**:
176
+ - Identify inconsistencies in history, symptoms, or tests.
177
+ - Consider psychiatric, neurological, infectious, autoimmune, genetic conditions, family history, trauma, and developmental factors.
178
+
179
+ 2. **Medication Conflicts**:
180
+ - Check for contraindications, interactions, or unjustified off-label use.
181
+ - Assess if medications worsen diagnoses or cause adverse effects.
182
+
183
+ 3. **Incomplete Assessments**:
184
+ - Note missing or superficial cognitive, psychiatric, social, or family assessments.
185
+ - Highlight gaps in medical history, substance use, or lab/imaging documentation.
186
+
187
+ 4. **Urgent Follow-up**:
188
+ - Flag abnormal lab results, imaging, behaviors, or legal history needing immediate reassessment or referral.
189
+
190
+ Medical Records (Chunk {0} of {1}):
191
+ {{chunk}}
192
+
193
+ Begin analysis:
194
+ """
195
 
196
  try:
197
+ if history and history[-1]["content"].startswith("⏳"):
198
+ history.pop()
199
+
200
+ # Process each chunk and stream cleaned results
201
+ for chunk_idx, chunk in enumerate(chunks, 1):
202
+ # Update UI with progress
203
+ history.append({"role": "assistant", "content": f"πŸ”„ Processing Chunk {chunk_idx} of {len(chunks)}..."})
204
+ yield history, None
205
+
206
+ prompt = prompt_template.format(chunk_idx, len(chunks), chunk=chunk)
207
+ chunk_response = ""
208
+ for chunk_output in agent.run_gradio_chat(
209
+ message=prompt,
210
+ history=[],
211
+ temperature=0.2,
212
+ max_new_tokens=1024,
213
+ max_token=4096,
214
+ call_agent=False,
215
+ conversation=[],
216
+ ):
217
+ if chunk_output is None:
218
+ continue
219
+ if isinstance(chunk_output, list):
220
+ for m in chunk_output:
221
+ if hasattr(m, 'content') and m.content:
222
+ cleaned = clean_response(m.content)
223
+ if cleaned:
224
+ chunk_response += cleaned + "\n"
225
+ # Stream partial response to UI
226
+ if history[-1]["content"].startswith("πŸ”„"):
227
+ history[-1] = {"role": "assistant", "content": f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response.strip()}"}
228
+ else:
229
+ history[-1]["content"] = f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response.strip()}"
230
+ yield history, None
231
+ elif isinstance(chunk_output, str) and chunk_output.strip():
232
+ cleaned = clean_response(chunk_output)
233
+ if cleaned:
234
+ chunk_response += cleaned + "\n"
235
+ # Stream partial response to UI
236
+ if history[-1]["content"].startswith("πŸ”„"):
237
+ history[-1] = {"role": "assistant", "content": f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response.strip()}"}
238
+ else:
239
+ history[-1]["content"] = f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response.strip()}"
240
+ yield history, None
241
+
242
+ # Append completed chunk response to combined response
243
+ if chunk_response:
244
+ combined_response += f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response}\n"
245
+
246
+ # Finalize UI with complete response
247
+ if combined_response:
248
  history[-1]["content"] = combined_response.strip()
249
  else:
250
+ history.append({"role": "assistant", "content": "No oversights identified."})
251
 
252
+ # Generate report file with cleaned response
253
  report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
254
  if report_path:
255
  with open(report_path, "w", encoding="utf-8") as f:
256
+ f.write(combined_response)
257
+ yield history, report_path if report_path and os.path.exists(report_path) else None
 
258
 
259
  except Exception as e:
260
+ print("🚨 ERROR:", e)
261
  history.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
262
+ yield history, None
263
 
264
+ send_btn.click(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output])
265
+ msg_input.submit(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output])
266
  return demo
267
 
268
  if __name__ == "__main__":
269
+ print("πŸš€ Launching app...")
270
+ agent = init_agent()
271
+ demo = create_ui(agent)
272
+ demo.queue(api_open=False).launch(
273
+ server_name="0.0.0.0",
274
+ server_port=7860,
275
+ show_error=True,
276
+ allowed_paths=[report_dir],
277
+ share=False
278
+ )