Update app.py
Browse files
app.py
CHANGED
@@ -11,15 +11,6 @@ import shutil
|
|
11 |
import re
|
12 |
import psutil
|
13 |
import subprocess
|
14 |
-
import logging
|
15 |
-
import torch
|
16 |
-
import gc
|
17 |
-
from diskcache import Cache
|
18 |
-
import time
|
19 |
-
|
20 |
-
# Configure logging with a more specific logger name to avoid conflicts
|
21 |
-
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
22 |
-
logger = logging.getLogger("ClinicalOversightApp")
|
23 |
|
24 |
# Persistent directory
|
25 |
persistent_dir = "/data/hf_cache"
|
@@ -46,8 +37,8 @@ sys.path.insert(0, src_path)
|
|
46 |
|
47 |
from txagent.txagent import TxAgent
|
48 |
|
49 |
-
|
50 |
-
|
51 |
|
52 |
def sanitize_utf8(text: str) -> str:
|
53 |
return text.encode("utf-8", "ignore").decode("utf-8")
|
@@ -56,60 +47,28 @@ def file_hash(path: str) -> str:
|
|
56 |
with open(path, "rb") as f:
|
57 |
return hashlib.md5(f.read()).hexdigest()
|
58 |
|
59 |
-
def
|
60 |
try:
|
|
|
61 |
with pdfplumber.open(file_path) as pdf:
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
batch_size = 10
|
68 |
-
batches = [(i, min(i + batch_size, total_pages)) for i in range(0, total_pages, batch_size)]
|
69 |
-
text_chunks = [""] * total_pages
|
70 |
-
processed_pages = 0
|
71 |
-
|
72 |
-
def extract_batch(start: int, end: int) -> List[tuple]:
|
73 |
-
results = []
|
74 |
-
with pdfplumber.open(file_path) as pdf:
|
75 |
-
for idx, page in enumerate(pdf.pages[start:end], start=start):
|
76 |
-
page_text = page.extract_text() or ""
|
77 |
-
results.append((idx, f"=== Page {idx + 1} ===\n{page_text.strip()}"))
|
78 |
-
logger.debug("Extracted page %d, text length: %d chars", idx + 1, len(page_text))
|
79 |
-
return results
|
80 |
-
|
81 |
-
with ThreadPoolExecutor(max_workers=6) as executor:
|
82 |
-
futures = [executor.submit(extract_batch, start, end) for start, end in batches]
|
83 |
-
for future in as_completed(futures):
|
84 |
-
for page_num, text in future.result():
|
85 |
-
if page_num < len(text_chunks):
|
86 |
-
text_chunks[page_num] = text
|
87 |
-
else:
|
88 |
-
logger.warning("Page number %d out of range for text_chunks (size %d)", page_num, len(text_chunks))
|
89 |
-
processed_pages += batch_size
|
90 |
-
if progress_callback:
|
91 |
-
progress_callback(min(processed_pages, total_pages), total_pages)
|
92 |
-
logger.info("Processed %d/%d pages for %s", min(processed_pages, total_pages), total_pages, file_path)
|
93 |
-
|
94 |
-
extracted_text = "\n\n".join(filter(None, text_chunks))
|
95 |
-
logger.info("Extracted %d pages from %s, total length: %d chars", total_pages, file_path, len(extracted_text))
|
96 |
-
return extracted_text
|
97 |
except Exception as e:
|
98 |
-
logger.error("PDF processing error for %s: %s", file_path, e, exc_info=True)
|
99 |
return f"PDF processing error: {str(e)}"
|
100 |
|
101 |
-
def convert_file_to_json(file_path: str, file_type: str
|
102 |
try:
|
103 |
-
|
104 |
-
|
105 |
-
if
|
106 |
-
|
107 |
-
|
108 |
-
else:
|
109 |
-
logger.info("Cache miss for %s (key: %s), performing fresh extraction", file_path, cache_key)
|
110 |
|
111 |
if file_type == "pdf":
|
112 |
-
text =
|
113 |
result = json.dumps({"filename": os.path.basename(file_path), "content": text, "status": "initial"})
|
114 |
elif file_type == "csv":
|
115 |
df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str,
|
@@ -125,99 +84,42 @@ def convert_file_to_json(file_path: str, file_type: str, progress_callback=None)
|
|
125 |
result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
|
126 |
else:
|
127 |
result = json.dumps({"error": f"Unsupported file type: {file_type}"})
|
128 |
-
|
129 |
-
|
130 |
-
logger.info("Cached extraction for %s, size: %d bytes", file_path, len(result))
|
131 |
return result
|
132 |
except Exception as e:
|
133 |
-
logger.error("Error processing %s: %s", os.path.basename(file_path), e, exc_info=True)
|
134 |
return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
|
135 |
|
136 |
def log_system_usage(tag=""):
|
137 |
try:
|
138 |
cpu = psutil.cpu_percent(interval=1)
|
139 |
mem = psutil.virtual_memory()
|
140 |
-
|
141 |
result = subprocess.run(
|
142 |
["nvidia-smi", "--query-gpu=memory.used,memory.total,utilization.gpu", "--format=csv,nounits,noheader"],
|
143 |
capture_output=True, text=True
|
144 |
)
|
145 |
if result.returncode == 0:
|
146 |
used, total, util = result.stdout.strip().split(", ")
|
147 |
-
|
148 |
except Exception as e:
|
149 |
-
|
150 |
|
151 |
def clean_response(text: str) -> str:
|
152 |
text = sanitize_utf8(text)
|
153 |
-
|
154 |
-
text = re.sub(r"\
|
155 |
-
text = re.sub(r"[^\n
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
continue
|
164 |
-
section_match = re.match(r"###\s*(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", line)
|
165 |
-
if section_match:
|
166 |
-
current_section = section_match.group(1)
|
167 |
-
if current_section not in sections:
|
168 |
-
sections[current_section] = []
|
169 |
-
continue
|
170 |
-
finding_match = re.match(r"-\s*.+", line)
|
171 |
-
if finding_match and current_section and not re.match(r"-\s*No issues identified", line):
|
172 |
-
sections[current_section].append(line)
|
173 |
-
|
174 |
-
cleaned = []
|
175 |
-
for heading, findings in sections.items():
|
176 |
-
if findings:
|
177 |
-
cleaned.append(f"### {heading}\n" + "\n".join(findings))
|
178 |
-
|
179 |
-
text = "\n\n".join(cleaned).strip()
|
180 |
-
logger.debug("Cleaned response length: %d chars", len(text))
|
181 |
-
return text if text else ""
|
182 |
-
|
183 |
-
def summarize_findings(combined_response: str) -> str:
|
184 |
-
if not combined_response or all("No oversights identified" in chunk for chunk in combined_response.split("--- Analysis for Chunk")):
|
185 |
-
logger.info("No clinical oversights identified in analysis")
|
186 |
-
return "### Summary of Clinical Oversights\nNo critical oversights identified in the provided records."
|
187 |
-
|
188 |
-
sections = {}
|
189 |
-
lines = combined_response.splitlines()
|
190 |
-
current_section = None
|
191 |
-
for line in lines:
|
192 |
-
line = line.strip()
|
193 |
-
if not line:
|
194 |
-
continue
|
195 |
-
section_match = re.match(r"###\s*(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", line)
|
196 |
-
if section_match:
|
197 |
-
current_section = section_match.group(1)
|
198 |
-
if current_section not in sections:
|
199 |
-
sections[current_section] = []
|
200 |
-
continue
|
201 |
-
finding_match = re.match(r"-\s*(.+)", line)
|
202 |
-
if finding_match and current_section:
|
203 |
-
sections[current_section].append(finding_match.group(1))
|
204 |
-
|
205 |
-
summary_lines = []
|
206 |
-
for heading, findings in sections.items():
|
207 |
-
if findings:
|
208 |
-
summary = f"- **{heading}**: {'; '.join(findings[:2])}. Risks: {heading.lower()} may lead to adverse outcomes. Recommend: urgent review and specialist referral."
|
209 |
-
summary_lines.append(summary)
|
210 |
-
|
211 |
-
if not summary_lines:
|
212 |
-
logger.info("No clinical oversights identified after summarization")
|
213 |
-
return "### Summary of Clinical Oversights\nNo critical oversights identified."
|
214 |
-
|
215 |
-
summary = "### Summary of Clinical Oversights\n" + "\n".join(summary_lines)
|
216 |
-
logger.info("Summarized findings: %s", summary[:100])
|
217 |
-
return summary
|
218 |
|
219 |
def init_agent():
|
220 |
-
|
221 |
log_system_usage("Before Load")
|
222 |
default_tool_path = os.path.abspath("data/new_tool.json")
|
223 |
target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
|
@@ -229,150 +131,148 @@ def init_agent():
|
|
229 |
rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
|
230 |
tool_files_dict={"new_tool": target_tool_path},
|
231 |
force_finish=True,
|
232 |
-
enable_checker=
|
233 |
-
step_rag_num=
|
234 |
seed=100,
|
235 |
additional_default_tools=[],
|
236 |
)
|
237 |
agent.init_model()
|
238 |
log_system_usage("After Load")
|
239 |
-
|
240 |
return agent
|
241 |
|
242 |
def create_ui(agent):
|
243 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
244 |
gr.Markdown("<h1 style='text-align: center;'>π©Ί Clinical Oversight Assistant</h1>")
|
245 |
-
chatbot = gr.Chatbot(label="
|
246 |
-
final_summary = gr.Markdown(label="Summary of Clinical Oversights")
|
247 |
file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
|
248 |
msg_input = gr.Textbox(placeholder="Ask about potential oversights...", show_label=False)
|
249 |
send_btn = gr.Button("Analyze", variant="primary")
|
250 |
download_output = gr.File(label="Download Full Report")
|
251 |
-
progress_bar = gr.Progress()
|
252 |
-
|
253 |
-
prompt_template = """
|
254 |
-
Analyze the patient record excerpt for clinical oversights. Provide a concise, evidence-based summary in markdown with findings grouped under headings (e.g., 'Missed Diagnoses'). For each finding, include clinical context, risks, and recommendations. Output only markdown bullet points under headings. If no issues, state "No issues identified".
|
255 |
-
Patient Record Excerpt (Chunk {0} of {1}):
|
256 |
-
{chunk}
|
257 |
-
"""
|
258 |
|
259 |
-
def analyze(message: str, history: List[dict], files: List
|
260 |
history.append({"role": "user", "content": message})
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
if not files:
|
265 |
-
logger.error("No files uploaded for analysis")
|
266 |
-
history.append({"role": "assistant", "content": "β Please upload a file to analyze."})
|
267 |
-
yield history, None, "### Summary of Clinical Oversights\nNo file uploaded for analysis."
|
268 |
-
return
|
269 |
|
270 |
extracted = ""
|
271 |
file_hash_value = ""
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
futures = [executor.submit(convert_file_to_json, f.name, f.name.split(".")[-1].lower(), update_extraction_progress) for f in files]
|
282 |
-
results = [sanitize_utf8(f.result()) for f in as_completed(futures)]
|
283 |
-
extracted = "\n".join(results)
|
284 |
-
file_hash_value = file_hash(files[0].name) if files else ""
|
285 |
-
logger.info("Extraction complete for %d files", len(files))
|
286 |
-
history.append({"role": "assistant", "content": "β
Text extraction complete."})
|
287 |
-
yield history, None, ""
|
288 |
-
|
289 |
-
logger.info("Extracted text length: %d chars", len(extracted))
|
290 |
-
if len(extracted.strip()) == 0:
|
291 |
-
logger.error("Extracted text is empty")
|
292 |
-
history.append({"role": "assistant", "content": "β Extracted text is empty. Please ensure the file contains readable content."})
|
293 |
-
yield history, None, "### Summary of Clinical Oversights\nExtracted text is empty."
|
294 |
-
return
|
295 |
-
|
296 |
-
chunk_size = 6000
|
297 |
chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
|
298 |
-
if not chunks:
|
299 |
-
chunks = [""]
|
300 |
-
logger.info("Created %d chunks", len(chunks))
|
301 |
combined_response = ""
|
302 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
303 |
|
304 |
try:
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
343 |
history[-1]["content"] = combined_response.strip()
|
344 |
else:
|
345 |
-
history.append({"role": "assistant", "content": "No oversights identified
|
346 |
|
347 |
-
|
348 |
report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
|
349 |
if report_path:
|
350 |
with open(report_path, "w", encoding="utf-8") as f:
|
351 |
-
f.write(combined_response
|
352 |
-
|
353 |
-
yield history, report_path if report_path and os.path.exists(report_path) else None, summary
|
354 |
|
355 |
except Exception as e:
|
356 |
-
|
357 |
history.append({"role": "assistant", "content": f"β Error occurred: {str(e)}"})
|
358 |
-
yield history, None
|
359 |
|
360 |
-
send_btn.click(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output
|
361 |
-
msg_input.submit(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output
|
362 |
return demo
|
363 |
|
364 |
if __name__ == "__main__":
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
)
|
376 |
-
finally:
|
377 |
-
if torch.distributed.is_initialized():
|
378 |
-
torch.distributed.destroy_process_group()
|
|
|
11 |
import re
|
12 |
import psutil
|
13 |
import subprocess
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
# Persistent directory
|
16 |
persistent_dir = "/data/hf_cache"
|
|
|
37 |
|
38 |
from txagent.txagent import TxAgent
|
39 |
|
40 |
+
MEDICAL_KEYWORDS = {'diagnosis', 'assessment', 'plan', 'results', 'medications',
|
41 |
+
'allergies', 'summary', 'impression', 'findings', 'recommendations'}
|
42 |
|
43 |
def sanitize_utf8(text: str) -> str:
|
44 |
return text.encode("utf-8", "ignore").decode("utf-8")
|
|
|
47 |
with open(path, "rb") as f:
|
48 |
return hashlib.md5(f.read()).hexdigest()
|
49 |
|
50 |
+
def extract_priority_pages(file_path: str) -> str:
|
51 |
try:
|
52 |
+
text_chunks = []
|
53 |
with pdfplumber.open(file_path) as pdf:
|
54 |
+
for i, page in enumerate(pdf.pages):
|
55 |
+
page_text = page.extract_text() or ""
|
56 |
+
if i < 3 or any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
|
57 |
+
text_chunks.append(f"=== Page {i+1} ===\n{page_text.strip()}")
|
58 |
+
return "\n\n".join(text_chunks)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
except Exception as e:
|
|
|
60 |
return f"PDF processing error: {str(e)}"
|
61 |
|
62 |
+
def convert_file_to_json(file_path: str, file_type: str) -> str:
|
63 |
try:
|
64 |
+
h = file_hash(file_path)
|
65 |
+
cache_path = os.path.join(file_cache_dir, f"{h}.json")
|
66 |
+
if os.path.exists(cache_path):
|
67 |
+
with open(cache_path, "r", encoding="utf-8") as f:
|
68 |
+
return f.read()
|
|
|
|
|
69 |
|
70 |
if file_type == "pdf":
|
71 |
+
text = extract_priority_pages(file_path)
|
72 |
result = json.dumps({"filename": os.path.basename(file_path), "content": text, "status": "initial"})
|
73 |
elif file_type == "csv":
|
74 |
df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str,
|
|
|
84 |
result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
|
85 |
else:
|
86 |
result = json.dumps({"error": f"Unsupported file type: {file_type}"})
|
87 |
+
with open(cache_path, "w", encoding="utf-8") as f:
|
88 |
+
f.write(result)
|
|
|
89 |
return result
|
90 |
except Exception as e:
|
|
|
91 |
return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
|
92 |
|
93 |
def log_system_usage(tag=""):
|
94 |
try:
|
95 |
cpu = psutil.cpu_percent(interval=1)
|
96 |
mem = psutil.virtual_memory()
|
97 |
+
print(f"[{tag}] CPU: {cpu}% | RAM: {mem.used // (1024**2)}MB / {mem.total // (1024**2)}MB")
|
98 |
result = subprocess.run(
|
99 |
["nvidia-smi", "--query-gpu=memory.used,memory.total,utilization.gpu", "--format=csv,nounits,noheader"],
|
100 |
capture_output=True, text=True
|
101 |
)
|
102 |
if result.returncode == 0:
|
103 |
used, total, util = result.stdout.strip().split(", ")
|
104 |
+
print(f"[{tag}] GPU: {used}MB / {total}MB | Utilization: {util}%")
|
105 |
except Exception as e:
|
106 |
+
print(f"[{tag}] GPU/CPU monitor failed: {e}")
|
107 |
|
108 |
def clean_response(text: str) -> str:
|
109 |
text = sanitize_utf8(text)
|
110 |
+
# Remove tool calls, JSON data, and repetitive phrases
|
111 |
+
text = re.sub(r"\[TOOL_CALLS\].*", "", text, flags=re.DOTALL)
|
112 |
+
text = re.sub(r"\['get_[^\]]+\']\n?", "", text) # Remove tool names
|
113 |
+
text = re.sub(r"\{'meta':\s*\{.*?\}\s*,\s*'results':\s*\[.*?\]\}\n?", "", text, flags=re.DOTALL) # Remove JSON
|
114 |
+
text = re.sub(r"To analyze the medical records for clinical oversights.*?begin by reviewing.*?\n", "", text, flags=re.DOTALL)
|
115 |
+
text = re.sub(r"\n{3,}", "\n\n", text).strip()
|
116 |
+
# Only keep text under analysis headings or relevant content
|
117 |
+
if not re.search(r"(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", text):
|
118 |
+
return ""
|
119 |
+
return text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
|
121 |
def init_agent():
|
122 |
+
print("π Initializing model...")
|
123 |
log_system_usage("Before Load")
|
124 |
default_tool_path = os.path.abspath("data/new_tool.json")
|
125 |
target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
|
|
|
131 |
rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
|
132 |
tool_files_dict={"new_tool": target_tool_path},
|
133 |
force_finish=True,
|
134 |
+
enable_checker=True,
|
135 |
+
step_rag_num=2,
|
136 |
seed=100,
|
137 |
additional_default_tools=[],
|
138 |
)
|
139 |
agent.init_model()
|
140 |
log_system_usage("After Load")
|
141 |
+
print("β
Agent Ready")
|
142 |
return agent
|
143 |
|
144 |
def create_ui(agent):
|
145 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
146 |
gr.Markdown("<h1 style='text-align: center;'>π©Ί Clinical Oversight Assistant</h1>")
|
147 |
+
chatbot = gr.Chatbot(label="Analysis", height=600, type="messages")
|
|
|
148 |
file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
|
149 |
msg_input = gr.Textbox(placeholder="Ask about potential oversights...", show_label=False)
|
150 |
send_btn = gr.Button("Analyze", variant="primary")
|
151 |
download_output = gr.File(label="Download Full Report")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
|
153 |
+
def analyze(message: str, history: List[dict], files: List):
|
154 |
history.append({"role": "user", "content": message})
|
155 |
+
history.append({"role": "assistant", "content": "β³ Analyzing records for potential oversights..."})
|
156 |
+
yield history, None
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
|
158 |
extracted = ""
|
159 |
file_hash_value = ""
|
160 |
+
if files:
|
161 |
+
with ThreadPoolExecutor(max_workers=6) as executor:
|
162 |
+
futures = [executor.submit(convert_file_to_json, f.name, f.name.split(".")[-1].lower()) for f in files]
|
163 |
+
results = [sanitize_utf8(f.result()) for f in as_completed(futures)]
|
164 |
+
extracted = "\n".join(results)
|
165 |
+
file_hash_value = file_hash(files[0].name) if files else ""
|
166 |
+
|
167 |
+
# Split extracted text into chunks of ~4,000 characters
|
168 |
+
chunk_size = 4000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
|
|
|
|
|
|
|
170 |
combined_response = ""
|
171 |
+
|
172 |
+
prompt_template = f"""
|
173 |
+
Analyze the medical records for clinical oversights. Provide a concise, evidence-based summary under these headings:
|
174 |
+
|
175 |
+
1. **Missed Diagnoses**:
|
176 |
+
- Identify inconsistencies in history, symptoms, or tests.
|
177 |
+
- Consider psychiatric, neurological, infectious, autoimmune, genetic conditions, family history, trauma, and developmental factors.
|
178 |
+
|
179 |
+
2. **Medication Conflicts**:
|
180 |
+
- Check for contraindications, interactions, or unjustified off-label use.
|
181 |
+
- Assess if medications worsen diagnoses or cause adverse effects.
|
182 |
+
|
183 |
+
3. **Incomplete Assessments**:
|
184 |
+
- Note missing or superficial cognitive, psychiatric, social, or family assessments.
|
185 |
+
- Highlight gaps in medical history, substance use, or lab/imaging documentation.
|
186 |
+
|
187 |
+
4. **Urgent Follow-up**:
|
188 |
+
- Flag abnormal lab results, imaging, behaviors, or legal history needing immediate reassessment or referral.
|
189 |
+
|
190 |
+
Medical Records (Chunk {0} of {1}):
|
191 |
+
{{chunk}}
|
192 |
+
|
193 |
+
Begin analysis:
|
194 |
+
"""
|
195 |
|
196 |
try:
|
197 |
+
if history and history[-1]["content"].startswith("β³"):
|
198 |
+
history.pop()
|
199 |
+
|
200 |
+
# Process each chunk and stream cleaned results
|
201 |
+
for chunk_idx, chunk in enumerate(chunks, 1):
|
202 |
+
# Update UI with progress
|
203 |
+
history.append({"role": "assistant", "content": f"π Processing Chunk {chunk_idx} of {len(chunks)}..."})
|
204 |
+
yield history, None
|
205 |
+
|
206 |
+
prompt = prompt_template.format(chunk_idx, len(chunks), chunk=chunk)
|
207 |
+
chunk_response = ""
|
208 |
+
for chunk_output in agent.run_gradio_chat(
|
209 |
+
message=prompt,
|
210 |
+
history=[],
|
211 |
+
temperature=0.2,
|
212 |
+
max_new_tokens=1024,
|
213 |
+
max_token=4096,
|
214 |
+
call_agent=False,
|
215 |
+
conversation=[],
|
216 |
+
):
|
217 |
+
if chunk_output is None:
|
218 |
+
continue
|
219 |
+
if isinstance(chunk_output, list):
|
220 |
+
for m in chunk_output:
|
221 |
+
if hasattr(m, 'content') and m.content:
|
222 |
+
cleaned = clean_response(m.content)
|
223 |
+
if cleaned:
|
224 |
+
chunk_response += cleaned + "\n"
|
225 |
+
# Stream partial response to UI
|
226 |
+
if history[-1]["content"].startswith("π"):
|
227 |
+
history[-1] = {"role": "assistant", "content": f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response.strip()}"}
|
228 |
+
else:
|
229 |
+
history[-1]["content"] = f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response.strip()}"
|
230 |
+
yield history, None
|
231 |
+
elif isinstance(chunk_output, str) and chunk_output.strip():
|
232 |
+
cleaned = clean_response(chunk_output)
|
233 |
+
if cleaned:
|
234 |
+
chunk_response += cleaned + "\n"
|
235 |
+
# Stream partial response to UI
|
236 |
+
if history[-1]["content"].startswith("π"):
|
237 |
+
history[-1] = {"role": "assistant", "content": f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response.strip()}"}
|
238 |
+
else:
|
239 |
+
history[-1]["content"] = f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response.strip()}"
|
240 |
+
yield history, None
|
241 |
+
|
242 |
+
# Append completed chunk response to combined response
|
243 |
+
if chunk_response:
|
244 |
+
combined_response += f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response}\n"
|
245 |
+
|
246 |
+
# Finalize UI with complete response
|
247 |
+
if combined_response:
|
248 |
history[-1]["content"] = combined_response.strip()
|
249 |
else:
|
250 |
+
history.append({"role": "assistant", "content": "No oversights identified."})
|
251 |
|
252 |
+
# Generate report file with cleaned response
|
253 |
report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
|
254 |
if report_path:
|
255 |
with open(report_path, "w", encoding="utf-8") as f:
|
256 |
+
f.write(combined_response)
|
257 |
+
yield history, report_path if report_path and os.path.exists(report_path) else None
|
|
|
258 |
|
259 |
except Exception as e:
|
260 |
+
print("π¨ ERROR:", e)
|
261 |
history.append({"role": "assistant", "content": f"β Error occurred: {str(e)}"})
|
262 |
+
yield history, None
|
263 |
|
264 |
+
send_btn.click(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output])
|
265 |
+
msg_input.submit(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output])
|
266 |
return demo
|
267 |
|
268 |
if __name__ == "__main__":
|
269 |
+
print("π Launching app...")
|
270 |
+
agent = init_agent()
|
271 |
+
demo = create_ui(agent)
|
272 |
+
demo.queue(api_open=False).launch(
|
273 |
+
server_name="0.0.0.0",
|
274 |
+
server_port=7860,
|
275 |
+
show_error=True,
|
276 |
+
allowed_paths=[report_dir],
|
277 |
+
share=False
|
278 |
+
)
|
|
|
|
|
|
|
|