Update app.py
Browse files
app.py
CHANGED
@@ -52,8 +52,8 @@ def file_hash(path: str) -> str:
|
|
52 |
with open(path, "rb") as f:
|
53 |
return hashlib.md5(f.read()).hexdigest()
|
54 |
|
55 |
-
def
|
56 |
-
return hashlib.md5((
|
57 |
|
58 |
def extract_page_range(file_path: str, start_page: int, end_page: int) -> str:
|
59 |
"""Extract text from a range of PDF pages."""
|
@@ -147,32 +147,21 @@ def log_system_usage(tag=""):
|
|
147 |
logger.error(f"[{tag}] GPU/CPU monitor failed: {e}")
|
148 |
|
149 |
def clean_response(text: str) -> str:
|
150 |
-
"""Clean TxAgent response to group findings
|
151 |
text = sanitize_utf8(text)
|
152 |
-
|
|
|
153 |
text = re.sub(r"\n{3,}", "\n\n", text)
|
154 |
text = re.sub(r"[^\n#\-\*\w\s\.\,\:\(\)]+", "", text)
|
155 |
-
|
156 |
-
tool_to_heading = {
|
157 |
-
"get_abuse_info_by_drug_name": "Drugs",
|
158 |
-
"get_dependence_info_by_drug_name": "Drugs",
|
159 |
-
"get_abuse_types_and_related_adverse_reactions_and_controlled_substance_status_by_drug_name": "Drugs",
|
160 |
-
"get_info_for_patients_by_drug_name": "Drugs",
|
161 |
-
}
|
162 |
-
|
163 |
sections = {}
|
164 |
current_section = None
|
165 |
-
current_tool = None
|
166 |
lines = text.splitlines()
|
167 |
for line in lines:
|
168 |
line = line.strip()
|
169 |
if not line:
|
170 |
continue
|
171 |
-
|
172 |
-
if tool_match:
|
173 |
-
current_tool = tool_match.group(1)
|
174 |
-
continue
|
175 |
-
section_match = re.match(r"###\s*(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up|Drugs)", line)
|
176 |
if section_match:
|
177 |
current_section = section_match.group(1)
|
178 |
if current_section not in sections:
|
@@ -180,13 +169,7 @@ def clean_response(text: str) -> str:
|
|
180 |
continue
|
181 |
finding_match = re.match(r"-\s*.+", line)
|
182 |
if finding_match and current_section and not re.match(r"-\s*No issues identified", line):
|
183 |
-
|
184 |
-
heading = tool_to_heading[current_tool]
|
185 |
-
if heading not in sections:
|
186 |
-
sections[heading] = []
|
187 |
-
sections[heading].append(line)
|
188 |
-
else:
|
189 |
-
sections[current_section].append(line)
|
190 |
|
191 |
cleaned = []
|
192 |
for heading, findings in sections.items():
|
@@ -212,97 +195,99 @@ def init_agent():
|
|
212 |
tool_files_dict={"new_tool": target_tool_path},
|
213 |
force_finish=True,
|
214 |
enable_checker=True,
|
215 |
-
step_rag_num=
|
216 |
seed=100,
|
217 |
additional_default_tools=[],
|
|
|
218 |
)
|
219 |
agent.init_model()
|
220 |
log_system_usage("After Load")
|
221 |
logger.info("Agent Ready")
|
222 |
return agent
|
223 |
|
224 |
-
def
|
225 |
-
"""Process a
|
226 |
-
if not chunk.strip():
|
227 |
-
logger.warning(
|
228 |
-
return
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
if os.path.exists(chunk_cache_path):
|
234 |
-
with open(chunk_cache_path, "r", encoding="utf-8") as f:
|
235 |
-
logger.info(f"Cache hit for chunk {chunk_idx}")
|
236 |
-
return chunk_idx, f.read()
|
237 |
-
|
238 |
-
prompt = prompt_template.format(chunk_idx, total_chunks, chunk=chunk[:1000]) # Truncate to avoid token limits
|
239 |
-
chunk_response = ""
|
240 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
try:
|
242 |
-
for
|
243 |
message=prompt,
|
244 |
history=[],
|
245 |
temperature=0.2,
|
246 |
-
max_new_tokens=
|
247 |
-
max_token=
|
248 |
call_agent=False,
|
249 |
conversation=[],
|
250 |
):
|
251 |
-
if
|
252 |
continue
|
253 |
-
if isinstance(
|
254 |
-
for m in
|
255 |
if hasattr(m, 'content') and m.content:
|
256 |
cleaned = clean_response(m.content)
|
257 |
if cleaned and re.search(r"###\s*\w+", cleaned):
|
258 |
-
|
259 |
-
elif isinstance(
|
260 |
-
cleaned = clean_response(
|
261 |
if cleaned and re.search(r"###\s*\w+", cleaned):
|
262 |
-
|
263 |
except Exception as e:
|
264 |
-
logger.error(f"Error processing
|
265 |
-
return
|
266 |
-
|
267 |
-
if
|
268 |
-
with open(
|
269 |
-
f.write(
|
270 |
-
return
|
271 |
-
return
|
272 |
|
273 |
def create_ui(agent):
|
274 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
275 |
gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
|
276 |
chatbot = gr.Chatbot(label="Analysis", height=600, type="messages")
|
277 |
file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
|
278 |
-
max_chunks_input = gr.Slider(minimum=1, maximum=50, value=5, step=1, label="Max Chunks to Analyze")
|
279 |
msg_input = gr.Textbox(placeholder="Ask about potential oversights...", show_label=False)
|
280 |
send_btn = gr.Button("Analyze", variant="primary")
|
281 |
download_output = gr.File(label="Download Full Report")
|
282 |
|
283 |
prompt_template = """
|
284 |
-
You are a medical analysis assistant. Analyze the following patient record
|
285 |
- Clinical context (why the issue was missed or relevant details from the record).
|
286 |
- Potential risks if unaddressed (e.g., disease progression, adverse events).
|
287 |
- Actionable recommendations (e.g., tests, referrals, medication adjustments).
|
288 |
-
Output ONLY the markdown-formatted findings, with bullet points under each heading.
|
289 |
|
290 |
Example Output:
|
291 |
### Drugs
|
292 |
-
|
293 |
-
- [Finding placeholder for drug-related issue]
|
294 |
### Missed Diagnoses
|
295 |
-
-
|
296 |
### Incomplete Assessments
|
297 |
-
-
|
298 |
### Urgent Follow-up
|
299 |
-
-
|
300 |
|
301 |
-
Patient Record
|
302 |
-
{
|
303 |
"""
|
304 |
|
305 |
-
def analyze(message: str, history: List[dict], files: List
|
306 |
history.append({"role": "user", "content": message})
|
307 |
history.append({"role": "assistant", "content": "⏳ Extracting text from files..."})
|
308 |
yield history, None
|
@@ -330,39 +315,28 @@ Patient Record Excerpt (Chunk {0} of {1}):
|
|
330 |
history.append({"role": "assistant", "content": "✅ Text extraction complete."})
|
331 |
yield history, None
|
332 |
|
333 |
-
chunk_size =
|
|
|
334 |
chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
|
335 |
-
chunks = chunks[:max_chunks] # Limit to
|
336 |
-
total_chunks = len(chunks)
|
337 |
-
combined_response = ""
|
338 |
-
|
339 |
if not chunks:
|
340 |
history.append({"role": "assistant", "content": "No content to analyze."})
|
341 |
yield history, None
|
342 |
return
|
343 |
|
344 |
try:
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
history.append({"role": "assistant", "content": f"Analyzing chunk {chunk_idx}/{total_chunks}... {animation}"})
|
349 |
-
yield history, None
|
350 |
-
|
351 |
-
_, chunk_response = process_chunk(agent, chunk, chunk_idx, total_chunks, file_cache_dir, prompt_template)
|
352 |
-
combined_response += chunk_response
|
353 |
-
|
354 |
-
history[-1] = {"role": "assistant", "content": combined_response.strip()}
|
355 |
-
yield history, None
|
356 |
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
history.append({"role": "assistant", "content": "No oversights identified in the provided records."})
|
361 |
|
362 |
report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
|
363 |
-
if report_path:
|
364 |
with open(report_path, "w", encoding="utf-8") as f:
|
365 |
-
f.write(
|
366 |
yield history, report_path if report_path and os.path.exists(report_path) else None
|
367 |
|
368 |
except Exception as e:
|
@@ -370,8 +344,8 @@ Patient Record Excerpt (Chunk {0} of {1}):
|
|
370 |
history.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
|
371 |
yield history, None
|
372 |
|
373 |
-
send_btn.click(analyze, inputs=[msg_input, gr.State([]), file_upload
|
374 |
-
msg_input.submit(analyze, inputs=[msg_input, gr.State([]), file_upload
|
375 |
return demo
|
376 |
|
377 |
if __name__ == "__main__":
|
|
|
52 |
with open(path, "rb") as f:
|
53 |
return hashlib.md5(f.read()).hexdigest()
|
54 |
|
55 |
+
def batch_hash(chunks: List[str], prompt: str) -> str:
|
56 |
+
return hashlib.md5(("".join(chunks) + prompt).encode("utf-8")).hexdigest()
|
57 |
|
58 |
def extract_page_range(file_path: str, start_page: int, end_page: int) -> str:
|
59 |
"""Extract text from a range of PDF pages."""
|
|
|
147 |
logger.error(f"[{tag}] GPU/CPU monitor failed: {e}")
|
148 |
|
149 |
def clean_response(text: str) -> str:
|
150 |
+
"""Clean TxAgent response to group findings by section without tool names."""
|
151 |
text = sanitize_utf8(text)
|
152 |
+
# Remove tool tags, None, and reasoning
|
153 |
+
text = re.sub(r"\[TOOL:[^\]]+\]|\bNone\b|To analyze the patient record excerpt.*?medications\.|Since the previous attempts.*?\.|I need to.*?medications\.|Retrieving tools.*?\.", "", text, flags=re.DOTALL)
|
154 |
text = re.sub(r"\n{3,}", "\n\n", text)
|
155 |
text = re.sub(r"[^\n#\-\*\w\s\.\,\:\(\)]+", "", text)
|
156 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
sections = {}
|
158 |
current_section = None
|
|
|
159 |
lines = text.splitlines()
|
160 |
for line in lines:
|
161 |
line = line.strip()
|
162 |
if not line:
|
163 |
continue
|
164 |
+
section_match = re.match(r"###\s*(Drugs|Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", line)
|
|
|
|
|
|
|
|
|
165 |
if section_match:
|
166 |
current_section = section_match.group(1)
|
167 |
if current_section not in sections:
|
|
|
169 |
continue
|
170 |
finding_match = re.match(r"-\s*.+", line)
|
171 |
if finding_match and current_section and not re.match(r"-\s*No issues identified", line):
|
172 |
+
sections[current_section].append(line)
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
|
174 |
cleaned = []
|
175 |
for heading, findings in sections.items():
|
|
|
195 |
tool_files_dict={"new_tool": target_tool_path},
|
196 |
force_finish=True,
|
197 |
enable_checker=True,
|
198 |
+
step_rag_num=1, # Reduced for speed
|
199 |
seed=100,
|
200 |
additional_default_tools=[],
|
201 |
+
num_engine_threads=1, # Limit VLLM threads for stability
|
202 |
)
|
203 |
agent.init_model()
|
204 |
log_system_usage("After Load")
|
205 |
logger.info("Agent Ready")
|
206 |
return agent
|
207 |
|
208 |
+
def process_batch(agent, chunks: List[str], cache_path: str, prompt_template: str) -> str:
|
209 |
+
"""Process a batch of chunks in a single prompt."""
|
210 |
+
if not any(chunk.strip() for chunk in chunks):
|
211 |
+
logger.warning("All chunks are empty, skipping analysis...")
|
212 |
+
return "No oversights identified in the provided records."
|
213 |
+
|
214 |
+
batch_id = batch_hash(chunks, prompt_template)
|
215 |
+
batch_cache_path = os.path.join(cache_path, f"batch_{batch_id}.txt")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
|
217 |
+
if os.path.exists(batch_cache_path):
|
218 |
+
with open(batch_cache_path, "r", encoding="utf-8") as f:
|
219 |
+
logger.info("Cache hit for batch")
|
220 |
+
return f.read()
|
221 |
+
|
222 |
+
# Combine chunks into one prompt
|
223 |
+
chunk_texts = [f"Chunk {i+1}:\n{chunk[:500]}" for i, chunk in enumerate(chunks) if chunk.strip()]
|
224 |
+
combined_text = "\n\n".join(chunk_texts)
|
225 |
+
prompt = prompt_template.format(chunks=combined_text)
|
226 |
+
response = ""
|
227 |
+
|
228 |
try:
|
229 |
+
for output in agent.run_gradio_chat(
|
230 |
message=prompt,
|
231 |
history=[],
|
232 |
temperature=0.2,
|
233 |
+
max_new_tokens=256, # Reduced for speed
|
234 |
+
max_token=1024, # Reduced for speed
|
235 |
call_agent=False,
|
236 |
conversation=[],
|
237 |
):
|
238 |
+
if output is None:
|
239 |
continue
|
240 |
+
if isinstance(output, list):
|
241 |
+
for m in output:
|
242 |
if hasattr(m, 'content') and m.content:
|
243 |
cleaned = clean_response(m.content)
|
244 |
if cleaned and re.search(r"###\s*\w+", cleaned):
|
245 |
+
response += cleaned + "\n\n"
|
246 |
+
elif isinstance(output, str) and output.strip():
|
247 |
+
cleaned = clean_response(output)
|
248 |
if cleaned and re.search(r"###\s*\w+", cleaned):
|
249 |
+
response += cleaned + "\n\n"
|
250 |
except Exception as e:
|
251 |
+
logger.error(f"Error processing batch: {e}")
|
252 |
+
return f"Error occurred: {str(e)}"
|
253 |
+
|
254 |
+
if response:
|
255 |
+
with open(batch_cache_path, "w", encoding="utf-8") as f:
|
256 |
+
f.write(response)
|
257 |
+
return response
|
258 |
+
return "No oversights identified in the provided records."
|
259 |
|
260 |
def create_ui(agent):
|
261 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
262 |
gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
|
263 |
chatbot = gr.Chatbot(label="Analysis", height=600, type="messages")
|
264 |
file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
|
|
|
265 |
msg_input = gr.Textbox(placeholder="Ask about potential oversights...", show_label=False)
|
266 |
send_btn = gr.Button("Analyze", variant="primary")
|
267 |
download_output = gr.File(label="Download Full Report")
|
268 |
|
269 |
prompt_template = """
|
270 |
+
You are a medical analysis assistant. Analyze the following patient record excerpts for clinical oversights and provide a concise, evidence-based summary in markdown format. Group findings under the following headings: 'Drugs', 'Missed Diagnoses', 'Medication Conflicts', 'Incomplete Assessments', 'Urgent Follow-up'. For each finding, include:
|
271 |
- Clinical context (why the issue was missed or relevant details from the record).
|
272 |
- Potential risks if unaddressed (e.g., disease progression, adverse events).
|
273 |
- Actionable recommendations (e.g., tests, referrals, medication adjustments).
|
274 |
+
Output ONLY the markdown-formatted findings, with bullet points under each heading. Do NOT include tool references, reasoning, or intermediate steps. If no issues are found for a section, omit that section. Ensure the output is specific to the provided text and avoids generic responses.
|
275 |
|
276 |
Example Output:
|
277 |
### Drugs
|
278 |
+
- Opioid use disorder not addressed. Missed due to lack of screening. Risks: overdose. Recommend: addiction specialist referral.
|
|
|
279 |
### Missed Diagnoses
|
280 |
+
- Elevated BP noted without diagnosis. Missed due to inconsistent visits. Risks: stroke. Recommend: BP monitoring, antihypertensives.
|
281 |
### Incomplete Assessments
|
282 |
+
- Chest pain not evaluated. Time constraints likely cause. Risks: cardiac issues. Recommend: ECG, stress test.
|
283 |
### Urgent Follow-up
|
284 |
+
- Abnormal creatinine not addressed. Delayed lab review. Risks: renal failure. Recommend: nephrology referral.
|
285 |
|
286 |
+
Patient Record Excerpts:
|
287 |
+
{chunks}
|
288 |
"""
|
289 |
|
290 |
+
def analyze(message: str, history: List[dict], files: List):
|
291 |
history.append({"role": "user", "content": message})
|
292 |
history.append({"role": "assistant", "content": "⏳ Extracting text from files..."})
|
293 |
yield history, None
|
|
|
315 |
history.append({"role": "assistant", "content": "✅ Text extraction complete."})
|
316 |
yield history, None
|
317 |
|
318 |
+
chunk_size = 500 # Fixed for speed
|
319 |
+
max_chunks = 5 # Fixed for speed
|
320 |
chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
|
321 |
+
chunks = chunks[:max_chunks] # Limit to 5 chunks
|
|
|
|
|
|
|
322 |
if not chunks:
|
323 |
history.append({"role": "assistant", "content": "No content to analyze."})
|
324 |
yield history, None
|
325 |
return
|
326 |
|
327 |
try:
|
328 |
+
animation = ["🔍", "📊", "🧠", "🔎"][(int(time.time() * 2) % 4)]
|
329 |
+
history.append({"role": "assistant", "content": f"Analyzing chunks 1-5... {animation}"})
|
330 |
+
yield history, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
331 |
|
332 |
+
response = process_batch(agent, chunks, file_cache_dir, prompt_template)
|
333 |
+
history[-1] = {"role": "assistant", "content": response.strip()}
|
334 |
+
yield history, None
|
|
|
335 |
|
336 |
report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
|
337 |
+
if report_path and response.strip() and "No oversights identified" not in response and "Error occurred" not in response:
|
338 |
with open(report_path, "w", encoding="utf-8") as f:
|
339 |
+
f.write(response)
|
340 |
yield history, report_path if report_path and os.path.exists(report_path) else None
|
341 |
|
342 |
except Exception as e:
|
|
|
344 |
history.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
|
345 |
yield history, None
|
346 |
|
347 |
+
send_btn.click(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output])
|
348 |
+
msg_input.submit(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output])
|
349 |
return demo
|
350 |
|
351 |
if __name__ == "__main__":
|