Update app.py
Browse files
app.py
CHANGED
@@ -63,7 +63,7 @@ def extract_all_pages(file_path: str, progress_callback=None) -> str:
|
|
63 |
if total_pages == 0:
|
64 |
return ""
|
65 |
|
66 |
-
batch_size = 10
|
67 |
batches = [(i, min(i + batch_size, total_pages)) for i in range(0, total_pages, batch_size)]
|
68 |
text_chunks = [""] * total_pages
|
69 |
processed_pages = 0
|
@@ -143,25 +143,13 @@ def clean_response(text: str) -> str:
|
|
143 |
text = re.sub(r"\n{3,}", "\n\n", text)
|
144 |
text = re.sub(r"[^\n#\-\*\w\s\.\,\:\(\)]+", "", text)
|
145 |
|
146 |
-
tool_to_heading = {
|
147 |
-
"get_abuse_info_by_drug_name": "Drugs",
|
148 |
-
"get_dependence_info_by_drug_name": "Drugs",
|
149 |
-
"get_abuse_types_and_related_adverse_reactions_and_controlled_substance_status_by_drug_name": "Drugs",
|
150 |
-
"get_info_for_patients_by_drug_name": "Drugs",
|
151 |
-
}
|
152 |
-
|
153 |
sections = {}
|
154 |
current_section = None
|
155 |
-
current_tool = None
|
156 |
lines = text.splitlines()
|
157 |
for line in lines:
|
158 |
line = line.strip()
|
159 |
if not line:
|
160 |
continue
|
161 |
-
tool_match = re.match(r"\[TOOL:\s*(\w+)\]", line)
|
162 |
-
if tool_match:
|
163 |
-
current_tool = tool_match.group(1)
|
164 |
-
continue
|
165 |
section_match = re.match(r"###\s*(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", line)
|
166 |
if section_match:
|
167 |
current_section = section_match.group(1)
|
@@ -170,13 +158,7 @@ def clean_response(text: str) -> str:
|
|
170 |
continue
|
171 |
finding_match = re.match(r"-\s*.+", line)
|
172 |
if finding_match and current_section and not re.match(r"-\s*No issues identified", line):
|
173 |
-
|
174 |
-
heading = tool_to_heading[current_tool]
|
175 |
-
if heading not in sections:
|
176 |
-
sections[heading] = []
|
177 |
-
sections[heading].append(line)
|
178 |
-
else:
|
179 |
-
sections[current_section].append(line)
|
180 |
|
181 |
cleaned = []
|
182 |
for heading, findings in sections.items():
|
@@ -186,6 +168,38 @@ def clean_response(text: str) -> str:
|
|
186 |
text = "\n\n".join(cleaned).strip()
|
187 |
return text if text else ""
|
188 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
def init_agent():
|
190 |
logger.info("Initializing model...")
|
191 |
log_system_usage("Before Load")
|
@@ -199,7 +213,7 @@ def init_agent():
|
|
199 |
rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
|
200 |
tool_files_dict={"new_tool": target_tool_path},
|
201 |
force_finish=True,
|
202 |
-
enable_checker=False,
|
203 |
step_rag_num=4,
|
204 |
seed=100,
|
205 |
additional_default_tools=[],
|
@@ -212,7 +226,8 @@ def init_agent():
|
|
212 |
def create_ui(agent):
|
213 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
214 |
gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
|
215 |
-
chatbot = gr.Chatbot(label="Analysis", height=600, type="messages")
|
|
|
216 |
file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
|
217 |
msg_input = gr.Textbox(placeholder="Ask about potential oversights...", show_label=False)
|
218 |
send_btn = gr.Button("Analyze", variant="primary")
|
@@ -220,7 +235,7 @@ def create_ui(agent):
|
|
220 |
progress_bar = gr.Progress()
|
221 |
|
222 |
prompt_template = """
|
223 |
-
Analyze the patient record excerpt for clinical oversights. Provide a concise, evidence-based summary in markdown with findings grouped under
|
224 |
|
225 |
Patient Record Excerpt (Chunk {0} of {1}):
|
226 |
{chunk}
|
@@ -228,14 +243,14 @@ Patient Record Excerpt (Chunk {0} of {1}):
|
|
228 |
|
229 |
def analyze(message: str, history: List[dict], files: List, progress=gr.Progress()):
|
230 |
history.append({"role": "user", "content": message})
|
231 |
-
yield history, None
|
232 |
|
233 |
extracted = ""
|
234 |
file_hash_value = ""
|
235 |
if files:
|
236 |
def update_extraction_progress(current, total):
|
237 |
progress(current / total, desc=f"Extracting text... Page {current}/{total}")
|
238 |
-
return history, None
|
239 |
|
240 |
with ThreadPoolExecutor(max_workers=6) as executor:
|
241 |
futures = [executor.submit(convert_file_to_json, f.name, f.name.split(".")[-1].lower(), update_extraction_progress) for f in files]
|
@@ -244,7 +259,7 @@ Patient Record Excerpt (Chunk {0} of {1}):
|
|
244 |
file_hash_value = file_hash(files[0].name) if files else ""
|
245 |
|
246 |
history.append({"role": "assistant", "content": "✅ Text extraction complete."})
|
247 |
-
yield history, None
|
248 |
|
249 |
chunk_size = 6000
|
250 |
chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
|
@@ -273,7 +288,7 @@ Patient Record Excerpt (Chunk {0} of {1}):
|
|
273 |
if cleaned and re.search(r"###\s*\w+", cleaned):
|
274 |
chunk_response += cleaned + "\n\n"
|
275 |
elif isinstance(chunk_output, str) and chunk_output.strip():
|
276 |
-
cleaned = clean_response(
|
277 |
if cleaned and re.search(r"###\s*\w+", cleaned):
|
278 |
chunk_response += cleaned + "\n\n"
|
279 |
batch_responses.append(chunk_response)
|
@@ -286,26 +301,27 @@ Patient Record Excerpt (Chunk {0} of {1}):
|
|
286 |
else:
|
287 |
combined_response += f"--- Analysis for Chunk {chunk_idx} ---\nNo oversights identified for this chunk.\n\n"
|
288 |
history[-1] = {"role": "assistant", "content": combined_response.strip()}
|
289 |
-
yield history, None
|
290 |
|
291 |
if combined_response.strip() and not all("No oversights identified" in chunk for chunk in combined_response.split("--- Analysis for Chunk")):
|
292 |
history[-1]["content"] = combined_response.strip()
|
293 |
else:
|
294 |
history.append({"role": "assistant", "content": "No oversights identified in the provided records."})
|
295 |
|
|
|
296 |
report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
|
297 |
if report_path:
|
298 |
with open(report_path, "w", encoding="utf-8") as f:
|
299 |
-
f.write(combined_response)
|
300 |
-
yield history, report_path if report_path and os.path.exists(report_path) else None
|
301 |
|
302 |
except Exception as e:
|
303 |
logger.error("Analysis error: %s", e)
|
304 |
history.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
|
305 |
-
yield history, None
|
306 |
|
307 |
-
send_btn.click(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output])
|
308 |
-
msg_input.submit(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output])
|
309 |
return demo
|
310 |
|
311 |
if __name__ == "__main__":
|
|
|
63 |
if total_pages == 0:
|
64 |
return ""
|
65 |
|
66 |
+
batch_size = 10
|
67 |
batches = [(i, min(i + batch_size, total_pages)) for i in range(0, total_pages, batch_size)]
|
68 |
text_chunks = [""] * total_pages
|
69 |
processed_pages = 0
|
|
|
143 |
text = re.sub(r"\n{3,}", "\n\n", text)
|
144 |
text = re.sub(r"[^\n#\-\*\w\s\.\,\:\(\)]+", "", text)
|
145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
sections = {}
|
147 |
current_section = None
|
|
|
148 |
lines = text.splitlines()
|
149 |
for line in lines:
|
150 |
line = line.strip()
|
151 |
if not line:
|
152 |
continue
|
|
|
|
|
|
|
|
|
153 |
section_match = re.match(r"###\s*(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", line)
|
154 |
if section_match:
|
155 |
current_section = section_match.group(1)
|
|
|
158 |
continue
|
159 |
finding_match = re.match(r"-\s*.+", line)
|
160 |
if finding_match and current_section and not re.match(r"-\s*No issues identified", line):
|
161 |
+
sections[current_section].append(line)
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
|
163 |
cleaned = []
|
164 |
for heading, findings in sections.items():
|
|
|
168 |
text = "\n\n".join(cleaned).strip()
|
169 |
return text if text else ""
|
170 |
|
171 |
+
def summarize_findings(combined_response: str) -> str:
|
172 |
+
if not combined_response or all("No oversights identified" in chunk for chunk in combined_response.split("--- Analysis for Chunk")):
|
173 |
+
return "### Summary of Clinical Oversights\nNo critical oversights identified in the provided records."
|
174 |
+
|
175 |
+
sections = {}
|
176 |
+
lines = combined_response.splitlines()
|
177 |
+
current_section = None
|
178 |
+
for line in lines:
|
179 |
+
line = line.strip()
|
180 |
+
if not line:
|
181 |
+
continue
|
182 |
+
section_match = re.match(r"###\s*(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", line)
|
183 |
+
if section_match:
|
184 |
+
current_section = section_match.group(1)
|
185 |
+
if current_section not in sections:
|
186 |
+
sections[current_section] = []
|
187 |
+
continue
|
188 |
+
finding_match = re.match(r"-\s*(.+)", line)
|
189 |
+
if finding_match and current_section:
|
190 |
+
sections[current_section].append(finding_match.group(1))
|
191 |
+
|
192 |
+
summary_lines = []
|
193 |
+
for heading, findings in sections.items():
|
194 |
+
if findings:
|
195 |
+
summary = f"- **{heading}**: {'; '.join(findings[:2])}. Risks: {heading.lower()} may lead to adverse outcomes. Recommend: urgent review and specialist referral."
|
196 |
+
summary_lines.append(summary)
|
197 |
+
|
198 |
+
if not summary_lines:
|
199 |
+
return "### Summary of Clinical Oversights\nNo critical oversights identified."
|
200 |
+
|
201 |
+
return "### Summary of Clinical Oversights\n" + "\n".join(summary_lines)
|
202 |
+
|
203 |
def init_agent():
|
204 |
logger.info("Initializing model...")
|
205 |
log_system_usage("Before Load")
|
|
|
213 |
rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
|
214 |
tool_files_dict={"new_tool": target_tool_path},
|
215 |
force_finish=True,
|
216 |
+
enable_checker=False,
|
217 |
step_rag_num=4,
|
218 |
seed=100,
|
219 |
additional_default_tools=[],
|
|
|
226 |
def create_ui(agent):
|
227 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
228 |
gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
|
229 |
+
chatbot = gr.Chatbot(label="Detailed Analysis", height=600, type="messages")
|
230 |
+
final_summary = gr.Markdown(label="Summary of Clinical Oversights")
|
231 |
file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
|
232 |
msg_input = gr.Textbox(placeholder="Ask about potential oversights...", show_label=False)
|
233 |
send_btn = gr.Button("Analyze", variant="primary")
|
|
|
235 |
progress_bar = gr.Progress()
|
236 |
|
237 |
prompt_template = """
|
238 |
+
Analyze the patient record excerpt for clinical oversights. Provide a concise, evidence-based summary in markdown with findings grouped under headings (e.g., 'Missed Diagnoses'). For each finding, include clinical context, risks, and recommendations. Output only markdown bullet points under headings. If no issues, state "No issues identified".
|
239 |
|
240 |
Patient Record Excerpt (Chunk {0} of {1}):
|
241 |
{chunk}
|
|
|
243 |
|
244 |
def analyze(message: str, history: List[dict], files: List, progress=gr.Progress()):
|
245 |
history.append({"role": "user", "content": message})
|
246 |
+
yield history, None, ""
|
247 |
|
248 |
extracted = ""
|
249 |
file_hash_value = ""
|
250 |
if files:
|
251 |
def update_extraction_progress(current, total):
|
252 |
progress(current / total, desc=f"Extracting text... Page {current}/{total}")
|
253 |
+
return history, None, ""
|
254 |
|
255 |
with ThreadPoolExecutor(max_workers=6) as executor:
|
256 |
futures = [executor.submit(convert_file_to_json, f.name, f.name.split(".")[-1].lower(), update_extraction_progress) for f in files]
|
|
|
259 |
file_hash_value = file_hash(files[0].name) if files else ""
|
260 |
|
261 |
history.append({"role": "assistant", "content": "✅ Text extraction complete."})
|
262 |
+
yield history, None, ""
|
263 |
|
264 |
chunk_size = 6000
|
265 |
chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
|
|
|
288 |
if cleaned and re.search(r"###\s*\w+", cleaned):
|
289 |
chunk_response += cleaned + "\n\n"
|
290 |
elif isinstance(chunk_output, str) and chunk_output.strip():
|
291 |
+
cleaned = clean_response(m.content)
|
292 |
if cleaned and re.search(r"###\s*\w+", cleaned):
|
293 |
chunk_response += cleaned + "\n\n"
|
294 |
batch_responses.append(chunk_response)
|
|
|
301 |
else:
|
302 |
combined_response += f"--- Analysis for Chunk {chunk_idx} ---\nNo oversights identified for this chunk.\n\n"
|
303 |
history[-1] = {"role": "assistant", "content": combined_response.strip()}
|
304 |
+
yield history, None, ""
|
305 |
|
306 |
if combined_response.strip() and not all("No oversights identified" in chunk for chunk in combined_response.split("--- Analysis for Chunk")):
|
307 |
history[-1]["content"] = combined_response.strip()
|
308 |
else:
|
309 |
history.append({"role": "assistant", "content": "No oversights identified in the provided records."})
|
310 |
|
311 |
+
summary = summarize_findings(combined_response)
|
312 |
report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
|
313 |
if report_path:
|
314 |
with open(report_path, "w", encoding="utf-8") as f:
|
315 |
+
f.write(combined_response + "\n\n" + summary)
|
316 |
+
yield history, report_path if report_path and os.path.exists(report_path) else None, summary
|
317 |
|
318 |
except Exception as e:
|
319 |
logger.error("Analysis error: %s", e)
|
320 |
history.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
|
321 |
+
yield history, None, f"### Summary of Clinical Oversights\nError occurred during analysis: {str(e)}"
|
322 |
|
323 |
+
send_btn.click(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output, final_summary])
|
324 |
+
msg_input.submit(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output, final_summary])
|
325 |
return demo
|
326 |
|
327 |
if __name__ == "__main__":
|