Ali2206 commited on
Commit
9277e15
·
verified ·
1 Parent(s): 0456412

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -33
app.py CHANGED
@@ -63,7 +63,7 @@ def extract_all_pages(file_path: str, progress_callback=None) -> str:
63
  if total_pages == 0:
64
  return ""
65
 
66
- batch_size = 10 # Process 10 pages per thread
67
  batches = [(i, min(i + batch_size, total_pages)) for i in range(0, total_pages, batch_size)]
68
  text_chunks = [""] * total_pages
69
  processed_pages = 0
@@ -143,25 +143,13 @@ def clean_response(text: str) -> str:
143
  text = re.sub(r"\n{3,}", "\n\n", text)
144
  text = re.sub(r"[^\n#\-\*\w\s\.\,\:\(\)]+", "", text)
145
 
146
- tool_to_heading = {
147
- "get_abuse_info_by_drug_name": "Drugs",
148
- "get_dependence_info_by_drug_name": "Drugs",
149
- "get_abuse_types_and_related_adverse_reactions_and_controlled_substance_status_by_drug_name": "Drugs",
150
- "get_info_for_patients_by_drug_name": "Drugs",
151
- }
152
-
153
  sections = {}
154
  current_section = None
155
- current_tool = None
156
  lines = text.splitlines()
157
  for line in lines:
158
  line = line.strip()
159
  if not line:
160
  continue
161
- tool_match = re.match(r"\[TOOL:\s*(\w+)\]", line)
162
- if tool_match:
163
- current_tool = tool_match.group(1)
164
- continue
165
  section_match = re.match(r"###\s*(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", line)
166
  if section_match:
167
  current_section = section_match.group(1)
@@ -170,13 +158,7 @@ def clean_response(text: str) -> str:
170
  continue
171
  finding_match = re.match(r"-\s*.+", line)
172
  if finding_match and current_section and not re.match(r"-\s*No issues identified", line):
173
- if current_tool and current_tool in tool_to_heading:
174
- heading = tool_to_heading[current_tool]
175
- if heading not in sections:
176
- sections[heading] = []
177
- sections[heading].append(line)
178
- else:
179
- sections[current_section].append(line)
180
 
181
  cleaned = []
182
  for heading, findings in sections.items():
@@ -186,6 +168,38 @@ def clean_response(text: str) -> str:
186
  text = "\n\n".join(cleaned).strip()
187
  return text if text else ""
188
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  def init_agent():
190
  logger.info("Initializing model...")
191
  log_system_usage("Before Load")
@@ -199,7 +213,7 @@ def init_agent():
199
  rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
200
  tool_files_dict={"new_tool": target_tool_path},
201
  force_finish=True,
202
- enable_checker=False, # Disabled for speed
203
  step_rag_num=4,
204
  seed=100,
205
  additional_default_tools=[],
@@ -212,7 +226,8 @@ def init_agent():
212
  def create_ui(agent):
213
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
214
  gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
215
- chatbot = gr.Chatbot(label="Analysis", height=600, type="messages")
 
216
  file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
217
  msg_input = gr.Textbox(placeholder="Ask about potential oversights...", show_label=False)
218
  send_btn = gr.Button("Analyze", variant="primary")
@@ -220,7 +235,7 @@ def create_ui(agent):
220
  progress_bar = gr.Progress()
221
 
222
  prompt_template = """
223
- Analyze the patient record excerpt for clinical oversights. Provide a concise, evidence-based summary in markdown with findings grouped under tool-derived headings (e.g., 'Drugs'). For each finding, include clinical context, risks, and recommendations. Precede findings with a tool tag (e.g., [TOOL: get_abuse_info_by_drug_name]). Output only markdown bullet points under headings. If no issues, state "No issues identified".
224
 
225
  Patient Record Excerpt (Chunk {0} of {1}):
226
  {chunk}
@@ -228,14 +243,14 @@ Patient Record Excerpt (Chunk {0} of {1}):
228
 
229
  def analyze(message: str, history: List[dict], files: List, progress=gr.Progress()):
230
  history.append({"role": "user", "content": message})
231
- yield history, None
232
 
233
  extracted = ""
234
  file_hash_value = ""
235
  if files:
236
  def update_extraction_progress(current, total):
237
  progress(current / total, desc=f"Extracting text... Page {current}/{total}")
238
- return history, None
239
 
240
  with ThreadPoolExecutor(max_workers=6) as executor:
241
  futures = [executor.submit(convert_file_to_json, f.name, f.name.split(".")[-1].lower(), update_extraction_progress) for f in files]
@@ -244,7 +259,7 @@ Patient Record Excerpt (Chunk {0} of {1}):
244
  file_hash_value = file_hash(files[0].name) if files else ""
245
 
246
  history.append({"role": "assistant", "content": "✅ Text extraction complete."})
247
- yield history, None
248
 
249
  chunk_size = 6000
250
  chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
@@ -273,7 +288,7 @@ Patient Record Excerpt (Chunk {0} of {1}):
273
  if cleaned and re.search(r"###\s*\w+", cleaned):
274
  chunk_response += cleaned + "\n\n"
275
  elif isinstance(chunk_output, str) and chunk_output.strip():
276
- cleaned = clean_response(chunk_output)
277
  if cleaned and re.search(r"###\s*\w+", cleaned):
278
  chunk_response += cleaned + "\n\n"
279
  batch_responses.append(chunk_response)
@@ -286,26 +301,27 @@ Patient Record Excerpt (Chunk {0} of {1}):
286
  else:
287
  combined_response += f"--- Analysis for Chunk {chunk_idx} ---\nNo oversights identified for this chunk.\n\n"
288
  history[-1] = {"role": "assistant", "content": combined_response.strip()}
289
- yield history, None
290
 
291
  if combined_response.strip() and not all("No oversights identified" in chunk for chunk in combined_response.split("--- Analysis for Chunk")):
292
  history[-1]["content"] = combined_response.strip()
293
  else:
294
  history.append({"role": "assistant", "content": "No oversights identified in the provided records."})
295
 
 
296
  report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
297
  if report_path:
298
  with open(report_path, "w", encoding="utf-8") as f:
299
- f.write(combined_response)
300
- yield history, report_path if report_path and os.path.exists(report_path) else None
301
 
302
  except Exception as e:
303
  logger.error("Analysis error: %s", e)
304
  history.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
305
- yield history, None
306
 
307
- send_btn.click(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output])
308
- msg_input.submit(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output])
309
  return demo
310
 
311
  if __name__ == "__main__":
 
63
  if total_pages == 0:
64
  return ""
65
 
66
+ batch_size = 10
67
  batches = [(i, min(i + batch_size, total_pages)) for i in range(0, total_pages, batch_size)]
68
  text_chunks = [""] * total_pages
69
  processed_pages = 0
 
143
  text = re.sub(r"\n{3,}", "\n\n", text)
144
  text = re.sub(r"[^\n#\-\*\w\s\.\,\:\(\)]+", "", text)
145
 
 
 
 
 
 
 
 
146
  sections = {}
147
  current_section = None
 
148
  lines = text.splitlines()
149
  for line in lines:
150
  line = line.strip()
151
  if not line:
152
  continue
 
 
 
 
153
  section_match = re.match(r"###\s*(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", line)
154
  if section_match:
155
  current_section = section_match.group(1)
 
158
  continue
159
  finding_match = re.match(r"-\s*.+", line)
160
  if finding_match and current_section and not re.match(r"-\s*No issues identified", line):
161
+ sections[current_section].append(line)
 
 
 
 
 
 
162
 
163
  cleaned = []
164
  for heading, findings in sections.items():
 
168
  text = "\n\n".join(cleaned).strip()
169
  return text if text else ""
170
 
171
+ def summarize_findings(combined_response: str) -> str:
172
+ if not combined_response or all("No oversights identified" in chunk for chunk in combined_response.split("--- Analysis for Chunk")):
173
+ return "### Summary of Clinical Oversights\nNo critical oversights identified in the provided records."
174
+
175
+ sections = {}
176
+ lines = combined_response.splitlines()
177
+ current_section = None
178
+ for line in lines:
179
+ line = line.strip()
180
+ if not line:
181
+ continue
182
+ section_match = re.match(r"###\s*(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", line)
183
+ if section_match:
184
+ current_section = section_match.group(1)
185
+ if current_section not in sections:
186
+ sections[current_section] = []
187
+ continue
188
+ finding_match = re.match(r"-\s*(.+)", line)
189
+ if finding_match and current_section:
190
+ sections[current_section].append(finding_match.group(1))
191
+
192
+ summary_lines = []
193
+ for heading, findings in sections.items():
194
+ if findings:
195
+ summary = f"- **{heading}**: {'; '.join(findings[:2])}. Risks: {heading.lower()} may lead to adverse outcomes. Recommend: urgent review and specialist referral."
196
+ summary_lines.append(summary)
197
+
198
+ if not summary_lines:
199
+ return "### Summary of Clinical Oversights\nNo critical oversights identified."
200
+
201
+ return "### Summary of Clinical Oversights\n" + "\n".join(summary_lines)
202
+
203
  def init_agent():
204
  logger.info("Initializing model...")
205
  log_system_usage("Before Load")
 
213
  rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
214
  tool_files_dict={"new_tool": target_tool_path},
215
  force_finish=True,
216
+ enable_checker=False,
217
  step_rag_num=4,
218
  seed=100,
219
  additional_default_tools=[],
 
226
  def create_ui(agent):
227
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
228
  gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
229
+ chatbot = gr.Chatbot(label="Detailed Analysis", height=600, type="messages")
230
+ final_summary = gr.Markdown(label="Summary of Clinical Oversights")
231
  file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
232
  msg_input = gr.Textbox(placeholder="Ask about potential oversights...", show_label=False)
233
  send_btn = gr.Button("Analyze", variant="primary")
 
235
  progress_bar = gr.Progress()
236
 
237
  prompt_template = """
238
+ Analyze the patient record excerpt for clinical oversights. Provide a concise, evidence-based summary in markdown with findings grouped under headings (e.g., 'Missed Diagnoses'). For each finding, include clinical context, risks, and recommendations. Output only markdown bullet points under headings. If no issues, state "No issues identified".
239
 
240
  Patient Record Excerpt (Chunk {0} of {1}):
241
  {chunk}
 
243
 
244
  def analyze(message: str, history: List[dict], files: List, progress=gr.Progress()):
245
  history.append({"role": "user", "content": message})
246
+ yield history, None, ""
247
 
248
  extracted = ""
249
  file_hash_value = ""
250
  if files:
251
  def update_extraction_progress(current, total):
252
  progress(current / total, desc=f"Extracting text... Page {current}/{total}")
253
+ return history, None, ""
254
 
255
  with ThreadPoolExecutor(max_workers=6) as executor:
256
  futures = [executor.submit(convert_file_to_json, f.name, f.name.split(".")[-1].lower(), update_extraction_progress) for f in files]
 
259
  file_hash_value = file_hash(files[0].name) if files else ""
260
 
261
  history.append({"role": "assistant", "content": "✅ Text extraction complete."})
262
+ yield history, None, ""
263
 
264
  chunk_size = 6000
265
  chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
 
288
  if cleaned and re.search(r"###\s*\w+", cleaned):
289
  chunk_response += cleaned + "\n\n"
290
  elif isinstance(chunk_output, str) and chunk_output.strip():
291
+ cleaned = clean_response(m.content)
292
  if cleaned and re.search(r"###\s*\w+", cleaned):
293
  chunk_response += cleaned + "\n\n"
294
  batch_responses.append(chunk_response)
 
301
  else:
302
  combined_response += f"--- Analysis for Chunk {chunk_idx} ---\nNo oversights identified for this chunk.\n\n"
303
  history[-1] = {"role": "assistant", "content": combined_response.strip()}
304
+ yield history, None, ""
305
 
306
  if combined_response.strip() and not all("No oversights identified" in chunk for chunk in combined_response.split("--- Analysis for Chunk")):
307
  history[-1]["content"] = combined_response.strip()
308
  else:
309
  history.append({"role": "assistant", "content": "No oversights identified in the provided records."})
310
 
311
+ summary = summarize_findings(combined_response)
312
  report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
313
  if report_path:
314
  with open(report_path, "w", encoding="utf-8") as f:
315
+ f.write(combined_response + "\n\n" + summary)
316
+ yield history, report_path if report_path and os.path.exists(report_path) else None, summary
317
 
318
  except Exception as e:
319
  logger.error("Analysis error: %s", e)
320
  history.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
321
+ yield history, None, f"### Summary of Clinical Oversights\nError occurred during analysis: {str(e)}"
322
 
323
+ send_btn.click(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output, final_summary])
324
+ msg_input.submit(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output, final_summary])
325
  return demo
326
 
327
  if __name__ == "__main__":