Ali2206 commited on
Commit
eab55d6
Β·
verified Β·
1 Parent(s): 072b189

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -91
app.py CHANGED
@@ -11,6 +11,7 @@ import shutil
11
  import re
12
  import psutil
13
  import subprocess
 
14
 
15
  # Persistent directory setup
16
  persistent_dir = "/data/hf_cache"
@@ -37,6 +38,7 @@ sys.path.insert(0, src_path)
37
 
38
  from txagent.txagent import TxAgent
39
 
 
40
  MEDICAL_KEYWORDS = {'diagnosis', 'assessment', 'plan', 'results', 'medications',
41
  'allergies', 'summary', 'impression', 'findings', 'recommendations'}
42
 
@@ -55,14 +57,17 @@ def extract_priority_pages(file_path: str, max_pages: int = 20) -> str:
55
  for i, page in enumerate(pdf.pages[:3]):
56
  text = page.extract_text() or ""
57
  text_chunks.append(f"=== Page {i+1} ===\n{text.strip()}")
58
- # Then include pages that mention one or more medical keywords
59
  for i, page in enumerate(pdf.pages[3:max_pages], start=4):
60
  page_text = page.extract_text() or ""
61
  if any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
62
  text_chunks.append(f"=== Page {i} ===\n{page_text.strip()}")
63
  return "\n\n".join(text_chunks)
64
  except Exception as e:
65
- return f"PDF processing error: {str(e)}"
 
 
 
66
 
67
  def convert_file_to_json(file_path: str, file_type: str) -> str:
68
  try:
@@ -93,7 +98,10 @@ def convert_file_to_json(file_path: str, file_type: str) -> str:
93
  f.write(result)
94
  return result
95
  except Exception as e:
96
- return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
 
 
 
97
 
98
  def log_system_usage(tag=""):
99
  try:
@@ -109,41 +117,35 @@ def log_system_usage(tag=""):
109
  print(f"[{tag}] GPU: {used}MB / {total}MB | Utilization: {util}%")
110
  except Exception as e:
111
  print(f"[{tag}] GPU/CPU monitor failed: {e}")
 
112
 
113
  def init_agent():
114
- print("πŸ” Initializing model...")
115
- log_system_usage("Before Load")
116
- default_tool_path = os.path.abspath("data/new_tool.json")
117
- target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
118
- if not os.path.exists(target_tool_path):
119
- shutil.copy(default_tool_path, target_tool_path)
120
-
121
- agent = TxAgent(
122
- model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
123
- rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
124
- tool_files_dict={"new_tool": target_tool_path},
125
- force_finish=True,
126
- enable_checker=True,
127
- step_rag_num=8,
128
- seed=100,
129
- additional_default_tools=[],
130
- )
131
- agent.init_model()
132
- log_system_usage("After Load")
133
- print("βœ… Agent Ready")
134
- return agent
135
-
136
- def clean_response(response: str) -> str:
137
- """
138
- Updated cleaner that removes the [TOOL_CALLS] tag and any JSON artifacts
139
- while preserving the full analysis so that all identified oversights are displayed.
140
- """
141
- # Remove everything starting from the first [TOOL_CALLS] occurrence
142
- cleaned = response.split("[TOOL_CALLS]")[0].strip()
143
- # Remove any remaining JSON artifacts in case they appear
144
- cleaned = re.sub(r'\{.*?\}', '', cleaned)
145
- cleaned = re.sub(r'\[.*?\]', '', cleaned)
146
- return cleaned
147
 
148
  def create_ui(agent):
149
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
@@ -155,20 +157,28 @@ def create_ui(agent):
155
  download_output = gr.File(label="Download Full Report")
156
 
157
  def analyze(message: str, history: list, files: list):
158
- history.append({"role": "user", "content": message})
159
- history.append({"role": "assistant", "content": "⏳ Analyzing records for potential oversights..."})
160
- yield history, None
161
-
162
- extracted = ""
163
- file_hash_value = ""
164
- if files:
165
- with ThreadPoolExecutor(max_workers=4) as executor:
166
- futures = [executor.submit(convert_file_to_json, f.name, f.name.split(".")[-1].lower()) for f in files]
167
- results = [sanitize_utf8(f.result()) for f in as_completed(futures)]
168
- extracted = "\n".join(results)
169
- file_hash_value = file_hash(files[0].name)
170
-
171
- prompt = f"""Review these medical records and identify EXACTLY what might have been missed:
 
 
 
 
 
 
 
 
172
  1. List potential missed diagnoses
173
  2. Flag any medication conflicts
174
  3. Note incomplete assessments
@@ -180,10 +190,12 @@ Medical Records:
180
  ### Potential Oversights:
181
  """
182
 
183
- try:
 
 
184
  full_response = ""
185
  finish_detected = False
186
-
187
  for chunk in agent.run_gradio_chat(
188
  message=prompt,
189
  history=[],
@@ -193,42 +205,51 @@ Medical Records:
193
  call_agent=False,
194
  conversation=[]
195
  ):
196
- if chunk is None:
197
- continue
198
-
199
- if isinstance(chunk, str):
200
- full_response += chunk
201
- elif isinstance(chunk, list):
202
- chunk_content = "".join([c.content for c in chunk if hasattr(c, "content") and c.content])
203
- full_response += chunk_content
204
- if '"name": "Finish"' in chunk_content:
205
- finish_detected = True
206
-
207
- # Display intermediate response
208
- current_cleaned = clean_response(full_response)
209
- if current_cleaned:
210
- history[-1] = {"role": "assistant", "content": current_cleaned}
211
- yield history, None
212
-
213
- # Final processing of the complete response
214
- final_cleaned = clean_response(full_response)
215
-
216
- if not final_cleaned:
217
- final_cleaned = "⚠️ No clear oversights identified or model output was invalid."
218
-
219
- # Save report if a file was processed
 
 
 
220
  report_path = None
221
  if file_hash_value:
222
  report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt")
223
- with open(report_path, "w", encoding="utf-8") as f:
224
- f.write(final_cleaned)
 
 
 
 
225
 
226
- history[-1] = {"role": "assistant", "content": final_cleaned}
227
  yield history, report_path if report_path and os.path.exists(report_path) else None
228
 
229
  except Exception as e:
230
- print("❌ ERROR:", str(e))
231
- history[-1] = {"role": "assistant", "content": f"❌ An error occurred: {str(e)}"}
 
 
232
  yield history, None
233
 
234
  send_btn.click(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output])
@@ -236,13 +257,17 @@ Medical Records:
236
  return demo
237
 
238
  if __name__ == "__main__":
239
- print("πŸš€ Launching app...")
240
- agent = init_agent()
241
- demo = create_ui(agent)
242
- demo.queue(api_open=False).launch(
243
- server_name="0.0.0.0",
244
- server_port=7860,
245
- show_error=True,
246
- allowed_paths=[report_dir],
247
- share=False
248
- )
 
 
 
 
 
11
  import re
12
  import psutil
13
  import subprocess
14
+ import traceback
15
 
16
  # Persistent directory setup
17
  persistent_dir = "/data/hf_cache"
 
38
 
39
  from txagent.txagent import TxAgent
40
 
41
+ # Medical Keywords used in the PDF processing
42
  MEDICAL_KEYWORDS = {'diagnosis', 'assessment', 'plan', 'results', 'medications',
43
  'allergies', 'summary', 'impression', 'findings', 'recommendations'}
44
 
 
57
  for i, page in enumerate(pdf.pages[:3]):
58
  text = page.extract_text() or ""
59
  text_chunks.append(f"=== Page {i+1} ===\n{text.strip()}")
60
+ # Include further pages if they contain any medical keywords
61
  for i, page in enumerate(pdf.pages[3:max_pages], start=4):
62
  page_text = page.extract_text() or ""
63
  if any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
64
  text_chunks.append(f"=== Page {i} ===\n{page_text.strip()}")
65
  return "\n\n".join(text_chunks)
66
  except Exception as e:
67
+ debug_msg = f"PDF processing error: {str(e)}"
68
+ print(debug_msg)
69
+ traceback.print_exc()
70
+ return debug_msg
71
 
72
  def convert_file_to_json(file_path: str, file_type: str) -> str:
73
  try:
 
98
  f.write(result)
99
  return result
100
  except Exception as e:
101
+ error_msg = f"Error processing {os.path.basename(file_path)}: {str(e)}"
102
+ print(error_msg)
103
+ traceback.print_exc()
104
+ return json.dumps({"error": error_msg})
105
 
106
  def log_system_usage(tag=""):
107
  try:
 
117
  print(f"[{tag}] GPU: {used}MB / {total}MB | Utilization: {util}%")
118
  except Exception as e:
119
  print(f"[{tag}] GPU/CPU monitor failed: {e}")
120
+ traceback.print_exc()
121
 
122
  def init_agent():
123
+ try:
124
+ print("πŸ” Initializing model...")
125
+ log_system_usage("Before Load")
126
+ default_tool_path = os.path.abspath("data/new_tool.json")
127
+ target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
128
+ if not os.path.exists(target_tool_path):
129
+ shutil.copy(default_tool_path, target_tool_path)
130
+
131
+ agent = TxAgent(
132
+ model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
133
+ rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
134
+ tool_files_dict={"new_tool": target_tool_path},
135
+ force_finish=True,
136
+ enable_checker=True,
137
+ step_rag_num=8,
138
+ seed=100,
139
+ additional_default_tools=[],
140
+ )
141
+ agent.init_model()
142
+ log_system_usage("After Load")
143
+ print("βœ… Agent Ready")
144
+ return agent
145
+ except Exception as e:
146
+ print("❌ Error initializing agent:", str(e))
147
+ traceback.print_exc()
148
+ raise e
 
 
 
 
 
 
 
149
 
150
  def create_ui(agent):
151
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
 
157
  download_output = gr.File(label="Download Full Report")
158
 
159
  def analyze(message: str, history: list, files: list):
160
+ try:
161
+ history.append({"role": "user", "content": message})
162
+ history.append({"role": "assistant", "content": "⏳ Analyzing records for potential oversights..."})
163
+ yield history, None
164
+
165
+ extracted = ""
166
+ file_hash_value = ""
167
+ if files:
168
+ with ThreadPoolExecutor(max_workers=4) as executor:
169
+ futures = [executor.submit(convert_file_to_json, f.name, f.name.split(".")[-1].lower()) for f in files]
170
+ results = []
171
+ for future in as_completed(futures):
172
+ try:
173
+ res = future.result()
174
+ results.append(sanitize_utf8(res))
175
+ except Exception as e:
176
+ print("❌ Error in file processing:", str(e))
177
+ traceback.print_exc()
178
+ extracted = "\n".join(results)
179
+ file_hash_value = file_hash(files[0].name)
180
+
181
+ prompt = f"""Review these medical records and identify EXACTLY what might have been missed:
182
  1. List potential missed diagnoses
183
  2. Flag any medication conflicts
184
  3. Note incomplete assessments
 
190
  ### Potential Oversights:
191
  """
192
 
193
+ print("πŸ”Ž Generated prompt:")
194
+ print(prompt)
195
+
196
  full_response = ""
197
  finish_detected = False
198
+
199
  for chunk in agent.run_gradio_chat(
200
  message=prompt,
201
  history=[],
 
205
  call_agent=False,
206
  conversation=[]
207
  ):
208
+ try:
209
+ if chunk is None:
210
+ continue
211
+ if isinstance(chunk, str):
212
+ print("DEBUG: Received string chunk")
213
+ full_response += chunk
214
+ elif isinstance(chunk, list):
215
+ chunk_content = "".join([c.content for c in chunk if hasattr(c, "content") and c.content])
216
+ print("DEBUG: Received list chunk:", chunk_content)
217
+ full_response += chunk_content
218
+ if '"name": "Finish"' in chunk_content:
219
+ finish_detected = True
220
+ else:
221
+ print("DEBUG: Received unknown type chunk", type(chunk))
222
+ except Exception as e:
223
+ print("❌ Error processing chunk:", str(e))
224
+ traceback.print_exc()
225
+
226
+ # Yield intermediate full (raw) response for debugging
227
+ history[-1] = {"role": "assistant", "content": full_response}
228
+ yield history, None
229
+
230
+ # Final processing
231
+ if not full_response:
232
+ full_response = "⚠️ No clear oversights identified or model output was invalid."
233
+
234
+ # Save full report
235
  report_path = None
236
  if file_hash_value:
237
  report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt")
238
+ try:
239
+ with open(report_path, "w", encoding="utf-8") as f:
240
+ f.write(full_response)
241
+ except Exception as e:
242
+ print("❌ Error saving report:", str(e))
243
+ traceback.print_exc()
244
 
245
+ history[-1] = {"role": "assistant", "content": full_response}
246
  yield history, report_path if report_path and os.path.exists(report_path) else None
247
 
248
  except Exception as e:
249
+ error_message = f"❌ An error occurred in analyze: {str(e)}"
250
+ print(error_message)
251
+ traceback.print_exc()
252
+ history[-1] = {"role": "assistant", "content": error_message}
253
  yield history, None
254
 
255
  send_btn.click(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output])
 
257
  return demo
258
 
259
  if __name__ == "__main__":
260
+ try:
261
+ print("πŸš€ Launching app...")
262
+ agent = init_agent()
263
+ demo = create_ui(agent)
264
+ demo.queue(api_open=False).launch(
265
+ server_name="0.0.0.0",
266
+ server_port=7860,
267
+ show_error=True,
268
+ allowed_paths=[report_dir],
269
+ share=False
270
+ )
271
+ except Exception as e:
272
+ print("❌ Fatal error during launch:", str(e))
273
+ traceback.print_exc()