Ali2206 commited on
Commit
f4976e2
·
verified ·
1 Parent(s): 2943a5c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -208
app.py CHANGED
@@ -4,26 +4,13 @@ import pandas as pd
4
  import pdfplumber
5
  import json
6
  import gradio as gr
7
- from typing import List, Tuple
8
  from concurrent.futures import ThreadPoolExecutor, as_completed
9
  import hashlib
10
  import shutil
11
  import re
12
  import psutil
13
  import subprocess
14
- import logging
15
- from datetime import datetime
16
-
17
- # Configure logging
18
- logging.basicConfig(
19
- level=logging.INFO,
20
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
21
- handlers=[
22
- logging.StreamHandler(),
23
- logging.FileHandler('clinical_oversight.log')
24
- ]
25
- )
26
- logger = logging.getLogger(__name__)
27
 
28
  # Persistent directory
29
  persistent_dir = "/data/hf_cache"
@@ -73,7 +60,6 @@ def extract_priority_pages(file_path: str, max_pages: int = 20) -> str:
73
  text_chunks.append(f"=== Page {i} ===\n{page_text.strip()}")
74
  return "\n\n".join(text_chunks)
75
  except Exception as e:
76
- logger.error(f"Error extracting pages from PDF: {str(e)}")
77
  return f"PDF processing error: {str(e)}"
78
 
79
  def convert_file_to_json(file_path: str, file_type: str) -> str:
@@ -101,31 +87,29 @@ def convert_file_to_json(file_path: str, file_type: str) -> str:
101
  result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
102
  else:
103
  result = json.dumps({"error": f"Unsupported file type: {file_type}"})
104
-
105
  with open(cache_path, "w", encoding="utf-8") as f:
106
  f.write(result)
107
  return result
108
  except Exception as e:
109
- logger.error(f"Error converting {file_type} file to JSON: {str(e)}")
110
  return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
111
 
112
  def log_system_usage(tag=""):
113
  try:
114
  cpu = psutil.cpu_percent(interval=1)
115
  mem = psutil.virtual_memory()
116
- logger.info(f"[{tag}] CPU: {cpu}% | RAM: {mem.used // (1024**2)}MB / {mem.total // (1024**2)}MB")
117
  result = subprocess.run(
118
  ["nvidia-smi", "--query-gpu=memory.used,memory.total,utilization.gpu", "--format=csv,nounits,noheader"],
119
  capture_output=True, text=True
120
  )
121
  if result.returncode == 0:
122
  used, total, util = result.stdout.strip().split(", ")
123
- logger.info(f"[{tag}] GPU: {used}MB / {total}MB | Utilization: {util}%")
124
  except Exception as e:
125
- logger.error(f"[{tag}] GPU/CPU monitor failed: {e}")
126
 
127
  def init_agent():
128
- logger.info("🔁 Initializing model...")
129
  log_system_usage("Before Load")
130
  default_tool_path = os.path.abspath("data/new_tool.json")
131
  target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
@@ -144,63 +128,33 @@ def init_agent():
144
  )
145
  agent.init_model()
146
  log_system_usage("After Load")
147
- logger.info("✅ Agent Ready")
148
  return agent
149
 
150
- def format_response_for_ui(response: str) -> str:
151
- """Formats the raw response for clean display in the UI"""
152
- # Remove any tool call metadata
153
- cleaned = response.split("[TOOL_CALLS]")[0].strip()
154
-
155
- # If we have a structured response, format it nicely
156
- if "Potential missed diagnoses" in cleaned or "Flagged medication conflicts" in cleaned:
157
- # Add markdown formatting for better readability
158
- formatted = []
159
- for line in cleaned.split("\n"):
160
- if line.startswith("Potential missed diagnoses"):
161
- formatted.append(f"### 🔍 Potential Missed Diagnoses")
162
- elif line.startswith("Flagged medication conflicts"):
163
- formatted.append(f"\n### ⚠️ Flagged Medication Conflicts")
164
- elif line.startswith("Incomplete assessments"):
165
- formatted.append(f"\n### 📋 Incomplete Assessments")
166
- elif line.startswith("Highlighted abnormal results"):
167
- formatted.append(f"\n### ❗ Abnormal Results Needing Follow-up")
168
- else:
169
- formatted.append(line)
170
- return "\n".join(formatted)
171
- return cleaned
172
-
173
- def analyze(message: str, history: List[Tuple[str, str]], files: list):
174
- start_time = datetime.now()
175
- logger.info(f"Starting analysis for message: {message[:100]}...")
176
- if files:
177
- logger.info(f"Processing {len(files)} uploaded files")
178
-
179
- # Initialize chat history in the correct format if empty
180
- if history is None:
181
- history = []
182
-
183
- # Add user message to history
184
- history.append([message, None])
185
- yield history, None
186
-
187
- extracted = ""
188
- file_hash_value = ""
189
- if files:
190
- try:
191
- with ThreadPoolExecutor(max_workers=4) as executor:
192
- futures = [executor.submit(convert_file_to_json, f.name, f.name.split(".")[-1].lower()) for f in files]
193
- results = [sanitize_utf8(f.result()) for f in as_completed(futures)]
194
- extracted = "\n".join(results)
195
- file_hash_value = file_hash(files[0].name)
196
- logger.info(f"Processed {len(files)} files, extracted {len(extracted)} characters")
197
- except Exception as e:
198
- logger.error(f"Error processing files: {str(e)}")
199
- history[-1][1] = f"❌ Error processing files: {str(e)}"
200
  yield history, None
201
- return
202
 
203
- prompt = f"""Review these medical records and identify EXACTLY what might have been missed:
 
 
 
 
 
 
 
 
 
204
  1. List potential missed diagnoses
205
  2. Flag any medication conflicts
206
  3. Note incomplete assessments
@@ -211,145 +165,54 @@ Medical Records:
211
 
212
  ### Potential Oversights:
213
  """
214
- logger.info(f"Generated prompt with {len(prompt)} characters")
215
-
216
- response_chunks = []
217
- try:
218
- logger.info("Starting model inference...")
219
- for chunk in agent.run_gradio_chat(
220
- message=prompt,
221
- history=[],
222
- temperature=0.2,
223
- max_new_tokens=1024,
224
- max_token=4096,
225
- call_agent=False,
226
- conversation=[]
227
- ):
228
- if not chunk:
229
- continue
230
- if isinstance(chunk, str):
231
- response_chunks.append(chunk)
232
- elif isinstance(chunk, list):
233
- response_chunks.extend([c.content for c in chunk if hasattr(c, 'content')])
234
-
235
- partial_response = "".join(response_chunks)
236
- formatted_partial = format_response_for_ui(partial_response)
237
-
238
- if formatted_partial:
239
- history[-1][1] = formatted_partial
240
- yield history, None
241
-
242
- full_response = "".join(response_chunks)
243
- logger.info(f"Full model response received: {full_response[:500]}...")
244
-
245
- final_output = format_response_for_ui(full_response)
246
- if not final_output or len(final_output) < 20: # Very short response
247
- final_output = "No clear oversights identified. Recommend comprehensive review."
248
- logger.info("No significant findings detected in analysis")
249
-
250
- history[-1][1] = final_output
251
-
252
- # Save report
253
- report_path = None
254
- if file_hash_value:
255
- report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt")
256
  try:
257
- with open(report_path, "w", encoding="utf-8") as f:
258
- f.write(final_output)
259
- logger.info(f"Saved report to {report_path}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  except Exception as e:
261
- logger.error(f"Error saving report: {str(e)}")
262
-
263
- elapsed = (datetime.now() - start_time).total_seconds()
264
- logger.info(f"Analysis completed in {elapsed:.2f} seconds")
265
- yield history, report_path if report_path and os.path.exists(report_path) else None
266
-
267
- except Exception as e:
268
- logger.error(f"Error during analysis: {str(e)}", exc_info=True)
269
- history[-1][1] = f"❌ Error during analysis: {str(e)}"
270
- yield history, None
271
-
272
- def create_ui(agent):
273
- with gr.Blocks(theme=gr.themes.Soft(), title="Clinical Oversight Assistant") as demo:
274
- gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
275
- gr.Markdown("""
276
- <div style='text-align: center; margin-bottom: 20px;'>
277
- Upload medical records and receive analysis of potential oversights, including:<br>
278
- - Missed diagnoses - Medication conflicts - Incomplete assessments - Abnormal results needing follow-up
279
- </div>
280
- """)
281
-
282
- with gr.Row():
283
- with gr.Column(scale=2):
284
- file_upload = gr.File(
285
- label="Upload Medical Records",
286
- file_types=[".pdf", ".csv", ".xls", ".xlsx"],
287
- file_count="multiple",
288
- interactive=True
289
- )
290
- msg_input = gr.Textbox(
291
- placeholder="Ask about potential oversights...",
292
- show_label=False,
293
- lines=3,
294
- max_lines=5
295
- )
296
- send_btn = gr.Button("Analyze", variant="primary")
297
-
298
- with gr.Column(scale=3):
299
- chatbot = gr.Chatbot(
300
- label="Analysis Results",
301
- height=600,
302
- bubble_full_width=False,
303
- show_copy_button=True
304
- )
305
- download_output = gr.File(
306
- label="Download Full Report",
307
- interactive=False
308
- )
309
-
310
- # Examples for quick testing
311
- examples = gr.Examples(
312
- examples=[
313
- ["Are there any potential missed diagnoses in these records?"],
314
- ["What medication conflicts should I be aware of?"],
315
- ["Are there any incomplete assessments in this case?"]
316
- ],
317
- inputs=[msg_input],
318
- label="Example Questions"
319
- )
320
 
321
- send_btn.click(
322
- analyze,
323
- inputs=[msg_input, gr.State([]), file_upload],
324
- outputs=[chatbot, download_output]
325
- )
326
- msg_input.submit(
327
- analyze,
328
- inputs=[msg_input, gr.State([]), file_upload],
329
- outputs=[chatbot, download_output]
330
- )
331
 
332
- # Add some footer text
333
- gr.Markdown("""
334
- <div style='text-align: center; margin-top: 20px; color: #666; font-size: 0.9em;'>
335
- Note: This tool provides preliminary analysis only. Always verify findings with complete clinical evaluation.
336
- </div>
337
- """)
338
 
 
 
339
  return demo
340
 
341
  if __name__ == "__main__":
342
- logger.info("🚀 Launching Clinical Oversight Assistant...")
343
- try:
344
- agent = init_agent()
345
- demo = create_ui(agent)
346
- demo.queue(api_open=False).launch(
347
- server_name="0.0.0.0",
348
- server_port=7860,
349
- show_error=True,
350
- allowed_paths=[report_dir],
351
- share=False
352
- )
353
- except Exception as e:
354
- logger.error(f"Failed to launch application: {str(e)}", exc_info=True)
355
- raise
 
4
  import pdfplumber
5
  import json
6
  import gradio as gr
7
+ from typing import List
8
  from concurrent.futures import ThreadPoolExecutor, as_completed
9
  import hashlib
10
  import shutil
11
  import re
12
  import psutil
13
  import subprocess
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  # Persistent directory
16
  persistent_dir = "/data/hf_cache"
 
60
  text_chunks.append(f"=== Page {i} ===\n{page_text.strip()}")
61
  return "\n\n".join(text_chunks)
62
  except Exception as e:
 
63
  return f"PDF processing error: {str(e)}"
64
 
65
  def convert_file_to_json(file_path: str, file_type: str) -> str:
 
87
  result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
88
  else:
89
  result = json.dumps({"error": f"Unsupported file type: {file_type}"})
 
90
  with open(cache_path, "w", encoding="utf-8") as f:
91
  f.write(result)
92
  return result
93
  except Exception as e:
 
94
  return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
95
 
96
  def log_system_usage(tag=""):
97
  try:
98
  cpu = psutil.cpu_percent(interval=1)
99
  mem = psutil.virtual_memory()
100
+ print(f"[{tag}] CPU: {cpu}% | RAM: {mem.used // (1024**2)}MB / {mem.total // (1024**2)}MB")
101
  result = subprocess.run(
102
  ["nvidia-smi", "--query-gpu=memory.used,memory.total,utilization.gpu", "--format=csv,nounits,noheader"],
103
  capture_output=True, text=True
104
  )
105
  if result.returncode == 0:
106
  used, total, util = result.stdout.strip().split(", ")
107
+ print(f"[{tag}] GPU: {used}MB / {total}MB | Utilization: {util}%")
108
  except Exception as e:
109
+ print(f"[{tag}] GPU/CPU monitor failed: {e}")
110
 
111
  def init_agent():
112
+ print("🔁 Initializing model...")
113
  log_system_usage("Before Load")
114
  default_tool_path = os.path.abspath("data/new_tool.json")
115
  target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
 
128
  )
129
  agent.init_model()
130
  log_system_usage("After Load")
131
+ print("✅ Agent Ready")
132
  return agent
133
 
134
+ def create_ui(agent):
135
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
136
+ gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
137
+ chatbot = gr.Chatbot(label="Analysis", height=600, type="messages")
138
+ file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
139
+ msg_input = gr.Textbox(placeholder="Ask about potential oversights...", show_label=False)
140
+ send_btn = gr.Button("Analyze", variant="primary")
141
+ download_output = gr.File(label="Download Full Report")
142
+
143
+ def analyze(message: str, history: list, files: list):
144
+ history = history + [{"role": "user", "content": message},
145
+ {"role": "assistant", "content": "⏳ Analyzing records for potential oversights..."}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  yield history, None
 
147
 
148
+ extracted = ""
149
+ file_hash_value = ""
150
+ if files:
151
+ with ThreadPoolExecutor(max_workers=4) as executor:
152
+ futures = [executor.submit(convert_file_to_json, f.name, f.name.split(".")[-1].lower()) for f in files]
153
+ results = [sanitize_utf8(f.result()) for f in as_completed(futures)]
154
+ extracted = "\n".join(results)
155
+ file_hash_value = file_hash(files[0].name)
156
+
157
+ prompt = f"""Review these medical records and identify EXACTLY what might have been missed:
158
  1. List potential missed diagnoses
159
  2. Flag any medication conflicts
160
  3. Note incomplete assessments
 
165
 
166
  ### Potential Oversights:
167
  """
168
+ response_chunks = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  try:
170
+ for chunk in agent.run_gradio_chat(
171
+ message=prompt,
172
+ history=[],
173
+ temperature=0.2,
174
+ max_new_tokens=1024,
175
+ max_token=4096,
176
+ call_agent=False,
177
+ conversation=[]
178
+ ):
179
+ if not chunk:
180
+ continue
181
+ if isinstance(chunk, str):
182
+ response_chunks.append(chunk)
183
+ elif isinstance(chunk, list):
184
+ response_chunks.extend([c.content for c in chunk if hasattr(c, 'content')])
185
+ partial_response = "".join(response_chunks)
186
+ cleaned_partial = partial_response.split("[TOOL_CALLS]")[0].strip()
187
+ if cleaned_partial:
188
+ history[-1] = {"role": "assistant", "content": cleaned_partial}
189
+ yield history, None
190
  except Exception as e:
191
+ history[-1] = {"role": "assistant", "content": f"❌ Error: {str(e)}"}
192
+ yield history, None
193
+ return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
 
195
+ full_response = "".join(response_chunks)
196
+ final_output = full_response.split("[TOOL_CALLS]")[0].strip()
197
+ if not final_output:
198
+ final_output = "No clear oversights identified. Recommend comprehensive review."
199
+ history[-1] = {"role": "assistant", "content": final_output}
 
 
 
 
 
200
 
201
+ report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
202
+ yield history, report_path if report_path and os.path.exists(report_path) else None
 
 
 
 
203
 
204
+ send_btn.click(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output])
205
+ msg_input.submit(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output])
206
  return demo
207
 
208
  if __name__ == "__main__":
209
+ print("🚀 Launching app...")
210
+ agent = init_agent()
211
+ demo = create_ui(agent)
212
+ demo.queue(api_open=False).launch(
213
+ server_name="0.0.0.0",
214
+ server_port=7860,
215
+ show_error=True,
216
+ allowed_paths=[report_dir],
217
+ share=False
218
+ )