Ali2206 commited on
Commit
67dd49b
·
verified ·
1 Parent(s): a58b5f7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -75
app.py CHANGED
@@ -14,6 +14,11 @@ import subprocess
14
  import multiprocessing
15
  from functools import partial
16
  import time
 
 
 
 
 
17
 
18
  # Persistent directory
19
  persistent_dir = "/data/hf_cache"
@@ -47,8 +52,8 @@ def file_hash(path: str) -> str:
47
  with open(path, "rb") as f:
48
  return hashlib.md5(f.read()).hexdigest()
49
 
50
- def chunk_hash(chunk: str) -> str:
51
- return hashlib.md5(chunk.encode("utf-8")).hexdigest()
52
 
53
  def extract_page_range(file_path: str, start_page: int, end_page: int) -> str:
54
  """Extract text from a range of PDF pages."""
@@ -59,7 +64,8 @@ def extract_page_range(file_path: str, start_page: int, end_page: int) -> str:
59
  page_text = page.extract_text() or ""
60
  text_chunks.append(f"=== Page {start_page + pdf.pages.index(page) + 1} ===\n{page_text.strip()}")
61
  return "\n\n".join(text_chunks)
62
- except Exception:
 
63
  return ""
64
 
65
  def extract_all_pages(file_path: str, progress_callback=None) -> str:
@@ -90,6 +96,7 @@ def extract_all_pages(file_path: str, progress_callback=None) -> str:
90
 
91
  return "\n\n".join(filter(None, results))
92
  except Exception as e:
 
93
  return f"PDF processing error: {str(e)}"
94
 
95
  def convert_file_to_json(file_path: str, file_type: str, progress_callback=None) -> str:
@@ -121,22 +128,23 @@ def convert_file_to_json(file_path: str, file_type: str, progress_callback=None)
121
  f.write(result)
122
  return result
123
  except Exception as e:
 
124
  return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
125
 
126
  def log_system_usage(tag=""):
127
  try:
128
  cpu = psutil.cpu_percent(interval=1)
129
  mem = psutil.virtual_memory()
130
- print(f"[{tag}] CPU: {cpu}% | RAM: {mem.used // (1024**2)}MB / {mem.total // (1024**2)}MB")
131
  result = subprocess.run(
132
  ["nvidia-smi", "--query-gpu=memory.used,memory.total,utilization.gpu", "--format=csv,nounits,noheader"],
133
  capture_output=True, text=True
134
  )
135
  if result.returncode == 0:
136
  used, total, util = result.stdout.strip().split(", ")
137
- print(f"[{tag}] GPU: {used}MB / {total}MB | Utilization: {util}%")
138
  except Exception as e:
139
- print(f"[{tag}] GPU/CPU monitor failed: {e}")
140
 
141
  def clean_response(text: str) -> str:
142
  """Clean TxAgent response to group findings under tool-derived headings."""
@@ -191,7 +199,7 @@ def clean_response(text: str) -> str:
191
  return text
192
 
193
  def init_agent():
194
- print("🔁 Initializing model...")
195
  log_system_usage("Before Load")
196
  default_tool_path = os.path.abspath("data/new_tool.json")
197
  target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
@@ -204,25 +212,75 @@ def init_agent():
204
  tool_files_dict={"new_tool": target_tool_path},
205
  force_finish=True,
206
  enable_checker=True,
207
- step_rag_num=2, # Reduced for speed
208
  seed=100,
209
  additional_default_tools=[],
210
  )
211
  agent.init_model()
212
  log_system_usage("After Load")
213
- print("Agent Ready")
214
  return agent
215
 
216
- def process_chunk(agent, chunk: str, chunk_idx: int, total_chunks: int, cache_path: str) -> str:
217
- """Process a single chunk and cache the result."""
218
- chunk_id = chunk_hash(chunk)
219
- chunk_cache_path = os.path.join(file_cache_dir, f"chunk_{chunk_id}.txt")
 
 
 
 
220
 
221
  if os.path.exists(chunk_cache_path):
222
  with open(chunk_cache_path, "r", encoding="utf-8") as f:
223
- return f.read()
 
 
 
 
224
 
225
- prompt_template = """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  You are a medical analysis assistant. Analyze the following patient record excerpt for clinical oversights and provide a concise, evidence-based summary in markdown format. Group findings under appropriate headings based on the tool used (e.g., drug-related findings under 'Drugs'). For each finding, include:
227
  - Clinical context (why the issue was missed or relevant details from the record).
228
  - Potential risks if unaddressed (e.g., disease progression, adverse events).
@@ -243,45 +301,6 @@ Example Output:
243
  Patient Record Excerpt (Chunk {0} of {1}):
244
  {chunk}
245
  """
246
- prompt = prompt_template.format(chunk_idx, total_chunks, chunk=chunk[:2000]) # Truncate to avoid token limits
247
- chunk_response = ""
248
-
249
- for chunk_output in agent.run_gradio_chat(
250
- message=prompt,
251
- history=[],
252
- temperature=0.2,
253
- max_new_tokens=512, # Reduced for speed
254
- max_token=2048, # Reduced for speed
255
- call_agent=False,
256
- conversation=[],
257
- ):
258
- if chunk_output is None:
259
- continue
260
- if isinstance(chunk_output, list):
261
- for m in chunk_output:
262
- if hasattr(m, 'content') and m.content:
263
- cleaned = clean_response(m.content)
264
- if cleaned and re.search(r"###\s*\w+", cleaned):
265
- chunk_response += cleaned + "\n\n"
266
- elif isinstance(chunk_output, str) and chunk_output.strip():
267
- cleaned = clean_response(chunk_output)
268
- if cleaned and re.search(r"###\s*\w+", cleaned):
269
- chunk_response += cleaned + "\n\n"
270
-
271
- if chunk_response:
272
- with open(chunk_cache_path, "w", encoding="utf-8") as f:
273
- f.write(chunk_response)
274
- return chunk_response
275
-
276
- def create_ui(agent):
277
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
278
- gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
279
- chatbot = gr.Chatbot(label="Analysis", height=600, type="messages")
280
- file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
281
- max_chunks_input = gr.Slider(minimum=1, maximum=50, value=10, step=1, label="Max Chunks to Analyze")
282
- msg_input = gr.Textbox(placeholder="Ask about potential oversights...", show_label=False)
283
- send_btn = gr.Button("Analyze", variant="primary")
284
- download_output = gr.File(label="Download Full Report")
285
 
286
  def analyze(message: str, history: List[dict], files: List, max_chunks: int):
287
  history.append({"role": "user", "content": message})
@@ -311,7 +330,7 @@ def create_ui(agent):
311
  history.append({"role": "assistant", "content": "✅ Text extraction complete."})
312
  yield history, None
313
 
314
- chunk_size = 2000 # Reduced for speed
315
  chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
316
  chunks = chunks[:max_chunks] # Limit to max_chunks
317
  total_chunks = len(chunks)
@@ -323,24 +342,17 @@ def create_ui(agent):
323
  return
324
 
325
  try:
326
- with ThreadPoolExecutor(max_workers=4) as executor: # Parallel processing
327
- futures = []
328
- for chunk_idx, chunk in enumerate(chunks, 1):
329
- futures.append(executor.submit(process_chunk, agent, chunk, chunk_idx, total_chunks, file_cache_dir))
330
-
331
- for idx, future in enumerate(as_completed(futures)):
332
- chunk_response = future.result()
333
- animation = ["🔍", "📊", "🧠", "🔎"][(int(time.time() * 2) % 4)]
334
- history.append({"role": "assistant", "content": f"Analyzing chunks... {animation} {idx + 1}/{total_chunks}"})
335
- yield history, None
336
-
337
- if chunk_response:
338
- combined_response += f"--- Analysis for Chunk {idx + 1} ---\n{chunk_response}\n"
339
- else:
340
- combined_response += f"--- Analysis for Chunk {idx + 1} ---\nNo oversights identified for this chunk.\n\n"
341
-
342
- history[-1] = {"role": "assistant", "content": combined_response.strip()}
343
- yield history, None
344
 
345
  if combined_response.strip() and not all("No oversights identified" in chunk for chunk in combined_response.split("--- Analysis for Chunk")):
346
  history[-1]["content"] = combined_response.strip()
@@ -354,7 +366,7 @@ def create_ui(agent):
354
  yield history, report_path if report_path and os.path.exists(report_path) else None
355
 
356
  except Exception as e:
357
- print("🚨 ERROR:", e)
358
  history.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
359
  yield history, None
360
 
@@ -363,7 +375,7 @@ def create_ui(agent):
363
  return demo
364
 
365
  if __name__ == "__main__":
366
- print("🚀 Launching app...")
367
  agent = init_agent()
368
  demo = create_ui(agent)
369
  demo.queue(api_open=False).launch(
 
14
  import multiprocessing
15
  from functools import partial
16
  import time
17
+ import logging
18
+
19
+ # Setup logging
20
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
21
+ logger = logging.getLogger(__name__)
22
 
23
  # Persistent directory
24
  persistent_dir = "/data/hf_cache"
 
52
  with open(path, "rb") as f:
53
  return hashlib.md5(f.read()).hexdigest()
54
 
55
+ def chunk_hash(chunk: str, prompt: str) -> str:
56
+ return hashlib.md5((chunk + prompt).encode("utf-8")).hexdigest()
57
 
58
  def extract_page_range(file_path: str, start_page: int, end_page: int) -> str:
59
  """Extract text from a range of PDF pages."""
 
64
  page_text = page.extract_text() or ""
65
  text_chunks.append(f"=== Page {start_page + pdf.pages.index(page) + 1} ===\n{page_text.strip()}")
66
  return "\n\n".join(text_chunks)
67
+ except Exception as e:
68
+ logger.error(f"Error extracting pages {start_page}-{end_page}: {e}")
69
  return ""
70
 
71
  def extract_all_pages(file_path: str, progress_callback=None) -> str:
 
96
 
97
  return "\n\n".join(filter(None, results))
98
  except Exception as e:
99
+ logger.error(f"PDF processing error: {e}")
100
  return f"PDF processing error: {str(e)}"
101
 
102
  def convert_file_to_json(file_path: str, file_type: str, progress_callback=None) -> str:
 
128
  f.write(result)
129
  return result
130
  except Exception as e:
131
+ logger.error(f"Error processing {file_path}: {e}")
132
  return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
133
 
134
  def log_system_usage(tag=""):
135
  try:
136
  cpu = psutil.cpu_percent(interval=1)
137
  mem = psutil.virtual_memory()
138
+ logger.info(f"[{tag}] CPU: {cpu}% | RAM: {mem.used // (1024**2)}MB / {mem.total // (1024**2)}MB")
139
  result = subprocess.run(
140
  ["nvidia-smi", "--query-gpu=memory.used,memory.total,utilization.gpu", "--format=csv,nounits,noheader"],
141
  capture_output=True, text=True
142
  )
143
  if result.returncode == 0:
144
  used, total, util = result.stdout.strip().split(", ")
145
+ logger.info(f"[{tag}] GPU: {used}MB / {total}MB | Utilization: {util}%")
146
  except Exception as e:
147
+ logger.error(f"[{tag}] GPU/CPU monitor failed: {e}")
148
 
149
  def clean_response(text: str) -> str:
150
  """Clean TxAgent response to group findings under tool-derived headings."""
 
199
  return text
200
 
201
  def init_agent():
202
+ logger.info("Initializing model...")
203
  log_system_usage("Before Load")
204
  default_tool_path = os.path.abspath("data/new_tool.json")
205
  target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
 
212
  tool_files_dict={"new_tool": target_tool_path},
213
  force_finish=True,
214
  enable_checker=True,
215
+ step_rag_num=2,
216
  seed=100,
217
  additional_default_tools=[],
218
  )
219
  agent.init_model()
220
  log_system_usage("After Load")
221
+ logger.info("Agent Ready")
222
  return agent
223
 
224
+ def process_chunk(agent, chunk: str, chunk_idx: int, total_chunks: int, cache_path: str, prompt_template: str) -> tuple:
225
+ """Process a single chunk with error handling and caching."""
226
+ if not chunk.strip():
227
+ logger.warning(f"Chunk {chunk_idx} is empty, skipping...")
228
+ return chunk_idx, f"--- Analysis for Chunk {chunk_idx} ---\nNo oversights identified for this chunk.\n\n"
229
+
230
+ chunk_id = chunk_hash(chunk, prompt_template)
231
+ chunk_cache_path = os.path.join(cache_path, f"chunk_{chunk_id}.txt")
232
 
233
  if os.path.exists(chunk_cache_path):
234
  with open(chunk_cache_path, "r", encoding="utf-8") as f:
235
+ logger.info(f"Cache hit for chunk {chunk_idx}")
236
+ return chunk_idx, f.read()
237
+
238
+ prompt = prompt_template.format(chunk_idx, total_chunks, chunk=chunk[:1000]) # Truncate to avoid token limits
239
+ chunk_response = ""
240
 
241
+ try:
242
+ for chunk_output in agent.run_gradio_chat(
243
+ message=prompt,
244
+ history=[],
245
+ temperature=0.2,
246
+ max_new_tokens=512,
247
+ max_token=2048,
248
+ call_agent=False,
249
+ conversation=[],
250
+ ):
251
+ if chunk_output is None:
252
+ continue
253
+ if isinstance(chunk_output, list):
254
+ for m in chunk_output:
255
+ if hasattr(m, 'content') and m.content:
256
+ cleaned = clean_response(m.content)
257
+ if cleaned and re.search(r"###\s*\w+", cleaned):
258
+ chunk_response += cleaned + "\n\n"
259
+ elif isinstance(chunk_output, str) and chunk_output.strip():
260
+ cleaned = clean_response(chunk_output)
261
+ if cleaned and re.search(r"###\s*\w+", cleaned):
262
+ chunk_response += cleaned + "\n\n"
263
+ except Exception as e:
264
+ logger.error(f"Error processing chunk {chunk_idx}: {e}")
265
+ return chunk_idx, f"--- Analysis for Chunk {chunk_idx} ---\nError occurred: {str(e)}\n\n"
266
+
267
+ if chunk_response:
268
+ with open(chunk_cache_path, "w", encoding="utf-8") as f:
269
+ f.write(chunk_response)
270
+ return chunk_idx, f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response}\n"
271
+ return chunk_idx, f"--- Analysis for Chunk {chunk_idx} ---\nNo oversights identified for this chunk.\n\n"
272
+
273
+ def create_ui(agent):
274
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
275
+ gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
276
+ chatbot = gr.Chatbot(label="Analysis", height=600, type="messages")
277
+ file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
278
+ max_chunks_input = gr.Slider(minimum=1, maximum=50, value=5, step=1, label="Max Chunks to Analyze")
279
+ msg_input = gr.Textbox(placeholder="Ask about potential oversights...", show_label=False)
280
+ send_btn = gr.Button("Analyze", variant="primary")
281
+ download_output = gr.File(label="Download Full Report")
282
+
283
+ prompt_template = """
284
  You are a medical analysis assistant. Analyze the following patient record excerpt for clinical oversights and provide a concise, evidence-based summary in markdown format. Group findings under appropriate headings based on the tool used (e.g., drug-related findings under 'Drugs'). For each finding, include:
285
  - Clinical context (why the issue was missed or relevant details from the record).
286
  - Potential risks if unaddressed (e.g., disease progression, adverse events).
 
301
  Patient Record Excerpt (Chunk {0} of {1}):
302
  {chunk}
303
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
 
305
  def analyze(message: str, history: List[dict], files: List, max_chunks: int):
306
  history.append({"role": "user", "content": message})
 
330
  history.append({"role": "assistant", "content": "✅ Text extraction complete."})
331
  yield history, None
332
 
333
+ chunk_size = 1000 # Reduced for speed
334
  chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
335
  chunks = chunks[:max_chunks] # Limit to max_chunks
336
  total_chunks = len(chunks)
 
342
  return
343
 
344
  try:
345
+ # Sequential processing to avoid VLLM error
346
+ for chunk_idx, chunk in enumerate(chunks, 1):
347
+ animation = ["🔍", "📊", "🧠", "🔎"][(int(time.time() * 2) % 4)]
348
+ history.append({"role": "assistant", "content": f"Analyzing chunk {chunk_idx}/{total_chunks}... {animation}"})
349
+ yield history, None
350
+
351
+ _, chunk_response = process_chunk(agent, chunk, chunk_idx, total_chunks, file_cache_dir, prompt_template)
352
+ combined_response += chunk_response
353
+
354
+ history[-1] = {"role": "assistant", "content": combined_response.strip()}
355
+ yield history, None
 
 
 
 
 
 
 
356
 
357
  if combined_response.strip() and not all("No oversights identified" in chunk for chunk in combined_response.split("--- Analysis for Chunk")):
358
  history[-1]["content"] = combined_response.strip()
 
366
  yield history, report_path if report_path and os.path.exists(report_path) else None
367
 
368
  except Exception as e:
369
+ logger.error(f"Analysis error: {e}")
370
  history.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
371
  yield history, None
372
 
 
375
  return demo
376
 
377
  if __name__ == "__main__":
378
+ logger.info("Launching app...")
379
  agent = init_agent()
380
  demo = create_ui(agent)
381
  demo.queue(api_open=False).launch(