Ali2206 commited on
Commit
b33bf6c
·
verified ·
1 Parent(s): 67f566e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -36
app.py CHANGED
@@ -46,11 +46,27 @@ MEDICAL_KEYWORDS = {
46
  'conclusion', 'history', 'examination', 'progress', 'discharge'
47
  }
48
  TOKENIZER = "cl100k_base"
49
- MAX_MODEL_LEN = 2048 # Matches your model's actual limit
50
- TARGET_CHUNK_TOKENS = 1200 # Reduced to ensure room for prompt and response
51
- PROMPT_RESERVE = 300 # Tokens reserved for prompt structure
52
  MEDICAL_SECTION_HEADER = "=== MEDICAL SECTION ==="
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  def sanitize_utf8(text: str) -> str:
55
  """Ensure text is UTF-8 clean."""
56
  return text.encode("utf-8", "ignore").decode("utf-8")
@@ -82,7 +98,6 @@ def extract_all_pages_with_token_count(file_path: str) -> Tuple[str, int, int]:
82
  page_text = page.extract_text() or ""
83
  lower_text = page_text.lower()
84
 
85
- # Mark medical sections
86
  if any(re.search(rf'\b{kw}\b', lower_text) for kw in MEDICAL_KEYWORDS):
87
  section_header = f"\n{MEDICAL_SECTION_HEADER} (Page {i+1})\n"
88
  text_chunks.append(section_header + page_text.strip())
@@ -236,31 +251,6 @@ def split_content_by_tokens(content: str, max_tokens: int = TARGET_CHUNK_TOKENS)
236
 
237
  return chunks
238
 
239
- def init_agent():
240
- """Initialize the TxAgent with proper configuration."""
241
- print("🔁 Initializing model...")
242
- log_system_usage("Before Load")
243
-
244
- default_tool_path = os.path.abspath("data/new_tool.json")
245
- target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
246
- if not os.path.exists(target_tool_path):
247
- shutil.copy(default_tool_path, target_tool_path)
248
-
249
- agent = TxAgent(
250
- model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
251
- rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
252
- tool_files_dict={"new_tool": target_tool_path},
253
- force_finish=True,
254
- enable_checker=True,
255
- step_rag_num=2,
256
- seed=100,
257
- additional_default_tools=[],
258
- )
259
- agent.init_model()
260
- log_system_usage("After Load")
261
- print("✅ Agent Ready")
262
- return agent
263
-
264
  def analyze_complete_document(content: str, filename: str, agent: TxAgent) -> str:
265
  """Analyze complete document with strict token management"""
266
  chunks = split_content_by_tokens(content)
@@ -268,17 +258,13 @@ def analyze_complete_document(content: str, filename: str, agent: TxAgent) -> st
268
 
269
  for i, chunk in enumerate(chunks):
270
  try:
271
- # Ultra-minimal prompt to maximize content space
272
  base_prompt = "Analyze for:\n1. Critical\n2. Missed DX\n3. Med issues\n4. Gaps\n5. Follow-up\n\nContent:\n"
273
 
274
- # Calculate available space for content
275
  prompt_tokens = count_tokens(base_prompt)
276
- max_content_tokens = MAX_MODEL_LEN - prompt_tokens - 100 # Response buffer
277
 
278
- # Ensure chunk fits
279
  chunk_tokens = count_tokens(chunk)
280
  if chunk_tokens > max_content_tokens:
281
- # Find last paragraph that fits
282
  adjusted_chunk = ""
283
  tokens_used = 0
284
  paragraphs = re.split(r"\n\s*\n", chunk)
@@ -292,7 +278,6 @@ def analyze_complete_document(content: str, filename: str, agent: TxAgent) -> st
292
  break
293
 
294
  if not adjusted_chunk:
295
- # If even one paragraph is too big, split sentences
296
  sentences = re.split(r'(?<=[.!?])\s+', chunk)
297
  for sent in sentences:
298
  sent_tokens = count_tokens(sent)
@@ -311,7 +296,7 @@ def analyze_complete_document(content: str, filename: str, agent: TxAgent) -> st
311
  message=prompt,
312
  history=[],
313
  temperature=0.1,
314
- max_new_tokens=300, # Keep responses very concise
315
  max_token=MAX_MODEL_LEN,
316
  call_agent=False,
317
  conversation=[],
@@ -332,6 +317,31 @@ def analyze_complete_document(content: str, filename: str, agent: TxAgent) -> st
332
 
333
  return format_final_report(analysis_results, filename)
334
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
  def create_ui(agent):
336
  """Create the Gradio interface."""
337
  with gr.Blocks(theme=gr.themes.Soft(), title="Clinical Oversight Assistant") as demo:
 
46
  'conclusion', 'history', 'examination', 'progress', 'discharge'
47
  }
48
  TOKENIZER = "cl100k_base"
49
+ MAX_MODEL_LEN = 2048
50
+ TARGET_CHUNK_TOKENS = 1200
51
+ PROMPT_RESERVE = 300
52
  MEDICAL_SECTION_HEADER = "=== MEDICAL SECTION ==="
53
 
54
+ def log_system_usage(tag=""):
55
+ """Log system resource usage."""
56
+ try:
57
+ cpu = psutil.cpu_percent(interval=1)
58
+ mem = psutil.virtual_memory()
59
+ print(f"[{tag}] CPU: {cpu}% | RAM: {mem.used // (1024**2)}MB / {mem.total // (1024**2)}MB")
60
+ result = subprocess.run(
61
+ ["nvidia-smi", "--query-gpu=memory.used,memory.total,utilization.gpu", "--format=csv,nounits,noheader"],
62
+ capture_output=True, text=True
63
+ )
64
+ if result.returncode == 0:
65
+ used, total, util = result.stdout.strip().split(", ")
66
+ print(f"[{tag}] GPU: {used}MB / {total}MB | Utilization: {util}%")
67
+ except Exception as e:
68
+ print(f"[{tag}] GPU/CPU monitor failed: {e}")
69
+
70
  def sanitize_utf8(text: str) -> str:
71
  """Ensure text is UTF-8 clean."""
72
  return text.encode("utf-8", "ignore").decode("utf-8")
 
98
  page_text = page.extract_text() or ""
99
  lower_text = page_text.lower()
100
 
 
101
  if any(re.search(rf'\b{kw}\b', lower_text) for kw in MEDICAL_KEYWORDS):
102
  section_header = f"\n{MEDICAL_SECTION_HEADER} (Page {i+1})\n"
103
  text_chunks.append(section_header + page_text.strip())
 
251
 
252
  return chunks
253
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
  def analyze_complete_document(content: str, filename: str, agent: TxAgent) -> str:
255
  """Analyze complete document with strict token management"""
256
  chunks = split_content_by_tokens(content)
 
258
 
259
  for i, chunk in enumerate(chunks):
260
  try:
 
261
  base_prompt = "Analyze for:\n1. Critical\n2. Missed DX\n3. Med issues\n4. Gaps\n5. Follow-up\n\nContent:\n"
262
 
 
263
  prompt_tokens = count_tokens(base_prompt)
264
+ max_content_tokens = MAX_MODEL_LEN - prompt_tokens - 100
265
 
 
266
  chunk_tokens = count_tokens(chunk)
267
  if chunk_tokens > max_content_tokens:
 
268
  adjusted_chunk = ""
269
  tokens_used = 0
270
  paragraphs = re.split(r"\n\s*\n", chunk)
 
278
  break
279
 
280
  if not adjusted_chunk:
 
281
  sentences = re.split(r'(?<=[.!?])\s+', chunk)
282
  for sent in sentences:
283
  sent_tokens = count_tokens(sent)
 
296
  message=prompt,
297
  history=[],
298
  temperature=0.1,
299
+ max_new_tokens=300,
300
  max_token=MAX_MODEL_LEN,
301
  call_agent=False,
302
  conversation=[],
 
317
 
318
  return format_final_report(analysis_results, filename)
319
 
320
+ def init_agent():
321
+ """Initialize the TxAgent with proper configuration."""
322
+ print("🔁 Initializing model...")
323
+ log_system_usage("Before Load")
324
+
325
+ default_tool_path = os.path.abspath("data/new_tool.json")
326
+ target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
327
+ if not os.path.exists(target_tool_path):
328
+ shutil.copy(default_tool_path, target_tool_path)
329
+
330
+ agent = TxAgent(
331
+ model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
332
+ rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
333
+ tool_files_dict={"new_tool": target_tool_path},
334
+ force_finish=True,
335
+ enable_checker=True,
336
+ step_rag_num=2,
337
+ seed=100,
338
+ additional_default_tools=[],
339
+ )
340
+ agent.init_model()
341
+ log_system_usage("After Load")
342
+ print("✅ Agent Ready")
343
+ return agent
344
+
345
  def create_ui(agent):
346
  """Create the Gradio interface."""
347
  with gr.Blocks(theme=gr.themes.Soft(), title="Clinical Oversight Assistant") as demo: