Update app.py
Browse files
app.py
CHANGED
@@ -46,11 +46,27 @@ MEDICAL_KEYWORDS = {
|
|
46 |
'conclusion', 'history', 'examination', 'progress', 'discharge'
|
47 |
}
|
48 |
TOKENIZER = "cl100k_base"
|
49 |
-
MAX_MODEL_LEN = 2048
|
50 |
-
TARGET_CHUNK_TOKENS = 1200
|
51 |
-
PROMPT_RESERVE = 300
|
52 |
MEDICAL_SECTION_HEADER = "=== MEDICAL SECTION ==="
|
53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
def sanitize_utf8(text: str) -> str:
|
55 |
"""Ensure text is UTF-8 clean."""
|
56 |
return text.encode("utf-8", "ignore").decode("utf-8")
|
@@ -82,7 +98,6 @@ def extract_all_pages_with_token_count(file_path: str) -> Tuple[str, int, int]:
|
|
82 |
page_text = page.extract_text() or ""
|
83 |
lower_text = page_text.lower()
|
84 |
|
85 |
-
# Mark medical sections
|
86 |
if any(re.search(rf'\b{kw}\b', lower_text) for kw in MEDICAL_KEYWORDS):
|
87 |
section_header = f"\n{MEDICAL_SECTION_HEADER} (Page {i+1})\n"
|
88 |
text_chunks.append(section_header + page_text.strip())
|
@@ -236,31 +251,6 @@ def split_content_by_tokens(content: str, max_tokens: int = TARGET_CHUNK_TOKENS)
|
|
236 |
|
237 |
return chunks
|
238 |
|
239 |
-
def init_agent():
|
240 |
-
"""Initialize the TxAgent with proper configuration."""
|
241 |
-
print("🔁 Initializing model...")
|
242 |
-
log_system_usage("Before Load")
|
243 |
-
|
244 |
-
default_tool_path = os.path.abspath("data/new_tool.json")
|
245 |
-
target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
|
246 |
-
if not os.path.exists(target_tool_path):
|
247 |
-
shutil.copy(default_tool_path, target_tool_path)
|
248 |
-
|
249 |
-
agent = TxAgent(
|
250 |
-
model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
|
251 |
-
rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
|
252 |
-
tool_files_dict={"new_tool": target_tool_path},
|
253 |
-
force_finish=True,
|
254 |
-
enable_checker=True,
|
255 |
-
step_rag_num=2,
|
256 |
-
seed=100,
|
257 |
-
additional_default_tools=[],
|
258 |
-
)
|
259 |
-
agent.init_model()
|
260 |
-
log_system_usage("After Load")
|
261 |
-
print("✅ Agent Ready")
|
262 |
-
return agent
|
263 |
-
|
264 |
def analyze_complete_document(content: str, filename: str, agent: TxAgent) -> str:
|
265 |
"""Analyze complete document with strict token management"""
|
266 |
chunks = split_content_by_tokens(content)
|
@@ -268,17 +258,13 @@ def analyze_complete_document(content: str, filename: str, agent: TxAgent) -> st
|
|
268 |
|
269 |
for i, chunk in enumerate(chunks):
|
270 |
try:
|
271 |
-
# Ultra-minimal prompt to maximize content space
|
272 |
base_prompt = "Analyze for:\n1. Critical\n2. Missed DX\n3. Med issues\n4. Gaps\n5. Follow-up\n\nContent:\n"
|
273 |
|
274 |
-
# Calculate available space for content
|
275 |
prompt_tokens = count_tokens(base_prompt)
|
276 |
-
max_content_tokens = MAX_MODEL_LEN - prompt_tokens - 100
|
277 |
|
278 |
-
# Ensure chunk fits
|
279 |
chunk_tokens = count_tokens(chunk)
|
280 |
if chunk_tokens > max_content_tokens:
|
281 |
-
# Find last paragraph that fits
|
282 |
adjusted_chunk = ""
|
283 |
tokens_used = 0
|
284 |
paragraphs = re.split(r"\n\s*\n", chunk)
|
@@ -292,7 +278,6 @@ def analyze_complete_document(content: str, filename: str, agent: TxAgent) -> st
|
|
292 |
break
|
293 |
|
294 |
if not adjusted_chunk:
|
295 |
-
# If even one paragraph is too big, split sentences
|
296 |
sentences = re.split(r'(?<=[.!?])\s+', chunk)
|
297 |
for sent in sentences:
|
298 |
sent_tokens = count_tokens(sent)
|
@@ -311,7 +296,7 @@ def analyze_complete_document(content: str, filename: str, agent: TxAgent) -> st
|
|
311 |
message=prompt,
|
312 |
history=[],
|
313 |
temperature=0.1,
|
314 |
-
max_new_tokens=300,
|
315 |
max_token=MAX_MODEL_LEN,
|
316 |
call_agent=False,
|
317 |
conversation=[],
|
@@ -332,6 +317,31 @@ def analyze_complete_document(content: str, filename: str, agent: TxAgent) -> st
|
|
332 |
|
333 |
return format_final_report(analysis_results, filename)
|
334 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
335 |
def create_ui(agent):
|
336 |
"""Create the Gradio interface."""
|
337 |
with gr.Blocks(theme=gr.themes.Soft(), title="Clinical Oversight Assistant") as demo:
|
|
|
46 |
'conclusion', 'history', 'examination', 'progress', 'discharge'
|
47 |
}
|
48 |
TOKENIZER = "cl100k_base"
|
49 |
+
MAX_MODEL_LEN = 2048
|
50 |
+
TARGET_CHUNK_TOKENS = 1200
|
51 |
+
PROMPT_RESERVE = 300
|
52 |
MEDICAL_SECTION_HEADER = "=== MEDICAL SECTION ==="
|
53 |
|
54 |
+
def log_system_usage(tag=""):
|
55 |
+
"""Log system resource usage."""
|
56 |
+
try:
|
57 |
+
cpu = psutil.cpu_percent(interval=1)
|
58 |
+
mem = psutil.virtual_memory()
|
59 |
+
print(f"[{tag}] CPU: {cpu}% | RAM: {mem.used // (1024**2)}MB / {mem.total // (1024**2)}MB")
|
60 |
+
result = subprocess.run(
|
61 |
+
["nvidia-smi", "--query-gpu=memory.used,memory.total,utilization.gpu", "--format=csv,nounits,noheader"],
|
62 |
+
capture_output=True, text=True
|
63 |
+
)
|
64 |
+
if result.returncode == 0:
|
65 |
+
used, total, util = result.stdout.strip().split(", ")
|
66 |
+
print(f"[{tag}] GPU: {used}MB / {total}MB | Utilization: {util}%")
|
67 |
+
except Exception as e:
|
68 |
+
print(f"[{tag}] GPU/CPU monitor failed: {e}")
|
69 |
+
|
70 |
def sanitize_utf8(text: str) -> str:
|
71 |
"""Ensure text is UTF-8 clean."""
|
72 |
return text.encode("utf-8", "ignore").decode("utf-8")
|
|
|
98 |
page_text = page.extract_text() or ""
|
99 |
lower_text = page_text.lower()
|
100 |
|
|
|
101 |
if any(re.search(rf'\b{kw}\b', lower_text) for kw in MEDICAL_KEYWORDS):
|
102 |
section_header = f"\n{MEDICAL_SECTION_HEADER} (Page {i+1})\n"
|
103 |
text_chunks.append(section_header + page_text.strip())
|
|
|
251 |
|
252 |
return chunks
|
253 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
254 |
def analyze_complete_document(content: str, filename: str, agent: TxAgent) -> str:
|
255 |
"""Analyze complete document with strict token management"""
|
256 |
chunks = split_content_by_tokens(content)
|
|
|
258 |
|
259 |
for i, chunk in enumerate(chunks):
|
260 |
try:
|
|
|
261 |
base_prompt = "Analyze for:\n1. Critical\n2. Missed DX\n3. Med issues\n4. Gaps\n5. Follow-up\n\nContent:\n"
|
262 |
|
|
|
263 |
prompt_tokens = count_tokens(base_prompt)
|
264 |
+
max_content_tokens = MAX_MODEL_LEN - prompt_tokens - 100
|
265 |
|
|
|
266 |
chunk_tokens = count_tokens(chunk)
|
267 |
if chunk_tokens > max_content_tokens:
|
|
|
268 |
adjusted_chunk = ""
|
269 |
tokens_used = 0
|
270 |
paragraphs = re.split(r"\n\s*\n", chunk)
|
|
|
278 |
break
|
279 |
|
280 |
if not adjusted_chunk:
|
|
|
281 |
sentences = re.split(r'(?<=[.!?])\s+', chunk)
|
282 |
for sent in sentences:
|
283 |
sent_tokens = count_tokens(sent)
|
|
|
296 |
message=prompt,
|
297 |
history=[],
|
298 |
temperature=0.1,
|
299 |
+
max_new_tokens=300,
|
300 |
max_token=MAX_MODEL_LEN,
|
301 |
call_agent=False,
|
302 |
conversation=[],
|
|
|
317 |
|
318 |
return format_final_report(analysis_results, filename)
|
319 |
|
320 |
+
def init_agent():
|
321 |
+
"""Initialize the TxAgent with proper configuration."""
|
322 |
+
print("🔁 Initializing model...")
|
323 |
+
log_system_usage("Before Load")
|
324 |
+
|
325 |
+
default_tool_path = os.path.abspath("data/new_tool.json")
|
326 |
+
target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
|
327 |
+
if not os.path.exists(target_tool_path):
|
328 |
+
shutil.copy(default_tool_path, target_tool_path)
|
329 |
+
|
330 |
+
agent = TxAgent(
|
331 |
+
model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
|
332 |
+
rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
|
333 |
+
tool_files_dict={"new_tool": target_tool_path},
|
334 |
+
force_finish=True,
|
335 |
+
enable_checker=True,
|
336 |
+
step_rag_num=2,
|
337 |
+
seed=100,
|
338 |
+
additional_default_tools=[],
|
339 |
+
)
|
340 |
+
agent.init_model()
|
341 |
+
log_system_usage("After Load")
|
342 |
+
print("✅ Agent Ready")
|
343 |
+
return agent
|
344 |
+
|
345 |
def create_ui(agent):
|
346 |
"""Create the Gradio interface."""
|
347 |
with gr.Blocks(theme=gr.themes.Soft(), title="Clinical Oversight Assistant") as demo:
|