Update app.py
Browse files
app.py
CHANGED
@@ -91,13 +91,13 @@ def log_system_usage(tag=""):
|
|
91 |
def clean_response(text: str) -> str:
|
92 |
text = sanitize_utf8(text)
|
93 |
# Strip all tool and reasoning text
|
94 |
-
text = re.sub(r"\[TOOL_CALLS\].*?\n|\[.*?\].*?\n|(?:get_|tool\s|retrieve\s|use\s).*?\n", "", text, flags=re.DOTALL | re.IGNORECASE)
|
95 |
text = re.sub(r"\{'meta':\s*\{.*?\}\s*,\s*'results':\s*\[.*?\]\}\n?", "", text, flags=re.DOTALL)
|
96 |
text = re.sub(
|
97 |
r"(?i)(to\s|analyze|will\s|since\s|no\s|none|previous|attempt|involve|check\s|explore|manually|"
|
98 |
r"start|look|use|focus|retrieve|tool|based\s|overall|indicate|mention|consider|ensure|need\s|"
|
99 |
r"provide|review|assess|identify|potential|records|patient|history|symptoms|medication|"
|
100 |
-
r"conflict|assessment|follow-up|issue|reasoning|step|prompt|address|rag|thought|try).*?\n",
|
101 |
"", text, flags=re.DOTALL
|
102 |
)
|
103 |
text = re.sub(r"\n{2,}", "\n", text).strip()
|
@@ -116,11 +116,9 @@ def clean_response(text: str) -> str:
|
|
116 |
return "\n".join(lines).strip()
|
117 |
|
118 |
def normalize_text(text: str) -> str:
|
119 |
-
# Normalize for deduplication
|
120 |
return re.sub(r"\s+", " ", text.lower().strip())
|
121 |
|
122 |
def consolidate_findings(responses: List[str]) -> str:
|
123 |
-
# Merge unique findings
|
124 |
findings = defaultdict(set)
|
125 |
headings = ["Missed Diagnoses", "Medication Conflicts", "Incomplete Assessments", "Urgent Follow-up"]
|
126 |
|
@@ -137,12 +135,10 @@ def consolidate_findings(responses: List[str]) -> str:
|
|
137 |
elif current_heading and line.startswith("-"):
|
138 |
findings[current_heading].add(normalize_text(line))
|
139 |
|
140 |
-
# Format output
|
141 |
output = []
|
142 |
for heading in headings:
|
143 |
if findings[heading]:
|
144 |
output.append(f"**{heading}**:")
|
145 |
-
# Restore original case for display
|
146 |
original_lines = {normalize_text(r): r for r in sum([r.split("\n") for r in responses], []) if r.startswith("-")}
|
147 |
output.extend(sorted(original_lines.get(n, "- " + n) for n in findings[heading]))
|
148 |
return "\n".join(output).strip() if output else "No oversights identified."
|
@@ -154,12 +150,11 @@ def init_agent():
|
|
154 |
model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
|
155 |
rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
|
156 |
force_finish=True,
|
157 |
-
enable_checker=False,
|
158 |
-
enable_rag=False,
|
159 |
-
tool_files_dict=None,
|
160 |
step_rag_num=0,
|
161 |
seed=100,
|
162 |
-
enforce_eager=True, # No torch.compile
|
163 |
)
|
164 |
agent.init_model()
|
165 |
log_system_usage("After Load")
|
@@ -189,7 +184,6 @@ def create_ui(agent):
|
|
189 |
extracted = "\n".join([json.loads(r).get("content", "") for r in results if "content" in json.loads(r)])
|
190 |
file_hash_value = file_hash(files[0].name) if files else ""
|
191 |
|
192 |
-
# Smaller chunks
|
193 |
chunk_size = 800
|
194 |
chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
|
195 |
chunk_responses = []
|
@@ -209,7 +203,6 @@ Records:
|
|
209 |
"""
|
210 |
|
211 |
try:
|
212 |
-
# Batch process with progress
|
213 |
for i in range(0, len(chunks), batch_size):
|
214 |
batch = chunks[i:i + batch_size]
|
215 |
batch_responses = []
|
@@ -239,18 +232,15 @@ Records:
|
|
239 |
chunk_response += cleaned + "\n"
|
240 |
if chunk_response:
|
241 |
batch_responses.append(chunk_response)
|
242 |
-
# Progress update
|
243 |
processed = min(i + j + 1, total_chunks)
|
244 |
history[-1]["content"] = f"🔄 Analyzing... ({processed}/{total_chunks} chunks)"
|
245 |
yield history, None
|
246 |
chunk_responses.extend(batch_responses)
|
247 |
|
248 |
-
# Final result
|
249 |
final_response = consolidate_findings(chunk_responses)
|
250 |
history[-1]["content"] = final_response
|
251 |
yield history, None
|
252 |
|
253 |
-
# Report
|
254 |
report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
|
255 |
if report_path and final_response != "No oversights identified.":
|
256 |
with open(report_path, "w", encoding="utf-8") as f:
|
|
|
91 |
def clean_response(text: str) -> str:
|
92 |
text = sanitize_utf8(text)
|
93 |
# Strip all tool and reasoning text
|
94 |
+
text = re.sub(r"\[TOOL_CALLS\].*?\n|\[.*?\].*?\n|(?:get_|tool\s|retrieve\s|use\s|rag\s).*?\n", "", text, flags=re.DOTALL | re.IGNORECASE)
|
95 |
text = re.sub(r"\{'meta':\s*\{.*?\}\s*,\s*'results':\s*\[.*?\]\}\n?", "", text, flags=re.DOTALL)
|
96 |
text = re.sub(
|
97 |
r"(?i)(to\s|analyze|will\s|since\s|no\s|none|previous|attempt|involve|check\s|explore|manually|"
|
98 |
r"start|look|use|focus|retrieve|tool|based\s|overall|indicate|mention|consider|ensure|need\s|"
|
99 |
r"provide|review|assess|identify|potential|records|patient|history|symptoms|medication|"
|
100 |
+
r"conflict|assessment|follow-up|issue|reasoning|step|prompt|address|rag|thought|try|john\sdoe|nkma).*?\n",
|
101 |
"", text, flags=re.DOTALL
|
102 |
)
|
103 |
text = re.sub(r"\n{2,}", "\n", text).strip()
|
|
|
116 |
return "\n".join(lines).strip()
|
117 |
|
118 |
def normalize_text(text: str) -> str:
|
|
|
119 |
return re.sub(r"\s+", " ", text.lower().strip())
|
120 |
|
121 |
def consolidate_findings(responses: List[str]) -> str:
|
|
|
122 |
findings = defaultdict(set)
|
123 |
headings = ["Missed Diagnoses", "Medication Conflicts", "Incomplete Assessments", "Urgent Follow-up"]
|
124 |
|
|
|
135 |
elif current_heading and line.startswith("-"):
|
136 |
findings[current_heading].add(normalize_text(line))
|
137 |
|
|
|
138 |
output = []
|
139 |
for heading in headings:
|
140 |
if findings[heading]:
|
141 |
output.append(f"**{heading}**:")
|
|
|
142 |
original_lines = {normalize_text(r): r for r in sum([r.split("\n") for r in responses], []) if r.startswith("-")}
|
143 |
output.extend(sorted(original_lines.get(n, "- " + n) for n in findings[heading]))
|
144 |
return "\n".join(output).strip() if output else "No oversights identified."
|
|
|
150 |
model_name="mims-harvard/TxAgent-T1-Llama-3.1-8B",
|
151 |
rag_model_name="mims-harvard/ToolRAG-T1-GTE-Qwen2-1.5B",
|
152 |
force_finish=True,
|
153 |
+
enable_checker=False,
|
154 |
+
enable_rag=False,
|
155 |
+
tool_files_dict=None,
|
156 |
step_rag_num=0,
|
157 |
seed=100,
|
|
|
158 |
)
|
159 |
agent.init_model()
|
160 |
log_system_usage("After Load")
|
|
|
184 |
extracted = "\n".join([json.loads(r).get("content", "") for r in results if "content" in json.loads(r)])
|
185 |
file_hash_value = file_hash(files[0].name) if files else ""
|
186 |
|
|
|
187 |
chunk_size = 800
|
188 |
chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
|
189 |
chunk_responses = []
|
|
|
203 |
"""
|
204 |
|
205 |
try:
|
|
|
206 |
for i in range(0, len(chunks), batch_size):
|
207 |
batch = chunks[i:i + batch_size]
|
208 |
batch_responses = []
|
|
|
232 |
chunk_response += cleaned + "\n"
|
233 |
if chunk_response:
|
234 |
batch_responses.append(chunk_response)
|
|
|
235 |
processed = min(i + j + 1, total_chunks)
|
236 |
history[-1]["content"] = f"🔄 Analyzing... ({processed}/{total_chunks} chunks)"
|
237 |
yield history, None
|
238 |
chunk_responses.extend(batch_responses)
|
239 |
|
|
|
240 |
final_response = consolidate_findings(chunk_responses)
|
241 |
history[-1]["content"] = final_response
|
242 |
yield history, None
|
243 |
|
|
|
244 |
report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
|
245 |
if report_path and final_response != "No oversights identified.":
|
246 |
with open(report_path, "w", encoding="utf-8") as f:
|