Update app.py
Browse files
app.py
CHANGED
@@ -9,6 +9,7 @@ import hashlib
|
|
9 |
import re
|
10 |
import psutil
|
11 |
import subprocess
|
|
|
12 |
|
13 |
# Persistent directory
|
14 |
persistent_dir = "/data/hf_cache"
|
@@ -45,7 +46,7 @@ def extract_all_pages(file_path: str) -> str:
|
|
45 |
try:
|
46 |
text_chunks = []
|
47 |
with pdfplumber.open(file_path) as pdf:
|
48 |
-
for
|
49 |
page_text = page.extract_text() or ""
|
50 |
text_chunks.append(page_text.strip())
|
51 |
return "\n".join(text_chunks)
|
@@ -88,15 +89,49 @@ def log_system_usage(tag=""):
|
|
88 |
|
89 |
def clean_response(text: str) -> str:
|
90 |
text = sanitize_utf8(text)
|
91 |
-
|
92 |
-
text = re.sub(r"\[
|
93 |
text = re.sub(r"\{'meta':\s*\{.*?\}\s*,\s*'results':\s*\[.*?\]\}\n?", "", text, flags=re.DOTALL)
|
94 |
-
text = re.sub(r"(?i)(to analyze
|
95 |
text = re.sub(r"\n{3,}", "\n\n", text).strip()
|
96 |
-
|
|
|
97 |
return ""
|
98 |
return text
|
99 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
def init_agent():
|
101 |
print("🔁 Initializing model...")
|
102 |
log_system_usage("Before Load")
|
@@ -124,6 +159,7 @@ def create_ui(agent):
|
|
124 |
|
125 |
def analyze(message: str, history: List[dict], files: List):
|
126 |
history.append({"role": "user", "content": message})
|
|
|
127 |
yield history, None
|
128 |
|
129 |
extracted = ""
|
@@ -135,28 +171,26 @@ def create_ui(agent):
|
|
135 |
extracted = "\n".join(results)
|
136 |
file_hash_value = file_hash(files[0].name) if files else ""
|
137 |
|
138 |
-
# Split into small chunks of
|
139 |
-
chunk_size =
|
140 |
chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
|
141 |
-
|
142 |
|
143 |
-
prompt_template =
|
144 |
-
List doctor oversights
|
145 |
|
146 |
-
**Missed Diagnoses**:
|
147 |
-
**Medication Conflicts**:
|
148 |
-
**Incomplete Assessments**:
|
149 |
-
**Urgent Follow-up**:
|
150 |
|
151 |
Records:
|
152 |
-
{
|
153 |
"""
|
154 |
|
155 |
try:
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
for chunk_idx, chunk in enumerate(chunks, 1):
|
160 |
prompt = prompt_template.format(chunk=chunk)
|
161 |
chunk_response = ""
|
162 |
for output in agent.run_gradio_chat(
|
@@ -176,27 +210,23 @@ Records:
|
|
176 |
cleaned = clean_response(m.content)
|
177 |
if cleaned:
|
178 |
chunk_response += cleaned + "\n"
|
179 |
-
history[-1]["content"] = combined_response + chunk_response.strip()
|
180 |
-
yield history, None
|
181 |
elif isinstance(output, str) and output.strip():
|
182 |
cleaned = clean_response(output)
|
183 |
if cleaned:
|
184 |
chunk_response += cleaned + "\n"
|
185 |
-
history[-1]["content"] = combined_response + chunk_response.strip()
|
186 |
-
yield history, None
|
187 |
-
|
188 |
if chunk_response:
|
189 |
-
|
190 |
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
|
|
|
196 |
report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
|
197 |
-
if report_path and
|
198 |
with open(report_path, "w", encoding="utf-8") as f:
|
199 |
-
f.write(
|
200 |
yield history, report_path if report_path and os.path.exists(report_path) else None
|
201 |
|
202 |
except Exception as e:
|
|
|
9 |
import re
|
10 |
import psutil
|
11 |
import subprocess
|
12 |
+
from collections import defaultdict
|
13 |
|
14 |
# Persistent directory
|
15 |
persistent_dir = "/data/hf_cache"
|
|
|
46 |
try:
|
47 |
text_chunks = []
|
48 |
with pdfplumber.open(file_path) as pdf:
|
49 |
+
for page in pdf.pages:
|
50 |
page_text = page.extract_text() or ""
|
51 |
text_chunks.append(page_text.strip())
|
52 |
return "\n".join(text_chunks)
|
|
|
89 |
|
90 |
def clean_response(text: str) -> str:
|
91 |
text = sanitize_utf8(text)
|
92 |
+
# Remove all tool-related and reasoning text
|
93 |
+
text = re.sub(r"\[TOOL_CALLS\].*|(?:get_|tool\s|retrieve\s).*?\n", "", text, flags=re.DOTALL | re.IGNORECASE)
|
94 |
text = re.sub(r"\{'meta':\s*\{.*?\}\s*,\s*'results':\s*\[.*?\]\}\n?", "", text, flags=re.DOTALL)
|
95 |
+
text = re.sub(r"(?i)(to address|analyze the|will (start|look|use|focus)|since the|no (drug|clinical|information)|none|previous|attempt|involve|check for|explore|manually).*?\n", "", text, flags=re.DOTALL)
|
96 |
text = re.sub(r"\n{3,}", "\n\n", text).strip()
|
97 |
+
# Only keep text under specific headings
|
98 |
+
if not re.search(r"^(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", text, re.MULTILINE | re.IGNORECASE):
|
99 |
return ""
|
100 |
return text
|
101 |
|
102 |
+
def consolidate_findings(responses: List[str]) -> str:
|
103 |
+
# Aggregate findings under each heading, removing duplicates
|
104 |
+
findings = defaultdict(set)
|
105 |
+
headings = ["Missed Diagnoses", "Medication Conflicts", "Incomplete Assessments", "Urgent Follow-up"]
|
106 |
+
|
107 |
+
for response in responses:
|
108 |
+
if not response:
|
109 |
+
continue
|
110 |
+
# Split response into sections by heading
|
111 |
+
current_heading = None
|
112 |
+
current_points = []
|
113 |
+
for line in response.split("\n"):
|
114 |
+
line = line.strip()
|
115 |
+
if not line:
|
116 |
+
continue
|
117 |
+
if any(line.lower().startswith(h.lower()) for h in headings):
|
118 |
+
if current_heading and current_points:
|
119 |
+
findings[current_heading].update(current_points)
|
120 |
+
current_heading = next(h for h in headings if line.lower().startswith(h.lower()))
|
121 |
+
current_points = []
|
122 |
+
elif current_heading and line.startswith("-"):
|
123 |
+
current_points.append(line)
|
124 |
+
if current_heading and current_points:
|
125 |
+
findings[current_heading].update(current_points)
|
126 |
+
|
127 |
+
# Format consolidated output
|
128 |
+
output = []
|
129 |
+
for heading in headings:
|
130 |
+
if findings[heading]:
|
131 |
+
output.append(f"**{heading}**:")
|
132 |
+
output.extend(sorted(findings[heading]))
|
133 |
+
return "\n".join(output).strip() if output else "No oversights identified."
|
134 |
+
|
135 |
def init_agent():
|
136 |
print("🔁 Initializing model...")
|
137 |
log_system_usage("Before Load")
|
|
|
159 |
|
160 |
def analyze(message: str, history: List[dict], files: List):
|
161 |
history.append({"role": "user", "content": message})
|
162 |
+
history.append({"role": "assistant", "content": "🔄 Analyzing..."})
|
163 |
yield history, None
|
164 |
|
165 |
extracted = ""
|
|
|
171 |
extracted = "\n".join(results)
|
172 |
file_hash_value = file_hash(files[0].name) if files else ""
|
173 |
|
174 |
+
# Split into small chunks of 1,500 characters
|
175 |
+
chunk_size = 1500
|
176 |
chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
|
177 |
+
chunk_responses = []
|
178 |
|
179 |
+
prompt_template = """
|
180 |
+
List doctor oversights under these headings only, with one brief point each. No tools or reasoning steps.
|
181 |
|
182 |
+
**Missed Diagnoses**:
|
183 |
+
**Medication Conflicts**:
|
184 |
+
**Incomplete Assessments**:
|
185 |
+
**Urgent Follow-up**:
|
186 |
|
187 |
Records:
|
188 |
+
{chunk}
|
189 |
"""
|
190 |
|
191 |
try:
|
192 |
+
# Process all chunks, collecting responses
|
193 |
+
for chunk in chunks:
|
|
|
|
|
194 |
prompt = prompt_template.format(chunk=chunk)
|
195 |
chunk_response = ""
|
196 |
for output in agent.run_gradio_chat(
|
|
|
210 |
cleaned = clean_response(m.content)
|
211 |
if cleaned:
|
212 |
chunk_response += cleaned + "\n"
|
|
|
|
|
213 |
elif isinstance(output, str) and output.strip():
|
214 |
cleaned = clean_response(output)
|
215 |
if cleaned:
|
216 |
chunk_response += cleaned + "\n"
|
|
|
|
|
|
|
217 |
if chunk_response:
|
218 |
+
chunk_responses.append(chunk_response)
|
219 |
|
220 |
+
# Consolidate all responses into one final output
|
221 |
+
final_response = consolidate_findings(chunk_responses)
|
222 |
+
history[-1]["content"] = final_response
|
223 |
+
yield history, None
|
224 |
|
225 |
+
# Generate report file
|
226 |
report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
|
227 |
+
if report_path and final_response != "No oversights identified.":
|
228 |
with open(report_path, "w", encoding="utf-8") as f:
|
229 |
+
f.write(final_response)
|
230 |
yield history, report_path if report_path and os.path.exists(report_path) else None
|
231 |
|
232 |
except Exception as e:
|