Update app.py
Browse files
app.py
CHANGED
@@ -34,9 +34,6 @@ sys.path.insert(0, src_path)
|
|
34 |
|
35 |
from txagent.txagent import TxAgent
|
36 |
|
37 |
-
MEDICAL_KEYWORDS = {'diagnosis', 'assessment', 'plan', 'results', 'medications',
|
38 |
-
'allergies', 'summary', 'impression', 'findings', 'recommendations'}
|
39 |
-
|
40 |
def sanitize_utf8(text: str) -> str:
|
41 |
return text.encode("utf-8", "ignore").decode("utf-8")
|
42 |
|
@@ -44,23 +41,14 @@ def file_hash(path: str) -> str:
|
|
44 |
with open(path, "rb") as f:
|
45 |
return hashlib.md5(f.read()).hexdigest()
|
46 |
|
47 |
-
def
|
48 |
try:
|
49 |
text_chunks = []
|
50 |
-
total_chars = 0
|
51 |
with pdfplumber.open(file_path) as pdf:
|
52 |
for i, page in enumerate(pdf.pages):
|
53 |
page_text = page.extract_text() or ""
|
54 |
-
|
55 |
-
|
56 |
-
if total_chars + len(page_chunk) <= max_chars:
|
57 |
-
text_chunks.append(page_chunk)
|
58 |
-
total_chars += len(page_chunk)
|
59 |
-
else:
|
60 |
-
remaining = max_chars - total_chars
|
61 |
-
text_chunks.append(page_chunk[:remaining])
|
62 |
-
break
|
63 |
-
return "".join(text_chunks).strip()
|
64 |
except Exception as e:
|
65 |
return f"PDF processing error: {str(e)}"
|
66 |
|
@@ -73,7 +61,7 @@ def convert_file_to_json(file_path: str, file_type: str) -> str:
|
|
73 |
return f.read()
|
74 |
|
75 |
if file_type == "pdf":
|
76 |
-
text =
|
77 |
result = json.dumps({"filename": os.path.basename(file_path), "content": text, "status": "initial"})
|
78 |
else:
|
79 |
result = json.dumps({"error": f"Unsupported file type: {file_type}"})
|
@@ -103,7 +91,7 @@ def clean_response(text: str) -> str:
|
|
103 |
text = re.sub(r"\[TOOL_CALLS\].*", "", text, flags=re.DOTALL)
|
104 |
text = re.sub(r"\['get_[^\]]+\']\n?", "", text)
|
105 |
text = re.sub(r"\{'meta':\s*\{.*?\}\s*,\s*'results':\s*\[.*?\]\}\n?", "", text, flags=re.DOTALL)
|
106 |
-
text = re.sub(r"(?i)(to analyze|based on|will start|no (drug|clinical|information)).*?\n", "", text, flags=re.DOTALL)
|
107 |
text = re.sub(r"\n{3,}", "\n\n", text).strip()
|
108 |
if not re.search(r"(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", text, re.IGNORECASE):
|
109 |
return ""
|
@@ -147,59 +135,68 @@ def create_ui(agent):
|
|
147 |
extracted = "\n".join(results)
|
148 |
file_hash_value = file_hash(files[0].name) if files else ""
|
149 |
|
150 |
-
|
151 |
-
|
|
|
|
|
152 |
|
153 |
-
|
154 |
-
|
155 |
-
**Incomplete Assessments**: Missing or shallow evaluations.
|
156 |
-
**Urgent Follow-up**: Issues needing immediate attention.
|
157 |
|
158 |
-
|
159 |
-
|
|
|
|
|
160 |
|
161 |
-
|
|
|
162 |
"""
|
163 |
|
164 |
try:
|
165 |
history.append({"role": "assistant", "content": "🔄 Analyzing..."})
|
166 |
yield history, None
|
167 |
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
if
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
|
|
|
|
|
|
|
|
|
|
196 |
history[-1]["content"] = "No oversights identified."
|
197 |
-
|
|
|
198 |
|
199 |
report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
|
200 |
-
if report_path and
|
201 |
with open(report_path, "w", encoding="utf-8") as f:
|
202 |
-
f.write(
|
203 |
yield history, report_path if report_path and os.path.exists(report_path) else None
|
204 |
|
205 |
except Exception as e:
|
|
|
34 |
|
35 |
from txagent.txagent import TxAgent
|
36 |
|
|
|
|
|
|
|
37 |
def sanitize_utf8(text: str) -> str:
|
38 |
return text.encode("utf-8", "ignore").decode("utf-8")
|
39 |
|
|
|
41 |
with open(path, "rb") as f:
|
42 |
return hashlib.md5(f.read()).hexdigest()
|
43 |
|
44 |
+
def extract_all_pages(file_path: str) -> str:
|
45 |
try:
|
46 |
text_chunks = []
|
|
|
47 |
with pdfplumber.open(file_path) as pdf:
|
48 |
for i, page in enumerate(pdf.pages):
|
49 |
page_text = page.extract_text() or ""
|
50 |
+
text_chunks.append(page_text.strip())
|
51 |
+
return "\n".join(text_chunks)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
except Exception as e:
|
53 |
return f"PDF processing error: {str(e)}"
|
54 |
|
|
|
61 |
return f.read()
|
62 |
|
63 |
if file_type == "pdf":
|
64 |
+
text = extract_all_pages(file_path)
|
65 |
result = json.dumps({"filename": os.path.basename(file_path), "content": text, "status": "initial"})
|
66 |
else:
|
67 |
result = json.dumps({"error": f"Unsupported file type: {file_type}"})
|
|
|
91 |
text = re.sub(r"\[TOOL_CALLS\].*", "", text, flags=re.DOTALL)
|
92 |
text = re.sub(r"\['get_[^\]]+\']\n?", "", text)
|
93 |
text = re.sub(r"\{'meta':\s*\{.*?\}\s*,\s*'results':\s*\[.*?\]\}\n?", "", text, flags=re.DOTALL)
|
94 |
+
text = re.sub(r"(?i)(to analyze|based on|will start|no (drug|clinical|information)|none).*?\n", "", text, flags=re.DOTALL)
|
95 |
text = re.sub(r"\n{3,}", "\n\n", text).strip()
|
96 |
if not re.search(r"(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", text, re.IGNORECASE):
|
97 |
return ""
|
|
|
135 |
extracted = "\n".join(results)
|
136 |
file_hash_value = file_hash(files[0].name) if files else ""
|
137 |
|
138 |
+
# Split into small chunks of 2,000 characters
|
139 |
+
chunk_size = 2000
|
140 |
+
chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
|
141 |
+
combined_response = ""
|
142 |
|
143 |
+
prompt_template = f"""
|
144 |
+
List doctor oversights in the medical records under these headings with brief details:
|
|
|
|
|
145 |
|
146 |
+
**Missed Diagnoses**: Unaddressed conditions or inconsistencies.
|
147 |
+
**Medication Conflicts**: Risky prescriptions.
|
148 |
+
**Incomplete Assessments**: Missing evaluations.
|
149 |
+
**Urgent Follow-up**: Issues needing attention.
|
150 |
|
151 |
+
Records:
|
152 |
+
{{chunk}}
|
153 |
"""
|
154 |
|
155 |
try:
|
156 |
history.append({"role": "assistant", "content": "🔄 Analyzing..."})
|
157 |
yield history, None
|
158 |
|
159 |
+
for chunk_idx, chunk in enumerate(chunks, 1):
|
160 |
+
prompt = prompt_template.format(chunk=chunk)
|
161 |
+
chunk_response = ""
|
162 |
+
for output in agent.run_gradio_chat(
|
163 |
+
message=prompt,
|
164 |
+
history=[],
|
165 |
+
temperature=0.1,
|
166 |
+
max_new_tokens=256,
|
167 |
+
max_token=4096,
|
168 |
+
call_agent=False,
|
169 |
+
conversation=[],
|
170 |
+
):
|
171 |
+
if output is None:
|
172 |
+
continue
|
173 |
+
if isinstance(output, list):
|
174 |
+
for m in output:
|
175 |
+
if hasattr(m, 'content') and m.content:
|
176 |
+
cleaned = clean_response(m.content)
|
177 |
+
if cleaned:
|
178 |
+
chunk_response += cleaned + "\n"
|
179 |
+
history[-1]["content"] = combined_response + chunk_response.strip()
|
180 |
+
yield history, None
|
181 |
+
elif isinstance(output, str) and output.strip():
|
182 |
+
cleaned = clean_response(output)
|
183 |
+
if cleaned:
|
184 |
+
chunk_response += cleaned + "\n"
|
185 |
+
history[-1]["content"] = combined_response + chunk_response.strip()
|
186 |
+
yield history, None
|
187 |
+
|
188 |
+
if chunk_response:
|
189 |
+
combined_response += chunk_response
|
190 |
+
|
191 |
+
if not combined_response:
|
192 |
history[-1]["content"] = "No oversights identified."
|
193 |
+
else:
|
194 |
+
history[-1]["content"] = combined_response.strip()
|
195 |
|
196 |
report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
|
197 |
+
if report_path and combined_response:
|
198 |
with open(report_path, "w", encoding="utf-8") as f:
|
199 |
+
f.write(combined_response)
|
200 |
yield history, report_path if report_path and os.path.exists(report_path) else None
|
201 |
|
202 |
except Exception as e:
|