Update app.py
Browse files
app.py
CHANGED
@@ -51,9 +51,11 @@ def extract_priority_pages(file_path: str, max_pages: int = 20) -> str:
|
|
51 |
try:
|
52 |
text_chunks = []
|
53 |
with pdfplumber.open(file_path) as pdf:
|
|
|
54 |
for i, page in enumerate(pdf.pages[:3]):
|
55 |
text = page.extract_text() or ""
|
56 |
text_chunks.append(f"=== Page {i+1} ===\n{text.strip()}")
|
|
|
57 |
for i, page in enumerate(pdf.pages[3:max_pages], start=4):
|
58 |
page_text = page.extract_text() or ""
|
59 |
if any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
|
@@ -132,24 +134,15 @@ def init_agent():
|
|
132 |
return agent
|
133 |
|
134 |
def clean_response(response: str) -> str:
|
135 |
-
"""
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
if analysis_match:
|
144 |
-
cleaned = analysis_match.group(1).strip()
|
145 |
-
else:
|
146 |
-
# Fallback if pattern not found
|
147 |
-
cleaned = re.sub(r'\[TOOL_CALLS\].*?$', '', response, flags=re.DOTALL).strip()
|
148 |
-
|
149 |
-
# Remove any remaining JSON artifacts
|
150 |
cleaned = re.sub(r'\{.*?\}', '', cleaned)
|
151 |
cleaned = re.sub(r'\[.*?\]', '', cleaned)
|
152 |
-
|
153 |
return cleaned
|
154 |
|
155 |
def create_ui(agent):
|
@@ -217,13 +210,13 @@ Medical Records:
|
|
217 |
history[-1] = {"role": "assistant", "content": current_cleaned}
|
218 |
yield history, None
|
219 |
|
220 |
-
# Final processing
|
221 |
final_cleaned = clean_response(full_response)
|
222 |
|
223 |
if not final_cleaned:
|
224 |
final_cleaned = "⚠️ No clear oversights identified or model output was invalid."
|
225 |
|
226 |
-
# Save report
|
227 |
report_path = None
|
228 |
if file_hash_value:
|
229 |
report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt")
|
@@ -252,4 +245,4 @@ if __name__ == "__main__":
|
|
252 |
show_error=True,
|
253 |
allowed_paths=[report_dir],
|
254 |
share=False
|
255 |
-
)
|
|
|
51 |
try:
|
52 |
text_chunks = []
|
53 |
with pdfplumber.open(file_path) as pdf:
|
54 |
+
# Always include the first three pages
|
55 |
for i, page in enumerate(pdf.pages[:3]):
|
56 |
text = page.extract_text() or ""
|
57 |
text_chunks.append(f"=== Page {i+1} ===\n{text.strip()}")
|
58 |
+
# Then include pages that mention one or more medical keywords
|
59 |
for i, page in enumerate(pdf.pages[3:max_pages], start=4):
|
60 |
page_text = page.extract_text() or ""
|
61 |
if any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
|
|
|
134 |
return agent
|
135 |
|
136 |
def clean_response(response: str) -> str:
|
137 |
+
"""
|
138 |
+
Updated cleaner that removes the [TOOL_CALLS] tag and any JSON artifacts
|
139 |
+
while preserving the full analysis so that all identified oversights are displayed.
|
140 |
+
"""
|
141 |
+
# Remove everything starting from the first [TOOL_CALLS] occurrence
|
142 |
+
cleaned = response.split("[TOOL_CALLS]")[0].strip()
|
143 |
+
# Remove any remaining JSON artifacts in case they appear
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
144 |
cleaned = re.sub(r'\{.*?\}', '', cleaned)
|
145 |
cleaned = re.sub(r'\[.*?\]', '', cleaned)
|
|
|
146 |
return cleaned
|
147 |
|
148 |
def create_ui(agent):
|
|
|
210 |
history[-1] = {"role": "assistant", "content": current_cleaned}
|
211 |
yield history, None
|
212 |
|
213 |
+
# Final processing of the complete response
|
214 |
final_cleaned = clean_response(full_response)
|
215 |
|
216 |
if not final_cleaned:
|
217 |
final_cleaned = "⚠️ No clear oversights identified or model output was invalid."
|
218 |
|
219 |
+
# Save report if a file was processed
|
220 |
report_path = None
|
221 |
if file_hash_value:
|
222 |
report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt")
|
|
|
245 |
show_error=True,
|
246 |
allowed_paths=[report_dir],
|
247 |
share=False
|
248 |
+
)
|