Update app.py
Browse files
app.py
CHANGED
@@ -47,17 +47,15 @@ def file_hash(path: str) -> str:
|
|
47 |
with open(path, "rb") as f:
|
48 |
return hashlib.md5(f.read()).hexdigest()
|
49 |
|
50 |
-
def extract_priority_pages(file_path: str
|
51 |
try:
|
52 |
text_chunks = []
|
53 |
with pdfplumber.open(file_path) as pdf:
|
54 |
-
for i, page in enumerate(pdf.pages
|
55 |
-
text = page.extract_text() or ""
|
56 |
-
text_chunks.append(f"=== Page {i+1} ===\n{text.strip()}")
|
57 |
-
for i, page in enumerate(pdf.pages[3:max_pages], start=4):
|
58 |
page_text = page.extract_text() or ""
|
59 |
-
|
60 |
-
|
|
|
61 |
return "\n\n".join(text_chunks)
|
62 |
except Exception as e:
|
63 |
return f"PDF processing error: {str(e)}"
|
@@ -160,7 +158,12 @@ def create_ui(agent):
|
|
160 |
extracted = "\n".join(results)
|
161 |
file_hash_value = file_hash(files[0].name) if files else ""
|
162 |
|
163 |
-
|
|
|
|
|
|
|
|
|
|
|
164 |
Analyze the medical records for clinical oversights. Provide a concise, evidence-based summary under these headings:
|
165 |
|
166 |
1. **Missed Diagnoses**:
|
@@ -178,8 +181,8 @@ Analyze the medical records for clinical oversights. Provide a concise, evidence
|
|
178 |
4. **Urgent Follow-up**:
|
179 |
- Flag abnormal lab results, imaging, behaviors, or legal history needing immediate reassessment or referral.
|
180 |
|
181 |
-
Medical Records (
|
182 |
-
{
|
183 |
|
184 |
Begin analysis:
|
185 |
"""
|
@@ -188,35 +191,44 @@ Begin analysis:
|
|
188 |
if history and history[-1]["content"].startswith("⏳"):
|
189 |
history.pop()
|
190 |
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
|
|
|
|
|
|
|
220 |
yield history, report_path if report_path and os.path.exists(report_path) else None
|
221 |
|
222 |
except Exception as e:
|
|
|
47 |
with open(path, "rb") as f:
|
48 |
return hashlib.md5(f.read()).hexdigest()
|
49 |
|
50 |
+
def extract_priority_pages(file_path: str) -> str:
|
51 |
try:
|
52 |
text_chunks = []
|
53 |
with pdfplumber.open(file_path) as pdf:
|
54 |
+
for i, page in enumerate(pdf.pages):
|
|
|
|
|
|
|
55 |
page_text = page.extract_text() or ""
|
56 |
+
# Include first 3 pages or pages with medical keywords
|
57 |
+
if i < 3 or any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
|
58 |
+
text_chunks.append(f"=== Page {i+1} ===\n{page_text.strip()}")
|
59 |
return "\n\n".join(text_chunks)
|
60 |
except Exception as e:
|
61 |
return f"PDF processing error: {str(e)}"
|
|
|
158 |
extracted = "\n".join(results)
|
159 |
file_hash_value = file_hash(files[0].name) if files else ""
|
160 |
|
161 |
+
# Split extracted text into chunks of ~6,000 characters
|
162 |
+
chunk_size = 6000
|
163 |
+
chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
|
164 |
+
combined_response = ""
|
165 |
+
|
166 |
+
prompt_template = f"""
|
167 |
Analyze the medical records for clinical oversights. Provide a concise, evidence-based summary under these headings:
|
168 |
|
169 |
1. **Missed Diagnoses**:
|
|
|
181 |
4. **Urgent Follow-up**:
|
182 |
- Flag abnormal lab results, imaging, behaviors, or legal history needing immediate reassessment or referral.
|
183 |
|
184 |
+
Medical Records (Chunk {0}):
|
185 |
+
{{chunk}}
|
186 |
|
187 |
Begin analysis:
|
188 |
"""
|
|
|
191 |
if history and history[-1]["content"].startswith("⏳"):
|
192 |
history.pop()
|
193 |
|
194 |
+
# Process each chunk sequentially
|
195 |
+
for chunk_idx, chunk in enumerate(chunks, 1):
|
196 |
+
prompt = prompt_template.format(chunk_idx, chunk=chunk)
|
197 |
+
chunk_response = ""
|
198 |
+
for chunk_output in agent.run_gradio_chat(
|
199 |
+
message=prompt,
|
200 |
+
history=[],
|
201 |
+
temperature=0.2,
|
202 |
+
max_new_tokens=1024,
|
203 |
+
max_token=4096,
|
204 |
+
call_agent=False,
|
205 |
+
conversation=[],
|
206 |
+
):
|
207 |
+
if chunk_output is None:
|
208 |
+
continue
|
209 |
+
if isinstance(chunk_output, list):
|
210 |
+
for m in chunk_output:
|
211 |
+
if hasattr(m, 'content') and m.content:
|
212 |
+
cleaned = clean_response(m.content)
|
213 |
+
if cleaned:
|
214 |
+
chunk_response += cleaned + "\n"
|
215 |
+
elif isinstance(chunk_output, str) and chunk_output.strip():
|
216 |
+
cleaned = clean_response(chunk_output)
|
217 |
+
if cleaned:
|
218 |
+
chunk_response += cleaned + "\n"
|
219 |
+
combined_response += f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response}\n"
|
220 |
+
|
221 |
+
# Update history with combined response
|
222 |
+
if combined_response:
|
223 |
+
history.append({"role": "assistant", "content": combined_response.strip()})
|
224 |
+
else:
|
225 |
+
history.append({"role": "assistant", "content": "No oversights identified."})
|
226 |
+
|
227 |
+
# Generate report file
|
228 |
report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
|
229 |
+
if report_path:
|
230 |
+
with open(report_path, "w", encoding="utf-8") as f:
|
231 |
+
f.write(combined_response)
|
232 |
yield history, report_path if report_path and os.path.exists(report_path) else None
|
233 |
|
234 |
except Exception as e:
|