Ali2206 commited on
Commit
26668b6
·
verified ·
1 Parent(s): 96347cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -38
app.py CHANGED
@@ -47,17 +47,15 @@ def file_hash(path: str) -> str:
47
  with open(path, "rb") as f:
48
  return hashlib.md5(f.read()).hexdigest()
49
 
50
- def extract_priority_pages(file_path: str, max_pages: int = 10) -> str:
51
  try:
52
  text_chunks = []
53
  with pdfplumber.open(file_path) as pdf:
54
- for i, page in enumerate(pdf.pages[:3]):
55
- text = page.extract_text() or ""
56
- text_chunks.append(f"=== Page {i+1} ===\n{text.strip()}")
57
- for i, page in enumerate(pdf.pages[3:max_pages], start=4):
58
  page_text = page.extract_text() or ""
59
- if any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
60
- text_chunks.append(f"=== Page {i} ===\n{page_text.strip()}")
 
61
  return "\n\n".join(text_chunks)
62
  except Exception as e:
63
  return f"PDF processing error: {str(e)}"
@@ -160,7 +158,12 @@ def create_ui(agent):
160
  extracted = "\n".join(results)
161
  file_hash_value = file_hash(files[0].name) if files else ""
162
 
163
- prompt = f"""
 
 
 
 
 
164
  Analyze the medical records for clinical oversights. Provide a concise, evidence-based summary under these headings:
165
 
166
  1. **Missed Diagnoses**:
@@ -178,8 +181,8 @@ Analyze the medical records for clinical oversights. Provide a concise, evidence
178
  4. **Urgent Follow-up**:
179
  - Flag abnormal lab results, imaging, behaviors, or legal history needing immediate reassessment or referral.
180
 
181
- Medical Records (Truncated to 8k chars):
182
- {extracted[:8000]}
183
 
184
  Begin analysis:
185
  """
@@ -188,35 +191,44 @@ Begin analysis:
188
  if history and history[-1]["content"].startswith("⏳"):
189
  history.pop()
190
 
191
- for chunk in agent.run_gradio_chat(
192
- message=prompt,
193
- history=[],
194
- temperature=0.2,
195
- max_new_tokens=1024,
196
- max_token=4096,
197
- call_agent=False,
198
- conversation=[],
199
- ):
200
- if chunk is None:
201
- continue
202
-
203
- if isinstance(chunk, list):
204
- for m in chunk:
205
- if hasattr(m, 'content') and m.content:
206
- cleaned = clean_response(m.content)
207
- if cleaned:
208
- history.append({"role": m.role, "content": cleaned})
209
- yield history, None
210
- elif isinstance(chunk, str) and chunk.strip():
211
- cleaned = clean_response(chunk)
212
- if cleaned:
213
- if history and history[-1]["role"] == "assistant":
214
- history[-1]["content"] += cleaned
215
- else:
216
- history.append({"role": "assistant", "content": cleaned})
217
- yield history, None
218
-
 
 
 
 
 
 
219
  report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
 
 
 
220
  yield history, report_path if report_path and os.path.exists(report_path) else None
221
 
222
  except Exception as e:
 
47
  with open(path, "rb") as f:
48
  return hashlib.md5(f.read()).hexdigest()
49
 
50
+ def extract_priority_pages(file_path: str) -> str:
51
  try:
52
  text_chunks = []
53
  with pdfplumber.open(file_path) as pdf:
54
+ for i, page in enumerate(pdf.pages):
 
 
 
55
  page_text = page.extract_text() or ""
56
+ # Include first 3 pages or pages with medical keywords
57
+ if i < 3 or any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
58
+ text_chunks.append(f"=== Page {i+1} ===\n{page_text.strip()}")
59
  return "\n\n".join(text_chunks)
60
  except Exception as e:
61
  return f"PDF processing error: {str(e)}"
 
158
  extracted = "\n".join(results)
159
  file_hash_value = file_hash(files[0].name) if files else ""
160
 
161
+ # Split extracted text into chunks of ~6,000 characters
162
+ chunk_size = 6000
163
+ chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
164
+ combined_response = ""
165
+
166
+ prompt_template = f"""
167
  Analyze the medical records for clinical oversights. Provide a concise, evidence-based summary under these headings:
168
 
169
  1. **Missed Diagnoses**:
 
181
  4. **Urgent Follow-up**:
182
  - Flag abnormal lab results, imaging, behaviors, or legal history needing immediate reassessment or referral.
183
 
184
+ Medical Records (Chunk {0}):
185
+ {{chunk}}
186
 
187
  Begin analysis:
188
  """
 
191
  if history and history[-1]["content"].startswith("⏳"):
192
  history.pop()
193
 
194
+ # Process each chunk sequentially
195
+ for chunk_idx, chunk in enumerate(chunks, 1):
196
+ prompt = prompt_template.format(chunk_idx, chunk=chunk)
197
+ chunk_response = ""
198
+ for chunk_output in agent.run_gradio_chat(
199
+ message=prompt,
200
+ history=[],
201
+ temperature=0.2,
202
+ max_new_tokens=1024,
203
+ max_token=4096,
204
+ call_agent=False,
205
+ conversation=[],
206
+ ):
207
+ if chunk_output is None:
208
+ continue
209
+ if isinstance(chunk_output, list):
210
+ for m in chunk_output:
211
+ if hasattr(m, 'content') and m.content:
212
+ cleaned = clean_response(m.content)
213
+ if cleaned:
214
+ chunk_response += cleaned + "\n"
215
+ elif isinstance(chunk_output, str) and chunk_output.strip():
216
+ cleaned = clean_response(chunk_output)
217
+ if cleaned:
218
+ chunk_response += cleaned + "\n"
219
+ combined_response += f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response}\n"
220
+
221
+ # Update history with combined response
222
+ if combined_response:
223
+ history.append({"role": "assistant", "content": combined_response.strip()})
224
+ else:
225
+ history.append({"role": "assistant", "content": "No oversights identified."})
226
+
227
+ # Generate report file
228
  report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
229
+ if report_path:
230
+ with open(report_path, "w", encoding="utf-8") as f:
231
+ f.write(combined_response)
232
  yield history, report_path if report_path and os.path.exists(report_path) else None
233
 
234
  except Exception as e: