Ali2206 commited on
Commit
072b189
·
verified ·
1 Parent(s): 8126e99

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -19
app.py CHANGED
@@ -51,9 +51,11 @@ def extract_priority_pages(file_path: str, max_pages: int = 20) -> str:
51
  try:
52
  text_chunks = []
53
  with pdfplumber.open(file_path) as pdf:
 
54
  for i, page in enumerate(pdf.pages[:3]):
55
  text = page.extract_text() or ""
56
  text_chunks.append(f"=== Page {i+1} ===\n{text.strip()}")
 
57
  for i, page in enumerate(pdf.pages[3:max_pages], start=4):
58
  page_text = page.extract_text() or ""
59
  if any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
@@ -132,24 +134,15 @@ def init_agent():
132
  return agent
133
 
134
  def clean_response(response: str) -> str:
135
- """Enhanced response cleaner that handles duplicates and tool calls."""
136
- # First extract the main analysis content
137
- analysis_match = re.search(
138
- r'(Based on the medical records provided.*?)(?=\[TOOL_CALLS\]|Based on|$)',
139
- response,
140
- flags=re.DOTALL
141
- )
142
-
143
- if analysis_match:
144
- cleaned = analysis_match.group(1).strip()
145
- else:
146
- # Fallback if pattern not found
147
- cleaned = re.sub(r'\[TOOL_CALLS\].*?$', '', response, flags=re.DOTALL).strip()
148
-
149
- # Remove any remaining JSON artifacts
150
  cleaned = re.sub(r'\{.*?\}', '', cleaned)
151
  cleaned = re.sub(r'\[.*?\]', '', cleaned)
152
-
153
  return cleaned
154
 
155
  def create_ui(agent):
@@ -217,13 +210,13 @@ Medical Records:
217
  history[-1] = {"role": "assistant", "content": current_cleaned}
218
  yield history, None
219
 
220
- # Final processing
221
  final_cleaned = clean_response(full_response)
222
 
223
  if not final_cleaned:
224
  final_cleaned = "⚠️ No clear oversights identified or model output was invalid."
225
 
226
- # Save report
227
  report_path = None
228
  if file_hash_value:
229
  report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt")
@@ -252,4 +245,4 @@ if __name__ == "__main__":
252
  show_error=True,
253
  allowed_paths=[report_dir],
254
  share=False
255
- )
 
51
  try:
52
  text_chunks = []
53
  with pdfplumber.open(file_path) as pdf:
54
+ # Always include the first three pages
55
  for i, page in enumerate(pdf.pages[:3]):
56
  text = page.extract_text() or ""
57
  text_chunks.append(f"=== Page {i+1} ===\n{text.strip()}")
58
+ # Then include pages that mention one or more medical keywords
59
  for i, page in enumerate(pdf.pages[3:max_pages], start=4):
60
  page_text = page.extract_text() or ""
61
  if any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
 
134
  return agent
135
 
136
  def clean_response(response: str) -> str:
137
+ """
138
+ Updated cleaner that removes the [TOOL_CALLS] tag and any JSON artifacts
139
+ while preserving the full analysis so that all identified oversights are displayed.
140
+ """
141
+ # Remove everything starting from the first [TOOL_CALLS] occurrence
142
+ cleaned = response.split("[TOOL_CALLS]")[0].strip()
143
+ # Remove any remaining JSON artifacts in case they appear
 
 
 
 
 
 
 
 
144
  cleaned = re.sub(r'\{.*?\}', '', cleaned)
145
  cleaned = re.sub(r'\[.*?\]', '', cleaned)
 
146
  return cleaned
147
 
148
  def create_ui(agent):
 
210
  history[-1] = {"role": "assistant", "content": current_cleaned}
211
  yield history, None
212
 
213
+ # Final processing of the complete response
214
  final_cleaned = clean_response(full_response)
215
 
216
  if not final_cleaned:
217
  final_cleaned = "⚠️ No clear oversights identified or model output was invalid."
218
 
219
+ # Save report if a file was processed
220
  report_path = None
221
  if file_hash_value:
222
  report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt")
 
245
  show_error=True,
246
  allowed_paths=[report_dir],
247
  share=False
248
+ )