Ali2206 commited on
Commit
fe1f17b
·
verified ·
1 Parent(s): 973658c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -51
app.py CHANGED
@@ -56,7 +56,7 @@ def extract_priority_pages(file_path: str, max_pages: int = 20) -> str:
56
  text_chunks.append(f"=== Page {i+1} ===\n{text.strip()}")
57
  for i, page in enumerate(pdf.pages[3:max_pages], start=4):
58
  page_text = page.extract_text() or ""
59
- if any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
60
  text_chunks.append(f"=== Page {i} ===\n{page_text.strip()}")
61
  return "\n\n".join(text_chunks)
62
  except Exception as e:
@@ -108,6 +108,17 @@ def log_system_usage(tag=""):
108
  except Exception as e:
109
  print(f"[{tag}] GPU/CPU monitor failed: {e}")
110
 
 
 
 
 
 
 
 
 
 
 
 
111
  def init_agent():
112
  print("🔁 Initializing model...")
113
  log_system_usage("Before Load")
@@ -131,33 +142,6 @@ def init_agent():
131
  print("✅ Agent Ready")
132
  return agent
133
 
134
- def clean_response(response: str) -> str:
135
- """Clean the response by removing tool calls and duplicate content."""
136
- # First remove all tool call blocks
137
- response = re.sub(r'\[TOOL_CALLS\].*?(\[TOOL_CALLS\]|$)', '', response, flags=re.DOTALL)
138
-
139
- # Then remove any remaining standalone tool call markers
140
- response = response.replace('[TOOL_CALLS]', '')
141
-
142
- # Remove duplicate sections (looking for repeated identical paragraphs)
143
- paragraphs = [p.strip() for p in response.split('\n\n') if p.strip()]
144
- unique_paragraphs = []
145
- seen_paragraphs = set()
146
-
147
- for para in paragraphs:
148
- if para not in seen_paragraphs:
149
- seen_paragraphs.add(para)
150
- unique_paragraphs.append(para)
151
-
152
- # Reconstruct the response
153
- cleaned = '\n\n'.join(unique_paragraphs)
154
-
155
- # Remove any remaining JSON-like artifacts
156
- cleaned = re.sub(r'\{.*?\}', '', cleaned)
157
- cleaned = re.sub(r'\[.*?\]', '', cleaned)
158
-
159
- return cleaned.strip()
160
-
161
  def create_ui(agent):
162
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
163
  gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
@@ -194,7 +178,7 @@ Medical Records:
194
  """
195
 
196
  try:
197
- full_response = ""
198
  for chunk in agent.run_gradio_chat(
199
  message=prompt,
200
  history=[],
@@ -207,29 +191,19 @@ Medical Records:
207
  if chunk is None:
208
  continue
209
  if isinstance(chunk, str):
210
- full_response += chunk
211
  elif isinstance(chunk, list):
212
- full_response += "".join([c.content for c in chunk if hasattr(c, "content") and c.content])
213
-
214
- # Clean the current response for display
215
- current_cleaned = clean_response(full_response)
216
- if current_cleaned:
217
- history[-1] = {"role": "assistant", "content": current_cleaned}
218
- yield history, None
219
-
220
- # Final cleaning and processing
221
- final_cleaned = clean_response(full_response)
222
- if not final_cleaned:
223
- final_cleaned = "⚠️ No clear oversights identified or model output was invalid."
224
-
225
- # Save the full report
226
- report_path = None
227
- if file_hash_value:
228
- report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt")
229
- with open(report_path, "w", encoding="utf-8") as f:
230
- f.write(final_cleaned)
231
-
232
- history[-1] = {"role": "assistant", "content": final_cleaned}
233
  yield history, report_path if report_path and os.path.exists(report_path) else None
234
 
235
  except Exception as e:
 
56
  text_chunks.append(f"=== Page {i+1} ===\n{text.strip()}")
57
  for i, page in enumerate(pdf.pages[3:max_pages], start=4):
58
  page_text = page.extract_text() or ""
59
+ if any(re.search(rf'\\b{kw}\\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
60
  text_chunks.append(f"=== Page {i} ===\n{page_text.strip()}")
61
  return "\n\n".join(text_chunks)
62
  except Exception as e:
 
108
  except Exception as e:
109
  print(f"[{tag}] GPU/CPU monitor failed: {e}")
110
 
111
+ def extract_final_response(response: str) -> str:
112
+ try:
113
+ parts = response.split("[TOOL_CALLS]")
114
+ for i in reversed(range(len(parts))):
115
+ if i + 1 < len(parts) and '"name": "Finish"' in parts[i + 1]:
116
+ return parts[i].strip()
117
+ return response.strip()
118
+ except Exception as e:
119
+ print("❌ Failed to extract clean response:", str(e))
120
+ return response.strip()
121
+
122
  def init_agent():
123
  print("🔁 Initializing model...")
124
  log_system_usage("Before Load")
 
142
  print("✅ Agent Ready")
143
  return agent
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  def create_ui(agent):
146
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
147
  gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
 
178
  """
179
 
180
  try:
181
+ response = ""
182
  for chunk in agent.run_gradio_chat(
183
  message=prompt,
184
  history=[],
 
191
  if chunk is None:
192
  continue
193
  if isinstance(chunk, str):
194
+ response += chunk
195
  elif isinstance(chunk, list):
196
+ response += "".join([c.content for c in chunk if hasattr(c, "content") and c.content])
197
+
198
+ clean = extract_final_response(response)
199
+ print("🧼 Raw Response:\n", response)
200
+ print("✅ Cleaned Final Response:\n", clean)
201
+
202
+ if not clean:
203
+ clean = "⚠️ No clear oversights identified or model output was invalid."
204
+
205
+ history[-1] = {"role": "assistant", "content": clean}
206
+ report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
 
 
 
 
 
 
 
 
 
 
207
  yield history, report_path if report_path and os.path.exists(report_path) else None
208
 
209
  except Exception as e: