Update app.py
Browse files
app.py
CHANGED
@@ -56,7 +56,7 @@ def extract_priority_pages(file_path: str, max_pages: int = 20) -> str:
|
|
56 |
text_chunks.append(f"=== Page {i+1} ===\n{text.strip()}")
|
57 |
for i, page in enumerate(pdf.pages[3:max_pages], start=4):
|
58 |
page_text = page.extract_text() or ""
|
59 |
-
if any(re.search(rf'
|
60 |
text_chunks.append(f"=== Page {i} ===\n{page_text.strip()}")
|
61 |
return "\n\n".join(text_chunks)
|
62 |
except Exception as e:
|
@@ -108,6 +108,17 @@ def log_system_usage(tag=""):
|
|
108 |
except Exception as e:
|
109 |
print(f"[{tag}] GPU/CPU monitor failed: {e}")
|
110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
def init_agent():
|
112 |
print("🔁 Initializing model...")
|
113 |
log_system_usage("Before Load")
|
@@ -131,33 +142,6 @@ def init_agent():
|
|
131 |
print("✅ Agent Ready")
|
132 |
return agent
|
133 |
|
134 |
-
def clean_response(response: str) -> str:
|
135 |
-
"""Clean the response by removing tool calls and duplicate content."""
|
136 |
-
# First remove all tool call blocks
|
137 |
-
response = re.sub(r'\[TOOL_CALLS\].*?(\[TOOL_CALLS\]|$)', '', response, flags=re.DOTALL)
|
138 |
-
|
139 |
-
# Then remove any remaining standalone tool call markers
|
140 |
-
response = response.replace('[TOOL_CALLS]', '')
|
141 |
-
|
142 |
-
# Remove duplicate sections (looking for repeated identical paragraphs)
|
143 |
-
paragraphs = [p.strip() for p in response.split('\n\n') if p.strip()]
|
144 |
-
unique_paragraphs = []
|
145 |
-
seen_paragraphs = set()
|
146 |
-
|
147 |
-
for para in paragraphs:
|
148 |
-
if para not in seen_paragraphs:
|
149 |
-
seen_paragraphs.add(para)
|
150 |
-
unique_paragraphs.append(para)
|
151 |
-
|
152 |
-
# Reconstruct the response
|
153 |
-
cleaned = '\n\n'.join(unique_paragraphs)
|
154 |
-
|
155 |
-
# Remove any remaining JSON-like artifacts
|
156 |
-
cleaned = re.sub(r'\{.*?\}', '', cleaned)
|
157 |
-
cleaned = re.sub(r'\[.*?\]', '', cleaned)
|
158 |
-
|
159 |
-
return cleaned.strip()
|
160 |
-
|
161 |
def create_ui(agent):
|
162 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
163 |
gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
|
@@ -194,7 +178,7 @@ Medical Records:
|
|
194 |
"""
|
195 |
|
196 |
try:
|
197 |
-
|
198 |
for chunk in agent.run_gradio_chat(
|
199 |
message=prompt,
|
200 |
history=[],
|
@@ -207,29 +191,19 @@ Medical Records:
|
|
207 |
if chunk is None:
|
208 |
continue
|
209 |
if isinstance(chunk, str):
|
210 |
-
|
211 |
elif isinstance(chunk, list):
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
if
|
223 |
-
final_cleaned = "⚠️ No clear oversights identified or model output was invalid."
|
224 |
-
|
225 |
-
# Save the full report
|
226 |
-
report_path = None
|
227 |
-
if file_hash_value:
|
228 |
-
report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt")
|
229 |
-
with open(report_path, "w", encoding="utf-8") as f:
|
230 |
-
f.write(final_cleaned)
|
231 |
-
|
232 |
-
history[-1] = {"role": "assistant", "content": final_cleaned}
|
233 |
yield history, report_path if report_path and os.path.exists(report_path) else None
|
234 |
|
235 |
except Exception as e:
|
|
|
56 |
text_chunks.append(f"=== Page {i+1} ===\n{text.strip()}")
|
57 |
for i, page in enumerate(pdf.pages[3:max_pages], start=4):
|
58 |
page_text = page.extract_text() or ""
|
59 |
+
if any(re.search(rf'\\b{kw}\\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
|
60 |
text_chunks.append(f"=== Page {i} ===\n{page_text.strip()}")
|
61 |
return "\n\n".join(text_chunks)
|
62 |
except Exception as e:
|
|
|
108 |
except Exception as e:
|
109 |
print(f"[{tag}] GPU/CPU monitor failed: {e}")
|
110 |
|
111 |
+
def extract_final_response(response: str) -> str:
|
112 |
+
try:
|
113 |
+
parts = response.split("[TOOL_CALLS]")
|
114 |
+
for i in reversed(range(len(parts))):
|
115 |
+
if i + 1 < len(parts) and '"name": "Finish"' in parts[i + 1]:
|
116 |
+
return parts[i].strip()
|
117 |
+
return response.strip()
|
118 |
+
except Exception as e:
|
119 |
+
print("❌ Failed to extract clean response:", str(e))
|
120 |
+
return response.strip()
|
121 |
+
|
122 |
def init_agent():
|
123 |
print("🔁 Initializing model...")
|
124 |
log_system_usage("Before Load")
|
|
|
142 |
print("✅ Agent Ready")
|
143 |
return agent
|
144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
def create_ui(agent):
|
146 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
147 |
gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
|
|
|
178 |
"""
|
179 |
|
180 |
try:
|
181 |
+
response = ""
|
182 |
for chunk in agent.run_gradio_chat(
|
183 |
message=prompt,
|
184 |
history=[],
|
|
|
191 |
if chunk is None:
|
192 |
continue
|
193 |
if isinstance(chunk, str):
|
194 |
+
response += chunk
|
195 |
elif isinstance(chunk, list):
|
196 |
+
response += "".join([c.content for c in chunk if hasattr(c, "content") and c.content])
|
197 |
+
|
198 |
+
clean = extract_final_response(response)
|
199 |
+
print("🧼 Raw Response:\n", response)
|
200 |
+
print("✅ Cleaned Final Response:\n", clean)
|
201 |
+
|
202 |
+
if not clean:
|
203 |
+
clean = "⚠️ No clear oversights identified or model output was invalid."
|
204 |
+
|
205 |
+
history[-1] = {"role": "assistant", "content": clean}
|
206 |
+
report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
207 |
yield history, report_path if report_path and os.path.exists(report_path) else None
|
208 |
|
209 |
except Exception as e:
|