Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,9 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
2 |
from typing import List
|
3 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
4 |
import hashlib
|
@@ -51,7 +56,7 @@ def extract_priority_pages(file_path: str, max_pages: int = 20) -> str:
|
|
51 |
text_chunks.append(f"=== Page {i+1} ===\n{text.strip()}")
|
52 |
for i, page in enumerate(pdf.pages[3:max_pages], start=4):
|
53 |
page_text = page.extract_text() or ""
|
54 |
-
if any(re.search(rf'
|
55 |
text_chunks.append(f"=== Page {i} ===\n{page_text.strip()}")
|
56 |
return "\n\n".join(text_chunks)
|
57 |
except Exception as e:
|
@@ -126,6 +131,30 @@ def init_agent():
|
|
126 |
print("✅ Agent Ready")
|
127 |
return agent
|
128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
def create_ui(agent):
|
130 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
131 |
gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
|
@@ -163,8 +192,6 @@ Medical Records:
|
|
163 |
|
164 |
try:
|
165 |
response = ""
|
166 |
-
finish_detected = False
|
167 |
-
|
168 |
for chunk in agent.run_gradio_chat(
|
169 |
message=prompt,
|
170 |
history=[],
|
@@ -179,18 +206,22 @@ Medical Records:
|
|
179 |
if isinstance(chunk, str):
|
180 |
response += chunk
|
181 |
elif isinstance(chunk, list):
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
|
|
|
|
|
|
|
|
194 |
yield history, report_path if report_path and os.path.exists(report_path) else None
|
195 |
|
196 |
except Exception as e:
|
@@ -212,4 +243,4 @@ if __name__ == "__main__":
|
|
212 |
show_error=True,
|
213 |
allowed_paths=[report_dir],
|
214 |
share=False
|
215 |
-
)
|
|
|
1 |
+
import sys
|
2 |
+
import os
|
3 |
+
import pandas as pd
|
4 |
+
import pdfplumber
|
5 |
+
import json
|
6 |
+
import gradio as gr
|
7 |
from typing import List
|
8 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
9 |
import hashlib
|
|
|
56 |
text_chunks.append(f"=== Page {i+1} ===\n{text.strip()}")
|
57 |
for i, page in enumerate(pdf.pages[3:max_pages], start=4):
|
58 |
page_text = page.extract_text() or ""
|
59 |
+
if any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
|
60 |
text_chunks.append(f"=== Page {i} ===\n{page_text.strip()}")
|
61 |
return "\n\n".join(text_chunks)
|
62 |
except Exception as e:
|
|
|
131 |
print("✅ Agent Ready")
|
132 |
return agent
|
133 |
|
134 |
+
def clean_response(response: str) -> str:
|
135 |
+
"""Clean the response by removing tool calls and duplicate content."""
|
136 |
+
# Remove all tool call blocks
|
137 |
+
response = re.sub(r'\[TOOL_CALLS\].*?$', '', response, flags=re.DOTALL)
|
138 |
+
|
139 |
+
# Remove duplicate sentences (simple approach)
|
140 |
+
sentences = [s.strip() for s in response.split('.') if s.strip()]
|
141 |
+
unique_sentences = []
|
142 |
+
seen_sentences = set()
|
143 |
+
|
144 |
+
for sentence in sentences:
|
145 |
+
if sentence not in seen_sentences:
|
146 |
+
seen_sentences.add(sentence)
|
147 |
+
unique_sentences.append(sentence)
|
148 |
+
|
149 |
+
# Reconstruct the response
|
150 |
+
cleaned = '. '.join(unique_sentences) + '.' if unique_sentences else response
|
151 |
+
|
152 |
+
# Remove any remaining JSON-like artifacts
|
153 |
+
cleaned = re.sub(r'\{.*?\}', '', cleaned)
|
154 |
+
cleaned = re.sub(r'\[.*?\]', '', cleaned)
|
155 |
+
|
156 |
+
return cleaned.strip()
|
157 |
+
|
158 |
def create_ui(agent):
|
159 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
160 |
gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
|
|
|
192 |
|
193 |
try:
|
194 |
response = ""
|
|
|
|
|
195 |
for chunk in agent.run_gradio_chat(
|
196 |
message=prompt,
|
197 |
history=[],
|
|
|
206 |
if isinstance(chunk, str):
|
207 |
response += chunk
|
208 |
elif isinstance(chunk, list):
|
209 |
+
response += "".join([c.content for c in chunk if hasattr(c, "content") and c.content])
|
210 |
+
|
211 |
+
# Clean the response before displaying
|
212 |
+
clean_response_text = clean_response(response)
|
213 |
+
|
214 |
+
if not clean_response_text:
|
215 |
+
clean_response_text = "⚠️ No clear oversights identified or model output was invalid."
|
216 |
+
|
217 |
+
# Save the full report
|
218 |
+
report_path = None
|
219 |
+
if file_hash_value:
|
220 |
+
report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt")
|
221 |
+
with open(report_path, "w", encoding="utf-8") as f:
|
222 |
+
f.write(clean_response_text)
|
223 |
+
|
224 |
+
history[-1] = {"role": "assistant", "content": clean_response_text}
|
225 |
yield history, report_path if report_path and os.path.exists(report_path) else None
|
226 |
|
227 |
except Exception as e:
|
|
|
243 |
show_error=True,
|
244 |
allowed_paths=[report_dir],
|
245 |
share=False
|
246 |
+
)
|