Update app.py
Browse files
app.py
CHANGED
@@ -4,15 +4,16 @@ import pandas as pd
|
|
4 |
import pdfplumber
|
5 |
import json
|
6 |
import gradio as gr
|
7 |
-
from typing import List
|
8 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
9 |
import hashlib
|
10 |
import shutil
|
11 |
import re
|
12 |
import psutil
|
13 |
import subprocess
|
|
|
14 |
|
15 |
-
# Persistent directory
|
16 |
persistent_dir = "/data/hf_cache"
|
17 |
os.makedirs(persistent_dir, exist_ok=True)
|
18 |
|
@@ -37,46 +38,78 @@ sys.path.insert(0, src_path)
|
|
37 |
|
38 |
from txagent.txagent import TxAgent
|
39 |
|
40 |
-
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
def sanitize_utf8(text: str) -> str:
|
|
|
44 |
return text.encode("utf-8", "ignore").decode("utf-8")
|
45 |
|
46 |
def file_hash(path: str) -> str:
|
|
|
47 |
with open(path, "rb") as f:
|
48 |
return hashlib.md5(f.read()).hexdigest()
|
49 |
|
50 |
-
def
|
|
|
|
|
|
|
|
|
51 |
try:
|
52 |
text_chunks = []
|
|
|
53 |
with pdfplumber.open(file_path) as pdf:
|
|
|
|
|
54 |
for i, page in enumerate(pdf.pages):
|
55 |
page_text = page.extract_text() or ""
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
57 |
text_chunks.append(f"=== Page {i+1} ===\n{page_text.strip()}")
|
58 |
-
|
|
|
59 |
except Exception as e:
|
60 |
-
return f"PDF processing error: {str(e)}"
|
61 |
|
62 |
def convert_file_to_json(file_path: str, file_type: str) -> str:
|
|
|
63 |
try:
|
64 |
h = file_hash(file_path)
|
65 |
cache_path = os.path.join(file_cache_dir, f"{h}.json")
|
|
|
66 |
if os.path.exists(cache_path):
|
67 |
with open(cache_path, "r", encoding="utf-8") as f:
|
68 |
return f.read()
|
69 |
|
70 |
if file_type == "pdf":
|
71 |
-
text =
|
72 |
-
result = json.dumps({
|
|
|
|
|
|
|
|
|
|
|
73 |
elif file_type == "csv":
|
74 |
-
|
75 |
-
|
76 |
-
|
|
|
|
|
|
|
77 |
result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
|
78 |
elif file_type in ["xls", "xlsx"]:
|
79 |
try:
|
|
|
80 |
df = pd.read_excel(file_path, engine="openpyxl", header=None, dtype=str)
|
81 |
except Exception:
|
82 |
df = pd.read_excel(file_path, engine="xlrd", header=None, dtype=str)
|
@@ -84,6 +117,7 @@ def convert_file_to_json(file_path: str, file_type: str) -> str:
|
|
84 |
result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
|
85 |
else:
|
86 |
result = json.dumps({"error": f"Unsupported file type: {file_type}"})
|
|
|
87 |
with open(cache_path, "w", encoding="utf-8") as f:
|
88 |
f.write(result)
|
89 |
return result
|
@@ -91,6 +125,7 @@ def convert_file_to_json(file_path: str, file_type: str) -> str:
|
|
91 |
return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
|
92 |
|
93 |
def log_system_usage(tag=""):
|
|
|
94 |
try:
|
95 |
cpu = psutil.cpu_percent(interval=1)
|
96 |
mem = psutil.virtual_memory()
|
@@ -106,21 +141,74 @@ def log_system_usage(tag=""):
|
|
106 |
print(f"[{tag}] GPU/CPU monitor failed: {e}")
|
107 |
|
108 |
def clean_response(text: str) -> str:
|
|
|
109 |
text = sanitize_utf8(text)
|
110 |
-
# Remove tool calls
|
111 |
text = re.sub(r"\[TOOL_CALLS\].*", "", text, flags=re.DOTALL)
|
112 |
-
text = re.sub(r"\['get_[^\]]+\']\n?", "", text)
|
113 |
-
text = re.sub(r"\{'meta':\s*\{.*?\}\s*,\s*'results':\s*\[.*?\]\}\n?", "", text, flags=re.DOTALL)
|
|
|
114 |
text = re.sub(r"To analyze the medical records for clinical oversights.*?begin by reviewing.*?\n", "", text, flags=re.DOTALL)
|
|
|
115 |
text = re.sub(r"\n{3,}", "\n\n", text).strip()
|
116 |
-
# Only keep text under analysis headings or relevant content
|
117 |
-
if not re.search(r"(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", text):
|
118 |
-
return ""
|
119 |
return text
|
120 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
def init_agent():
|
|
|
122 |
print("🔁 Initializing model...")
|
123 |
log_system_usage("Before Load")
|
|
|
124 |
default_tool_path = os.path.abspath("data/new_tool.json")
|
125 |
target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
|
126 |
if not os.path.exists(target_tool_path):
|
@@ -141,135 +229,191 @@ def init_agent():
|
|
141 |
print("✅ Agent Ready")
|
142 |
return agent
|
143 |
|
144 |
-
def
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
3. **Incomplete Assessments**:
|
184 |
-
- Note missing or superficial cognitive, psychiatric, social, or family assessments.
|
185 |
-
- Highlight gaps in medical history, substance use, or lab/imaging documentation.
|
186 |
-
|
187 |
-
4. **Urgent Follow-up**:
|
188 |
-
- Flag abnormal lab results, imaging, behaviors, or legal history needing immediate reassessment or referral.
|
189 |
-
|
190 |
-
Medical Records (Chunk {0} of {1}):
|
191 |
-
{{chunk}}
|
192 |
-
|
193 |
-
Begin analysis:
|
194 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
196 |
try:
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
max_new_tokens=1024,
|
213 |
-
max_token=4096,
|
214 |
-
call_agent=False,
|
215 |
-
conversation=[],
|
216 |
-
):
|
217 |
-
if chunk_output is None:
|
218 |
-
continue
|
219 |
-
if isinstance(chunk_output, list):
|
220 |
-
for m in chunk_output:
|
221 |
-
if hasattr(m, 'content') and m.content:
|
222 |
-
cleaned = clean_response(m.content)
|
223 |
-
if cleaned:
|
224 |
-
chunk_response += cleaned + "\n"
|
225 |
-
# Stream partial response to UI
|
226 |
-
if history[-1]["content"].startswith("🔄"):
|
227 |
-
history[-1] = {"role": "assistant", "content": f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response.strip()}"}
|
228 |
-
else:
|
229 |
-
history[-1]["content"] = f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response.strip()}"
|
230 |
-
yield history, None
|
231 |
-
elif isinstance(chunk_output, str) and chunk_output.strip():
|
232 |
-
cleaned = clean_response(chunk_output)
|
233 |
-
if cleaned:
|
234 |
-
chunk_response += cleaned + "\n"
|
235 |
-
# Stream partial response to UI
|
236 |
-
if history[-1]["content"].startswith("🔄"):
|
237 |
-
history[-1] = {"role": "assistant", "content": f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response.strip()}"}
|
238 |
-
else:
|
239 |
-
history[-1]["content"] = f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response.strip()}"
|
240 |
-
yield history, None
|
241 |
-
|
242 |
-
# Append completed chunk response to combined response
|
243 |
-
if chunk_response:
|
244 |
-
combined_response += f"--- Analysis for Chunk {chunk_idx} ---\n{chunk_response}\n"
|
245 |
-
|
246 |
-
# Finalize UI with complete response
|
247 |
-
if combined_response:
|
248 |
-
history[-1]["content"] = combined_response.strip()
|
249 |
-
else:
|
250 |
-
history.append({"role": "assistant", "content": "No oversights identified."})
|
251 |
-
|
252 |
-
# Generate report file with cleaned response
|
253 |
-
report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
|
254 |
-
if report_path:
|
255 |
-
with open(report_path, "w", encoding="utf-8") as f:
|
256 |
-
f.write(combined_response)
|
257 |
-
yield history, report_path if report_path and os.path.exists(report_path) else None
|
258 |
-
|
259 |
except Exception as e:
|
260 |
-
|
261 |
-
|
262 |
-
yield
|
263 |
-
|
264 |
-
|
265 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
266 |
return demo
|
267 |
|
268 |
if __name__ == "__main__":
|
269 |
print("🚀 Launching app...")
|
270 |
agent = init_agent()
|
271 |
demo = create_ui(agent)
|
272 |
-
demo.queue(
|
|
|
|
|
|
|
273 |
server_name="0.0.0.0",
|
274 |
server_port=7860,
|
275 |
show_error=True,
|
|
|
4 |
import pdfplumber
|
5 |
import json
|
6 |
import gradio as gr
|
7 |
+
from typing import List, Tuple, Optional
|
8 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
9 |
import hashlib
|
10 |
import shutil
|
11 |
import re
|
12 |
import psutil
|
13 |
import subprocess
|
14 |
+
from datetime import datetime
|
15 |
|
16 |
+
# Persistent directory setup
|
17 |
persistent_dir = "/data/hf_cache"
|
18 |
os.makedirs(persistent_dir, exist_ok=True)
|
19 |
|
|
|
38 |
|
39 |
from txagent.txagent import TxAgent
|
40 |
|
41 |
+
# Constants
|
42 |
+
MEDICAL_KEYWORDS = {
|
43 |
+
'diagnosis', 'assessment', 'plan', 'results', 'medications',
|
44 |
+
'allergies', 'summary', 'impression', 'findings', 'recommendations',
|
45 |
+
'conclusion', 'history', 'examination', 'progress', 'discharge'
|
46 |
+
}
|
47 |
+
CHUNK_SIZE = 10000 # Increased chunk size for better context
|
48 |
+
MAX_TOKENS = 12000 # Maximum tokens for analysis
|
49 |
|
50 |
def sanitize_utf8(text: str) -> str:
|
51 |
+
"""Ensure text is UTF-8 clean."""
|
52 |
return text.encode("utf-8", "ignore").decode("utf-8")
|
53 |
|
54 |
def file_hash(path: str) -> str:
|
55 |
+
"""Generate MD5 hash of file content."""
|
56 |
with open(path, "rb") as f:
|
57 |
return hashlib.md5(f.read()).hexdigest()
|
58 |
|
59 |
+
def extract_all_pages(file_path: str) -> Tuple[str, int]:
|
60 |
+
"""
|
61 |
+
Extract all pages from PDF with smart prioritization of medical sections.
|
62 |
+
Returns (extracted_text, total_pages)
|
63 |
+
"""
|
64 |
try:
|
65 |
text_chunks = []
|
66 |
+
total_pages = 0
|
67 |
with pdfplumber.open(file_path) as pdf:
|
68 |
+
total_pages = len(pdf.pages)
|
69 |
+
|
70 |
for i, page in enumerate(pdf.pages):
|
71 |
page_text = page.extract_text() or ""
|
72 |
+
lower_text = page_text.lower()
|
73 |
+
|
74 |
+
# Include all pages but mark sections with medical keywords
|
75 |
+
if any(re.search(rf'\b{kw}\b', lower_text) for kw in MEDICAL_KEYWORDS):
|
76 |
+
text_chunks.append(f"=== MEDICAL SECTION (Page {i+1}) ===\n{page_text.strip()}")
|
77 |
+
else:
|
78 |
text_chunks.append(f"=== Page {i+1} ===\n{page_text.strip()}")
|
79 |
+
|
80 |
+
return "\n\n".join(text_chunks), total_pages
|
81 |
except Exception as e:
|
82 |
+
return f"PDF processing error: {str(e)}", 0
|
83 |
|
84 |
def convert_file_to_json(file_path: str, file_type: str) -> str:
|
85 |
+
"""Convert file to JSON format with caching, processing all content."""
|
86 |
try:
|
87 |
h = file_hash(file_path)
|
88 |
cache_path = os.path.join(file_cache_dir, f"{h}.json")
|
89 |
+
|
90 |
if os.path.exists(cache_path):
|
91 |
with open(cache_path, "r", encoding="utf-8") as f:
|
92 |
return f.read()
|
93 |
|
94 |
if file_type == "pdf":
|
95 |
+
text, total_pages = extract_all_pages(file_path)
|
96 |
+
result = json.dumps({
|
97 |
+
"filename": os.path.basename(file_path),
|
98 |
+
"content": text,
|
99 |
+
"total_pages": total_pages,
|
100 |
+
"status": "complete"
|
101 |
+
})
|
102 |
elif file_type == "csv":
|
103 |
+
# Read CSV in chunks to handle large files
|
104 |
+
chunks = []
|
105 |
+
for chunk in pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str,
|
106 |
+
skip_blank_lines=False, on_bad_lines="skip", chunksize=1000):
|
107 |
+
chunks.append(chunk.fillna("").astype(str).values.tolist())
|
108 |
+
content = [item for sublist in chunks for item in sublist]
|
109 |
result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
|
110 |
elif file_type in ["xls", "xlsx"]:
|
111 |
try:
|
112 |
+
# Read Excel in chunks if possible
|
113 |
df = pd.read_excel(file_path, engine="openpyxl", header=None, dtype=str)
|
114 |
except Exception:
|
115 |
df = pd.read_excel(file_path, engine="xlrd", header=None, dtype=str)
|
|
|
117 |
result = json.dumps({"filename": os.path.basename(file_path), "rows": content})
|
118 |
else:
|
119 |
result = json.dumps({"error": f"Unsupported file type: {file_type}"})
|
120 |
+
|
121 |
with open(cache_path, "w", encoding="utf-8") as f:
|
122 |
f.write(result)
|
123 |
return result
|
|
|
125 |
return json.dumps({"error": f"Error processing {os.path.basename(file_path)}: {str(e)}"})
|
126 |
|
127 |
def log_system_usage(tag=""):
|
128 |
+
"""Log system resource usage."""
|
129 |
try:
|
130 |
cpu = psutil.cpu_percent(interval=1)
|
131 |
mem = psutil.virtual_memory()
|
|
|
141 |
print(f"[{tag}] GPU/CPU monitor failed: {e}")
|
142 |
|
143 |
def clean_response(text: str) -> str:
|
144 |
+
"""Clean and format the model response."""
|
145 |
text = sanitize_utf8(text)
|
146 |
+
# Remove tool calls and JSON artifacts
|
147 |
text = re.sub(r"\[TOOL_CALLS\].*", "", text, flags=re.DOTALL)
|
148 |
+
text = re.sub(r"\['get_[^\]]+\']\n?", "", text)
|
149 |
+
text = re.sub(r"\{'meta':\s*\{.*?\}\s*,\s*'results':\s*\[.*?\]\}\n?", "", text, flags=re.DOTALL)
|
150 |
+
# Remove repetitive phrases
|
151 |
text = re.sub(r"To analyze the medical records for clinical oversights.*?begin by reviewing.*?\n", "", text, flags=re.DOTALL)
|
152 |
+
# Collapse excessive newlines
|
153 |
text = re.sub(r"\n{3,}", "\n\n", text).strip()
|
|
|
|
|
|
|
154 |
return text
|
155 |
|
156 |
+
def format_final_report(analysis_results: List[str], filename: str) -> str:
|
157 |
+
"""Combine all analysis chunks into a well-formatted final report."""
|
158 |
+
report = []
|
159 |
+
report.append(f"COMPREHENSIVE CLINICAL OVERSIGHT ANALYSIS")
|
160 |
+
report.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
161 |
+
report.append(f"File: {filename}")
|
162 |
+
report.append("=" * 80)
|
163 |
+
|
164 |
+
# Extract sections from all chunks
|
165 |
+
sections = {
|
166 |
+
"CRITICAL FINDINGS": [],
|
167 |
+
"MISSED DIAGNOSES": [],
|
168 |
+
"MEDICATION ISSUES": [],
|
169 |
+
"ASSESSMENT GAPS": [],
|
170 |
+
"FOLLOW-UP RECOMMENDATIONS": []
|
171 |
+
}
|
172 |
+
|
173 |
+
for result in analysis_results:
|
174 |
+
for section in sections:
|
175 |
+
# Find section content using regex
|
176 |
+
section_match = re.search(
|
177 |
+
rf"{re.escape(section)}:?\s*\n([^*]+?)(?=\n\*|\n\n|$)",
|
178 |
+
result,
|
179 |
+
re.IGNORECASE | re.DOTALL
|
180 |
+
)
|
181 |
+
if section_match:
|
182 |
+
content = section_match.group(1).strip()
|
183 |
+
if content and content not in sections[section]:
|
184 |
+
sections[section].append(content)
|
185 |
+
|
186 |
+
# Build the final report - prioritize critical findings
|
187 |
+
if sections["CRITICAL FINDINGS"]:
|
188 |
+
report.append("\n🚨 **CRITICAL FINDINGS** 🚨")
|
189 |
+
for content in sections["CRITICAL FINDINGS"]:
|
190 |
+
report.append(f"\n{content}")
|
191 |
+
|
192 |
+
# Add other sections
|
193 |
+
for section, contents in sections.items():
|
194 |
+
if section != "CRITICAL FINDINGS" and contents:
|
195 |
+
report.append(f"\n**{section.upper()}**")
|
196 |
+
for content in contents:
|
197 |
+
report.append(f"\n{content}")
|
198 |
+
|
199 |
+
if not any(sections.values()):
|
200 |
+
report.append("\nNo significant clinical oversights identified.")
|
201 |
+
|
202 |
+
report.append("\n" + "=" * 80)
|
203 |
+
report.append("END OF REPORT")
|
204 |
+
|
205 |
+
return "\n".join(report)
|
206 |
+
|
207 |
def init_agent():
|
208 |
+
"""Initialize the TxAgent with proper configuration."""
|
209 |
print("🔁 Initializing model...")
|
210 |
log_system_usage("Before Load")
|
211 |
+
|
212 |
default_tool_path = os.path.abspath("data/new_tool.json")
|
213 |
target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
|
214 |
if not os.path.exists(target_tool_path):
|
|
|
229 |
print("✅ Agent Ready")
|
230 |
return agent
|
231 |
|
232 |
+
def analyze_large_document(content: str, filename: str, agent: TxAgent) -> str:
|
233 |
+
"""Analyze large documents by processing in logical sections."""
|
234 |
+
# Split content into logical sections
|
235 |
+
sections = re.split(r"(=== MEDICAL SECTION|=== Page \d+ ===)", content)
|
236 |
+
sections = [s.strip() for s in sections if s.strip()]
|
237 |
+
|
238 |
+
analysis_results = []
|
239 |
+
current_chunk = ""
|
240 |
+
|
241 |
+
for section in sections:
|
242 |
+
# If adding this section would exceed chunk size, analyze current chunk
|
243 |
+
if len(current_chunk) + len(section) > CHUNK_SIZE and current_chunk:
|
244 |
+
analysis_results.append(process_chunk(current_chunk, filename, agent))
|
245 |
+
current_chunk = section
|
246 |
+
else:
|
247 |
+
current_chunk += "\n\n" + section
|
248 |
+
|
249 |
+
# Process the last chunk
|
250 |
+
if current_chunk:
|
251 |
+
analysis_results.append(process_chunk(current_chunk, filename, agent))
|
252 |
+
|
253 |
+
return format_final_report(analysis_results, filename)
|
254 |
+
|
255 |
+
def process_chunk(chunk: str, filename: str, agent: TxAgent) -> str:
|
256 |
+
"""Process a single chunk of the document."""
|
257 |
+
prompt = f"""
|
258 |
+
Analyze this section of medical records for clinical oversights. Focus on:
|
259 |
+
1. Critical findings needing immediate attention
|
260 |
+
2. Potential missed diagnoses
|
261 |
+
3. Medication conflicts
|
262 |
+
4. Assessment gaps
|
263 |
+
5. Follow-up recommendations
|
264 |
+
|
265 |
+
File: {filename}
|
266 |
+
Content:
|
267 |
+
{chunk[:CHUNK_SIZE]}
|
268 |
+
|
269 |
+
Provide concise findings in bullet points under relevant headings.
|
270 |
+
Focus on factual evidence from the content.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
271 |
"""
|
272 |
+
|
273 |
+
full_response = ""
|
274 |
+
for output in agent.run_gradio_chat(
|
275 |
+
message=prompt,
|
276 |
+
history=[],
|
277 |
+
temperature=0.1, # Lower temperature for more factual responses
|
278 |
+
max_new_tokens=1024,
|
279 |
+
max_token=MAX_TOKENS,
|
280 |
+
call_agent=False,
|
281 |
+
conversation=[],
|
282 |
+
):
|
283 |
+
if output is None:
|
284 |
+
continue
|
285 |
+
|
286 |
+
if isinstance(output, list):
|
287 |
+
for m in output:
|
288 |
+
if hasattr(m, 'content') and m.content:
|
289 |
+
cleaned = clean_response(m.content)
|
290 |
+
if cleaned:
|
291 |
+
full_response += cleaned + "\n"
|
292 |
+
elif isinstance(output, str) and output.strip():
|
293 |
+
cleaned = clean_response(output)
|
294 |
+
if cleaned:
|
295 |
+
full_response += cleaned + "\n"
|
296 |
+
|
297 |
+
return full_response
|
298 |
|
299 |
+
def create_ui(agent):
|
300 |
+
"""Create the Gradio interface."""
|
301 |
+
with gr.Blocks(theme=gr.themes.Soft(), title="Clinical Oversight Assistant") as demo:
|
302 |
+
gr.Markdown("""
|
303 |
+
<h1 style='text-align: center;'>🩺 Comprehensive Clinical Oversight Assistant</h1>
|
304 |
+
<p style='text-align: center;'>Analyze complete medical records for potential oversights</p>
|
305 |
+
""")
|
306 |
+
|
307 |
+
with gr.Row():
|
308 |
+
with gr.Column(scale=3):
|
309 |
+
file_upload = gr.File(
|
310 |
+
file_types=[".pdf", ".csv", ".xls", ".xlsx"],
|
311 |
+
file_count="multiple",
|
312 |
+
label="Upload Medical Records"
|
313 |
+
)
|
314 |
+
msg_input = gr.Textbox(
|
315 |
+
placeholder="Optional: Add specific focus areas or questions...",
|
316 |
+
label="Analysis Focus"
|
317 |
+
)
|
318 |
+
with gr.Row():
|
319 |
+
send_btn = gr.Button("Analyze Full Document", variant="primary")
|
320 |
+
clear_btn = gr.Button("Clear")
|
321 |
+
status = gr.Textbox(label="Status", interactive=False)
|
322 |
+
|
323 |
+
with gr.Column(scale=7):
|
324 |
+
report_output = gr.Textbox(
|
325 |
+
label="Clinical Oversight Report",
|
326 |
+
lines=20,
|
327 |
+
max_lines=50,
|
328 |
+
interactive=False
|
329 |
+
)
|
330 |
+
download_output = gr.File(
|
331 |
+
label="Download Full Report",
|
332 |
+
visible=False
|
333 |
+
)
|
334 |
+
|
335 |
+
def analyze(files: List, message: str):
|
336 |
+
"""Process files and generate analysis."""
|
337 |
+
if not files:
|
338 |
+
yield "", None, "⚠️ Please upload at least one file to analyze."
|
339 |
+
return
|
340 |
+
|
341 |
+
yield "", None, "⏳ Processing documents..."
|
342 |
+
|
343 |
+
# Process all files completely
|
344 |
+
file_contents = []
|
345 |
+
filenames = []
|
346 |
+
|
347 |
+
with ThreadPoolExecutor(max_workers=4) as executor:
|
348 |
+
futures = []
|
349 |
+
for f in files:
|
350 |
+
futures.append(executor.submit(
|
351 |
+
convert_file_to_json,
|
352 |
+
f.name,
|
353 |
+
f.name.split(".")[-1].lower()
|
354 |
+
))
|
355 |
+
filenames.append(os.path.basename(f.name))
|
356 |
+
|
357 |
+
results = []
|
358 |
+
for future in as_completed(futures):
|
359 |
+
results.append(sanitize_utf8(future.result()))
|
360 |
+
|
361 |
+
file_contents = results
|
362 |
+
|
363 |
+
combined_filename = " + ".join(filenames)
|
364 |
+
combined_content = "\n".join([
|
365 |
+
json.loads(fc).get("content", "") if "content" in json.loads(fc)
|
366 |
+
else str(json.loads(fc).get("rows", ""))
|
367 |
+
for fc in file_contents
|
368 |
+
])
|
369 |
+
|
370 |
+
yield "", None, "🔍 Analyzing content..."
|
371 |
+
|
372 |
try:
|
373 |
+
# Process the complete document
|
374 |
+
full_report = analyze_large_document(
|
375 |
+
combined_content,
|
376 |
+
combined_filename,
|
377 |
+
agent
|
378 |
+
)
|
379 |
+
|
380 |
+
# Save report to file
|
381 |
+
file_hash_value = hashlib.md5(combined_content.encode()).hexdigest()
|
382 |
+
report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt")
|
383 |
+
with open(report_path, "w", encoding="utf-8") as f:
|
384 |
+
f.write(full_report)
|
385 |
+
|
386 |
+
yield full_report, report_path if os.path.exists(report_path) else None, "✅ Analysis complete!"
|
387 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
388 |
except Exception as e:
|
389 |
+
error_msg = f"❌ Error during analysis: {str(e)}"
|
390 |
+
print(error_msg)
|
391 |
+
yield "", None, error_msg
|
392 |
+
|
393 |
+
# UI event handlers
|
394 |
+
send_btn.click(
|
395 |
+
fn=analyze,
|
396 |
+
inputs=[file_upload, msg_input],
|
397 |
+
outputs=[report_output, download_output, status],
|
398 |
+
api_name="analyze"
|
399 |
+
)
|
400 |
+
|
401 |
+
clear_btn.click(
|
402 |
+
fn=lambda: ("", None, ""),
|
403 |
+
inputs=None,
|
404 |
+
outputs=[report_output, download_output, status]
|
405 |
+
)
|
406 |
+
|
407 |
return demo
|
408 |
|
409 |
if __name__ == "__main__":
|
410 |
print("🚀 Launching app...")
|
411 |
agent = init_agent()
|
412 |
demo = create_ui(agent)
|
413 |
+
demo.queue(
|
414 |
+
api_open=False,
|
415 |
+
max_size=20
|
416 |
+
).launch(
|
417 |
server_name="0.0.0.0",
|
418 |
server_port=7860,
|
419 |
show_error=True,
|