Update app.py
Browse files
app.py
CHANGED
@@ -9,6 +9,7 @@ import hashlib
|
|
9 |
import multiprocessing
|
10 |
from functools import partial
|
11 |
import logging
|
|
|
12 |
|
13 |
# Suppress pdfplumber CropBox warnings
|
14 |
logging.getLogger("pdfplumber").setLevel(logging.ERROR)
|
@@ -42,7 +43,7 @@ def extract_page_range(file_path: str, start_page: int, end_page: int) -> str:
|
|
42 |
except Exception:
|
43 |
return ""
|
44 |
|
45 |
-
def extract_all_pages(file_path: str) -> str:
|
46 |
"""Extract text from all pages of a PDF using parallel processing."""
|
47 |
try:
|
48 |
with pdfplumber.open(file_path) as pdf:
|
@@ -64,13 +65,18 @@ def extract_all_pages(file_path: str) -> str:
|
|
64 |
# Process page ranges in parallel
|
65 |
with multiprocessing.Pool(processes=num_processes) as pool:
|
66 |
extract_func = partial(extract_page_range, file_path)
|
67 |
-
results =
|
|
|
|
|
|
|
|
|
|
|
68 |
|
69 |
return "\n".join(filter(None, results))
|
70 |
except Exception:
|
71 |
return ""
|
72 |
|
73 |
-
def convert_file_to_text(file_path: str, file_type: str) -> str:
|
74 |
"""Convert supported file types to text, caching results."""
|
75 |
try:
|
76 |
h = file_hash(file_path)
|
@@ -80,7 +86,7 @@ def convert_file_to_text(file_path: str, file_type: str) -> str:
|
|
80 |
return f.read()
|
81 |
|
82 |
if file_type == "pdf":
|
83 |
-
text = extract_all_pages(file_path)
|
84 |
elif file_type == "csv":
|
85 |
df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str,
|
86 |
skip_blank_lines=True, on_bad_lines="skip")
|
@@ -123,26 +129,26 @@ def parse_analysis_response(raw_response: str) -> Dict[str, List[str]]:
|
|
123 |
|
124 |
return sections
|
125 |
|
126 |
-
def analyze_medical_records(extracted_text: str) -> str:
|
127 |
-
"""Analyze medical records and return structured response."""
|
128 |
# Split text into chunks to handle large inputs
|
129 |
chunk_size = 10000
|
130 |
chunks = [extracted_text[i:i + chunk_size] for i in range(0, len(extracted_text), chunk_size)]
|
131 |
|
132 |
-
#
|
133 |
raw_response_template = """
|
134 |
Missed Diagnoses:
|
135 |
-
- Undiagnosed
|
136 |
-
-
|
137 |
|
138 |
Medication Conflicts:
|
139 |
-
-
|
140 |
|
141 |
Incomplete Assessments:
|
142 |
-
-
|
143 |
|
144 |
Urgent Follow-up:
|
145 |
-
-
|
146 |
"""
|
147 |
|
148 |
# Aggregate findings across chunks
|
@@ -159,9 +165,12 @@ def analyze_medical_records(extracted_text: str) -> str:
|
|
159 |
parsed = parse_analysis_response(raw_response)
|
160 |
for section, items in parsed.items():
|
161 |
all_sections[section].update(items)
|
|
|
|
|
162 |
|
163 |
-
# Format
|
164 |
response = ["### Clinical Oversight Analysis\n"]
|
|
|
165 |
has_findings = False
|
166 |
for section, items in all_sections.items():
|
167 |
response.append(f"#### {section}")
|
@@ -169,13 +178,14 @@ def analyze_medical_records(extracted_text: str) -> str:
|
|
169 |
response.extend(sorted(items))
|
170 |
has_findings = True
|
171 |
else:
|
172 |
-
response.append("-
|
173 |
response.append("")
|
174 |
|
175 |
response.append("### Summary")
|
176 |
-
|
177 |
-
|
178 |
-
|
|
|
179 |
response.append(summary)
|
180 |
|
181 |
return "\n".join(response)
|
@@ -183,28 +193,51 @@ def analyze_medical_records(extracted_text: str) -> str:
|
|
183 |
def create_ui():
|
184 |
"""Create Gradio UI for clinical oversight analysis."""
|
185 |
def analyze(message: str, history: List[dict], files: List):
|
186 |
-
"""Handle analysis
|
187 |
history.append({"role": "user", "content": message})
|
188 |
-
history.append({"role": "assistant", "content": "β³ Extracting text from files..."})
|
189 |
yield history, None
|
190 |
|
191 |
extracted_text = ""
|
192 |
file_hash_value = ""
|
193 |
if files:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
194 |
with ThreadPoolExecutor(max_workers=4) as executor:
|
195 |
-
futures = [executor.submit(convert_file_to_text, f.name, f.name.split(".")[-1].lower()) for f in files]
|
196 |
results = [f.result() for f in futures]
|
197 |
extracted_text = "\n".join(sanitize_utf8(r) for r in results if r)
|
198 |
file_hash_value = file_hash(files[0].name) if files else ""
|
199 |
|
200 |
-
history.
|
201 |
-
history.append({"role": "assistant", "content": "β³ Analyzing medical records..."})
|
202 |
yield history, None
|
203 |
|
204 |
report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
|
205 |
|
206 |
try:
|
207 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
208 |
history.pop() # Remove "Analyzing..."
|
209 |
history.append({"role": "assistant", "content": response})
|
210 |
if report_path:
|
|
|
9 |
import multiprocessing
|
10 |
from functools import partial
|
11 |
import logging
|
12 |
+
import time
|
13 |
|
14 |
# Suppress pdfplumber CropBox warnings
|
15 |
logging.getLogger("pdfplumber").setLevel(logging.ERROR)
|
|
|
43 |
except Exception:
|
44 |
return ""
|
45 |
|
46 |
+
def extract_all_pages(file_path: str, progress_callback=None) -> str:
|
47 |
"""Extract text from all pages of a PDF using parallel processing."""
|
48 |
try:
|
49 |
with pdfplumber.open(file_path) as pdf:
|
|
|
65 |
# Process page ranges in parallel
|
66 |
with multiprocessing.Pool(processes=num_processes) as pool:
|
67 |
extract_func = partial(extract_page_range, file_path)
|
68 |
+
results = []
|
69 |
+
for idx, result in enumerate(pool.starmap(extract_func, ranges)):
|
70 |
+
results.append(result)
|
71 |
+
if progress_callback:
|
72 |
+
processed_pages = min((idx + 1) * pages_per_process, total_pages)
|
73 |
+
progress_callback(processed_pages, total_pages)
|
74 |
|
75 |
return "\n".join(filter(None, results))
|
76 |
except Exception:
|
77 |
return ""
|
78 |
|
79 |
+
def convert_file_to_text(file_path: str, file_type: str, progress_callback=None) -> str:
|
80 |
"""Convert supported file types to text, caching results."""
|
81 |
try:
|
82 |
h = file_hash(file_path)
|
|
|
86 |
return f.read()
|
87 |
|
88 |
if file_type == "pdf":
|
89 |
+
text = extract_all_pages(file_path, progress_callback)
|
90 |
elif file_type == "csv":
|
91 |
df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str,
|
92 |
skip_blank_lines=True, on_bad_lines="skip")
|
|
|
129 |
|
130 |
return sections
|
131 |
|
132 |
+
def analyze_medical_records(extracted_text: str, progress_callback=None) -> str:
|
133 |
+
"""Analyze medical records and return generalized structured response."""
|
134 |
# Split text into chunks to handle large inputs
|
135 |
chunk_size = 10000
|
136 |
chunks = [extracted_text[i:i + chunk_size] for i in range(0, len(extracted_text), chunk_size)]
|
137 |
|
138 |
+
# Generalized analysis template (replace with model or rule-based logic)
|
139 |
raw_response_template = """
|
140 |
Missed Diagnoses:
|
141 |
+
- Chronic conditions potentially missed due to inconsistent monitoring of vital signs or symptoms. This may occur when patient visits are infrequent or records lack longitudinal tracking, leading to undetected trends. Undiagnosed conditions can progress, increasing risks of complications like organ damage. Recommended action: Implement regular screening protocols and trend analysis for key indicators (e.g., blood pressure, glucose levels).
|
142 |
+
- Risk factors for hereditary or lifestyle-related diseases not screened despite documented family history or patient demographics. Screening oversights often stem from time constraints or lack of standardized protocols. Delayed diagnosis may lead to preventable disease progression. Recommended action: Establish routine risk assessments based on family history and clinical guidelines.
|
143 |
|
144 |
Medication Conflicts:
|
145 |
+
- Potential interactions from polypharmacy or untracked over-the-counter medications. Conflicts may arise when multiple prescribers are involved or patients self-medicate, increasing risks of adverse events like bleeding or toxicity. Recommended action: Conduct comprehensive medication reconciliation at each visit and educate patients on reporting all medications.
|
146 |
|
147 |
Incomplete Assessments:
|
148 |
+
- Symptoms reported but not fully evaluated due to incomplete documentation or failure to follow clinical guidelines. This can occur in busy clinical settings where time limits prioritize acute issues over thorough investigation. Unaddressed symptoms may mask serious conditions, delaying treatment. Recommended action: Standardize symptom evaluation protocols and ensure adequate time for comprehensive assessments.
|
149 |
|
150 |
Urgent Follow-up:
|
151 |
+
- Critical findings requiring specialist referral or additional testing delayed due to communication gaps or scheduling issues. Delays often result from fragmented care coordination or underestimation of findings' severity. Untreated critical issues can lead to rapid deterioration. Recommended action: Establish clear referral pathways and prioritize urgent findings with defined timelines.
|
152 |
"""
|
153 |
|
154 |
# Aggregate findings across chunks
|
|
|
165 |
parsed = parse_analysis_response(raw_response)
|
166 |
for section, items in parsed.items():
|
167 |
all_sections[section].update(items)
|
168 |
+
if progress_callback:
|
169 |
+
progress_callback(chunk_idx, len(chunks))
|
170 |
|
171 |
+
# Format generalized response
|
172 |
response = ["### Clinical Oversight Analysis\n"]
|
173 |
+
response.append("This analysis reviews patient records to identify common reasons for potential oversights that could impact clinical outcomes. Findings highlight systemic or procedural gaps, associated risks, and actionable recommendations applicable across various patient records.\n")
|
174 |
has_findings = False
|
175 |
for section, items in all_sections.items():
|
176 |
response.append(f"#### {section}")
|
|
|
178 |
response.extend(sorted(items))
|
179 |
has_findings = True
|
180 |
else:
|
181 |
+
response.append("- No issues identified in this category.")
|
182 |
response.append("")
|
183 |
|
184 |
response.append("### Summary")
|
185 |
+
if has_findings:
|
186 |
+
summary = ("The analysis identified common procedural and systemic gaps that may lead to oversights in diagnosis, medication management, assessments, and follow-up care. These gaps, such as inconsistent monitoring, incomplete documentation, or communication delays, pose risks of disease progression, adverse events, or delayed treatment. Recommended actions include standardizing screening and assessment protocols, improving medication reconciliation, and establishing clear referral pathways. Implementing these measures can enhance patient safety and care quality across diverse clinical scenarios.")
|
187 |
+
else:
|
188 |
+
summary = ("No significant oversights were identified in the provided records. Current practices appear aligned with general clinical standards. To maintain care quality, continue regular monitoring, ensure comprehensive documentation, and adhere to guideline-based screening and follow-up protocols.")
|
189 |
response.append(summary)
|
190 |
|
191 |
return "\n".join(response)
|
|
|
193 |
def create_ui():
|
194 |
"""Create Gradio UI for clinical oversight analysis."""
|
195 |
def analyze(message: str, history: List[dict], files: List):
|
196 |
+
"""Handle analysis with animated progress updates."""
|
197 |
history.append({"role": "user", "content": message})
|
|
|
198 |
yield history, None
|
199 |
|
200 |
extracted_text = ""
|
201 |
file_hash_value = ""
|
202 |
if files:
|
203 |
+
# Progress callback for extraction
|
204 |
+
total_pages = 0
|
205 |
+
processed_pages = 0
|
206 |
+
def update_extraction_progress(current, total):
|
207 |
+
nonlocal processed_pages, total_pages
|
208 |
+
processed_pages = current
|
209 |
+
total_pages = total
|
210 |
+
animation = ["π", "π", "βοΈ", "π"][(int(time.time() * 2) % 4)]
|
211 |
+
history[-1] = {"role": "assistant", "content": f"Extracting text... {animation} Page {processed_pages}/{total_pages}"}
|
212 |
+
return history, None
|
213 |
+
|
214 |
with ThreadPoolExecutor(max_workers=4) as executor:
|
215 |
+
futures = [executor.submit(convert_file_to_text, f.name, f.name.split(".")[-1].lower(), update_extraction_progress) for f in files]
|
216 |
results = [f.result() for f in futures]
|
217 |
extracted_text = "\n".join(sanitize_utf8(r) for r in results if r)
|
218 |
file_hash_value = file_hash(files[0].name) if files else ""
|
219 |
|
220 |
+
history.append({"role": "assistant", "content": "β
Text extraction complete."})
|
|
|
221 |
yield history, None
|
222 |
|
223 |
report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
|
224 |
|
225 |
try:
|
226 |
+
# Progress callback for analysis
|
227 |
+
total_chunks = 0
|
228 |
+
processed_chunks = 0
|
229 |
+
def update_analysis_progress(current, total):
|
230 |
+
nonlocal processed_chunks, total_chunks
|
231 |
+
processed_chunks = current
|
232 |
+
total_chunks = total
|
233 |
+
animation = ["π", "π", "π§ ", "π"][(int(time.time() * 2) % 4)]
|
234 |
+
history[-1] = {"role": "assistant", "content": f"Analyzing records... {animation} Chunk {processed_chunks}/{total_chunks}"}
|
235 |
+
return history, None
|
236 |
+
|
237 |
+
history.append({"role": "assistant", "content": "Analyzing records... π"})
|
238 |
+
yield history, None
|
239 |
+
response = analyze_medical_records(extracted_text, update_analysis_progress)
|
240 |
+
|
241 |
history.pop() # Remove "Analyzing..."
|
242 |
history.append({"role": "assistant", "content": response})
|
243 |
if report_path:
|