Ali2206 commited on
Commit
c278ebf
Β·
verified Β·
1 Parent(s): 90e24e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -23
app.py CHANGED
@@ -9,6 +9,7 @@ import hashlib
9
  import multiprocessing
10
  from functools import partial
11
  import logging
 
12
 
13
  # Suppress pdfplumber CropBox warnings
14
  logging.getLogger("pdfplumber").setLevel(logging.ERROR)
@@ -42,7 +43,7 @@ def extract_page_range(file_path: str, start_page: int, end_page: int) -> str:
42
  except Exception:
43
  return ""
44
 
45
- def extract_all_pages(file_path: str) -> str:
46
  """Extract text from all pages of a PDF using parallel processing."""
47
  try:
48
  with pdfplumber.open(file_path) as pdf:
@@ -64,13 +65,18 @@ def extract_all_pages(file_path: str) -> str:
64
  # Process page ranges in parallel
65
  with multiprocessing.Pool(processes=num_processes) as pool:
66
  extract_func = partial(extract_page_range, file_path)
67
- results = pool.starmap(extract_func, ranges)
 
 
 
 
 
68
 
69
  return "\n".join(filter(None, results))
70
  except Exception:
71
  return ""
72
 
73
- def convert_file_to_text(file_path: str, file_type: str) -> str:
74
  """Convert supported file types to text, caching results."""
75
  try:
76
  h = file_hash(file_path)
@@ -80,7 +86,7 @@ def convert_file_to_text(file_path: str, file_type: str) -> str:
80
  return f.read()
81
 
82
  if file_type == "pdf":
83
- text = extract_all_pages(file_path)
84
  elif file_type == "csv":
85
  df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str,
86
  skip_blank_lines=True, on_bad_lines="skip")
@@ -123,26 +129,26 @@ def parse_analysis_response(raw_response: str) -> Dict[str, List[str]]:
123
 
124
  return sections
125
 
126
- def analyze_medical_records(extracted_text: str) -> str:
127
- """Analyze medical records and return structured response."""
128
  # Split text into chunks to handle large inputs
129
  chunk_size = 10000
130
  chunks = [extracted_text[i:i + chunk_size] for i in range(0, len(extracted_text), chunk_size)]
131
 
132
- # Placeholder for analysis (replace with model or rule-based logic)
133
  raw_response_template = """
134
  Missed Diagnoses:
135
- - Undiagnosed hypertension despite elevated BP readings.
136
- - Family history of diabetes not evaluated for prediabetes risk.
137
 
138
  Medication Conflicts:
139
- - SSRIs and NSAIDs detected, increasing GI bleeding risk.
140
 
141
  Incomplete Assessments:
142
- - No cardiac stress test despite chest pain.
143
 
144
  Urgent Follow-up:
145
- - Abnormal ECG requires cardiology referral.
146
  """
147
 
148
  # Aggregate findings across chunks
@@ -159,9 +165,12 @@ def analyze_medical_records(extracted_text: str) -> str:
159
  parsed = parse_analysis_response(raw_response)
160
  for section, items in parsed.items():
161
  all_sections[section].update(items)
 
 
162
 
163
- # Format final response
164
  response = ["### Clinical Oversight Analysis\n"]
 
165
  has_findings = False
166
  for section, items in all_sections.items():
167
  response.append(f"#### {section}")
@@ -169,13 +178,14 @@ def analyze_medical_records(extracted_text: str) -> str:
169
  response.extend(sorted(items))
170
  has_findings = True
171
  else:
172
- response.append("- None identified.")
173
  response.append("")
174
 
175
  response.append("### Summary")
176
- summary = ("The analysis identified potential oversights in diagnosis, medication management, "
177
- "assessments, and follow-up needs. Immediate action is recommended.") if has_findings else \
178
- "No significant oversights identified. Continue monitoring."
 
179
  response.append(summary)
180
 
181
  return "\n".join(response)
@@ -183,28 +193,51 @@ def analyze_medical_records(extracted_text: str) -> str:
183
  def create_ui():
184
  """Create Gradio UI for clinical oversight analysis."""
185
  def analyze(message: str, history: List[dict], files: List):
186
- """Handle analysis and return results."""
187
  history.append({"role": "user", "content": message})
188
- history.append({"role": "assistant", "content": "⏳ Extracting text from files..."})
189
  yield history, None
190
 
191
  extracted_text = ""
192
  file_hash_value = ""
193
  if files:
 
 
 
 
 
 
 
 
 
 
 
194
  with ThreadPoolExecutor(max_workers=4) as executor:
195
- futures = [executor.submit(convert_file_to_text, f.name, f.name.split(".")[-1].lower()) for f in files]
196
  results = [f.result() for f in futures]
197
  extracted_text = "\n".join(sanitize_utf8(r) for r in results if r)
198
  file_hash_value = file_hash(files[0].name) if files else ""
199
 
200
- history.pop() # Remove "Extracting..."
201
- history.append({"role": "assistant", "content": "⏳ Analyzing medical records..."})
202
  yield history, None
203
 
204
  report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
205
 
206
  try:
207
- response = analyze_medical_records(extracted_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  history.pop() # Remove "Analyzing..."
209
  history.append({"role": "assistant", "content": response})
210
  if report_path:
 
9
  import multiprocessing
10
  from functools import partial
11
  import logging
12
+ import time
13
 
14
  # Suppress pdfplumber CropBox warnings
15
  logging.getLogger("pdfplumber").setLevel(logging.ERROR)
 
43
  except Exception:
44
  return ""
45
 
46
+ def extract_all_pages(file_path: str, progress_callback=None) -> str:
47
  """Extract text from all pages of a PDF using parallel processing."""
48
  try:
49
  with pdfplumber.open(file_path) as pdf:
 
65
  # Process page ranges in parallel
66
  with multiprocessing.Pool(processes=num_processes) as pool:
67
  extract_func = partial(extract_page_range, file_path)
68
+ results = []
69
+ for idx, result in enumerate(pool.starmap(extract_func, ranges)):
70
+ results.append(result)
71
+ if progress_callback:
72
+ processed_pages = min((idx + 1) * pages_per_process, total_pages)
73
+ progress_callback(processed_pages, total_pages)
74
 
75
  return "\n".join(filter(None, results))
76
  except Exception:
77
  return ""
78
 
79
+ def convert_file_to_text(file_path: str, file_type: str, progress_callback=None) -> str:
80
  """Convert supported file types to text, caching results."""
81
  try:
82
  h = file_hash(file_path)
 
86
  return f.read()
87
 
88
  if file_type == "pdf":
89
+ text = extract_all_pages(file_path, progress_callback)
90
  elif file_type == "csv":
91
  df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str,
92
  skip_blank_lines=True, on_bad_lines="skip")
 
129
 
130
  return sections
131
 
132
+ def analyze_medical_records(extracted_text: str, progress_callback=None) -> str:
133
+ """Analyze medical records and return generalized structured response."""
134
  # Split text into chunks to handle large inputs
135
  chunk_size = 10000
136
  chunks = [extracted_text[i:i + chunk_size] for i in range(0, len(extracted_text), chunk_size)]
137
 
138
+ # Generalized analysis template (replace with model or rule-based logic)
139
  raw_response_template = """
140
  Missed Diagnoses:
141
+ - Chronic conditions potentially missed due to inconsistent monitoring of vital signs or symptoms. This may occur when patient visits are infrequent or records lack longitudinal tracking, leading to undetected trends. Undiagnosed conditions can progress, increasing risks of complications like organ damage. Recommended action: Implement regular screening protocols and trend analysis for key indicators (e.g., blood pressure, glucose levels).
142
+ - Risk factors for hereditary or lifestyle-related diseases not screened despite documented family history or patient demographics. Screening oversights often stem from time constraints or lack of standardized protocols. Delayed diagnosis may lead to preventable disease progression. Recommended action: Establish routine risk assessments based on family history and clinical guidelines.
143
 
144
  Medication Conflicts:
145
+ - Potential interactions from polypharmacy or untracked over-the-counter medications. Conflicts may arise when multiple prescribers are involved or patients self-medicate, increasing risks of adverse events like bleeding or toxicity. Recommended action: Conduct comprehensive medication reconciliation at each visit and educate patients on reporting all medications.
146
 
147
  Incomplete Assessments:
148
+ - Symptoms reported but not fully evaluated due to incomplete documentation or failure to follow clinical guidelines. This can occur in busy clinical settings where time limits prioritize acute issues over thorough investigation. Unaddressed symptoms may mask serious conditions, delaying treatment. Recommended action: Standardize symptom evaluation protocols and ensure adequate time for comprehensive assessments.
149
 
150
  Urgent Follow-up:
151
+ - Critical findings requiring specialist referral or additional testing delayed due to communication gaps or scheduling issues. Delays often result from fragmented care coordination or underestimation of findings' severity. Untreated critical issues can lead to rapid deterioration. Recommended action: Establish clear referral pathways and prioritize urgent findings with defined timelines.
152
  """
153
 
154
  # Aggregate findings across chunks
 
165
  parsed = parse_analysis_response(raw_response)
166
  for section, items in parsed.items():
167
  all_sections[section].update(items)
168
+ if progress_callback:
169
+ progress_callback(chunk_idx, len(chunks))
170
 
171
+ # Format generalized response
172
  response = ["### Clinical Oversight Analysis\n"]
173
+ response.append("This analysis reviews patient records to identify common reasons for potential oversights that could impact clinical outcomes. Findings highlight systemic or procedural gaps, associated risks, and actionable recommendations applicable across various patient records.\n")
174
  has_findings = False
175
  for section, items in all_sections.items():
176
  response.append(f"#### {section}")
 
178
  response.extend(sorted(items))
179
  has_findings = True
180
  else:
181
+ response.append("- No issues identified in this category.")
182
  response.append("")
183
 
184
  response.append("### Summary")
185
+ if has_findings:
186
+ summary = ("The analysis identified common procedural and systemic gaps that may lead to oversights in diagnosis, medication management, assessments, and follow-up care. These gaps, such as inconsistent monitoring, incomplete documentation, or communication delays, pose risks of disease progression, adverse events, or delayed treatment. Recommended actions include standardizing screening and assessment protocols, improving medication reconciliation, and establishing clear referral pathways. Implementing these measures can enhance patient safety and care quality across diverse clinical scenarios.")
187
+ else:
188
+ summary = ("No significant oversights were identified in the provided records. Current practices appear aligned with general clinical standards. To maintain care quality, continue regular monitoring, ensure comprehensive documentation, and adhere to guideline-based screening and follow-up protocols.")
189
  response.append(summary)
190
 
191
  return "\n".join(response)
 
193
  def create_ui():
194
  """Create Gradio UI for clinical oversight analysis."""
195
  def analyze(message: str, history: List[dict], files: List):
196
+ """Handle analysis with animated progress updates."""
197
  history.append({"role": "user", "content": message})
 
198
  yield history, None
199
 
200
  extracted_text = ""
201
  file_hash_value = ""
202
  if files:
203
+ # Progress callback for extraction
204
+ total_pages = 0
205
+ processed_pages = 0
206
+ def update_extraction_progress(current, total):
207
+ nonlocal processed_pages, total_pages
208
+ processed_pages = current
209
+ total_pages = total
210
+ animation = ["πŸŒ€", "πŸ”„", "βš™οΈ", "πŸ”ƒ"][(int(time.time() * 2) % 4)]
211
+ history[-1] = {"role": "assistant", "content": f"Extracting text... {animation} Page {processed_pages}/{total_pages}"}
212
+ return history, None
213
+
214
  with ThreadPoolExecutor(max_workers=4) as executor:
215
+ futures = [executor.submit(convert_file_to_text, f.name, f.name.split(".")[-1].lower(), update_extraction_progress) for f in files]
216
  results = [f.result() for f in futures]
217
  extracted_text = "\n".join(sanitize_utf8(r) for r in results if r)
218
  file_hash_value = file_hash(files[0].name) if files else ""
219
 
220
+ history.append({"role": "assistant", "content": "βœ… Text extraction complete."})
 
221
  yield history, None
222
 
223
  report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
224
 
225
  try:
226
+ # Progress callback for analysis
227
+ total_chunks = 0
228
+ processed_chunks = 0
229
+ def update_analysis_progress(current, total):
230
+ nonlocal processed_chunks, total_chunks
231
+ processed_chunks = current
232
+ total_chunks = total
233
+ animation = ["πŸ”", "πŸ“Š", "🧠", "πŸ”Ž"][(int(time.time() * 2) % 4)]
234
+ history[-1] = {"role": "assistant", "content": f"Analyzing records... {animation} Chunk {processed_chunks}/{total_chunks}"}
235
+ return history, None
236
+
237
+ history.append({"role": "assistant", "content": "Analyzing records... πŸ”"})
238
+ yield history, None
239
+ response = analyze_medical_records(extracted_text, update_analysis_progress)
240
+
241
  history.pop() # Remove "Analyzing..."
242
  history.append({"role": "assistant", "content": response})
243
  if report_path: