Ali2206 commited on
Commit
3683afe
·
verified ·
1 Parent(s): 176dbe1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -95
app.py CHANGED
@@ -6,6 +6,7 @@ import gradio as gr
6
  from typing import List, Dict
7
  from concurrent.futures import ThreadPoolExecutor, as_completed
8
  import hashlib
 
9
 
10
  # Persistent directories
11
  persistent_dir = "/data/hf_cache"
@@ -15,12 +16,6 @@ report_dir = os.path.join(persistent_dir, "reports")
15
  for directory in [file_cache_dir, report_dir]:
16
  os.makedirs(directory, exist_ok=True)
17
 
18
- # Medical keywords for PDF extraction
19
- MEDICAL_KEYWORDS = {
20
- 'diagnosis', 'assessment', 'plan', 'results', 'medications',
21
- 'allergies', 'summary', 'impression', 'findings', 'recommendations'
22
- }
23
-
24
  def sanitize_utf8(text: str) -> str:
25
  """Sanitize text to handle UTF-8 encoding issues."""
26
  return text.encode("utf-8", "ignore").decode("utf-8")
@@ -30,20 +25,19 @@ def file_hash(path: str) -> str:
30
  with open(path, "rb") as f:
31
  return hashlib.md5(f.read()).hexdigest()
32
 
33
- def extract_priority_pages(file_path: str) -> str:
34
- """Extract text from PDF pages, prioritizing those with medical keywords."""
35
  try:
36
  text_chunks = []
37
  with pdfplumber.open(file_path) as pdf:
38
- for i, page in enumerate(pdf.pages):
39
  page_text = page.extract_text() or ""
40
- if i < 3 or any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
41
- text_chunks.append(f"=== Page {i+1} ===\n{page_text.strip()}")
42
- return "\n\n".join(text_chunks)
43
- except Exception as e:
44
- return f"PDF processing error: {str(e)}"
45
 
46
- def convert_file_to_text(file_path: str, file_type: str) -> str:
47
  """Convert supported file types to text, caching results."""
48
  try:
49
  h = file_hash(file_path)
@@ -53,28 +47,26 @@ def convert_file_to_text(file_path: str, file_type: str) -> str:
53
  return f.read()
54
 
55
  if file_type == "pdf":
56
- text = extract_priority_pages(file_path)
57
  elif file_type == "csv":
58
  df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str,
59
- skip_blank_lines=False, on_bad_lines="skip")
60
- text = "\n".join(df.fillna("").astype(str).agg(" ".join, axis=1))
61
  elif file_type in ["xls", "xlsx"]:
62
- try:
63
- df = pd.read_excel(file_path, engine="openpyxl", header=None, dtype=str)
64
- except Exception:
65
- df = pd.read_excel(file_path, engine="xlrd", header=None, dtype=str)
66
- text = "\n".join(df.fillna("").astype(str).agg(" ".join, axis=1))
67
  else:
68
- text = f"Unsupported file type: {file_type}"
69
 
70
- with open(cache_path, "w", encoding="utf-8") as f:
71
- f.write(text)
 
72
  return text
73
- except Exception as e:
74
- return f"Error processing {os.path.basename(file_path)}: {str(e)}"
75
 
76
  def parse_analysis_response(raw_response: str) -> Dict[str, List[str]]:
77
- """Parse raw analysis response into structured sections."""
78
  sections = {
79
  "Missed Diagnoses": [],
80
  "Medication Conflicts": [],
@@ -82,110 +74,134 @@ def parse_analysis_response(raw_response: str) -> Dict[str, List[str]]:
82
  "Urgent Follow-up": []
83
  }
84
  current_section = None
85
- lines = raw_response.split("\n")
 
86
 
87
- for line in lines:
88
  line = line.strip()
89
  if not line:
90
  continue
91
- if line.startswith("Missed Diagnoses"):
92
- current_section = "Missed Diagnoses"
93
- elif line.startswith("Medication Conflicts"):
94
- current_section = "Medication Conflicts"
95
- elif line.startswith("Incomplete Assessments"):
96
- current_section = "Incomplete Assessments"
97
- elif line.startswith("Urgent Follow-up"):
98
- current_section = "Urgent Follow-up"
99
- elif current_section and line.startswith("-"):
100
  sections[current_section].append(line)
101
 
102
  return sections
103
 
104
- def analyze_medical_records(extracted_text: str) -> str:
105
- """Analyze medical records for clinical oversights and return structured response."""
106
- # Placeholder for dynamic analysis (replace with actual model or rule-based logic)
107
- # Example response to demonstrate flexibility with varying content
108
- raw_response = """
 
 
 
 
109
  Missed Diagnoses:
110
  - Undiagnosed hypertension despite elevated BP readings.
111
  - Family history of diabetes not evaluated for prediabetes risk.
112
 
113
  Medication Conflicts:
114
- - Concurrent use of SSRIs and NSAIDs detected, increasing risk of gastrointestinal bleeding.
115
- - Beta-blocker prescribed without assessing asthma history, risking bronchospasm.
116
 
117
  Incomplete Assessments:
118
- - No cardiac stress test despite reported chest pain.
119
- - Social history lacks documentation of substance use or living conditions.
120
 
121
  Urgent Follow-up:
122
- - Abnormal ECG results require immediate cardiology referral.
123
- - Elevated liver enzymes not addressed, needing hepatology consultation.
124
  """
125
 
126
- # Parse the raw response into sections
127
- parsed = parse_analysis_response(raw_response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
- # Format the response
130
  response = ["### Clinical Oversight Analysis\n"]
131
  has_findings = False
132
- for section, items in parsed.items():
133
  response.append(f"#### {section}")
134
  if items:
135
- response.extend(items)
136
  has_findings = True
137
  else:
138
  response.append("- None identified.")
139
- response.append("") # Add newline for readability
140
 
141
  response.append("### Summary")
142
- if has_findings:
143
- summary = "The analysis identified potential oversights in diagnosis, medication management, assessments, and follow-up needs. Immediate action is recommended to address critical findings and ensure comprehensive patient care."
144
- else:
145
- summary = "No significant clinical oversights were identified in the provided records. Continue monitoring and ensure complete documentation."
146
  response.append(summary)
147
 
148
- return "\n".join(response)
149
 
150
- def create_ui():
151
  """Create Gradio UI for clinical oversight analysis."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
153
  gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
154
  chatbot = gr.Chatbot(label="Analysis", height=600, type="messages")
155
  file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
156
  msg_input = gr.Textbox(placeholder="Ask about potential oversights...", show_label=False)
157
  send_btn = gr.Button("Analyze", variant="primary")
158
- download_output = gr.File(label="Download Full Report")
159
-
160
- def analyze(message: str, history: List[dict], files: List):
161
- """Handle analysis of medical records and update UI."""
162
- history.append({"role": "user", "content": message})
163
- history.append({"role": "assistant", "content": "⏳ Analyzing records for potential oversights..."})
164
- yield history, None
165
-
166
- extracted_text = ""
167
- file_hash_value = ""
168
- if files:
169
- with ThreadPoolExecutor(max_workers=6) as executor:
170
- futures = [executor.submit(convert_file_to_text, f.name, f.name.split(".")[-1].lower()) for f in files]
171
- extracted_text = "\n".join(sanitize_utf8(f.result()) for f in as_completed(futures))
172
- file_hash_value = file_hash(files[0].name) if files else ""
173
-
174
- # Analyze extracted text
175
- history.pop() # Remove "Analyzing..." message
176
- try:
177
- response = analyze_medical_records(extracted_text)
178
- history.append({"role": "assistant", "content": response})
179
-
180
- # Generate report file
181
- report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
182
- if report_path:
183
- with open(report_path, "w", encoding="utf-8") as f:
184
- f.write(response)
185
- yield history, report_path if report_path and os.path.exists(report_path) else None
186
- except Exception as e:
187
- history.append({"role": "assistant", "content": f"❌ Error occurred: {str(e)}"})
188
- yield history, None
189
 
190
  send_btn.click(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output])
191
  msg_input.submit(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output])
@@ -194,7 +210,7 @@ def create_ui():
194
  if __name__ == "__main__":
195
  print("🚀 Launching app...")
196
  try:
197
- demo = create_ui()
198
  demo.queue(api_open=False).launch(
199
  server_name="0.0.0.0",
200
  server_port=7860,
 
6
  from typing import List, Dict
7
  from concurrent.futures import ThreadPoolExecutor, as_completed
8
  import hashlib
9
+ import asyncio
10
 
11
  # Persistent directories
12
  persistent_dir = "/data/hf_cache"
 
16
  for directory in [file_cache_dir, report_dir]:
17
  os.makedirs(directory, exist_ok=True)
18
 
 
 
 
 
 
 
19
  def sanitize_utf8(text: str) -> str:
20
  """Sanitize text to handle UTF-8 encoding issues."""
21
  return text.encode("utf-8", "ignore").decode("utf-8")
 
25
  with open(path, "rb") as f:
26
  return hashlib.md5(f.read()).hexdigest()
27
 
28
+ def extract_all_pages(file_path: str) -> str:
29
+ """Extract text from all pages of a PDF."""
30
  try:
31
  text_chunks = []
32
  with pdfplumber.open(file_path) as pdf:
33
+ for page in pdf.pages:
34
  page_text = page.extract_text() or ""
35
+ text_chunks.append(page_text.strip())
36
+ return "\n".join(text_chunks)
37
+ except Exception:
38
+ return ""
 
39
 
40
+ async def convert_file_to_text(file_path: str, file_type: str) -> str:
41
  """Convert supported file types to text, caching results."""
42
  try:
43
  h = file_hash(file_path)
 
47
  return f.read()
48
 
49
  if file_type == "pdf":
50
+ text = extract_all_pages(file_path)
51
  elif file_type == "csv":
52
  df = pd.read_csv(file_path, encoding_errors="replace", header=None, dtype=str,
53
+ skip_blank_lines=True, on_bad_lines="skip")
54
+ text = " ".join(df.fillna("").astype(str).agg(" ".join, axis=1))
55
  elif file_type in ["xls", "xlsx"]:
56
+ df = pd.read_excel(file_path, engine="openpyxl", header=None, dtype=str)
57
+ text = " ".join(df.fillna("").astype(str).agg(" ".join, axis=1))
 
 
 
58
  else:
59
+ text = ""
60
 
61
+ if text:
62
+ with open(cache_path, "w", encoding="utf-8") as f:
63
+ f.write(text)
64
  return text
65
+ except Exception:
66
+ return ""
67
 
68
  def parse_analysis_response(raw_response: str) -> Dict[str, List[str]]:
69
+ """Parse raw analysis response into structured sections using regex."""
70
  sections = {
71
  "Missed Diagnoses": [],
72
  "Medication Conflicts": [],
 
74
  "Urgent Follow-up": []
75
  }
76
  current_section = None
77
+ section_pattern = re.compile(r"^(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up):$", re.MULTILINE)
78
+ item_pattern = re.compile(r"^- .+$", re.MULTILINE)
79
 
80
+ for line in raw_response.splitlines():
81
  line = line.strip()
82
  if not line:
83
  continue
84
+ if section_pattern.match(line):
85
+ current_section = line[:-1]
86
+ elif current_section and item_pattern.match(line):
 
 
 
 
 
 
87
  sections[current_section].append(line)
88
 
89
  return sections
90
 
91
+ async def analyze_medical_records(extracted_text: str) -> str:
92
+ """Analyze medical records and stream structured response."""
93
+ # Split text into chunks to handle large inputs
94
+ chunk_size = 10000
95
+ chunks = [extracted_text[i:i + chunk_size] for i in range(0, len(extracted_text), chunk_size)]
96
+
97
+ # Placeholder for analysis (replace with model or rule-based logic)
98
+ # Simulate chunked analysis with sample response
99
+ raw_response_template = """
100
  Missed Diagnoses:
101
  - Undiagnosed hypertension despite elevated BP readings.
102
  - Family history of diabetes not evaluated for prediabetes risk.
103
 
104
  Medication Conflicts:
105
+ - SSRIs and NSAIDs detected, increasing GI bleeding risk.
 
106
 
107
  Incomplete Assessments:
108
+ - No cardiac stress test despite chest pain.
 
109
 
110
  Urgent Follow-up:
111
+ - Abnormal ECG requires cardiology referral.
 
112
  """
113
 
114
+ # Aggregate findings across chunks
115
+ all_sections = {
116
+ "Missed Diagnoses": set(),
117
+ "Medication Conflicts": set(),
118
+ "Incomplete Assessments": set(),
119
+ "Urgent Follow-up": set()
120
+ }
121
+
122
+ for chunk_idx, chunk in enumerate(chunks, 1):
123
+ # Simulate analysis per chunk (replace with real logic)
124
+ raw_response = raw_response_template # In real use, analyze chunk
125
+
126
+ # Parse chunk response
127
+ parsed = parse_analysis_response(raw_response)
128
+ for section, items in parsed.items():
129
+ all_sections[section].update(items)
130
+
131
+ # Stream partial results
132
+ response = [f"### Clinical Oversight Analysis (Chunk {chunk_idx}/{len(chunks)})\n"]
133
+ has_findings = False
134
+ for section, items in all_sections.items():
135
+ response.append(f"#### {section}")
136
+ if items:
137
+ response.extend(sorted(items))
138
+ has_findings = True
139
+ else:
140
+ response.append("- None identified.")
141
+ response.append("")
142
+ yield "\n".join(response)
143
 
144
+ # Final response
145
  response = ["### Clinical Oversight Analysis\n"]
146
  has_findings = False
147
+ for section, items in all_sections.items():
148
  response.append(f"#### {section}")
149
  if items:
150
+ response.extend(sorted(items))
151
  has_findings = True
152
  else:
153
  response.append("- None identified.")
154
+ response.append("")
155
 
156
  response.append("### Summary")
157
+ summary = ("The analysis identified potential oversights in diagnosis, medication management, "
158
+ "assessments, and follow-up needs. Immediate action is recommended.") if has_findings else \
159
+ "No significant oversights identified. Continue monitoring."
 
160
  response.append(summary)
161
 
162
+ yield "\n".join(response)
163
 
164
+ async def create_ui():
165
  """Create Gradio UI for clinical oversight analysis."""
166
+ async def analyze(message: str, history: List[dict], files: List):
167
+ """Handle analysis and stream results to UI."""
168
+ history.append({"role": "user", "content": message})
169
+ history.append({"role": "assistant", "content": "⏳ Analyzing..."})
170
+ yield history, None
171
+
172
+ extracted_text = ""
173
+ file_hash_value = ""
174
+ if files:
175
+ tasks = [convert_file_to_text(f.name, f.name.split(".")[-1].lower()) for f in files]
176
+ results = await asyncio.gather(*tasks, return_exceptions=True)
177
+ extracted_text = "\n".join(sanitize_utf8(r) for r in results if isinstance(r, str))
178
+ file_hash_value = file_hash(files[0].name) if files else ""
179
+
180
+ history.pop() # Remove "Analyzing..."
181
+ report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
182
+ full_response = []
183
+
184
+ try:
185
+ async for partial_response in analyze_medical_records(extracted_text):
186
+ full_response = partial_response.splitlines()
187
+ history.append({"role": "assistant", "content": partial_response})
188
+ yield history, None
189
+
190
+ if report_path:
191
+ with open(report_path, "w", encoding="utf-8") as f:
192
+ f.write("\n".join(full_response))
193
+ yield history, report_path if report_path and os.path.exists(report_path) else None
194
+ except Exception as e:
195
+ history.append({"role": "assistant", "content": f"❌ Error: {str(e)}"})
196
+ yield history, None
197
+
198
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
199
  gr.Markdown("<h1 style='text-align: center;'>🩺 Clinical Oversight Assistant</h1>")
200
  chatbot = gr.Chatbot(label="Analysis", height=600, type="messages")
201
  file_upload = gr.File(file_types=[".pdf", ".csv", ".xls", ".xlsx"], file_count="multiple")
202
  msg_input = gr.Textbox(placeholder="Ask about potential oversights...", show_label=False)
203
  send_btn = gr.Button("Analyze", variant="primary")
204
+ download_output = gr.File(label="Download Report")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
  send_btn.click(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output])
207
  msg_input.submit(analyze, inputs=[msg_input, gr.State([]), file_upload], outputs=[chatbot, download_output])
 
210
  if __name__ == "__main__":
211
  print("🚀 Launching app...")
212
  try:
213
+ demo = asyncio.run(create_ui())
214
  demo.queue(api_open=False).launch(
215
  server_name="0.0.0.0",
216
  server_port=7860,