Ali2206 commited on
Commit
7a596d9
·
verified ·
1 Parent(s): 2e43581

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -31
app.py CHANGED
@@ -9,6 +9,7 @@ import hashlib
9
  import re
10
  import psutil
11
  import subprocess
 
12
 
13
  # Persistent directory
14
  persistent_dir = "/data/hf_cache"
@@ -45,7 +46,7 @@ def extract_all_pages(file_path: str) -> str:
45
  try:
46
  text_chunks = []
47
  with pdfplumber.open(file_path) as pdf:
48
- for i, page in enumerate(pdf.pages):
49
  page_text = page.extract_text() or ""
50
  text_chunks.append(page_text.strip())
51
  return "\n".join(text_chunks)
@@ -88,15 +89,49 @@ def log_system_usage(tag=""):
88
 
89
  def clean_response(text: str) -> str:
90
  text = sanitize_utf8(text)
91
- text = re.sub(r"\[TOOL_CALLS\].*", "", text, flags=re.DOTALL)
92
- text = re.sub(r"\['get_[^\]]+\']\n?", "", text)
93
  text = re.sub(r"\{'meta':\s*\{.*?\}\s*,\s*'results':\s*\[.*?\]\}\n?", "", text, flags=re.DOTALL)
94
- text = re.sub(r"(?i)(to analyze|based on|will start|no (drug|clinical|information)|none).*?\n", "", text, flags=re.DOTALL)
95
  text = re.sub(r"\n{3,}", "\n\n", text).strip()
96
- if not re.search(r"(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", text, re.IGNORECASE):
 
97
  return ""
98
  return text
99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  def init_agent():
101
  print("🔁 Initializing model...")
102
  log_system_usage("Before Load")
@@ -124,6 +159,7 @@ def create_ui(agent):
124
 
125
  def analyze(message: str, history: List[dict], files: List):
126
  history.append({"role": "user", "content": message})
 
127
  yield history, None
128
 
129
  extracted = ""
@@ -135,28 +171,26 @@ def create_ui(agent):
135
  extracted = "\n".join(results)
136
  file_hash_value = file_hash(files[0].name) if files else ""
137
 
138
- # Split into small chunks of 2,000 characters
139
- chunk_size = 2000
140
  chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
141
- combined_response = ""
142
 
143
- prompt_template = f"""
144
- List doctor oversights in the medical records under these headings with brief details:
145
 
146
- **Missed Diagnoses**: Unaddressed conditions or inconsistencies.
147
- **Medication Conflicts**: Risky prescriptions.
148
- **Incomplete Assessments**: Missing evaluations.
149
- **Urgent Follow-up**: Issues needing attention.
150
 
151
  Records:
152
- {{chunk}}
153
  """
154
 
155
  try:
156
- history.append({"role": "assistant", "content": "🔄 Analyzing..."})
157
- yield history, None
158
-
159
- for chunk_idx, chunk in enumerate(chunks, 1):
160
  prompt = prompt_template.format(chunk=chunk)
161
  chunk_response = ""
162
  for output in agent.run_gradio_chat(
@@ -176,27 +210,23 @@ Records:
176
  cleaned = clean_response(m.content)
177
  if cleaned:
178
  chunk_response += cleaned + "\n"
179
- history[-1]["content"] = combined_response + chunk_response.strip()
180
- yield history, None
181
  elif isinstance(output, str) and output.strip():
182
  cleaned = clean_response(output)
183
  if cleaned:
184
  chunk_response += cleaned + "\n"
185
- history[-1]["content"] = combined_response + chunk_response.strip()
186
- yield history, None
187
-
188
  if chunk_response:
189
- combined_response += chunk_response
190
 
191
- if not combined_response:
192
- history[-1]["content"] = "No oversights identified."
193
- else:
194
- history[-1]["content"] = combined_response.strip()
195
 
 
196
  report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
197
- if report_path and combined_response:
198
  with open(report_path, "w", encoding="utf-8") as f:
199
- f.write(combined_response)
200
  yield history, report_path if report_path and os.path.exists(report_path) else None
201
 
202
  except Exception as e:
 
9
  import re
10
  import psutil
11
  import subprocess
12
+ from collections import defaultdict
13
 
14
  # Persistent directory
15
  persistent_dir = "/data/hf_cache"
 
46
  try:
47
  text_chunks = []
48
  with pdfplumber.open(file_path) as pdf:
49
+ for page in pdf.pages:
50
  page_text = page.extract_text() or ""
51
  text_chunks.append(page_text.strip())
52
  return "\n".join(text_chunks)
 
89
 
90
  def clean_response(text: str) -> str:
91
  text = sanitize_utf8(text)
92
+ # Remove all tool-related and reasoning text
93
+ text = re.sub(r"\[TOOL_CALLS\].*|(?:get_|tool\s|retrieve\s).*?\n", "", text, flags=re.DOTALL | re.IGNORECASE)
94
  text = re.sub(r"\{'meta':\s*\{.*?\}\s*,\s*'results':\s*\[.*?\]\}\n?", "", text, flags=re.DOTALL)
95
+ text = re.sub(r"(?i)(to address|analyze the|will (start|look|use|focus)|since the|no (drug|clinical|information)|none|previous|attempt|involve|check for|explore|manually).*?\n", "", text, flags=re.DOTALL)
96
  text = re.sub(r"\n{3,}", "\n\n", text).strip()
97
+ # Only keep text under specific headings
98
+ if not re.search(r"^(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", text, re.MULTILINE | re.IGNORECASE):
99
  return ""
100
  return text
101
 
102
+ def consolidate_findings(responses: List[str]) -> str:
103
+ # Aggregate findings under each heading, removing duplicates
104
+ findings = defaultdict(set)
105
+ headings = ["Missed Diagnoses", "Medication Conflicts", "Incomplete Assessments", "Urgent Follow-up"]
106
+
107
+ for response in responses:
108
+ if not response:
109
+ continue
110
+ # Split response into sections by heading
111
+ current_heading = None
112
+ current_points = []
113
+ for line in response.split("\n"):
114
+ line = line.strip()
115
+ if not line:
116
+ continue
117
+ if any(line.lower().startswith(h.lower()) for h in headings):
118
+ if current_heading and current_points:
119
+ findings[current_heading].update(current_points)
120
+ current_heading = next(h for h in headings if line.lower().startswith(h.lower()))
121
+ current_points = []
122
+ elif current_heading and line.startswith("-"):
123
+ current_points.append(line)
124
+ if current_heading and current_points:
125
+ findings[current_heading].update(current_points)
126
+
127
+ # Format consolidated output
128
+ output = []
129
+ for heading in headings:
130
+ if findings[heading]:
131
+ output.append(f"**{heading}**:")
132
+ output.extend(sorted(findings[heading]))
133
+ return "\n".join(output).strip() if output else "No oversights identified."
134
+
135
  def init_agent():
136
  print("🔁 Initializing model...")
137
  log_system_usage("Before Load")
 
159
 
160
  def analyze(message: str, history: List[dict], files: List):
161
  history.append({"role": "user", "content": message})
162
+ history.append({"role": "assistant", "content": "🔄 Analyzing..."})
163
  yield history, None
164
 
165
  extracted = ""
 
171
  extracted = "\n".join(results)
172
  file_hash_value = file_hash(files[0].name) if files else ""
173
 
174
+ # Split into small chunks of 1,500 characters
175
+ chunk_size = 1500
176
  chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
177
+ chunk_responses = []
178
 
179
+ prompt_template = """
180
+ List doctor oversights under these headings only, with one brief point each. No tools or reasoning steps.
181
 
182
+ **Missed Diagnoses**:
183
+ **Medication Conflicts**:
184
+ **Incomplete Assessments**:
185
+ **Urgent Follow-up**:
186
 
187
  Records:
188
+ {chunk}
189
  """
190
 
191
  try:
192
+ # Process all chunks, collecting responses
193
+ for chunk in chunks:
 
 
194
  prompt = prompt_template.format(chunk=chunk)
195
  chunk_response = ""
196
  for output in agent.run_gradio_chat(
 
210
  cleaned = clean_response(m.content)
211
  if cleaned:
212
  chunk_response += cleaned + "\n"
 
 
213
  elif isinstance(output, str) and output.strip():
214
  cleaned = clean_response(output)
215
  if cleaned:
216
  chunk_response += cleaned + "\n"
 
 
 
217
  if chunk_response:
218
+ chunk_responses.append(chunk_response)
219
 
220
+ # Consolidate all responses into one final output
221
+ final_response = consolidate_findings(chunk_responses)
222
+ history[-1]["content"] = final_response
223
+ yield history, None
224
 
225
+ # Generate report file
226
  report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
227
+ if report_path and final_response != "No oversights identified.":
228
  with open(report_path, "w", encoding="utf-8") as f:
229
+ f.write(final_response)
230
  yield history, report_path if report_path and os.path.exists(report_path) else None
231
 
232
  except Exception as e: