Ali2206 commited on
Commit
2e43581
·
verified ·
1 Parent(s): 1ba0100

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -57
app.py CHANGED
@@ -34,9 +34,6 @@ sys.path.insert(0, src_path)
34
 
35
  from txagent.txagent import TxAgent
36
 
37
- MEDICAL_KEYWORDS = {'diagnosis', 'assessment', 'plan', 'results', 'medications',
38
- 'allergies', 'summary', 'impression', 'findings', 'recommendations'}
39
-
40
  def sanitize_utf8(text: str) -> str:
41
  return text.encode("utf-8", "ignore").decode("utf-8")
42
 
@@ -44,23 +41,14 @@ def file_hash(path: str) -> str:
44
  with open(path, "rb") as f:
45
  return hashlib.md5(f.read()).hexdigest()
46
 
47
- def extract_priority_pages(file_path: str, max_chars: int = 6000) -> str:
48
  try:
49
  text_chunks = []
50
- total_chars = 0
51
  with pdfplumber.open(file_path) as pdf:
52
  for i, page in enumerate(pdf.pages):
53
  page_text = page.extract_text() or ""
54
- if i < 3 or any(re.search(rf'\b{kw}\b', page_text.lower()) for kw in MEDICAL_KEYWORDS):
55
- page_chunk = f"=== Page {i+1} ===\n{page_text.strip()}\n"
56
- if total_chars + len(page_chunk) <= max_chars:
57
- text_chunks.append(page_chunk)
58
- total_chars += len(page_chunk)
59
- else:
60
- remaining = max_chars - total_chars
61
- text_chunks.append(page_chunk[:remaining])
62
- break
63
- return "".join(text_chunks).strip()
64
  except Exception as e:
65
  return f"PDF processing error: {str(e)}"
66
 
@@ -73,7 +61,7 @@ def convert_file_to_json(file_path: str, file_type: str) -> str:
73
  return f.read()
74
 
75
  if file_type == "pdf":
76
- text = extract_priority_pages(file_path)
77
  result = json.dumps({"filename": os.path.basename(file_path), "content": text, "status": "initial"})
78
  else:
79
  result = json.dumps({"error": f"Unsupported file type: {file_type}"})
@@ -103,7 +91,7 @@ def clean_response(text: str) -> str:
103
  text = re.sub(r"\[TOOL_CALLS\].*", "", text, flags=re.DOTALL)
104
  text = re.sub(r"\['get_[^\]]+\']\n?", "", text)
105
  text = re.sub(r"\{'meta':\s*\{.*?\}\s*,\s*'results':\s*\[.*?\]\}\n?", "", text, flags=re.DOTALL)
106
- text = re.sub(r"(?i)(to analyze|based on|will start|no (drug|clinical|information)).*?\n", "", text, flags=re.DOTALL)
107
  text = re.sub(r"\n{3,}", "\n\n", text).strip()
108
  if not re.search(r"(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", text, re.IGNORECASE):
109
  return ""
@@ -147,59 +135,68 @@ def create_ui(agent):
147
  extracted = "\n".join(results)
148
  file_hash_value = file_hash(files[0].name) if files else ""
149
 
150
- prompt = f"""
151
- Analyze the medical records and list potential doctor oversights under these headings only, with brief details:
 
 
152
 
153
- **Missed Diagnoses**: Inconsistencies or unaddressed conditions.
154
- **Medication Conflicts**: Contraindications or risky prescriptions.
155
- **Incomplete Assessments**: Missing or shallow evaluations.
156
- **Urgent Follow-up**: Issues needing immediate attention.
157
 
158
- Records:
159
- {extracted[:6000]}
 
 
160
 
161
- Respond concisely.
 
162
  """
163
 
164
  try:
165
  history.append({"role": "assistant", "content": "🔄 Analyzing..."})
166
  yield history, None
167
 
168
- response = ""
169
- for output in agent.run_gradio_chat(
170
- message=prompt,
171
- history=[],
172
- temperature=0.1,
173
- max_new_tokens=512,
174
- max_token=4096,
175
- call_agent=False,
176
- conversation=[],
177
- ):
178
- if output is None:
179
- continue
180
- if isinstance(output, list):
181
- for m in output:
182
- if hasattr(m, 'content') and m.content:
183
- cleaned = clean_response(m.content)
184
- if cleaned:
185
- response += cleaned + "\n"
186
- history[-1]["content"] = response.strip()
187
- yield history, None
188
- elif isinstance(output, str) and output.strip():
189
- cleaned = clean_response(output)
190
- if cleaned:
191
- response += cleaned + "\n"
192
- history[-1]["content"] = response.strip()
193
- yield history, None
194
-
195
- if not response:
 
 
 
 
 
196
  history[-1]["content"] = "No oversights identified."
197
- yield history, None
 
198
 
199
  report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
200
- if report_path and response:
201
  with open(report_path, "w", encoding="utf-8") as f:
202
- f.write(response.strip())
203
  yield history, report_path if report_path and os.path.exists(report_path) else None
204
 
205
  except Exception as e:
 
34
 
35
  from txagent.txagent import TxAgent
36
 
 
 
 
37
  def sanitize_utf8(text: str) -> str:
38
  return text.encode("utf-8", "ignore").decode("utf-8")
39
 
 
41
  with open(path, "rb") as f:
42
  return hashlib.md5(f.read()).hexdigest()
43
 
44
+ def extract_all_pages(file_path: str) -> str:
45
  try:
46
  text_chunks = []
 
47
  with pdfplumber.open(file_path) as pdf:
48
  for i, page in enumerate(pdf.pages):
49
  page_text = page.extract_text() or ""
50
+ text_chunks.append(page_text.strip())
51
+ return "\n".join(text_chunks)
 
 
 
 
 
 
 
 
52
  except Exception as e:
53
  return f"PDF processing error: {str(e)}"
54
 
 
61
  return f.read()
62
 
63
  if file_type == "pdf":
64
+ text = extract_all_pages(file_path)
65
  result = json.dumps({"filename": os.path.basename(file_path), "content": text, "status": "initial"})
66
  else:
67
  result = json.dumps({"error": f"Unsupported file type: {file_type}"})
 
91
  text = re.sub(r"\[TOOL_CALLS\].*", "", text, flags=re.DOTALL)
92
  text = re.sub(r"\['get_[^\]]+\']\n?", "", text)
93
  text = re.sub(r"\{'meta':\s*\{.*?\}\s*,\s*'results':\s*\[.*?\]\}\n?", "", text, flags=re.DOTALL)
94
+ text = re.sub(r"(?i)(to analyze|based on|will start|no (drug|clinical|information)|none).*?\n", "", text, flags=re.DOTALL)
95
  text = re.sub(r"\n{3,}", "\n\n", text).strip()
96
  if not re.search(r"(Missed Diagnoses|Medication Conflicts|Incomplete Assessments|Urgent Follow-up)", text, re.IGNORECASE):
97
  return ""
 
135
  extracted = "\n".join(results)
136
  file_hash_value = file_hash(files[0].name) if files else ""
137
 
138
+ # Split into small chunks of 2,000 characters
139
+ chunk_size = 2000
140
+ chunks = [extracted[i:i + chunk_size] for i in range(0, len(extracted), chunk_size)]
141
+ combined_response = ""
142
 
143
+ prompt_template = f"""
144
+ List doctor oversights in the medical records under these headings with brief details:
 
 
145
 
146
+ **Missed Diagnoses**: Unaddressed conditions or inconsistencies.
147
+ **Medication Conflicts**: Risky prescriptions.
148
+ **Incomplete Assessments**: Missing evaluations.
149
+ **Urgent Follow-up**: Issues needing attention.
150
 
151
+ Records:
152
+ {{chunk}}
153
  """
154
 
155
  try:
156
  history.append({"role": "assistant", "content": "🔄 Analyzing..."})
157
  yield history, None
158
 
159
+ for chunk_idx, chunk in enumerate(chunks, 1):
160
+ prompt = prompt_template.format(chunk=chunk)
161
+ chunk_response = ""
162
+ for output in agent.run_gradio_chat(
163
+ message=prompt,
164
+ history=[],
165
+ temperature=0.1,
166
+ max_new_tokens=256,
167
+ max_token=4096,
168
+ call_agent=False,
169
+ conversation=[],
170
+ ):
171
+ if output is None:
172
+ continue
173
+ if isinstance(output, list):
174
+ for m in output:
175
+ if hasattr(m, 'content') and m.content:
176
+ cleaned = clean_response(m.content)
177
+ if cleaned:
178
+ chunk_response += cleaned + "\n"
179
+ history[-1]["content"] = combined_response + chunk_response.strip()
180
+ yield history, None
181
+ elif isinstance(output, str) and output.strip():
182
+ cleaned = clean_response(output)
183
+ if cleaned:
184
+ chunk_response += cleaned + "\n"
185
+ history[-1]["content"] = combined_response + chunk_response.strip()
186
+ yield history, None
187
+
188
+ if chunk_response:
189
+ combined_response += chunk_response
190
+
191
+ if not combined_response:
192
  history[-1]["content"] = "No oversights identified."
193
+ else:
194
+ history[-1]["content"] = combined_response.strip()
195
 
196
  report_path = os.path.join(report_dir, f"{file_hash_value}_report.txt") if file_hash_value else None
197
+ if report_path and combined_response:
198
  with open(report_path, "w", encoding="utf-8") as f:
199
+ f.write(combined_response)
200
  yield history, report_path if report_path and os.path.exists(report_path) else None
201
 
202
  except Exception as e: