Ali2206 commited on
Commit
13ad0d3
·
verified ·
1 Parent(s): 769cea3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -194
app.py CHANGED
@@ -33,17 +33,16 @@ sys.path.insert(0, src_path)
33
  from txagent.txagent import TxAgent
34
 
35
  # Constants
36
- MAX_TOKENS = 32768 # TxAgent's maximum token limit
37
- CHUNK_SIZE = 3000 # Target chunk size to stay under token limit
38
- MAX_NEW_TOKENS = 1024
 
39
 
40
  def file_hash(path: str) -> str:
41
- """Generate MD5 hash of file contents"""
42
  with open(path, "rb") as f:
43
  return hashlib.md5(f.read()).hexdigest()
44
 
45
  def clean_response(text: str) -> str:
46
- """Clean and normalize text output"""
47
  try:
48
  text = text.encode('utf-8', 'surrogatepass').decode('utf-8')
49
  except UnicodeError:
@@ -55,21 +54,19 @@ def clean_response(text: str) -> str:
55
  return text.strip()
56
 
57
  def estimate_tokens(text: str) -> int:
58
- """Approximate token count (1 token ~ 4 characters)"""
59
- return len(text) // 4
60
 
61
  def process_patient_data(df: pd.DataFrame) -> Dict[str, Any]:
62
- """Process raw patient data into structured format"""
63
  data = {
64
  'bookings': defaultdict(list),
65
  'medications': defaultdict(list),
66
  'diagnoses': defaultdict(list),
67
  'tests': defaultdict(list),
 
68
  'doctors': set(),
69
  'timeline': []
70
  }
71
 
72
- # Sort by date and group by booking
73
  df = df.sort_values('Interview Date')
74
  for booking, group in df.groupby('Booking Number'):
75
  for _, row in group.iterrows():
@@ -87,121 +84,78 @@ def process_patient_data(df: pd.DataFrame) -> Dict[str, Any]:
87
  data['timeline'].append(entry)
88
  data['doctors'].add(entry['doctor'])
89
 
90
- # Categorize entries
91
  form_lower = entry['form'].lower()
92
  if 'medication' in form_lower or 'drug' in form_lower:
93
  data['medications'][entry['item']].append(entry)
94
- elif 'diagnosis' in form_lower:
95
  data['diagnoses'][entry['item']].append(entry)
96
- elif 'test' in form_lower or 'lab' in form_lower:
97
  data['tests'][entry['item']].append(entry)
 
 
98
 
99
  return data
100
 
101
- def generate_analysis_prompt(patient_data: Dict[str, Any], booking: str) -> str:
102
- """Generate focused analysis prompt for a booking"""
103
- booking_entries = patient_data['bookings'][booking]
 
 
 
 
 
 
 
 
 
 
 
104
 
105
- # Build timeline string
106
- timeline = "\n".join(
107
- f"- {entry['date']}: {entry['form']} - {entry['item']} = {entry['response']} (by {entry['doctor']})"
108
- for entry in booking_entries
109
- )
110
-
111
- # Get current medications
112
- current_meds = []
113
- for med, entries in patient_data['medications'].items():
114
- if any(e['booking'] == booking for e in entries):
115
- latest = max((e for e in entries if e['booking'] == booking), key=lambda x: x['date'])
116
- current_meds.append(f"- {med}: {latest['response']} (as of {latest['date']})")
117
 
118
- # Get current diagnoses
119
- current_diags = []
120
- for diag, entries in patient_data['diagnoses'].items():
121
- if any(e['booking'] == booking for e in entries):
122
- latest = max((e for e in entries if e['booking'] == booking), key=lambda x: x['date'])
123
- current_diags.append(f"- {diag}: {latest['response']} (as of {latest['date']})")
 
 
 
 
 
 
 
 
 
124
 
125
- prompt = """
126
- **Comprehensive Patient Analysis - Booking {booking}**
127
-
128
- **Patient Timeline:**
129
- {timeline}
130
-
131
- **Current Medications:**
132
- {meds}
133
-
134
- **Current Diagnoses:**
135
- {diags}
136
-
137
- **Analysis Instructions:**
138
- 1. Review the patient's complete history across all visits
139
- 2. Identify any potential missed diagnoses based on symptoms and test results
140
- 3. Check for medication conflicts or inappropriate prescriptions
141
- 4. Note any incomplete assessments or missing tests
142
- 5. Flag any urgent follow-up needs
143
- 6. Compare findings across different doctors for consistency
144
-
145
- **Required Output Format:**
146
- ### Missed Diagnoses
147
- [Potential diagnoses that were not identified]
148
-
149
- ### Medication Issues
150
- [Conflicts, side effects, inappropriate prescriptions]
151
 
152
- ### Assessment Gaps
153
- [Missing tests or incomplete evaluations]
154
-
155
- ### Follow-up Recommendations
156
- [Urgent and non-urgent follow-up needs]
157
-
158
- ### Doctor Consistency
159
- [Discrepancies between different providers]
160
- """.format(
161
- booking=booking,
162
- timeline=timeline,
163
- meds='\n'.join(current_meds) if current_meds else "None recorded",
164
- diags='\n'.join(current_diags) if current_diags else "None recorded"
165
- )
166
- return prompt
167
-
168
- def chunk_patient_data(patient_data: Dict[str, Any]) -> List[Dict[str, Any]]:
169
- """Split patient data into manageable chunks"""
170
- chunks = []
171
- current_chunk = defaultdict(list)
172
- current_size = 0
173
 
174
- for booking, entries in patient_data['bookings'].items():
175
- booking_size = sum(estimate_tokens(str(e)) for e in entries)
176
-
177
- if current_size + booking_size > CHUNK_SIZE and current_chunk:
178
- chunks.append(dict(current_chunk))
179
- current_chunk = defaultdict(list)
180
- current_size = 0
181
-
182
- current_chunk['bookings'][booking] = entries
183
- current_size += booking_size
184
-
185
- # Add related data
186
- for med, med_entries in patient_data['medications'].items():
187
- if any(e['booking'] == booking for e in med_entries):
188
- current_chunk['medications'][med].extend(
189
- e for e in med_entries if e['booking'] == booking
190
- )
191
-
192
- for diag, diag_entries in patient_data['diagnoses'].items():
193
- if any(e['booking'] == booking for e in diag_entries):
194
- current_chunk['diagnoses'][diag].extend(
195
- e for e in diag_entries if e['booking'] == booking
196
- )
197
 
198
- if current_chunk:
199
- chunks.append(dict(current_chunk))
 
 
 
 
 
 
200
 
201
  return chunks
202
 
203
  def init_agent():
204
- """Initialize TxAgent with proper configuration"""
205
  default_tool_path = os.path.abspath("data/new_tool.json")
206
  target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
207
 
@@ -216,13 +170,12 @@ def init_agent():
216
  enable_checker=True,
217
  step_rag_num=4,
218
  seed=100,
219
- additional_default_tools=[],
220
  )
221
  agent.init_model()
222
  return agent
223
 
224
  def analyze_with_agent(agent, prompt: str) -> str:
225
- """Run analysis with proper error handling"""
226
  try:
227
  response = ""
228
  for result in agent.run_gradio_chat(
@@ -249,124 +202,76 @@ def analyze_with_agent(agent, prompt: str) -> str:
249
 
250
  def create_ui(agent):
251
  with gr.Blocks(theme=gr.themes.Soft(), title="Patient History Analyzer") as demo:
252
- gr.Markdown("# 🏥 Comprehensive Patient History Analysis")
253
 
254
  with gr.Tabs():
255
  with gr.TabItem("Analysis"):
256
  with gr.Row():
257
  with gr.Column(scale=1):
258
  file_upload = gr.File(
259
- label="Upload Patient Excel File",
260
  file_types=[".xlsx"],
261
  file_count="single"
262
  )
263
- analysis_btn = gr.Button("Analyze Patient History", variant="primary")
264
- status = gr.Markdown("Ready for analysis")
265
 
266
  with gr.Column(scale=2):
267
- output_display = gr.Markdown(
268
- label="Analysis Results",
269
- elem_id="results"
270
- )
271
- report_download = gr.File(
272
- label="Download Full Report",
273
- interactive=False
274
- )
275
 
276
  with gr.TabItem("Instructions"):
277
  gr.Markdown("""
278
- ## How to Use This Tool
279
-
280
- 1. **Upload Excel File**: Patient history Excel file
281
- 2. **Click Analyze**: System will process all bookings
282
- 3. **Review Results**: Comprehensive analysis appears
283
- 4. **Download Report**: Full report with all findings
284
 
285
- ### Excel Requirements
286
- Must contain these columns:
287
  - Booking Number
288
  - Interview Date
289
- - Interviewer (Doctor)
290
  - Form Name
291
  - Form Item
292
  - Item Response
293
  - Description
294
-
295
- ### Analysis Includes:
296
- - Missed diagnoses across visits
297
- - Medication conflicts over time
298
- - Incomplete assessments
299
- - Doctor consistency checks
300
- - Follow-up recommendations
301
  """)
302
 
303
- def analyze_patient(file) -> Tuple[str, str]:
304
  if not file:
305
- raise gr.Error("Please upload an Excel file first")
306
 
307
  try:
308
- # Process Excel file
309
  df = pd.read_excel(file.name)
310
  patient_data = process_patient_data(df)
311
-
312
- # Generate and process prompts
313
  full_report = []
314
- bookings_processed = 0
315
 
316
- for booking in patient_data['bookings']:
317
- prompt = generate_analysis_prompt(patient_data, booking)
318
  response = analyze_with_agent(agent, prompt)
319
-
320
- if "Error in analysis" not in response:
321
- bookings_processed += 1
322
- full_report.append(f"## Booking {booking}\n{response}\n")
323
-
324
  yield "\n".join(full_report), None
325
- time.sleep(0.1) # Prevent UI freezing
326
 
327
- # Generate overall summary
328
- if bookings_processed > 1:
329
- summary_prompt = """
330
- **Comprehensive Patient Summary**
331
-
332
- Analyze all bookings ({0} total) to identify:
333
- 1. Patterns across the entire treatment history
334
- 2. Chronic issues that may have been missed
335
- 3. Medication changes over time
336
- 4. Doctor consistency across visits
337
- 5. Long-term recommendations
338
-
339
- **Required Format:**
340
- ### Chronic Health Patterns
341
- [Recurring issues over time]
342
-
343
- ### Treatment Evolution
344
- [How treatment has changed]
345
-
346
- ### Long-term Concerns
347
- [Issues needing ongoing attention]
348
-
349
- ### Comprehensive Recommendations
350
- [Overall care plan]
351
- """.format(bookings_processed)
352
  summary = analyze_with_agent(agent, summary_prompt)
353
- full_report.append(f"## Overall Patient Summary\n{summary}\n")
354
 
355
- # Save report
356
- report_path = os.path.join(report_dir, f"patient_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
357
- with open(report_path, 'w', encoding='utf-8') as f:
358
  f.write("\n".join(full_report))
359
 
360
  yield "\n".join(full_report), report_path
361
-
362
  except Exception as e:
363
- raise gr.Error(f"Analysis failed: {str(e)}")
364
 
365
- analysis_btn.click(
366
- analyze_patient,
367
  inputs=file_upload,
368
- outputs=[output_display, report_download],
369
- api_name="analyze"
370
  )
371
 
372
  return demo
@@ -375,17 +280,11 @@ if __name__ == "__main__":
375
  try:
376
  agent = init_agent()
377
  demo = create_ui(agent)
378
-
379
- demo.queue(
380
- api_open=False,
381
- max_size=20
382
- ).launch(
383
  server_name="0.0.0.0",
384
  server_port=7860,
385
- show_error=True,
386
- allowed_paths=[report_dir],
387
- share=False
388
  )
389
  except Exception as e:
390
- print(f"Failed to launch application: {str(e)}")
391
  sys.exit(1)
 
33
  from txagent.txagent import TxAgent
34
 
35
  # Constants
36
+ MAX_TOKENS = 32768
37
+ CHUNK_SIZE = 10000
38
+ MAX_NEW_TOKENS = 2048
39
+ MAX_BOOKINGS_PER_CHUNK = 5
40
 
41
  def file_hash(path: str) -> str:
 
42
  with open(path, "rb") as f:
43
  return hashlib.md5(f.read()).hexdigest()
44
 
45
  def clean_response(text: str) -> str:
 
46
  try:
47
  text = text.encode('utf-8', 'surrogatepass').decode('utf-8')
48
  except UnicodeError:
 
54
  return text.strip()
55
 
56
  def estimate_tokens(text: str) -> int:
57
+ return len(text) // 3.5
 
58
 
59
  def process_patient_data(df: pd.DataFrame) -> Dict[str, Any]:
 
60
  data = {
61
  'bookings': defaultdict(list),
62
  'medications': defaultdict(list),
63
  'diagnoses': defaultdict(list),
64
  'tests': defaultdict(list),
65
+ 'procedures': defaultdict(list),
66
  'doctors': set(),
67
  'timeline': []
68
  }
69
 
 
70
  df = df.sort_values('Interview Date')
71
  for booking, group in df.groupby('Booking Number'):
72
  for _, row in group.iterrows():
 
84
  data['timeline'].append(entry)
85
  data['doctors'].add(entry['doctor'])
86
 
 
87
  form_lower = entry['form'].lower()
88
  if 'medication' in form_lower or 'drug' in form_lower:
89
  data['medications'][entry['item']].append(entry)
90
+ elif 'diagnosis' in form_lower or 'condition' in form_lower:
91
  data['diagnoses'][entry['item']].append(entry)
92
+ elif 'test' in form_lower or 'lab' in form_lower or 'result' in form_lower:
93
  data['tests'][entry['item']].append(entry)
94
+ elif 'procedure' in form_lower or 'surgery' in form_lower:
95
+ data['procedures'][entry['item']].append(entry)
96
 
97
  return data
98
 
99
+ def generate_analysis_prompt(patient_data: Dict[str, Any], bookings: List[str]) -> str:
100
+ prompt_lines = [
101
+ "**Comprehensive Patient Analysis**",
102
+ f"Analyzing {len(bookings)} bookings",
103
+ "",
104
+ "**Key Analysis Points:**",
105
+ "- Chronological progression of symptoms",
106
+ "- Medication changes and interactions",
107
+ "- Diagnostic consistency across providers",
108
+ "- Missed diagnostic opportunities",
109
+ "- Gaps in follow-up",
110
+ "",
111
+ "**Patient Timeline:**"
112
+ ]
113
 
114
+ for entry in patient_data['timeline']:
115
+ if entry['booking'] in bookings:
116
+ prompt_lines.append(
117
+ f"- {entry['date']}: {entry['form']} - {entry['item']} = {entry['response']} (by {entry['doctor']})"
118
+ )
 
 
 
 
 
 
 
119
 
120
+ prompt_lines.extend([
121
+ "",
122
+ "**Medication History:**",
123
+ *[f"- {med}: " + " ".join(
124
+ f"{e['date']}: {e['response']}"
125
+ for e in entries if e['booking'] in bookings
126
+ ) for med, entries in patient_data['medications'].items()],
127
+ "",
128
+ "**Required Analysis Format:**",
129
+ "### Diagnostic Patterns",
130
+ "### Medication Analysis",
131
+ "### Provider Consistency",
132
+ "### Missed Opportunities",
133
+ "### Recommendations"
134
+ ])
135
 
136
+ return "\n".join(prompt_lines)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
+ def chunk_bookings(patient_data: Dict[str, Any]) -> List[List[str]]:
139
+ all_bookings = list(patient_data['bookings'].keys())
140
+ booking_sizes = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
+ for booking in all_bookings:
143
+ entries = patient_data['bookings'][booking]
144
+ size = sum(estimate_tokens(str(e)) for e in entries)
145
+ booking_sizes.append((booking, size))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
+ booking_sizes.sort(key=lambda x: x[1], reverse=True)
148
+ chunks = [[] for _ in range(3)]
149
+ chunk_sizes = [0, 0, 0]
150
+
151
+ for booking, size in booking_sizes:
152
+ min_chunk = chunk_sizes.index(min(chunk_sizes))
153
+ chunks[min_chunk].append(booking)
154
+ chunk_sizes[min_chunk] += size
155
 
156
  return chunks
157
 
158
  def init_agent():
 
159
  default_tool_path = os.path.abspath("data/new_tool.json")
160
  target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
161
 
 
170
  enable_checker=True,
171
  step_rag_num=4,
172
  seed=100,
173
+ additional_default_tools=[]
174
  )
175
  agent.init_model()
176
  return agent
177
 
178
  def analyze_with_agent(agent, prompt: str) -> str:
 
179
  try:
180
  response = ""
181
  for result in agent.run_gradio_chat(
 
202
 
203
  def create_ui(agent):
204
  with gr.Blocks(theme=gr.themes.Soft(), title="Patient History Analyzer") as demo:
205
+ gr.Markdown("# 🏥 Patient History Analyzer")
206
 
207
  with gr.Tabs():
208
  with gr.TabItem("Analysis"):
209
  with gr.Row():
210
  with gr.Column(scale=1):
211
  file_upload = gr.File(
212
+ label="Upload Excel File",
213
  file_types=[".xlsx"],
214
  file_count="single"
215
  )
216
+ analyze_btn = gr.Button("Analyze", variant="primary")
217
+ status = gr.Markdown("Ready")
218
 
219
  with gr.Column(scale=2):
220
+ output = gr.Markdown()
221
+ report = gr.File(label="Download Report")
 
 
 
 
 
 
222
 
223
  with gr.TabItem("Instructions"):
224
  gr.Markdown("""
225
+ ## How to Use
226
+ 1. Upload patient history Excel
227
+ 2. Click Analyze
228
+ 3. View/download report
 
 
229
 
230
+ **Required Columns:**
 
231
  - Booking Number
232
  - Interview Date
233
+ - Interviewer
234
  - Form Name
235
  - Form Item
236
  - Item Response
237
  - Description
 
 
 
 
 
 
 
238
  """)
239
 
240
+ def analyze(file):
241
  if not file:
242
+ raise gr.Error("Please upload a file")
243
 
244
  try:
 
245
  df = pd.read_excel(file.name)
246
  patient_data = process_patient_data(df)
247
+ chunks = chunk_bookings(patient_data)
 
248
  full_report = []
 
249
 
250
+ for i, bookings in enumerate(chunks, 1):
251
+ prompt = generate_analysis_prompt(patient_data, bookings)
252
  response = analyze_with_agent(agent, prompt)
253
+ full_report.append(f"## Chunk {i}\n{response}\n")
 
 
 
 
254
  yield "\n".join(full_report), None
 
255
 
256
+ # Final summary
257
+ if len(chunks) > 1:
258
+ summary_prompt = "Create final summary combining all chunks"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  summary = analyze_with_agent(agent, summary_prompt)
260
+ full_report.append(f"## Final Summary\n{summary}\n")
261
 
262
+ report_path = os.path.join(report_dir, f"report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
263
+ with open(report_path, 'w') as f:
 
264
  f.write("\n".join(full_report))
265
 
266
  yield "\n".join(full_report), report_path
267
+
268
  except Exception as e:
269
+ raise gr.Error(f"Error: {str(e)}")
270
 
271
+ analyze_btn.click(
272
+ analyze,
273
  inputs=file_upload,
274
+ outputs=[output, report]
 
275
  )
276
 
277
  return demo
 
280
  try:
281
  agent = init_agent()
282
  demo = create_ui(agent)
283
+ demo.launch(
 
 
 
 
284
  server_name="0.0.0.0",
285
  server_port=7860,
286
+ show_error=True
 
 
287
  )
288
  except Exception as e:
289
+ print(f"Error: {str(e)}")
290
  sys.exit(1)