Ali2206 commited on
Commit
1de8c2b
·
verified ·
1 Parent(s): 4ba3497

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +154 -160
app.py CHANGED
@@ -32,11 +32,10 @@ sys.path.insert(0, src_path)
32
 
33
  from txagent.txagent import TxAgent
34
 
35
- # Constants - Updated for 32,768 token limit
36
- MAX_TOKENS = 32768
37
- CHUNK_SIZE = 10000 # Target chunk size (allowing 3 chunks within limit)
38
- MAX_NEW_TOKENS = 2048 # Increased output length
39
- MAX_BOOKINGS_PER_CHUNK = 5 # Process 5 bookings per chunk
40
 
41
  def file_hash(path: str) -> str:
42
  """Generate MD5 hash of file contents"""
@@ -56,17 +55,16 @@ def clean_response(text: str) -> str:
56
  return text.strip()
57
 
58
  def estimate_tokens(text: str) -> int:
59
- """More accurate token estimation (1 token ~ 3-4 characters)"""
60
- return len(text) // 3.5 # More conservative estimate
61
 
62
  def process_patient_data(df: pd.DataFrame) -> Dict[str, Any]:
63
- """Enhanced patient data processing with chronology"""
64
  data = {
65
  'bookings': defaultdict(list),
66
  'medications': defaultdict(list),
67
  'diagnoses': defaultdict(list),
68
  'tests': defaultdict(list),
69
- 'procedures': defaultdict(list),
70
  'doctors': set(),
71
  'timeline': []
72
  }
@@ -89,107 +87,116 @@ def process_patient_data(df: pd.DataFrame) -> Dict[str, Any]:
89
  data['timeline'].append(entry)
90
  data['doctors'].add(entry['doctor'])
91
 
92
- # Enhanced categorization
93
  form_lower = entry['form'].lower()
94
  if 'medication' in form_lower or 'drug' in form_lower:
95
  data['medications'][entry['item']].append(entry)
96
- elif 'diagnosis' in form_lower or 'condition' in form_lower:
97
  data['diagnoses'][entry['item']].append(entry)
98
- elif 'test' in form_lower or 'lab' in form_lower or 'result' in form_lower:
99
  data['tests'][entry['item']].append(entry)
100
- elif 'procedure' in form_lower or 'surgery' in form_lower:
101
- data['procedures'][entry['item']].append(entry)
102
 
103
  return data
104
 
105
- def generate_analysis_prompt(patient_data: Dict[str, Any], bookings: List[str]) -> str:
106
- """Generate comprehensive prompt for multiple bookings"""
107
- prompt_lines = [
108
- "**Comprehensive Patient Analysis**",
109
- f"Analyzing {len(bookings)} bookings spanning {patient_data['timeline'][0]['date']} to {patient_data['timeline'][-1]['date']}",
110
- "Focus on identifying patterns, inconsistencies, and missed opportunities across the entire treatment history.",
111
- "",
112
- "**Key Analysis Points:**",
113
- "- Chronological progression of symptoms and diagnoses",
114
- "- Medication changes and potential interactions over time",
115
- "- Diagnostic consistency across different providers",
116
- "- Missed diagnostic opportunities based on symptoms and test results",
117
- "- Gaps in follow-up or incomplete assessments",
118
- "- Emerging patterns that may indicate chronic conditions",
119
- "",
120
- "**Patient Timeline (Condensed):**"
121
- ]
122
 
123
- # Add condensed timeline
124
- for entry in patient_data['timeline']:
125
- if entry['booking'] in bookings:
126
- prompt_lines.append(
127
- f"- {entry['date']}: {entry['form']} - {entry['item']} = {entry['response']} (by {entry['doctor']})"
128
- )
129
-
130
- # Add current medications
131
- prompt_lines.extend([
132
- "",
133
- "**Medication History:**",
134
- *[f"- {med}: " + " → ".join(
135
- f"{e['date']}: {e['response']}"
136
- for e in entries if e['booking'] in bookings
137
- ) for med, entries in patient_data['medications'].items()],
138
- "",
139
- "**Diagnostic History:**",
140
- *[f"- {diag}: " + " → ".join(
141
- f"{e['date']}: {e['response']}"
142
- for e in entries if e['booking'] in bookings
143
- ) for diag, entries in patient_data['diagnoses'].items()],
144
- "",
145
- "**Required Analysis Format:**",
146
- "### Diagnostic Patterns",
147
- "[Identify patterns in symptoms and diagnoses over time]",
148
- "",
149
- "### Medication Analysis",
150
- "[Review all medication changes and potential issues]",
151
- "",
152
- "### Provider Consistency",
153
- "[Note any discrepancies between different doctors]",
154
- "",
155
- "### Missed Opportunities",
156
- "[Potential diagnoses or interventions that were missed]",
157
- "",
158
- "### Comprehensive Recommendations",
159
- "[Actionable recommendations for current care]"
160
- ])
161
 
162
- return "\n".join(prompt_lines)
163
-
164
- def chunk_bookings(patient_data: Dict[str, Any]) -> List[List[str]]:
165
- """Split bookings into 3 balanced chunks based on token count"""
166
- all_bookings = list(patient_data['bookings'].keys())
 
167
 
168
- # Estimate token count for each booking
169
- booking_sizes = []
170
- for booking in all_bookings:
171
- entries = patient_data['bookings'][booking]
172
- size = sum(estimate_tokens(str(e)) for e in entries)
173
- booking_sizes.append((booking, size))
174
 
175
- # Sort by size (descending) for better chunk balancing
176
- booking_sizes.sort(key=lambda x: x[1], reverse=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
 
178
- # Initialize 3 chunks
179
- chunks = [[] for _ in range(3)]
180
- chunk_sizes = [0, 0, 0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
 
182
- # Distribute bookings to chunks
183
- for booking, size in booking_sizes:
184
- # Find the chunk with smallest current size
185
- min_chunk = chunk_sizes.index(min(chunk_sizes))
186
- chunks[min_chunk].append(booking)
187
- chunk_sizes[min_chunk] += size
188
 
189
  return chunks
190
 
191
  def init_agent():
192
- """Initialize TxAgent with enhanced configuration"""
193
  default_tool_path = os.path.abspath("data/new_tool.json")
194
  target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
195
 
@@ -205,13 +212,12 @@ def init_agent():
205
  step_rag_num=4,
206
  seed=100,
207
  additional_default_tools=[],
208
- device_map="auto"
209
  )
210
  agent.init_model()
211
  return agent
212
 
213
  def analyze_with_agent(agent, prompt: str) -> str:
214
- """Enhanced analysis with progress tracking"""
215
  try:
216
  response = ""
217
  for result in agent.run_gradio_chat(
@@ -238,7 +244,7 @@ def analyze_with_agent(agent, prompt: str) -> str:
238
 
239
  def create_ui(agent):
240
  with gr.Blocks(theme=gr.themes.Soft(), title="Patient History Analyzer") as demo:
241
- gr.Markdown("# 🏥 Comprehensive Patient History Analyzer")
242
 
243
  with gr.Tabs():
244
  with gr.TabItem("Analysis"):
@@ -249,15 +255,8 @@ def create_ui(agent):
249
  file_types=[".xlsx"],
250
  file_count="single"
251
  )
252
- analysis_btn = gr.Button("Analyze Full History", variant="primary")
253
  status = gr.Markdown("Ready for analysis")
254
- progress = gr.Slider(
255
- minimum=0,
256
- maximum=100,
257
- value=0,
258
- label="Analysis Progress",
259
- interactive=False
260
- )
261
 
262
  with gr.Column(scale=2):
263
  output_display = gr.Markdown(
@@ -271,94 +270,89 @@ def create_ui(agent):
271
 
272
  with gr.TabItem("Instructions"):
273
  gr.Markdown("""
274
- ## Enhanced Patient History Analysis
275
 
276
- This tool processes complete medical histories across multiple visits, identifying:
277
- - Patterns in symptoms and diagnoses over time
278
- - Medication safety issues across providers
279
- - Missed diagnostic opportunities
280
- - Gaps in follow-up care
281
 
282
- **How to Use:**
283
- 1. Upload Excel file with patient history
284
- 2. Click "Analyze Full History"
285
- 3. View progressive results
286
- 4. Download comprehensive report
 
 
 
 
287
 
288
- **File Requirements:**
289
- - Must contain complete visit history
290
- - Required columns: Booking Number, Interview Date, Interviewer,
291
- Form Name, Form Item, Item Response, Description
 
 
292
  """)
293
 
294
- def analyze_patient(file) -> Tuple[str, str, int]:
295
  if not file:
296
  raise gr.Error("Please upload an Excel file first")
297
 
298
- full_report = []
299
- report_path = ""
300
-
301
  try:
302
  # Process Excel file
303
  df = pd.read_excel(file.name)
304
  patient_data = process_patient_data(df)
305
 
306
- # Split into 3 balanced chunks
307
- booking_chunks = chunk_bookings(patient_data)
308
- total_chunks = len(booking_chunks)
309
 
310
- for chunk_idx, bookings in enumerate(booking_chunks, 1):
311
- # Update progress
312
- progress_value = int((chunk_idx/total_chunks)*100)
313
- yield "", "", progress_value
314
-
315
- # Generate and process prompt
316
- prompt = generate_analysis_prompt(patient_data, bookings)
317
  response = analyze_with_agent(agent, prompt)
318
 
319
  if "Error in analysis" not in response:
320
- full_report.append(
321
- f"## Analysis Segment {chunk_idx} (Bookings: {', '.join(bookings)})\n{response}\n"
322
- )
323
- yield "\n".join(full_report), "", progress_value
324
 
 
325
  time.sleep(0.1) # Prevent UI freezing
326
 
327
- # Generate final summary
328
- if len(booking_chunks) > 1:
329
- summary_prompt = f"""
330
- **Final Comprehensive Summary**
331
 
332
- Analyze all {len(patient_data['bookings'])} bookings to identify:
333
- 1. Overall health trajectory
334
- 2. Chronic condition patterns
335
- 3. Medication safety across entire treatment
336
- 4. Most critical missed opportunities
337
- 5. Priority recommendations
338
 
339
  **Required Format:**
340
- ### Health Trajectory
341
- [Overall progression of health status]
342
 
343
- ### Chronic Condition Analysis
344
- [Patterns indicating chronic issues]
345
 
346
- ### Critical Concerns
347
- [Most urgent issues needing attention]
348
 
349
- ### Priority Recommendations
350
- [Action items ranked by importance]
351
- """
352
  summary = analyze_with_agent(agent, summary_prompt)
353
- full_report.append(f"## Final Comprehensive Summary\n{summary}\n")
354
 
355
  # Save report
356
- report_filename = f"patient_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
357
- report_path = os.path.join(report_dir, report_filename)
358
  with open(report_path, 'w', encoding='utf-8') as f:
359
  f.write("\n".join(full_report))
360
 
361
- yield "\n".join(full_report), report_path, 100
362
 
363
  except Exception as e:
364
  raise gr.Error(f"Analysis failed: {str(e)}")
@@ -366,7 +360,7 @@ Analyze all {len(patient_data['bookings'])} bookings to identify:
366
  analysis_btn.click(
367
  analyze_patient,
368
  inputs=file_upload,
369
- outputs=[output_display, report_download, progress],
370
  api_name="analyze"
371
  )
372
 
 
32
 
33
  from txagent.txagent import TxAgent
34
 
35
+ # Constants
36
+ MAX_TOKENS = 32768 # TxAgent's maximum token limit
37
+ CHUNK_SIZE = 3000 # Target chunk size to stay under token limit
38
+ MAX_NEW_TOKENS = 1024
 
39
 
40
  def file_hash(path: str) -> str:
41
  """Generate MD5 hash of file contents"""
 
55
  return text.strip()
56
 
57
  def estimate_tokens(text: str) -> int:
58
+ """Approximate token count (1 token ~ 4 characters)"""
59
+ return len(text) // 4
60
 
61
  def process_patient_data(df: pd.DataFrame) -> Dict[str, Any]:
62
+ """Process raw patient data into structured format"""
63
  data = {
64
  'bookings': defaultdict(list),
65
  'medications': defaultdict(list),
66
  'diagnoses': defaultdict(list),
67
  'tests': defaultdict(list),
 
68
  'doctors': set(),
69
  'timeline': []
70
  }
 
87
  data['timeline'].append(entry)
88
  data['doctors'].add(entry['doctor'])
89
 
90
+ # Categorize entries
91
  form_lower = entry['form'].lower()
92
  if 'medication' in form_lower or 'drug' in form_lower:
93
  data['medications'][entry['item']].append(entry)
94
+ elif 'diagnosis' in form_lower:
95
  data['diagnoses'][entry['item']].append(entry)
96
+ elif 'test' in form_lower or 'lab' in form_lower:
97
  data['tests'][entry['item']].append(entry)
 
 
98
 
99
  return data
100
 
101
+ def generate_analysis_prompt(patient_data: Dict[str, Any], booking: str) -> str:
102
+ """Generate focused analysis prompt for a booking"""
103
+ booking_entries = patient_data['bookings'][booking]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
+ # Build timeline string
106
+ timeline = "\n".join(
107
+ f"- {entry['date']}: {entry['form']} - {entry['item']} = {entry['response']} (by {entry['doctor']})"
108
+ for entry in booking_entries
109
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
+ # Get current medications
112
+ current_meds = []
113
+ for med, entries in patient_data['medications'].items():
114
+ if any(e['booking'] == booking for e in entries):
115
+ latest = max((e for e in entries if e['booking'] == booking), key=lambda x: x['date'])
116
+ current_meds.append(f"- {med}: {latest['response']} (as of {latest['date']})")
117
 
118
+ # Get current diagnoses
119
+ current_diags = []
120
+ for diag, entries in patient_data['diagnoses'].items():
121
+ if any(e['booking'] == booking for e in entries):
122
+ latest = max((e for e in entries if e['booking'] == booking), key=lambda x: x['date'])
123
+ current_diags.append(f"- {diag}: {latest['response']} (as of {latest['date']})")
124
 
125
+ prompt = f"""
126
+ **Comprehensive Patient Analysis - Booking {booking}**
127
+
128
+ **Patient Timeline:**
129
+ {timeline}
130
+
131
+ **Current Medications:**
132
+ {'\n'.join(current_meds) if current_meds else "None recorded"}
133
+
134
+ **Current Diagnoses:**
135
+ {'\n'.join(current_diags) if current_diags else "None recorded"}
136
+
137
+ **Analysis Instructions:**
138
+ 1. Review the patient's complete history across all visits
139
+ 2. Identify any potential missed diagnoses based on symptoms and test results
140
+ 3. Check for medication conflicts or inappropriate prescriptions
141
+ 4. Note any incomplete assessments or missing tests
142
+ 5. Flag any urgent follow-up needs
143
+ 6. Compare findings across different doctors for consistency
144
+
145
+ **Required Output Format:**
146
+ ### Missed Diagnoses
147
+ [Potential diagnoses that were not identified]
148
+
149
+ ### Medication Issues
150
+ [Conflicts, side effects, inappropriate prescriptions]
151
+
152
+ ### Assessment Gaps
153
+ [Missing tests or incomplete evaluations]
154
+
155
+ ### Follow-up Recommendations
156
+ [Urgent and non-urgent follow-up needs]
157
+
158
+ ### Doctor Consistency
159
+ [Discrepancies between different providers]
160
+ """
161
+ return prompt
162
+
163
+ def chunk_patient_data(patient_data: Dict[str, Any]) -> List[Dict[str, Any]]:
164
+ """Split patient data into manageable chunks"""
165
+ chunks = []
166
+ current_chunk = defaultdict(list)
167
+ current_size = 0
168
 
169
+ for booking, entries in patient_data['bookings'].items():
170
+ booking_size = sum(estimate_tokens(str(e)) for e in entries)
171
+
172
+ if current_size + booking_size > CHUNK_SIZE and current_chunk:
173
+ chunks.append(dict(current_chunk))
174
+ current_chunk = defaultdict(list)
175
+ current_size = 0
176
+
177
+ current_chunk['bookings'][booking] = entries
178
+ current_size += booking_size
179
+
180
+ # Add related data
181
+ for med, med_entries in patient_data['medications'].items():
182
+ if any(e['booking'] == booking for e in med_entries):
183
+ current_chunk['medications'][med].extend(
184
+ e for e in med_entries if e['booking'] == booking
185
+ )
186
+
187
+ for diag, diag_entries in patient_data['diagnoses'].items():
188
+ if any(e['booking'] == booking for e in diag_entries):
189
+ current_chunk['diagnoses'][diag].extend(
190
+ e for e in diag_entries if e['booking'] == booking
191
+ )
192
 
193
+ if current_chunk:
194
+ chunks.append(dict(current_chunk))
 
 
 
 
195
 
196
  return chunks
197
 
198
  def init_agent():
199
+ """Initialize TxAgent with proper configuration"""
200
  default_tool_path = os.path.abspath("data/new_tool.json")
201
  target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
202
 
 
212
  step_rag_num=4,
213
  seed=100,
214
  additional_default_tools=[],
 
215
  )
216
  agent.init_model()
217
  return agent
218
 
219
  def analyze_with_agent(agent, prompt: str) -> str:
220
+ """Run analysis with proper error handling"""
221
  try:
222
  response = ""
223
  for result in agent.run_gradio_chat(
 
244
 
245
  def create_ui(agent):
246
  with gr.Blocks(theme=gr.themes.Soft(), title="Patient History Analyzer") as demo:
247
+ gr.Markdown("# 🏥 Comprehensive Patient History Analysis")
248
 
249
  with gr.Tabs():
250
  with gr.TabItem("Analysis"):
 
255
  file_types=[".xlsx"],
256
  file_count="single"
257
  )
258
+ analysis_btn = gr.Button("Analyze Patient History", variant="primary")
259
  status = gr.Markdown("Ready for analysis")
 
 
 
 
 
 
 
260
 
261
  with gr.Column(scale=2):
262
  output_display = gr.Markdown(
 
270
 
271
  with gr.TabItem("Instructions"):
272
  gr.Markdown("""
273
+ ## How to Use This Tool
274
 
275
+ 1. **Upload Excel File**: Patient history Excel file
276
+ 2. **Click Analyze**: System will process all bookings
277
+ 3. **Review Results**: Comprehensive analysis appears
278
+ 4. **Download Report**: Full report with all findings
 
279
 
280
+ ### Excel Requirements
281
+ Must contain these columns:
282
+ - Booking Number
283
+ - Interview Date
284
+ - Interviewer (Doctor)
285
+ - Form Name
286
+ - Form Item
287
+ - Item Response
288
+ - Description
289
 
290
+ ### Analysis Includes:
291
+ - Missed diagnoses across visits
292
+ - Medication conflicts over time
293
+ - Incomplete assessments
294
+ - Doctor consistency checks
295
+ - Follow-up recommendations
296
  """)
297
 
298
+ def analyze_patient(file) -> Tuple[str, str]:
299
  if not file:
300
  raise gr.Error("Please upload an Excel file first")
301
 
 
 
 
302
  try:
303
  # Process Excel file
304
  df = pd.read_excel(file.name)
305
  patient_data = process_patient_data(df)
306
 
307
+ # Generate and process prompts
308
+ full_report = []
309
+ bookings_processed = 0
310
 
311
+ for booking in patient_data['bookings']:
312
+ prompt = generate_analysis_prompt(patient_data, booking)
 
 
 
 
 
313
  response = analyze_with_agent(agent, prompt)
314
 
315
  if "Error in analysis" not in response:
316
+ bookings_processed += 1
317
+ full_report.append(f"## Booking {booking}\n{response}\n")
 
 
318
 
319
+ yield "\n".join(full_report), None
320
  time.sleep(0.1) # Prevent UI freezing
321
 
322
+ # Generate overall summary
323
+ if bookings_processed > 1:
324
+ summary_prompt = """
325
+ **Comprehensive Patient Summary**
326
 
327
+ Analyze all bookings ({bookings_processed} total) to identify:
328
+ 1. Patterns across the entire treatment history
329
+ 2. Chronic issues that may have been missed
330
+ 3. Medication changes over time
331
+ 4. Doctor consistency across visits
332
+ 5. Long-term recommendations
333
 
334
  **Required Format:**
335
+ ### Chronic Health Patterns
336
+ [Recurring issues over time]
337
 
338
+ ### Treatment Evolution
339
+ [How treatment has changed]
340
 
341
+ ### Long-term Concerns
342
+ [Issues needing ongoing attention]
343
 
344
+ ### Comprehensive Recommendations
345
+ [Overall care plan]
346
+ """.format(bookings_processed=bookings_processed)
347
  summary = analyze_with_agent(agent, summary_prompt)
348
+ full_report.append(f"## Overall Patient Summary\n{summary}\n")
349
 
350
  # Save report
351
+ report_path = os.path.join(report_dir, f"patient_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
 
352
  with open(report_path, 'w', encoding='utf-8') as f:
353
  f.write("\n".join(full_report))
354
 
355
+ yield "\n".join(full_report), report_path
356
 
357
  except Exception as e:
358
  raise gr.Error(f"Analysis failed: {str(e)}")
 
360
  analysis_btn.click(
361
  analyze_patient,
362
  inputs=file_upload,
363
+ outputs=[output_display, report_download],
364
  api_name="analyze"
365
  )
366