Ali2206 commited on
Commit
4ba3497
·
verified ·
1 Parent(s): d16299c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +159 -155
app.py CHANGED
@@ -32,10 +32,11 @@ sys.path.insert(0, src_path)
32
 
33
  from txagent.txagent import TxAgent
34
 
35
- # Constants
36
- MAX_TOKENS = 32768 # TxAgent's maximum token limit
37
- CHUNK_SIZE = 3000 # Target chunk size to stay under token limit
38
- MAX_NEW_TOKENS = 1024
 
39
 
40
  def file_hash(path: str) -> str:
41
  """Generate MD5 hash of file contents"""
@@ -55,16 +56,17 @@ def clean_response(text: str) -> str:
55
  return text.strip()
56
 
57
  def estimate_tokens(text: str) -> int:
58
- """Approximate token count (1 token ~ 4 characters)"""
59
- return len(text) // 4
60
 
61
  def process_patient_data(df: pd.DataFrame) -> Dict[str, Any]:
62
- """Process raw patient data into structured format"""
63
  data = {
64
  'bookings': defaultdict(list),
65
  'medications': defaultdict(list),
66
  'diagnoses': defaultdict(list),
67
  'tests': defaultdict(list),
 
68
  'doctors': set(),
69
  'timeline': []
70
  }
@@ -87,116 +89,107 @@ def process_patient_data(df: pd.DataFrame) -> Dict[str, Any]:
87
  data['timeline'].append(entry)
88
  data['doctors'].add(entry['doctor'])
89
 
90
- # Categorize entries
91
  form_lower = entry['form'].lower()
92
  if 'medication' in form_lower or 'drug' in form_lower:
93
  data['medications'][entry['item']].append(entry)
94
- elif 'diagnosis' in form_lower:
95
  data['diagnoses'][entry['item']].append(entry)
96
- elif 'test' in form_lower or 'lab' in form_lower:
97
  data['tests'][entry['item']].append(entry)
 
 
98
 
99
  return data
100
 
101
- def generate_analysis_prompt(patient_data: Dict[str, Any], booking: str) -> str:
102
- """Generate focused analysis prompt for a booking"""
103
- booking_entries = patient_data['bookings'][booking]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
- # Build timeline string
106
- timeline = "\n".join(
107
- f"- {entry['date']}: {entry['form']} - {entry['item']} = {entry['response']} (by {entry['doctor']})"
108
- for entry in booking_entries
109
- )
110
-
111
- # Get current medications
112
- current_meds = []
113
- for med, entries in patient_data['medications'].items():
114
- if any(e['booking'] == booking for e in entries):
115
- latest = max((e for e in entries if e['booking'] == booking), key=lambda x: x['date'])
116
- current_meds.append(f"- {med}: {latest['response']} (as of {latest['date']})")
117
 
118
- # Get current diagnoses
119
- current_diags = []
120
- for diag, entries in patient_data['diagnoses'].items():
121
- if any(e['booking'] == booking for e in entries):
122
- latest = max((e for e in entries if e['booking'] == booking), key=lambda x: x['date'])
123
- current_diags.append(f"- {diag}: {latest['response']} (as of {latest['date']})")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
- prompt = f"""
126
- **Comprehensive Patient Analysis - Booking {booking}**
127
-
128
- **Patient Timeline:**
129
- {timeline}
130
-
131
- **Current Medications:**
132
- {'\n'.join(current_meds) if current_meds else "None recorded"}
133
-
134
- **Current Diagnoses:**
135
- {'\n'.join(current_diags) if current_diags else "None recorded"}
136
-
137
- **Analysis Instructions:**
138
- 1. Review the patient's complete history across all visits
139
- 2. Identify any potential missed diagnoses based on symptoms and test results
140
- 3. Check for medication conflicts or inappropriate prescriptions
141
- 4. Note any incomplete assessments or missing tests
142
- 5. Flag any urgent follow-up needs
143
- 6. Compare findings across different doctors for consistency
144
-
145
- **Required Output Format:**
146
- ### Missed Diagnoses
147
- [Potential diagnoses that were not identified]
148
-
149
- ### Medication Issues
150
- [Conflicts, side effects, inappropriate prescriptions]
151
-
152
- ### Assessment Gaps
153
- [Missing tests or incomplete evaluations]
154
-
155
- ### Follow-up Recommendations
156
- [Urgent and non-urgent follow-up needs]
157
-
158
- ### Doctor Consistency
159
- [Discrepancies between different providers]
160
- """
161
- return prompt
162
 
163
- def chunk_patient_data(patient_data: Dict[str, Any]) -> List[Dict[str, Any]]:
164
- """Split patient data into manageable chunks"""
165
- chunks = []
166
- current_chunk = defaultdict(list)
167
- current_size = 0
168
 
169
- for booking, entries in patient_data['bookings'].items():
170
- booking_size = sum(estimate_tokens(str(e)) for e in entries)
171
-
172
- if current_size + booking_size > CHUNK_SIZE and current_chunk:
173
- chunks.append(dict(current_chunk))
174
- current_chunk = defaultdict(list)
175
- current_size = 0
176
-
177
- current_chunk['bookings'][booking] = entries
178
- current_size += booking_size
179
-
180
- # Add related data
181
- for med, med_entries in patient_data['medications'].items():
182
- if any(e['booking'] == booking for e in med_entries):
183
- current_chunk['medications'][med].extend(
184
- e for e in med_entries if e['booking'] == booking
185
- )
186
-
187
- for diag, diag_entries in patient_data['diagnoses'].items():
188
- if any(e['booking'] == booking for e in diag_entries):
189
- current_chunk['diagnoses'][diag].extend(
190
- e for e in diag_entries if e['booking'] == booking
191
- )
192
 
193
- if current_chunk:
194
- chunks.append(dict(current_chunk))
 
 
 
 
195
 
196
  return chunks
197
 
198
  def init_agent():
199
- """Initialize TxAgent with proper configuration"""
200
  default_tool_path = os.path.abspath("data/new_tool.json")
201
  target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
202
 
@@ -212,12 +205,13 @@ def init_agent():
212
  step_rag_num=4,
213
  seed=100,
214
  additional_default_tools=[],
 
215
  )
216
  agent.init_model()
217
  return agent
218
 
219
  def analyze_with_agent(agent, prompt: str) -> str:
220
- """Run analysis with proper error handling"""
221
  try:
222
  response = ""
223
  for result in agent.run_gradio_chat(
@@ -244,9 +238,9 @@ def analyze_with_agent(agent, prompt: str) -> str:
244
 
245
  def create_ui(agent):
246
  with gr.Blocks(theme=gr.themes.Soft(), title="Patient History Analyzer") as demo:
247
- gr.Markdown("# 🏥 Comprehensive Patient History Analysis")
248
 
249
- -With gr.Tabs():
250
  with gr.TabItem("Analysis"):
251
  with gr.Row():
252
  with gr.Column(scale=1):
@@ -255,8 +249,15 @@ def create_ui(agent):
255
  file_types=[".xlsx"],
256
  file_count="single"
257
  )
258
- analysis_btn = gr.Button("Analyze Patient History", variant="primary")
259
  status = gr.Markdown("Ready for analysis")
 
 
 
 
 
 
 
260
 
261
  with gr.Column(scale=2):
262
  output_display = gr.Markdown(
@@ -270,91 +271,94 @@ def create_ui(agent):
270
 
271
  with gr.TabItem("Instructions"):
272
  gr.Markdown("""
273
- ## How to Use This Tool
274
 
275
- 1. **Upload Excel File**: Patient history Excel file
276
- 2. **Click Analyze**: System will process all bookings
277
- 3. **Review Results**: Comprehensive analysis appears
278
- 4. **Download Report**: Full report with all findings
 
279
 
280
- ### Excel Requirements
281
- Must contain these columns:
282
- - Booking Number
283
- - Interview Date
284
- - Interviewer (Doctor)
285
- - Form Name
286
- - Form Item
287
- - Item Response
288
- - Description
289
 
290
- ### Analysis Includes:
291
- - Missed diagnoses across visits
292
- - Medication conflicts over time
293
- - Incomplete assessments
294
- - Doctor consistency checks
295
- - Follow-up recommendations
296
  """)
297
 
298
- def analyze_patient(file) -> Tuple[str, str]:
299
  if not file:
300
  raise gr.Error("Please upload an Excel file first")
301
 
 
 
 
302
  try:
303
  # Process Excel file
304
  df = pd.read_excel(file.name)
305
  patient_data = process_patient_data(df)
306
 
307
- # Generate and process prompts
308
- full_report = []
309
- bookings_processed = 0
310
 
311
- for booking in patient_data['bookings']:
312
- prompt = generate_analysis_prompt(patient_data, booking)
 
 
 
 
 
313
  response = analyze_with_agent(agent, prompt)
314
 
315
  if "Error in analysis" not in response:
316
- bookings_processed += 1
317
- full_report.append(f"## Booking {booking}\n{response}\n")
 
 
318
 
319
- yield "\n".join(full_report), None
320
  time.sleep(0.1) # Prevent UI freezing
321
 
322
- # Generate overall summary
323
- if bookings_processed > 1:
324
  summary_prompt = f"""
325
- **Comprehensive Patient Summary**
326
 
327
- Analyze all bookings ({bookings_processed} total) to identify:
328
- 1. Patterns across the entire treatment history
329
- 2. Chronic issues that may have been missed
330
- 3. Medication changes over time
331
- 4. Doctor consistency across visits
332
- 5. Long-term recommendations
333
 
334
  **Required Format:**
335
- ### Chronic Health Patterns
336
- [Recurring issues over time]
337
-
338
- ascopy
339
 
340
- ### Treatment Evolution
341
- [How treatment has changed]
342
 
343
- ### Long-term Concerns
344
- [Issues needing ongoing attention]
345
 
346
- ### Comprehensive Recommendations
347
- [Overall care plan]
348
  """
349
  summary = analyze_with_agent(agent, summary_prompt)
350
- full_report.append(f"## Overall Patient Summary\n{summary}\n")
351
 
352
  # Save report
353
- report_path = os.path.join(report_dir, f"patient_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
 
354
  with open(report_path, 'w', encoding='utf-8') as f:
355
  f.write("\n".join(full_report))
356
 
357
- yield "\n".join(full_report), report_path
358
 
359
  except Exception as e:
360
  raise gr.Error(f"Analysis failed: {str(e)}")
@@ -362,7 +366,7 @@ ascopy
362
  analysis_btn.click(
363
  analyze_patient,
364
  inputs=file_upload,
365
- outputs=[output_display, report_download],
366
  api_name="analyze"
367
  )
368
 
 
32
 
33
  from txagent.txagent import TxAgent
34
 
35
+ # Constants - Updated for 32,768 token limit
36
+ MAX_TOKENS = 32768
37
+ CHUNK_SIZE = 10000 # Target chunk size (allowing 3 chunks within limit)
38
+ MAX_NEW_TOKENS = 2048 # Increased output length
39
+ MAX_BOOKINGS_PER_CHUNK = 5 # Process 5 bookings per chunk
40
 
41
  def file_hash(path: str) -> str:
42
  """Generate MD5 hash of file contents"""
 
56
  return text.strip()
57
 
58
  def estimate_tokens(text: str) -> int:
59
+ """More accurate token estimation (1 token ~ 3-4 characters)"""
60
+ return len(text) // 3.5 # More conservative estimate
61
 
62
  def process_patient_data(df: pd.DataFrame) -> Dict[str, Any]:
63
+ """Enhanced patient data processing with chronology"""
64
  data = {
65
  'bookings': defaultdict(list),
66
  'medications': defaultdict(list),
67
  'diagnoses': defaultdict(list),
68
  'tests': defaultdict(list),
69
+ 'procedures': defaultdict(list),
70
  'doctors': set(),
71
  'timeline': []
72
  }
 
89
  data['timeline'].append(entry)
90
  data['doctors'].add(entry['doctor'])
91
 
92
+ # Enhanced categorization
93
  form_lower = entry['form'].lower()
94
  if 'medication' in form_lower or 'drug' in form_lower:
95
  data['medications'][entry['item']].append(entry)
96
+ elif 'diagnosis' in form_lower or 'condition' in form_lower:
97
  data['diagnoses'][entry['item']].append(entry)
98
+ elif 'test' in form_lower or 'lab' in form_lower or 'result' in form_lower:
99
  data['tests'][entry['item']].append(entry)
100
+ elif 'procedure' in form_lower or 'surgery' in form_lower:
101
+ data['procedures'][entry['item']].append(entry)
102
 
103
  return data
104
 
105
+ def generate_analysis_prompt(patient_data: Dict[str, Any], bookings: List[str]) -> str:
106
+ """Generate comprehensive prompt for multiple bookings"""
107
+ prompt_lines = [
108
+ "**Comprehensive Patient Analysis**",
109
+ f"Analyzing {len(bookings)} bookings spanning {patient_data['timeline'][0]['date']} to {patient_data['timeline'][-1]['date']}",
110
+ "Focus on identifying patterns, inconsistencies, and missed opportunities across the entire treatment history.",
111
+ "",
112
+ "**Key Analysis Points:**",
113
+ "- Chronological progression of symptoms and diagnoses",
114
+ "- Medication changes and potential interactions over time",
115
+ "- Diagnostic consistency across different providers",
116
+ "- Missed diagnostic opportunities based on symptoms and test results",
117
+ "- Gaps in follow-up or incomplete assessments",
118
+ "- Emerging patterns that may indicate chronic conditions",
119
+ "",
120
+ "**Patient Timeline (Condensed):**"
121
+ ]
122
 
123
+ # Add condensed timeline
124
+ for entry in patient_data['timeline']:
125
+ if entry['booking'] in bookings:
126
+ prompt_lines.append(
127
+ f"- {entry['date']}: {entry['form']} - {entry['item']} = {entry['response']} (by {entry['doctor']})"
128
+ )
 
 
 
 
 
 
129
 
130
+ # Add current medications
131
+ prompt_lines.extend([
132
+ "",
133
+ "**Medication History:**",
134
+ *[f"- {med}: " + " ".join(
135
+ f"{e['date']}: {e['response']}"
136
+ for e in entries if e['booking'] in bookings
137
+ ) for med, entries in patient_data['medications'].items()],
138
+ "",
139
+ "**Diagnostic History:**",
140
+ *[f"- {diag}: " + " → ".join(
141
+ f"{e['date']}: {e['response']}"
142
+ for e in entries if e['booking'] in bookings
143
+ ) for diag, entries in patient_data['diagnoses'].items()],
144
+ "",
145
+ "**Required Analysis Format:**",
146
+ "### Diagnostic Patterns",
147
+ "[Identify patterns in symptoms and diagnoses over time]",
148
+ "",
149
+ "### Medication Analysis",
150
+ "[Review all medication changes and potential issues]",
151
+ "",
152
+ "### Provider Consistency",
153
+ "[Note any discrepancies between different doctors]",
154
+ "",
155
+ "### Missed Opportunities",
156
+ "[Potential diagnoses or interventions that were missed]",
157
+ "",
158
+ "### Comprehensive Recommendations",
159
+ "[Actionable recommendations for current care]"
160
+ ])
161
 
162
+ return "\n".join(prompt_lines)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
 
164
+ def chunk_bookings(patient_data: Dict[str, Any]) -> List[List[str]]:
165
+ """Split bookings into 3 balanced chunks based on token count"""
166
+ all_bookings = list(patient_data['bookings'].keys())
 
 
167
 
168
+ # Estimate token count for each booking
169
+ booking_sizes = []
170
+ for booking in all_bookings:
171
+ entries = patient_data['bookings'][booking]
172
+ size = sum(estimate_tokens(str(e)) for e in entries)
173
+ booking_sizes.append((booking, size))
174
+
175
+ # Sort by size (descending) for better chunk balancing
176
+ booking_sizes.sort(key=lambda x: x[1], reverse=True)
177
+
178
+ # Initialize 3 chunks
179
+ chunks = [[] for _ in range(3)]
180
+ chunk_sizes = [0, 0, 0]
 
 
 
 
 
 
 
 
 
 
181
 
182
+ # Distribute bookings to chunks
183
+ for booking, size in booking_sizes:
184
+ # Find the chunk with smallest current size
185
+ min_chunk = chunk_sizes.index(min(chunk_sizes))
186
+ chunks[min_chunk].append(booking)
187
+ chunk_sizes[min_chunk] += size
188
 
189
  return chunks
190
 
191
  def init_agent():
192
+ """Initialize TxAgent with enhanced configuration"""
193
  default_tool_path = os.path.abspath("data/new_tool.json")
194
  target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
195
 
 
205
  step_rag_num=4,
206
  seed=100,
207
  additional_default_tools=[],
208
+ device_map="auto"
209
  )
210
  agent.init_model()
211
  return agent
212
 
213
  def analyze_with_agent(agent, prompt: str) -> str:
214
+ """Enhanced analysis with progress tracking"""
215
  try:
216
  response = ""
217
  for result in agent.run_gradio_chat(
 
238
 
239
  def create_ui(agent):
240
  with gr.Blocks(theme=gr.themes.Soft(), title="Patient History Analyzer") as demo:
241
+ gr.Markdown("# 🏥 Comprehensive Patient History Analyzer")
242
 
243
+ with gr.Tabs():
244
  with gr.TabItem("Analysis"):
245
  with gr.Row():
246
  with gr.Column(scale=1):
 
249
  file_types=[".xlsx"],
250
  file_count="single"
251
  )
252
+ analysis_btn = gr.Button("Analyze Full History", variant="primary")
253
  status = gr.Markdown("Ready for analysis")
254
+ progress = gr.Slider(
255
+ minimum=0,
256
+ maximum=100,
257
+ value=0,
258
+ label="Analysis Progress",
259
+ interactive=False
260
+ )
261
 
262
  with gr.Column(scale=2):
263
  output_display = gr.Markdown(
 
271
 
272
  with gr.TabItem("Instructions"):
273
  gr.Markdown("""
274
+ ## Enhanced Patient History Analysis
275
 
276
+ This tool processes complete medical histories across multiple visits, identifying:
277
+ - Patterns in symptoms and diagnoses over time
278
+ - Medication safety issues across providers
279
+ - Missed diagnostic opportunities
280
+ - Gaps in follow-up care
281
 
282
+ **How to Use:**
283
+ 1. Upload Excel file with patient history
284
+ 2. Click "Analyze Full History"
285
+ 3. View progressive results
286
+ 4. Download comprehensive report
 
 
 
 
287
 
288
+ **File Requirements:**
289
+ - Must contain complete visit history
290
+ - Required columns: Booking Number, Interview Date, Interviewer,
291
+ Form Name, Form Item, Item Response, Description
 
 
292
  """)
293
 
294
+ def analyze_patient(file) -> Tuple[str, str, int]:
295
  if not file:
296
  raise gr.Error("Please upload an Excel file first")
297
 
298
+ full_report = []
299
+ report_path = ""
300
+
301
  try:
302
  # Process Excel file
303
  df = pd.read_excel(file.name)
304
  patient_data = process_patient_data(df)
305
 
306
+ # Split into 3 balanced chunks
307
+ booking_chunks = chunk_bookings(patient_data)
308
+ total_chunks = len(booking_chunks)
309
 
310
+ for chunk_idx, bookings in enumerate(booking_chunks, 1):
311
+ # Update progress
312
+ progress_value = int((chunk_idx/total_chunks)*100)
313
+ yield "", "", progress_value
314
+
315
+ # Generate and process prompt
316
+ prompt = generate_analysis_prompt(patient_data, bookings)
317
  response = analyze_with_agent(agent, prompt)
318
 
319
  if "Error in analysis" not in response:
320
+ full_report.append(
321
+ f"## Analysis Segment {chunk_idx} (Bookings: {', '.join(bookings)})\n{response}\n"
322
+ )
323
+ yield "\n".join(full_report), "", progress_value
324
 
 
325
  time.sleep(0.1) # Prevent UI freezing
326
 
327
+ # Generate final summary
328
+ if len(booking_chunks) > 1:
329
  summary_prompt = f"""
330
+ **Final Comprehensive Summary**
331
 
332
+ Analyze all {len(patient_data['bookings'])} bookings to identify:
333
+ 1. Overall health trajectory
334
+ 2. Chronic condition patterns
335
+ 3. Medication safety across entire treatment
336
+ 4. Most critical missed opportunities
337
+ 5. Priority recommendations
338
 
339
  **Required Format:**
340
+ ### Health Trajectory
341
+ [Overall progression of health status]
 
 
342
 
343
+ ### Chronic Condition Analysis
344
+ [Patterns indicating chronic issues]
345
 
346
+ ### Critical Concerns
347
+ [Most urgent issues needing attention]
348
 
349
+ ### Priority Recommendations
350
+ [Action items ranked by importance]
351
  """
352
  summary = analyze_with_agent(agent, summary_prompt)
353
+ full_report.append(f"## Final Comprehensive Summary\n{summary}\n")
354
 
355
  # Save report
356
+ report_filename = f"patient_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
357
+ report_path = os.path.join(report_dir, report_filename)
358
  with open(report_path, 'w', encoding='utf-8') as f:
359
  f.write("\n".join(full_report))
360
 
361
+ yield "\n".join(full_report), report_path, 100
362
 
363
  except Exception as e:
364
  raise gr.Error(f"Analysis failed: {str(e)}")
 
366
  analysis_btn.click(
367
  analyze_patient,
368
  inputs=file_upload,
369
+ outputs=[output_display, report_download, progress],
370
  api_name="analyze"
371
  )
372