CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 22

Commit

4ba3497

verified ·

1 Parent(s): d16299c

Update app.py

Browse files

Files changed (1) hide show

app.py +159 -155

app.py CHANGED Viewed

@@ -32,10 +32,11 @@ sys.path.insert(0, src_path)
 from txagent.txagent import TxAgent
-# Constants
-MAX_TOKENS = 32768  # TxAgent's maximum token limit
-CHUNK_SIZE = 3000  # Target chunk size to stay under token limit
-MAX_NEW_TOKENS = 1024
 def file_hash(path: str) -> str:
     """Generate MD5 hash of file contents"""
@@ -55,16 +56,17 @@ def clean_response(text: str) -> str:
     return text.strip()
 def estimate_tokens(text: str) -> int:
-    """Approximate token count (1 token ~ 4 characters)"""
-    return len(text) // 4
 def process_patient_data(df: pd.DataFrame) -> Dict[str, Any]:
-    """Process raw patient data into structured format"""
     data = {
         'bookings': defaultdict(list),
         'medications': defaultdict(list),
         'diagnoses': defaultdict(list),
         'tests': defaultdict(list),
         'doctors': set(),
         'timeline': []
     }
@@ -87,116 +89,107 @@ def process_patient_data(df: pd.DataFrame) -> Dict[str, Any]:
             data['timeline'].append(entry)
             data['doctors'].add(entry['doctor'])
-            # Categorize entries
             form_lower = entry['form'].lower()
             if 'medication' in form_lower or 'drug' in form_lower:
                 data['medications'][entry['item']].append(entry)
-            elif 'diagnosis' in form_lower:
                 data['diagnoses'][entry['item']].append(entry)
-            elif 'test' in form_lower or 'lab' in form_lower:
                 data['tests'][entry['item']].append(entry)
     return data
-def generate_analysis_prompt(patient_data: Dict[str, Any], booking: str) -> str:
-    """Generate focused analysis prompt for a booking"""
-    booking_entries = patient_data['bookings'][booking]
-    # Build timeline string
-    timeline = "\n".join(
-        f"- {entry['date']}: {entry['form']} - {entry['item']} = {entry['response']} (by {entry['doctor']})"
-        for entry in booking_entries
-    )
-    # Get current medications
-    current_meds = []
-    for med, entries in patient_data['medications'].items():
-        if any(e['booking'] == booking for e in entries):
-            latest = max((e for e in entries if e['booking'] == booking), key=lambda x: x['date'])
-            current_meds.append(f"- {med}: {latest['response']} (as of {latest['date']})")
-    # Get current diagnoses
-    current_diags = []
-    for diag, entries in patient_data['diagnoses'].items():
-        if any(e['booking'] == booking for e in entries):
-            latest = max((e for e in entries if e['booking'] == booking), key=lambda x: x['date'])
-            current_diags.append(f"- {diag}: {latest['response']} (as of {latest['date']})")
-    prompt = f"""
-**Comprehensive Patient Analysis - Booking {booking}**
-**Patient Timeline:**
-{timeline}
-**Current Medications:**
-{'\n'.join(current_meds) if current_meds else "None recorded"}
-**Current Diagnoses:**
-{'\n'.join(current_diags) if current_diags else "None recorded"}
-**Analysis Instructions:**
-1. Review the patient's complete history across all visits
-2. Identify any potential missed diagnoses based on symptoms and test results
-3. Check for medication conflicts or inappropriate prescriptions
-4. Note any incomplete assessments or missing tests
-5. Flag any urgent follow-up needs
-6. Compare findings across different doctors for consistency
-**Required Output Format:**
-### Missed Diagnoses
-[Potential diagnoses that were not identified]
-### Medication Issues
-[Conflicts, side effects, inappropriate prescriptions]
-### Assessment Gaps
-[Missing tests or incomplete evaluations]
-### Follow-up Recommendations
-[Urgent and non-urgent follow-up needs]
-### Doctor Consistency
-[Discrepancies between different providers]
-"""
-    return prompt
-def chunk_patient_data(patient_data: Dict[str, Any]) -> List[Dict[str, Any]]:
-    """Split patient data into manageable chunks"""
-    chunks = []
-    current_chunk = defaultdict(list)
-    current_size = 0
-    for booking, entries in patient_data['bookings'].items():
-        booking_size = sum(estimate_tokens(str(e)) for e in entries)
-        if current_size + booking_size > CHUNK_SIZE and current_chunk:
-            chunks.append(dict(current_chunk))
-            current_chunk = defaultdict(list)
-            current_size = 0
-        current_chunk['bookings'][booking] = entries
-        current_size += booking_size
-        # Add related data
-        for med, med_entries in patient_data['medications'].items():
-            if any(e['booking'] == booking for e in med_entries):
-                current_chunk['medications'][med].extend(
-                    e for e in med_entries if e['booking'] == booking
-                )
-        for diag, diag_entries in patient_data['diagnoses'].items():
-            if any(e['booking'] == booking for e in diag_entries):
-                current_chunk['diagnoses'][diag].extend(
-                    e for e in diag_entries if e['booking'] == booking
-                )
-    if current_chunk:
-        chunks.append(dict(current_chunk))
     return chunks
 def init_agent():
-    """Initialize TxAgent with proper configuration"""
     default_tool_path = os.path.abspath("data/new_tool.json")
     target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
@@ -212,12 +205,13 @@ def init_agent():
         step_rag_num=4,
         seed=100,
         additional_default_tools=[],
     )
     agent.init_model()
     return agent
 def analyze_with_agent(agent, prompt: str) -> str:
-    """Run analysis with proper error handling"""
     try:
         response = ""
         for result in agent.run_gradio_chat(
@@ -244,9 +238,9 @@ def analyze_with_agent(agent, prompt: str) -> str:
 def create_ui(agent):
     with gr.Blocks(theme=gr.themes.Soft(), title="Patient History Analyzer") as demo:
-        gr.Markdown("# 🏥 Comprehensive Patient History Analysis")
-       -With gr.Tabs():
             with gr.TabItem("Analysis"):
                 with gr.Row():
                     with gr.Column(scale=1):
@@ -255,8 +249,15 @@ def create_ui(agent):
                             file_types=[".xlsx"],
                             file_count="single"
                         )
-                        analysis_btn = gr.Button("Analyze Patient History", variant="primary")
                         status = gr.Markdown("Ready for analysis")
                     with gr.Column(scale=2):
                         output_display = gr.Markdown(
@@ -270,91 +271,94 @@ def create_ui(agent):
             with gr.TabItem("Instructions"):
                 gr.Markdown("""
-                ## How to Use This Tool
-                1. **Upload Excel File**: Patient history Excel file
-                2. **Click Analyze**: System will process all bookings
-                3. **Review Results**: Comprehensive analysis appears
-                4. **Download Report**: Full report with all findings
-                ### Excel Requirements
-                Must contain these columns:
-                - Booking Number
-                - Interview Date
-                - Interviewer (Doctor)
-                - Form Name
-                - Form Item
-                - Item Response
-                - Description
-                ### Analysis Includes:
-                - Missed diagnoses across visits
-                - Medication conflicts over time
-                - Incomplete assessments
-                - Doctor consistency checks
-                - Follow-up recommendations
                 """)
-        def analyze_patient(file) -> Tuple[str, str]:
             if not file:
                 raise gr.Error("Please upload an Excel file first")
             try:
                 # Process Excel file
                 df = pd.read_excel(file.name)
                 patient_data = process_patient_data(df)
-                # Generate and process prompts
-                full_report = []
-                bookings_processed = 0
-                for booking in patient_data['bookings']:
-                    prompt = generate_analysis_prompt(patient_data, booking)
                     response = analyze_with_agent(agent, prompt)
                     if "Error in analysis" not in response:
-                        bookings_processed += 1
-                        full_report.append(f"## Booking {booking}\n{response}\n")
-                    yield "\n".join(full_report), None
                     time.sleep(0.1)  # Prevent UI freezing
-                # Generate overall summary
-                if bookings_processed > 1:
                     summary_prompt = f"""
-**Comprehensive Patient Summary**
-Analyze all bookings ({bookings_processed} total) to identify:
-1. Patterns across the entire treatment history
-2. Chronic issues that may have been missed
-3. Medication changes over time
-4. Doctor consistency across visits
-5. Long-term recommendations
 **Required Format:**
-### Chronic Health Patterns
-[Recurring issues over time]
-ascopy
-### Treatment Evolution
-[How treatment has changed]
-### Long-term Concerns
-[Issues needing ongoing attention]
-### Comprehensive Recommendations
-[Overall care plan]
 """
                     summary = analyze_with_agent(agent, summary_prompt)
-                    full_report.append(f"## Overall Patient Summary\n{summary}\n")
                 # Save report
-                report_path = os.path.join(report_dir, f"patient_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
                 with open(report_path, 'w', encoding='utf-8') as f:
                     f.write("\n".join(full_report))
-                yield "\n".join(full_report), report_path
             except Exception as e:
                 raise gr.Error(f"Analysis failed: {str(e)}")
@@ -362,7 +366,7 @@ ascopy
         analysis_btn.click(
             analyze_patient,
             inputs=file_upload,
-            outputs=[output_display, report_download],
             api_name="analyze"
         )

 from txagent.txagent import TxAgent
+# Constants - Updated for 32,768 token limit
+MAX_TOKENS = 32768
+CHUNK_SIZE = 10000  # Target chunk size (allowing 3 chunks within limit)
+MAX_NEW_TOKENS = 2048  # Increased output length
+MAX_BOOKINGS_PER_CHUNK = 5  # Process 5 bookings per chunk
 def file_hash(path: str) -> str:
     """Generate MD5 hash of file contents"""
     return text.strip()
 def estimate_tokens(text: str) -> int:
+    """More accurate token estimation (1 token ~ 3-4 characters)"""
+    return len(text) // 3.5  # More conservative estimate
 def process_patient_data(df: pd.DataFrame) -> Dict[str, Any]:
+    """Enhanced patient data processing with chronology"""
     data = {
         'bookings': defaultdict(list),
         'medications': defaultdict(list),
         'diagnoses': defaultdict(list),
         'tests': defaultdict(list),
+        'procedures': defaultdict(list),
         'doctors': set(),
         'timeline': []
     }
             data['timeline'].append(entry)
             data['doctors'].add(entry['doctor'])
+            # Enhanced categorization
             form_lower = entry['form'].lower()
             if 'medication' in form_lower or 'drug' in form_lower:
                 data['medications'][entry['item']].append(entry)
+            elif 'diagnosis' in form_lower or 'condition' in form_lower:
                 data['diagnoses'][entry['item']].append(entry)
+            elif 'test' in form_lower or 'lab' in form_lower or 'result' in form_lower:
                 data['tests'][entry['item']].append(entry)
+            elif 'procedure' in form_lower or 'surgery' in form_lower:
+                data['procedures'][entry['item']].append(entry)
     return data
+def generate_analysis_prompt(patient_data: Dict[str, Any], bookings: List[str]) -> str:
+    """Generate comprehensive prompt for multiple bookings"""
+    prompt_lines = [
+        "**Comprehensive Patient Analysis**",
+        f"Analyzing {len(bookings)} bookings spanning {patient_data['timeline'][0]['date']} to {patient_data['timeline'][-1]['date']}",
+        "Focus on identifying patterns, inconsistencies, and missed opportunities across the entire treatment history.",
+        "",
+        "**Key Analysis Points:**",
+        "- Chronological progression of symptoms and diagnoses",
+        "- Medication changes and potential interactions over time",
+        "- Diagnostic consistency across different providers",
+        "- Missed diagnostic opportunities based on symptoms and test results",
+        "- Gaps in follow-up or incomplete assessments",
+        "- Emerging patterns that may indicate chronic conditions",
+        "",
+        "**Patient Timeline (Condensed):**"
+    ]
+    # Add condensed timeline
+    for entry in patient_data['timeline']:
+        if entry['booking'] in bookings:
+            prompt_lines.append(
+                f"- {entry['date']}: {entry['form']} - {entry['item']} = {entry['response']} (by {entry['doctor']})"
+            )
+    # Add current medications
+    prompt_lines.extend([
+        "",
+        "**Medication History:**",
+        *[f"- {med}: " + " → ".join(
+            f"{e['date']}: {e['response']}"
+            for e in entries if e['booking'] in bookings
+        ) for med, entries in patient_data['medications'].items()],
+        "",
+        "**Diagnostic History:**",
+        *[f"- {diag}: " + " → ".join(
+            f"{e['date']}: {e['response']}"
+            for e in entries if e['booking'] in bookings
+        ) for diag, entries in patient_data['diagnoses'].items()],
+        "",
+        "**Required Analysis Format:**",
+        "### Diagnostic Patterns",
+        "[Identify patterns in symptoms and diagnoses over time]",
+        "",
+        "### Medication Analysis",
+        "[Review all medication changes and potential issues]",
+        "",
+        "### Provider Consistency",
+        "[Note any discrepancies between different doctors]",
+        "",
+        "### Missed Opportunities",
+        "[Potential diagnoses or interventions that were missed]",
+        "",
+        "### Comprehensive Recommendations",
+        "[Actionable recommendations for current care]"
+    ])
+    return "\n".join(prompt_lines)
+def chunk_bookings(patient_data: Dict[str, Any]) -> List[List[str]]:
+    """Split bookings into 3 balanced chunks based on token count"""
+    all_bookings = list(patient_data['bookings'].keys())
+    # Estimate token count for each booking
+    booking_sizes = []
+    for booking in all_bookings:
+        entries = patient_data['bookings'][booking]
+        size = sum(estimate_tokens(str(e)) for e in entries)
+        booking_sizes.append((booking, size))
+    # Sort by size (descending) for better chunk balancing
+    booking_sizes.sort(key=lambda x: x[1], reverse=True)
+    # Initialize 3 chunks
+    chunks = [[] for _ in range(3)]
+    chunk_sizes = [0, 0, 0]
+    # Distribute bookings to chunks
+    for booking, size in booking_sizes:
+        # Find the chunk with smallest current size
+        min_chunk = chunk_sizes.index(min(chunk_sizes))
+        chunks[min_chunk].append(booking)
+        chunk_sizes[min_chunk] += size
     return chunks
 def init_agent():
+    """Initialize TxAgent with enhanced configuration"""
     default_tool_path = os.path.abspath("data/new_tool.json")
     target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
         step_rag_num=4,
         seed=100,
         additional_default_tools=[],
+        device_map="auto"
     )
     agent.init_model()
     return agent
 def analyze_with_agent(agent, prompt: str) -> str:
+    """Enhanced analysis with progress tracking"""
     try:
         response = ""
         for result in agent.run_gradio_chat(
 def create_ui(agent):
     with gr.Blocks(theme=gr.themes.Soft(), title="Patient History Analyzer") as demo:
+        gr.Markdown("# 🏥 Comprehensive Patient History Analyzer")
+        with gr.Tabs():
             with gr.TabItem("Analysis"):
                 with gr.Row():
                     with gr.Column(scale=1):
                             file_types=[".xlsx"],
                             file_count="single"
                         )
+                        analysis_btn = gr.Button("Analyze Full History", variant="primary")
                         status = gr.Markdown("Ready for analysis")
+                        progress = gr.Slider(
+                            minimum=0,
+                            maximum=100,
+                            value=0,
+                            label="Analysis Progress",
+                            interactive=False
+                        )
                     with gr.Column(scale=2):
                         output_display = gr.Markdown(
             with gr.TabItem("Instructions"):
                 gr.Markdown("""
+                ## Enhanced Patient History Analysis
+                This tool processes complete medical histories across multiple visits, identifying:
+                - Patterns in symptoms and diagnoses over time
+                - Medication safety issues across providers
+                - Missed diagnostic opportunities
+                - Gaps in follow-up care
+                **How to Use:**
+                1. Upload Excel file with patient history
+                2. Click "Analyze Full History"
+                3. View progressive results
+                4. Download comprehensive report
+                **File Requirements:**
+                - Must contain complete visit history
+                - Required columns: Booking Number, Interview Date, Interviewer,
+                  Form Name, Form Item, Item Response, Description
                 """)
+        def analyze_patient(file) -> Tuple[str, str, int]:
             if not file:
                 raise gr.Error("Please upload an Excel file first")
+            full_report = []
+            report_path = ""
             try:
                 # Process Excel file
                 df = pd.read_excel(file.name)
                 patient_data = process_patient_data(df)
+                # Split into 3 balanced chunks
+                booking_chunks = chunk_bookings(patient_data)
+                total_chunks = len(booking_chunks)
+                for chunk_idx, bookings in enumerate(booking_chunks, 1):
+                    # Update progress
+                    progress_value = int((chunk_idx/total_chunks)*100)
+                    yield "", "", progress_value
+                    # Generate and process prompt
+                    prompt = generate_analysis_prompt(patient_data, bookings)
                     response = analyze_with_agent(agent, prompt)
                     if "Error in analysis" not in response:
+                        full_report.append(
+                            f"## Analysis Segment {chunk_idx} (Bookings: {', '.join(bookings)})\n{response}\n"
+                        )
+                        yield "\n".join(full_report), "", progress_value
                     time.sleep(0.1)  # Prevent UI freezing
+                # Generate final summary
+                if len(booking_chunks) > 1:
                     summary_prompt = f"""
+**Final Comprehensive Summary**
+Analyze all {len(patient_data['bookings'])} bookings to identify:
+1. Overall health trajectory
+2. Chronic condition patterns
+3. Medication safety across entire treatment
+4. Most critical missed opportunities
+5. Priority recommendations
 **Required Format:**
+### Health Trajectory
+[Overall progression of health status]
+### Chronic Condition Analysis
+[Patterns indicating chronic issues]
+### Critical Concerns
+[Most urgent issues needing attention]
+### Priority Recommendations
+[Action items ranked by importance]
 """
                     summary = analyze_with_agent(agent, summary_prompt)
+                    full_report.append(f"## Final Comprehensive Summary\n{summary}\n")
                 # Save report
+                report_filename = f"patient_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
+                report_path = os.path.join(report_dir, report_filename)
                 with open(report_path, 'w', encoding='utf-8') as f:
                     f.write("\n".join(full_report))
+                yield "\n".join(full_report), report_path, 100
             except Exception as e:
                 raise gr.Error(f"Analysis failed: {str(e)}")
         analysis_btn.click(
             analyze_patient,
             inputs=file_upload,
+            outputs=[output_display, report_download, progress],
             api_name="analyze"
         )