CPS-Test-Mobile

Paused

App Files Files Community

Ali2206 commited on Apr 22

Commit

1de8c2b

verified ·

1 Parent(s): 4ba3497

Update app.py

Browse files

Files changed (1) hide show

app.py +154 -160

app.py CHANGED Viewed

@@ -32,11 +32,10 @@ sys.path.insert(0, src_path)
 from txagent.txagent import TxAgent
-# Constants - Updated for 32,768 token limit
-MAX_TOKENS = 32768
-CHUNK_SIZE = 10000  # Target chunk size (allowing 3 chunks within limit)
-MAX_NEW_TOKENS = 2048  # Increased output length
-MAX_BOOKINGS_PER_CHUNK = 5  # Process 5 bookings per chunk
 def file_hash(path: str) -> str:
     """Generate MD5 hash of file contents"""
@@ -56,17 +55,16 @@ def clean_response(text: str) -> str:
     return text.strip()
 def estimate_tokens(text: str) -> int:
-    """More accurate token estimation (1 token ~ 3-4 characters)"""
-    return len(text) // 3.5  # More conservative estimate
 def process_patient_data(df: pd.DataFrame) -> Dict[str, Any]:
-    """Enhanced patient data processing with chronology"""
     data = {
         'bookings': defaultdict(list),
         'medications': defaultdict(list),
         'diagnoses': defaultdict(list),
         'tests': defaultdict(list),
-        'procedures': defaultdict(list),
         'doctors': set(),
         'timeline': []
     }
@@ -89,107 +87,116 @@ def process_patient_data(df: pd.DataFrame) -> Dict[str, Any]:
             data['timeline'].append(entry)
             data['doctors'].add(entry['doctor'])
-            # Enhanced categorization
             form_lower = entry['form'].lower()
             if 'medication' in form_lower or 'drug' in form_lower:
                 data['medications'][entry['item']].append(entry)
-            elif 'diagnosis' in form_lower or 'condition' in form_lower:
                 data['diagnoses'][entry['item']].append(entry)
-            elif 'test' in form_lower or 'lab' in form_lower or 'result' in form_lower:
                 data['tests'][entry['item']].append(entry)
-            elif 'procedure' in form_lower or 'surgery' in form_lower:
-                data['procedures'][entry['item']].append(entry)
     return data
-def generate_analysis_prompt(patient_data: Dict[str, Any], bookings: List[str]) -> str:
-    """Generate comprehensive prompt for multiple bookings"""
-    prompt_lines = [
-        "**Comprehensive Patient Analysis**",
-        f"Analyzing {len(bookings)} bookings spanning {patient_data['timeline'][0]['date']} to {patient_data['timeline'][-1]['date']}",
-        "Focus on identifying patterns, inconsistencies, and missed opportunities across the entire treatment history.",
-        "",
-        "**Key Analysis Points:**",
-        "- Chronological progression of symptoms and diagnoses",
-        "- Medication changes and potential interactions over time",
-        "- Diagnostic consistency across different providers",
-        "- Missed diagnostic opportunities based on symptoms and test results",
-        "- Gaps in follow-up or incomplete assessments",
-        "- Emerging patterns that may indicate chronic conditions",
-        "",
-        "**Patient Timeline (Condensed):**"
-    ]
-    # Add condensed timeline
-    for entry in patient_data['timeline']:
-        if entry['booking'] in bookings:
-            prompt_lines.append(
-                f"- {entry['date']}: {entry['form']} - {entry['item']} = {entry['response']} (by {entry['doctor']})"
-            )
-    # Add current medications
-    prompt_lines.extend([
-        "",
-        "**Medication History:**",
-        *[f"- {med}: " + " → ".join(
-            f"{e['date']}: {e['response']}"
-            for e in entries if e['booking'] in bookings
-        ) for med, entries in patient_data['medications'].items()],
-        "",
-        "**Diagnostic History:**",
-        *[f"- {diag}: " + " → ".join(
-            f"{e['date']}: {e['response']}"
-            for e in entries if e['booking'] in bookings
-        ) for diag, entries in patient_data['diagnoses'].items()],
-        "",
-        "**Required Analysis Format:**",
-        "### Diagnostic Patterns",
-        "[Identify patterns in symptoms and diagnoses over time]",
-        "",
-        "### Medication Analysis",
-        "[Review all medication changes and potential issues]",
-        "",
-        "### Provider Consistency",
-        "[Note any discrepancies between different doctors]",
-        "",
-        "### Missed Opportunities",
-        "[Potential diagnoses or interventions that were missed]",
-        "",
-        "### Comprehensive Recommendations",
-        "[Actionable recommendations for current care]"
-    ])
-    return "\n".join(prompt_lines)
-def chunk_bookings(patient_data: Dict[str, Any]) -> List[List[str]]:
-    """Split bookings into 3 balanced chunks based on token count"""
-    all_bookings = list(patient_data['bookings'].keys())
-    # Estimate token count for each booking
-    booking_sizes = []
-    for booking in all_bookings:
-        entries = patient_data['bookings'][booking]
-        size = sum(estimate_tokens(str(e)) for e in entries)
-        booking_sizes.append((booking, size))
-    # Sort by size (descending) for better chunk balancing
-    booking_sizes.sort(key=lambda x: x[1], reverse=True)
-    # Initialize 3 chunks
-    chunks = [[] for _ in range(3)]
-    chunk_sizes = [0, 0, 0]
-    # Distribute bookings to chunks
-    for booking, size in booking_sizes:
-        # Find the chunk with smallest current size
-        min_chunk = chunk_sizes.index(min(chunk_sizes))
-        chunks[min_chunk].append(booking)
-        chunk_sizes[min_chunk] += size
     return chunks
 def init_agent():
-    """Initialize TxAgent with enhanced configuration"""
     default_tool_path = os.path.abspath("data/new_tool.json")
     target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
@@ -205,13 +212,12 @@ def init_agent():
         step_rag_num=4,
         seed=100,
         additional_default_tools=[],
-        device_map="auto"
     )
     agent.init_model()
     return agent
 def analyze_with_agent(agent, prompt: str) -> str:
-    """Enhanced analysis with progress tracking"""
     try:
         response = ""
         for result in agent.run_gradio_chat(
@@ -238,7 +244,7 @@ def analyze_with_agent(agent, prompt: str) -> str:
 def create_ui(agent):
     with gr.Blocks(theme=gr.themes.Soft(), title="Patient History Analyzer") as demo:
-        gr.Markdown("# 🏥 Comprehensive Patient History Analyzer")
         with gr.Tabs():
             with gr.TabItem("Analysis"):
@@ -249,15 +255,8 @@ def create_ui(agent):
                             file_types=[".xlsx"],
                             file_count="single"
                         )
-                        analysis_btn = gr.Button("Analyze Full History", variant="primary")
                         status = gr.Markdown("Ready for analysis")
-                        progress = gr.Slider(
-                            minimum=0,
-                            maximum=100,
-                            value=0,
-                            label="Analysis Progress",
-                            interactive=False
-                        )
                     with gr.Column(scale=2):
                         output_display = gr.Markdown(
@@ -271,94 +270,89 @@ def create_ui(agent):
             with gr.TabItem("Instructions"):
                 gr.Markdown("""
-                ## Enhanced Patient History Analysis
-                This tool processes complete medical histories across multiple visits, identifying:
-                - Patterns in symptoms and diagnoses over time
-                - Medication safety issues across providers
-                - Missed diagnostic opportunities
-                - Gaps in follow-up care
-                **How to Use:**
-                1. Upload Excel file with patient history
-                2. Click "Analyze Full History"
-                3. View progressive results
-                4. Download comprehensive report
-                **File Requirements:**
-                - Must contain complete visit history
-                - Required columns: Booking Number, Interview Date, Interviewer,
-                  Form Name, Form Item, Item Response, Description
                 """)
-        def analyze_patient(file) -> Tuple[str, str, int]:
             if not file:
                 raise gr.Error("Please upload an Excel file first")
-            full_report = []
-            report_path = ""
             try:
                 # Process Excel file
                 df = pd.read_excel(file.name)
                 patient_data = process_patient_data(df)
-                # Split into 3 balanced chunks
-                booking_chunks = chunk_bookings(patient_data)
-                total_chunks = len(booking_chunks)
-                for chunk_idx, bookings in enumerate(booking_chunks, 1):
-                    # Update progress
-                    progress_value = int((chunk_idx/total_chunks)*100)
-                    yield "", "", progress_value
-                    # Generate and process prompt
-                    prompt = generate_analysis_prompt(patient_data, bookings)
                     response = analyze_with_agent(agent, prompt)
                     if "Error in analysis" not in response:
-                        full_report.append(
-                            f"## Analysis Segment {chunk_idx} (Bookings: {', '.join(bookings)})\n{response}\n"
-                        )
-                        yield "\n".join(full_report), "", progress_value
                     time.sleep(0.1)  # Prevent UI freezing
-                # Generate final summary
-                if len(booking_chunks) > 1:
-                    summary_prompt = f"""
-**Final Comprehensive Summary**
-Analyze all {len(patient_data['bookings'])} bookings to identify:
-1. Overall health trajectory
-2. Chronic condition patterns
-3. Medication safety across entire treatment
-4. Most critical missed opportunities
-5. Priority recommendations
 **Required Format:**
-### Health Trajectory
-[Overall progression of health status]
-### Chronic Condition Analysis
-[Patterns indicating chronic issues]
-### Critical Concerns
-[Most urgent issues needing attention]
-### Priority Recommendations
-[Action items ranked by importance]
-"""
                     summary = analyze_with_agent(agent, summary_prompt)
-                    full_report.append(f"## Final Comprehensive Summary\n{summary}\n")
                 # Save report
-                report_filename = f"patient_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
-                report_path = os.path.join(report_dir, report_filename)
                 with open(report_path, 'w', encoding='utf-8') as f:
                     f.write("\n".join(full_report))
-                yield "\n".join(full_report), report_path, 100
             except Exception as e:
                 raise gr.Error(f"Analysis failed: {str(e)}")
@@ -366,7 +360,7 @@ Analyze all {len(patient_data['bookings'])} bookings to identify:
         analysis_btn.click(
             analyze_patient,
             inputs=file_upload,
-            outputs=[output_display, report_download, progress],
             api_name="analyze"
         )

 from txagent.txagent import TxAgent
+# Constants
+MAX_TOKENS = 32768  # TxAgent's maximum token limit
+CHUNK_SIZE = 3000  # Target chunk size to stay under token limit
+MAX_NEW_TOKENS = 1024
 def file_hash(path: str) -> str:
     """Generate MD5 hash of file contents"""
     return text.strip()
 def estimate_tokens(text: str) -> int:
+    """Approximate token count (1 token ~ 4 characters)"""
+    return len(text) // 4
 def process_patient_data(df: pd.DataFrame) -> Dict[str, Any]:
+    """Process raw patient data into structured format"""
     data = {
         'bookings': defaultdict(list),
         'medications': defaultdict(list),
         'diagnoses': defaultdict(list),
         'tests': defaultdict(list),
         'doctors': set(),
         'timeline': []
     }
             data['timeline'].append(entry)
             data['doctors'].add(entry['doctor'])
+            # Categorize entries
             form_lower = entry['form'].lower()
             if 'medication' in form_lower or 'drug' in form_lower:
                 data['medications'][entry['item']].append(entry)
+            elif 'diagnosis' in form_lower:
                 data['diagnoses'][entry['item']].append(entry)
+            elif 'test' in form_lower or 'lab' in form_lower:
                 data['tests'][entry['item']].append(entry)
     return data
+def generate_analysis_prompt(patient_data: Dict[str, Any], booking: str) -> str:
+    """Generate focused analysis prompt for a booking"""
+    booking_entries = patient_data['bookings'][booking]
+    # Build timeline string
+    timeline = "\n".join(
+        f"- {entry['date']}: {entry['form']} - {entry['item']} = {entry['response']} (by {entry['doctor']})"
+        for entry in booking_entries
+    )
+    # Get current medications
+    current_meds = []
+    for med, entries in patient_data['medications'].items():
+        if any(e['booking'] == booking for e in entries):
+            latest = max((e for e in entries if e['booking'] == booking), key=lambda x: x['date'])
+            current_meds.append(f"- {med}: {latest['response']} (as of {latest['date']})")
+    # Get current diagnoses
+    current_diags = []
+    for diag, entries in patient_data['diagnoses'].items():
+        if any(e['booking'] == booking for e in entries):
+            latest = max((e for e in entries if e['booking'] == booking), key=lambda x: x['date'])
+            current_diags.append(f"- {diag}: {latest['response']} (as of {latest['date']})")
+    prompt = f"""
+**Comprehensive Patient Analysis - Booking {booking}**
+**Patient Timeline:**
+{timeline}
+**Current Medications:**
+{'\n'.join(current_meds) if current_meds else "None recorded"}
+**Current Diagnoses:**
+{'\n'.join(current_diags) if current_diags else "None recorded"}
+**Analysis Instructions:**
+1. Review the patient's complete history across all visits
+2. Identify any potential missed diagnoses based on symptoms and test results
+3. Check for medication conflicts or inappropriate prescriptions
+4. Note any incomplete assessments or missing tests
+5. Flag any urgent follow-up needs
+6. Compare findings across different doctors for consistency
+**Required Output Format:**
+### Missed Diagnoses
+[Potential diagnoses that were not identified]
+### Medication Issues
+[Conflicts, side effects, inappropriate prescriptions]
+### Assessment Gaps
+[Missing tests or incomplete evaluations]
+### Follow-up Recommendations
+[Urgent and non-urgent follow-up needs]
+### Doctor Consistency
+[Discrepancies between different providers]
+"""
+    return prompt
+def chunk_patient_data(patient_data: Dict[str, Any]) -> List[Dict[str, Any]]:
+    """Split patient data into manageable chunks"""
+    chunks = []
+    current_chunk = defaultdict(list)
+    current_size = 0
+    for booking, entries in patient_data['bookings'].items():
+        booking_size = sum(estimate_tokens(str(e)) for e in entries)
+        if current_size + booking_size > CHUNK_SIZE and current_chunk:
+            chunks.append(dict(current_chunk))
+            current_chunk = defaultdict(list)
+            current_size = 0
+        current_chunk['bookings'][booking] = entries
+        current_size += booking_size
+        # Add related data
+        for med, med_entries in patient_data['medications'].items():
+            if any(e['booking'] == booking for e in med_entries):
+                current_chunk['medications'][med].extend(
+                    e for e in med_entries if e['booking'] == booking
+                )
+        for diag, diag_entries in patient_data['diagnoses'].items():
+            if any(e['booking'] == booking for e in diag_entries):
+                current_chunk['diagnoses'][diag].extend(
+                    e for e in diag_entries if e['booking'] == booking
+                )
+    if current_chunk:
+        chunks.append(dict(current_chunk))
     return chunks
 def init_agent():
+    """Initialize TxAgent with proper configuration"""
     default_tool_path = os.path.abspath("data/new_tool.json")
     target_tool_path = os.path.join(tool_cache_dir, "new_tool.json")
         step_rag_num=4,
         seed=100,
         additional_default_tools=[],
     )
     agent.init_model()
     return agent
 def analyze_with_agent(agent, prompt: str) -> str:
+    """Run analysis with proper error handling"""
     try:
         response = ""
         for result in agent.run_gradio_chat(
 def create_ui(agent):
     with gr.Blocks(theme=gr.themes.Soft(), title="Patient History Analyzer") as demo:
+        gr.Markdown("# 🏥 Comprehensive Patient History Analysis")
         with gr.Tabs():
             with gr.TabItem("Analysis"):
                             file_types=[".xlsx"],
                             file_count="single"
                         )
+                        analysis_btn = gr.Button("Analyze Patient History", variant="primary")
                         status = gr.Markdown("Ready for analysis")
                     with gr.Column(scale=2):
                         output_display = gr.Markdown(
             with gr.TabItem("Instructions"):
                 gr.Markdown("""
+                ## How to Use This Tool
+                1. **Upload Excel File**: Patient history Excel file
+                2. **Click Analyze**: System will process all bookings
+                3. **Review Results**: Comprehensive analysis appears
+                4. **Download Report**: Full report with all findings
+                ### Excel Requirements
+                Must contain these columns:
+                - Booking Number
+                - Interview Date
+                - Interviewer (Doctor)
+                - Form Name
+                - Form Item
+                - Item Response
+                - Description
+                ### Analysis Includes:
+                - Missed diagnoses across visits
+                - Medication conflicts over time
+                - Incomplete assessments
+                - Doctor consistency checks
+                - Follow-up recommendations
                 """)
+        def analyze_patient(file) -> Tuple[str, str]:
             if not file:
                 raise gr.Error("Please upload an Excel file first")
             try:
                 # Process Excel file
                 df = pd.read_excel(file.name)
                 patient_data = process_patient_data(df)
+                # Generate and process prompts
+                full_report = []
+                bookings_processed = 0
+                for booking in patient_data['bookings']:
+                    prompt = generate_analysis_prompt(patient_data, booking)
                     response = analyze_with_agent(agent, prompt)
                     if "Error in analysis" not in response:
+                        bookings_processed += 1
+                        full_report.append(f"## Booking {booking}\n{response}\n")
+                    yield "\n".join(full_report), None
                     time.sleep(0.1)  # Prevent UI freezing
+                # Generate overall summary
+                if bookings_processed > 1:
+                    summary_prompt = """
+**Comprehensive Patient Summary**
+Analyze all bookings ({bookings_processed} total) to identify:
+1. Patterns across the entire treatment history
+2. Chronic issues that may have been missed
+3. Medication changes over time
+4. Doctor consistency across visits
+5. Long-term recommendations
 **Required Format:**
+### Chronic Health Patterns
+[Recurring issues over time]
+### Treatment Evolution
+[How treatment has changed]
+### Long-term Concerns
+[Issues needing ongoing attention]
+### Comprehensive Recommendations
+[Overall care plan]
+""".format(bookings_processed=bookings_processed)
                     summary = analyze_with_agent(agent, summary_prompt)
+                    full_report.append(f"## Overall Patient Summary\n{summary}\n")
                 # Save report
+                report_path = os.path.join(report_dir, f"patient_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md")
                 with open(report_path, 'w', encoding='utf-8') as f:
                     f.write("\n".join(full_report))
+                yield "\n".join(full_report), report_path
             except Exception as e:
                 raise gr.Error(f"Analysis failed: {str(e)}")
         analysis_btn.click(
             analyze_patient,
             inputs=file_upload,
+            outputs=[output_display, report_download],
             api_name="analyze"
         )