MLRC_Bench

Running

File size: 11,997 Bytes

ed2eb44
 
 
 
 
 
06d4ee9
ed2eb44
 
 
 
 
 
06d4ee9
ed2eb44
 
06d4ee9
ed2eb44
 
 
 
06d4ee9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed2eb44
 
 
 
 
 
 
 
06d4ee9
 
ed2eb44
 
06d4ee9
 
 
ed2eb44
 
 
 
 
 
 
06d4ee9
 
 
 
 
 
 
ed2eb44
 
 
 
 
 
 
 
 
 
 
06d4ee9
 
 
 
 
 
 
 
ed2eb44
 
06d4ee9
ed2eb44
 
06d4ee9
ed2eb44
 
 
 
 
 
 
06d4ee9
ed2eb44
 
 
06d4ee9
ed2eb44
 
 
 
 
 
 
06d4ee9
 
 
 
 
ed2eb44
06d4ee9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed2eb44
 
 
 
 
 
 
 
 
 
 
06d4ee9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed2eb44
06d4ee9
 
 
 
 
 
 
 
 
 
 
 
 
bd9c702
 
 
 
 
 
 
 
 
06d4ee9
 
 
bd9c702
 
 
06d4ee9
bd9c702
06d4ee9
 
 
 
 
 
 
 
 
 
bd9c702
 
 
 
 
 
 
 
 
06d4ee9
 
 
bd9c702
 
06d4ee9
bd9c702
06d4ee9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed2eb44
06d4ee9
 
ed2eb44
 
 
 
 
 
 
c08520d
ed2eb44

"""
Leaderboard table components for the leaderboard application.
"""
import streamlit as st
from src.data.processors import get_model_type_style, get_rank_style

def render_leaderboard_table(display_df, metric_columns, primary_metric):
    """
    Render the custom HTML leaderboard table
    
    Args:
        display_df (pandas.DataFrame): The DataFrame with the display data
        metric_columns (list): List of metric column names
        primary_metric (str): The name of the primary metric
    """
    from src.components.header import render_section_header
    from src.utils.config import metrics_config
    
    # Display model ranking header without the box
    render_section_header("Model Rankings")
    
    # Detect if we have multiple metrics (columns with metric prefixes)
    has_multiple_metrics = any(":" in col for col in metric_columns)
    
    # Group columns by metric if multiple metrics are present
    metric_groups = {}
    if has_multiple_metrics:
        # Primary metric columns (no prefix)
        primary_cols = [col for col in metric_columns if ":" not in col]
        metric_groups[primary_metric] = primary_cols
        
        # Other metrics
        for col in metric_columns:
            if ":" in col:
                prefix, metric_name = col.split(": ", 1)
                full_metric_name = next((m for m in metrics_config if m.startswith(prefix)), prefix)
                if full_metric_name not in metric_groups:
                    metric_groups[full_metric_name] = []
                metric_groups[full_metric_name].append(col)
    else:
        # Single metric
        metric_groups[primary_metric] = metric_columns
    
    # Start building the HTML table structure
    html_table = """
    <div class="fixed-table-container">
      <div class="scroll-container">
        <table class="fixed-table">
          <thead>
            <tr class="header-row">
              <th class="fixed-column first-fixed-column" rowspan="2">Rank</th>
              <th class="fixed-column second-fixed-column" rowspan="2" style="text-align: center;">Agent</th>
              <th class="model-type-cell" rowspan="2" style="text-align: center;">Model Type</th>
    """
    
    # Add metric headers for each metric group
    for metric_name, cols in metric_groups.items():
        html_table += f'<th colspan="{len(cols)}" class="metric-header" style="text-align: center;">{metric_name}</th>'
    
    # Continue the table structure
    html_table += """
            </tr>
            <tr class="sub-header">
    """
    
    # Add individual column headers for all metrics
    for metric_name, cols in metric_groups.items():
        for col in cols:
            # Extract the actual column name if it has a prefix
            display_name = col.split(": ", 1)[-1] if ":" in col else col
            column_class = "overall-cell" if display_name == "Metric Average" else "metric-cell"
            html_table += f'<th class="{column_class}" style="text-align: center;">{display_name}</th>'
    
    # Close the header and start the body
    html_table += """
            </tr>
          </thead>
          <tbody>
    """
    
    # Add the data rows
    for i, (idx, row) in enumerate(display_df.iterrows()):
        # Define background colors to ensure consistency
        # Special background for human row
        is_human_row = row["Agent"] == "Top Human in Competition"
        if is_human_row:
            row_bg = "#2a1e37"  # Purple-ish dark background for human row
            row_style = f'style="background-color: {row_bg}; box-shadow: 0 0 5px #f472b6;"'
        else:
            row_bg = "#0a0a0a" if i % 2 == 0 else "#111111"
            row_style = f'style="background-color: {row_bg};"'
        
        # Start the row
        html_table += f'<tr class="table-row" {row_style}>'
        
        # Add Rank with medal styling and consistent background
        rank_style = "" # Don't set background at cell level
        rank_styles = get_rank_style(row["Rank"])
        for style_key, style_value in rank_styles.items():
            rank_style += f"{style_key}: {style_value};"
                
        html_table += f'<td class="fixed-column first-fixed-column" style="{rank_style}">{row["Rank"]}</td>'
        
        # Model name fixed column with consistent background
        html_table += f'<td class="fixed-column second-fixed-column" title="{row["Agent"]}" style="font-weight: 500; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; text-align: center;">{row["Agent"]}</td>'
        
        # Model type cell
        model_type = row["Model Type"]
        type_style = f"text-align: center;"
        model_type_styles = get_model_type_style(model_type)
        for style_key, style_value in model_type_styles.items():
            if style_value:
                type_style += f"{style_key}: {style_value};"
                
        html_table += f'<td class="table-cell model-type-cell" style="{type_style}">{model_type}</td>'
        
        # Add metric values with minimal styling for all columns
        all_metric_columns = [col for group in metric_groups.values() for col in group]
        for col in all_metric_columns:
            display_name = col.split(": ", 1)[-1] if ":" in col else col
            cell_class = "table-cell overall-cell" if display_name == "Metric Average" else "table-cell metric-cell"
            
            # Check if column exists in the row (it should)
            if col in row:
                value_text = row[col]
                
                # Simple styling based on positive/negative values
                try:
                    value = float(str(row[col]).replace(',', ''))
                    if value > 0:
                        cell_class += " positive-value"
                    elif value < 0:
                        cell_class += " negative-value"
                except:
                    pass
                
                html_table += f'<td class="{cell_class}">{value_text}</td>'
            else:
                # If column doesn't exist (shouldn't happen), add empty cell
                html_table += f'<td class="{cell_class}">-</td>'
        
        html_table += "</tr>"
    
    # Close the table
    html_table += """
          </tbody>
        </table>
      </div>
    </div>
    """
    
    # Add styling for metrics section
    metrics_css = """
    <style>
    .metric-definitions {
        margin-top: 30px;
        padding-top: 20px;
        border-top: 1px solid #333;
    }
    .metric-definition {
        background-color: #1a1a1a;
        border-radius: 8px;
        padding: 12px 16px;
        margin-bottom: 16px;
    }
    .metric-definition h4 {
        margin-top: 0;
        color: #a5b4fc;
    }
    .metric-definition p {
        margin-bottom: 0;
        color: #e2e8f0;
    }
    </style>
    """

    # Build a clean HTML string for the metrics section
    metrics_html = '<div class="metric-definitions">'
    
    # Add each metric definition
    for metric_name, metric_info in metrics_config.items():
        metric_description = metric_info.get('description', '')
        
        # Special handling for Relative Improvement to Human to show formula
        if metric_name == "Relative Improvement to Human":
            formula_html = """
            <div style="margin: 15px 0;">
                <p style="margin-bottom: 10px; font-weight: 500;">Formula:</p>
                <div style="background-color: #111; padding: 20px; border-radius: 5px; text-align: center; margin-bottom: 15px; font-size: 18px; line-height: 1.5; border: 1px solid #333;">
                    <div style="display: flex; align-items: center; justify-content: center;">
                        <div style="margin-right: 10px;">Relative Improvement to Human =</div>
                        <div style="display: inline-block; text-align: center; padding: 0 10px;">
                            <div style="border-bottom: 1px solid #aaa; padding-bottom: 5px;">s<sub>agent</sub> - s<sub>baseline</sub></div>
                            <div style="padding-top: 5px;">s<sub>top_human</sub> - s<sub>baseline</sub></div>
                        </div>
                        <div style="margin-left: 10px;">× 100%</div>
                    </div>
                </div>
                <p style="margin-top: 10px; font-weight: 500;">Where:</p>
                <ul style="list-style-type: disc; padding-left: 25px; margin-top: 8px;">
                    <li style="margin-bottom: 5px;">s<sub>agent</sub> is the agent's test performance</li>
                    <li style="margin-bottom: 5px;">s<sub>baseline</sub> is the baseline test performance</li>
                    <li style="margin-bottom: 5px;">s<sub>top_human</sub> is the top human performance in competition</li>
                </ul>
                <p style="margin-top: 10px;">This metric normalizes scores by setting the baseline solution to 0 and the top human solution to 100.</p>
            </div>
            """
            
            # Add the metric definition with the formula
            metrics_html += f'<div class="metric-definition"><h4>{metric_name}</h4><p>{metric_description}</p>{formula_html}</div>'
        # Special handling for Absolute Improvement to Baseline to show formula
        elif metric_name == "Absolute Improvement to Baseline":
            formula_html = """
            <div style="margin: 15px 0;">
                <p style="margin-bottom: 10px; font-weight: 500;">Formula:</p>
                <div style="background-color: #111; padding: 20px; border-radius: 5px; text-align: center; margin-bottom: 15px; font-size: 18px; line-height: 1.5; border: 1px solid #333;">
                    <div style="display: flex; align-items: center; justify-content: center;">
                        <div style="margin-right: 10px;">Absolute Improvement to Baseline =</div>
                        <div style="display: inline-block; text-align: center; padding: 0 10px;">
                            <div style="border-bottom: 1px solid #aaa; padding-bottom: 5px;">s<sub>agent</sub> - s<sub>baseline</sub></div>
                            <div style="padding-top: 5px;">s<sub>baseline</sub></div>
                        </div>
                        <div style="margin-left: 10px;">× 100%</div>
                    </div>
                </div>
                <p style="margin-top: 10px; font-weight: 500;">Where:</p>
                <ul style="list-style-type: disc; padding-left: 25px; margin-top: 8px;">
                    <li style="margin-bottom: 5px;">s<sub>agent</sub> is the agent's test performance</li>
                    <li style="margin-bottom: 5px;">s<sub>baseline</sub> is the baseline test performance</li>
                </ul>
                <p style="margin-top: 10px;">This metric measures the percentage improvement of an agent's performance over the baseline solution.</p>
            </div>
            """
            
            # Add the metric definition with the formula
            metrics_html += f'<div class="metric-definition"><h4>{metric_name}</h4><p>{metric_description}</p>{formula_html}</div>'
        else:
            # Regular metric without formula
            metrics_html += f'<div class="metric-definition"><h4>{metric_name}</h4><p>{metric_description}</p></div>'
    
    # Close the metric definitions container
    metrics_html += '</div>'
    
    # Display the styling and HTML separately for maximum control
    st.markdown(html_table, unsafe_allow_html=True)
    st.markdown(metrics_css, unsafe_allow_html=True)
    
    # Render the metrics definitions
    st.markdown(metrics_html, unsafe_allow_html=True)

def render_empty_state():
    """
    Render an empty state when no data is available
    """
    st.markdown("""
    <div class="warning-box">
        <strong>No data to display.</strong> Please select at least one task to view the data.
    </div>
    """, unsafe_allow_html=True)