MLRC_Bench

Running

File size: 5,379 Bytes

"""
Main entry point for the Model Capability Leaderboard application.
"""
import streamlit as st

# Import configuration
from src.utils.config import app_config, metrics_config

# Import data functions
from src.utils.data_loader import (
    load_metric_data, 
    process_data, 
    filter_and_prepare_data,
    format_display_dataframe
)

# Import styles
from src.styles.base import load_all_styles

# Import components
from src.components.header import render_page_header
from src.components.filters import (
    initialize_session_state, 
    render_metric_selection, 
    render_task_selection
)
from src.components.leaderboard import render_leaderboard_table, render_empty_state
from src.components.tasks import render_task_descriptions

def setup_page():
    """
    Set up the Streamlit page configuration
    """
    st.set_page_config(
        page_title=app_config['title'],
        layout=app_config['layout'],
        initial_sidebar_state=app_config['initial_sidebar_state']
    )
    
    # Load all styles
    load_all_styles()
    
    # Force dark mode using custom CSS
    st.markdown("""
    <style>
    /* Force dark mode regardless of browser settings */
    .stApp {
        background-color: #1a202c !important;
        color: #e2e8f0 !important;
    }
    /* Override Streamlit's default styling to ensure dark mode */
    .stTextInput, .stSelectbox, .stMultiselect {
        background-color: #2d3748 !important;
        color: #e2e8f0 !important;
    }
    .stButton>button {
        background-color: #4a5568 !important;
        color: #e2e8f0 !important;
    }
    /* Override header and text colors */
    h1, h2, h3, h4, h5, h6, p, span, div {
        color: #e2e8f0 !important;
    }
    /* Ensure tab styling is consistent */
    .stTabs [data-baseweb="tab-list"] {
        background-color: #1a202c !important;
    }
    .stTabs [data-baseweb="tab"] {
        color: #e2e8f0 !important;
    }
    </style>
    """, unsafe_allow_html=True)

def main():
    """
    Main application function
    """
    # Set up page
    setup_page()
    
    # Render header
    render_page_header()
    
    # Load primary metric data (first metric in config)
    primary_metric = list(metrics_config.keys())[0]
    metric_data = load_metric_data(metrics_config[primary_metric]["file"])
    df = process_data(metric_data)
    
    # Initialize session state
    initialize_session_state(df)
    
    # Create tabs
    tabs = st.tabs(["📊 Leaderboard", "📑 Benchmark Details"])
    
    # Tab 1: Leaderboard
    with tabs[0]:
        # Render filter components
        selected_metrics = render_metric_selection()
        
        # Continue with other filters
        selected_tasks = render_task_selection(df)
        
        # Render leaderboard if selections are valid
        if selected_tasks:
            # Load the primary metric data first (always the first in selected_metrics)
            primary_metric = selected_metrics[0]
            primary_metric_data = load_metric_data(metrics_config[primary_metric]["file"])
            primary_df = process_data(primary_metric_data)
            
            # Filter and prepare data for primary metric
            filtered_df = filter_and_prepare_data(primary_df, selected_tasks, st.session_state.selected_model_types)
            
            # Format data for display
            display_df, metric_columns = format_display_dataframe(filtered_df, selected_tasks)
            
            # If additional metrics are selected, add their data too
            all_metric_columns = metric_columns.copy()
            
            for metric in selected_metrics[1:]:
                metric_info = metrics_config[metric]
                metric_data = load_metric_data(metric_info["file"])
                metric_df = process_data(metric_data)
                
                # Process and merge the additional metric data
                metric_filtered_df = filter_and_prepare_data(metric_df, selected_tasks, st.session_state.selected_model_types)
                metric_display_df, _ = format_display_dataframe(metric_filtered_df, selected_tasks)
                
                # Create a meaningful prefix for this metric
                if metric == "Absolute Improvement to Baseline":
                    prefix = "Abs"
                else:
                    # Use first word of each part of the metric name
                    prefix = "".join([word[0] for word in metric.split()]).upper()
                
                # Combine the dataframes - keep only metric columns from metric_display_df
                for col in metric_columns:
                    if col in metric_display_df.columns:
                        # Add columns with metric prefix
                        display_df[f"{prefix}: {col}"] = metric_display_df[col]
                        # Add to the list of all metric columns
                        all_metric_columns.append(f"{prefix}: {col}")
            
            # Render the leaderboard table
            render_leaderboard_table(display_df, all_metric_columns, primary_metric)
        else:
            # Show empty state
            render_empty_state()
    
    # Tab 2: Benchmark Details
    with tabs[1]:
        # Render task descriptions
        render_task_descriptions()
    
    # Footer removed per user request

if __name__ == "__main__":
    main()