""" Main entry point for the Model Capability Leaderboard application. """ import streamlit as st # Import configuration from src.utils.config import app_config, metrics_config # Import data functions from src.utils.data_loader import ( load_metric_data, process_data, filter_and_prepare_data, format_display_dataframe ) # Import styles from src.styles.base import load_all_styles # Import components from src.components.header import render_page_header from src.components.filters import ( initialize_session_state, render_metric_selection, render_task_selection, render_model_type_selection ) from src.components.leaderboard import render_leaderboard_table, render_empty_state from src.components.tasks import render_task_descriptions def setup_page(): """ Set up the Streamlit page configuration """ st.set_page_config( page_title=app_config['title'], layout=app_config['layout'], initial_sidebar_state=app_config['initial_sidebar_state'] ) # Load all styles load_all_styles() # Force dark mode using custom CSS st.markdown(""" """, unsafe_allow_html=True) def main(): """ Main application function """ # Set up page setup_page() # Render header render_page_header() # Load primary metric data (first metric in config) primary_metric = list(metrics_config.keys())[0] metric_data = load_metric_data(metrics_config[primary_metric]["file"]) df = process_data(metric_data) # Initialize session state initialize_session_state(df) # Create tabs tabs = st.tabs(["📊 Leaderboard", "📑 Benchmark Details"]) # Tab 1: Leaderboard with tabs[0]: # Render filter components selected_metrics = render_metric_selection() # Continue with other filters selected_tasks = render_task_selection(df) selected_model_types = render_model_type_selection(df) # Render leaderboard if selections are valid if selected_tasks and selected_model_types: # Load the primary metric data first (always the first in selected_metrics) primary_metric = selected_metrics[0] primary_metric_data = load_metric_data(metrics_config[primary_metric]["file"]) primary_df = process_data(primary_metric_data) # Filter and prepare data for primary metric filtered_df = filter_and_prepare_data(primary_df, selected_tasks, selected_model_types) # Format data for display display_df, metric_columns = format_display_dataframe(filtered_df, selected_tasks) # If additional metrics are selected, add their data too all_metric_columns = metric_columns.copy() for metric in selected_metrics[1:]: metric_info = metrics_config[metric] metric_data = load_metric_data(metric_info["file"]) metric_df = process_data(metric_data) # Process and merge the additional metric data metric_filtered_df = filter_and_prepare_data(metric_df, selected_tasks, selected_model_types) metric_display_df, _ = format_display_dataframe(metric_filtered_df, selected_tasks) # Create a meaningful prefix for this metric if metric == "Absolute Improvement to Baseline": prefix = "Abs" else: # Use first word of each part of the metric name prefix = "".join([word[0] for word in metric.split()]).upper() # Combine the dataframes - keep only metric columns from metric_display_df for col in metric_columns: if col in metric_display_df.columns: # Add columns with metric prefix display_df[f"{prefix}: {col}"] = metric_display_df[col] # Add to the list of all metric columns all_metric_columns.append(f"{prefix}: {col}") # Render the leaderboard table render_leaderboard_table(display_df, all_metric_columns, primary_metric) else: # Show empty state render_empty_state() # Tab 2: Benchmark Details with tabs[1]: # Render task descriptions render_task_descriptions() # Footer removed per user request if __name__ == "__main__": main()