Spaces:
Running
Running
""" | |
Main entry point for the Model Capability Leaderboard application. | |
""" | |
import streamlit as st | |
# Import configuration | |
from src.utils.config import app_config, metrics_config | |
# Import data functions | |
from src.utils.data_loader import ( | |
load_metric_data, | |
process_data, | |
filter_and_prepare_data, | |
format_display_dataframe | |
) | |
# Import styles | |
from src.styles.base import load_all_styles | |
# Import components | |
from src.components.header import render_page_header | |
from src.components.filters import ( | |
initialize_session_state, | |
render_metric_selection, | |
render_task_selection | |
) | |
from src.components.leaderboard import render_leaderboard_table, render_empty_state | |
from src.components.tasks import render_task_descriptions | |
def setup_page(): | |
""" | |
Set up the Streamlit page configuration | |
""" | |
st.set_page_config( | |
page_title=app_config['title'], | |
layout=app_config['layout'], | |
initial_sidebar_state=app_config['initial_sidebar_state'] | |
) | |
# Load all styles | |
load_all_styles() | |
# Force dark mode using custom CSS | |
st.markdown(""" | |
<style> | |
/* Force dark mode regardless of browser settings */ | |
.stApp { | |
background-color: #1a202c !important; | |
color: #e2e8f0 !important; | |
} | |
/* Override Streamlit's default styling to ensure dark mode */ | |
.stTextInput, .stSelectbox, .stMultiselect { | |
background-color: #2d3748 !important; | |
color: #e2e8f0 !important; | |
} | |
.stButton>button { | |
background-color: #4a5568 !important; | |
color: #e2e8f0 !important; | |
} | |
/* Override header and text colors */ | |
h1, h2, h3, h4, h5, h6, p, span, div { | |
color: #e2e8f0 !important; | |
} | |
/* Ensure tab styling is consistent */ | |
.stTabs [data-baseweb="tab-list"] { | |
background-color: #1a202c !important; | |
} | |
.stTabs [data-baseweb="tab"] { | |
color: #e2e8f0 !important; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
def main(): | |
""" | |
Main application function | |
""" | |
# Set up page | |
setup_page() | |
# Render header | |
render_page_header() | |
# Load primary metric data (first metric in config) | |
primary_metric = list(metrics_config.keys())[0] | |
metric_data = load_metric_data(metrics_config[primary_metric]["file"]) | |
df = process_data(metric_data) | |
# Initialize session state | |
initialize_session_state(df) | |
# Create tabs | |
tabs = st.tabs(["π Leaderboard", "π Benchmark Details"]) | |
# Tab 1: Leaderboard | |
with tabs[0]: | |
# Render filter components | |
selected_metrics = render_metric_selection() | |
# Continue with other filters | |
selected_tasks = render_task_selection(df) | |
# Render leaderboard if selections are valid | |
if selected_tasks: | |
# Load the primary metric data first (always the first in selected_metrics) | |
primary_metric = selected_metrics[0] | |
primary_metric_data = load_metric_data(metrics_config[primary_metric]["file"]) | |
primary_df = process_data(primary_metric_data) | |
# Filter and prepare data for primary metric | |
filtered_df = filter_and_prepare_data(primary_df, selected_tasks, st.session_state.selected_model_types) | |
# Format data for display | |
display_df, metric_columns = format_display_dataframe(filtered_df, selected_tasks) | |
# If additional metrics are selected, add their data too | |
all_metric_columns = metric_columns.copy() | |
for metric in selected_metrics[1:]: | |
metric_info = metrics_config[metric] | |
metric_data = load_metric_data(metric_info["file"]) | |
metric_df = process_data(metric_data) | |
# Process and merge the additional metric data | |
metric_filtered_df = filter_and_prepare_data(metric_df, selected_tasks, st.session_state.selected_model_types) | |
metric_display_df, _ = format_display_dataframe(metric_filtered_df, selected_tasks) | |
# Create a meaningful prefix for this metric | |
if metric == "Absolute Improvement to Baseline": | |
prefix = "Abs" | |
else: | |
# Use first word of each part of the metric name | |
prefix = "".join([word[0] for word in metric.split()]).upper() | |
# Combine the dataframes - keep only metric columns from metric_display_df | |
for col in metric_columns: | |
if col in metric_display_df.columns: | |
# Add columns with metric prefix | |
display_df[f"{prefix}: {col}"] = metric_display_df[col] | |
# Add to the list of all metric columns | |
all_metric_columns.append(f"{prefix}: {col}") | |
# Render the leaderboard table | |
render_leaderboard_table(display_df, all_metric_columns, primary_metric) | |
else: | |
# Show empty state | |
render_empty_state() | |
# Tab 2: Benchmark Details | |
with tabs[1]: | |
# Render task descriptions | |
render_task_descriptions() | |
# Footer removed per user request | |
if __name__ == "__main__": | |
main() |