MLRC_Bench / src /app.py
Armeddinosaur's picture
Removing Model Type Selection
c08520d
raw
history blame
5.38 kB
"""
Main entry point for the Model Capability Leaderboard application.
"""
import streamlit as st
# Import configuration
from src.utils.config import app_config, metrics_config
# Import data functions
from src.utils.data_loader import (
load_metric_data,
process_data,
filter_and_prepare_data,
format_display_dataframe
)
# Import styles
from src.styles.base import load_all_styles
# Import components
from src.components.header import render_page_header
from src.components.filters import (
initialize_session_state,
render_metric_selection,
render_task_selection
)
from src.components.leaderboard import render_leaderboard_table, render_empty_state
from src.components.tasks import render_task_descriptions
def setup_page():
"""
Set up the Streamlit page configuration
"""
st.set_page_config(
page_title=app_config['title'],
layout=app_config['layout'],
initial_sidebar_state=app_config['initial_sidebar_state']
)
# Load all styles
load_all_styles()
# Force dark mode using custom CSS
st.markdown("""
<style>
/* Force dark mode regardless of browser settings */
.stApp {
background-color: #1a202c !important;
color: #e2e8f0 !important;
}
/* Override Streamlit's default styling to ensure dark mode */
.stTextInput, .stSelectbox, .stMultiselect {
background-color: #2d3748 !important;
color: #e2e8f0 !important;
}
.stButton>button {
background-color: #4a5568 !important;
color: #e2e8f0 !important;
}
/* Override header and text colors */
h1, h2, h3, h4, h5, h6, p, span, div {
color: #e2e8f0 !important;
}
/* Ensure tab styling is consistent */
.stTabs [data-baseweb="tab-list"] {
background-color: #1a202c !important;
}
.stTabs [data-baseweb="tab"] {
color: #e2e8f0 !important;
}
</style>
""", unsafe_allow_html=True)
def main():
"""
Main application function
"""
# Set up page
setup_page()
# Render header
render_page_header()
# Load primary metric data (first metric in config)
primary_metric = list(metrics_config.keys())[0]
metric_data = load_metric_data(metrics_config[primary_metric]["file"])
df = process_data(metric_data)
# Initialize session state
initialize_session_state(df)
# Create tabs
tabs = st.tabs(["πŸ“Š Leaderboard", "πŸ“‘ Benchmark Details"])
# Tab 1: Leaderboard
with tabs[0]:
# Render filter components
selected_metrics = render_metric_selection()
# Continue with other filters
selected_tasks = render_task_selection(df)
# Render leaderboard if selections are valid
if selected_tasks:
# Load the primary metric data first (always the first in selected_metrics)
primary_metric = selected_metrics[0]
primary_metric_data = load_metric_data(metrics_config[primary_metric]["file"])
primary_df = process_data(primary_metric_data)
# Filter and prepare data for primary metric
filtered_df = filter_and_prepare_data(primary_df, selected_tasks, st.session_state.selected_model_types)
# Format data for display
display_df, metric_columns = format_display_dataframe(filtered_df, selected_tasks)
# If additional metrics are selected, add their data too
all_metric_columns = metric_columns.copy()
for metric in selected_metrics[1:]:
metric_info = metrics_config[metric]
metric_data = load_metric_data(metric_info["file"])
metric_df = process_data(metric_data)
# Process and merge the additional metric data
metric_filtered_df = filter_and_prepare_data(metric_df, selected_tasks, st.session_state.selected_model_types)
metric_display_df, _ = format_display_dataframe(metric_filtered_df, selected_tasks)
# Create a meaningful prefix for this metric
if metric == "Absolute Improvement to Baseline":
prefix = "Abs"
else:
# Use first word of each part of the metric name
prefix = "".join([word[0] for word in metric.split()]).upper()
# Combine the dataframes - keep only metric columns from metric_display_df
for col in metric_columns:
if col in metric_display_df.columns:
# Add columns with metric prefix
display_df[f"{prefix}: {col}"] = metric_display_df[col]
# Add to the list of all metric columns
all_metric_columns.append(f"{prefix}: {col}")
# Render the leaderboard table
render_leaderboard_table(display_df, all_metric_columns, primary_metric)
else:
# Show empty state
render_empty_state()
# Tab 2: Benchmark Details
with tabs[1]:
# Render task descriptions
render_task_descriptions()
# Footer removed per user request
if __name__ == "__main__":
main()