Spaces:
Running
Running
File size: 5,379 Bytes
ed2eb44 06d4ee9 ed2eb44 c08520d ed2eb44 06d4ee9 ed2eb44 06d4ee9 ed2eb44 06d4ee9 ed2eb44 c08520d 06d4ee9 c08520d ed2eb44 06d4ee9 c08520d 06d4ee9 ed2eb44 06d4ee9 ed2eb44 06d4ee9 ed2eb44 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
"""
Main entry point for the Model Capability Leaderboard application.
"""
import streamlit as st
# Import configuration
from src.utils.config import app_config, metrics_config
# Import data functions
from src.utils.data_loader import (
load_metric_data,
process_data,
filter_and_prepare_data,
format_display_dataframe
)
# Import styles
from src.styles.base import load_all_styles
# Import components
from src.components.header import render_page_header
from src.components.filters import (
initialize_session_state,
render_metric_selection,
render_task_selection
)
from src.components.leaderboard import render_leaderboard_table, render_empty_state
from src.components.tasks import render_task_descriptions
def setup_page():
"""
Set up the Streamlit page configuration
"""
st.set_page_config(
page_title=app_config['title'],
layout=app_config['layout'],
initial_sidebar_state=app_config['initial_sidebar_state']
)
# Load all styles
load_all_styles()
# Force dark mode using custom CSS
st.markdown("""
<style>
/* Force dark mode regardless of browser settings */
.stApp {
background-color: #1a202c !important;
color: #e2e8f0 !important;
}
/* Override Streamlit's default styling to ensure dark mode */
.stTextInput, .stSelectbox, .stMultiselect {
background-color: #2d3748 !important;
color: #e2e8f0 !important;
}
.stButton>button {
background-color: #4a5568 !important;
color: #e2e8f0 !important;
}
/* Override header and text colors */
h1, h2, h3, h4, h5, h6, p, span, div {
color: #e2e8f0 !important;
}
/* Ensure tab styling is consistent */
.stTabs [data-baseweb="tab-list"] {
background-color: #1a202c !important;
}
.stTabs [data-baseweb="tab"] {
color: #e2e8f0 !important;
}
</style>
""", unsafe_allow_html=True)
def main():
"""
Main application function
"""
# Set up page
setup_page()
# Render header
render_page_header()
# Load primary metric data (first metric in config)
primary_metric = list(metrics_config.keys())[0]
metric_data = load_metric_data(metrics_config[primary_metric]["file"])
df = process_data(metric_data)
# Initialize session state
initialize_session_state(df)
# Create tabs
tabs = st.tabs(["📊 Leaderboard", "📑 Benchmark Details"])
# Tab 1: Leaderboard
with tabs[0]:
# Render filter components
selected_metrics = render_metric_selection()
# Continue with other filters
selected_tasks = render_task_selection(df)
# Render leaderboard if selections are valid
if selected_tasks:
# Load the primary metric data first (always the first in selected_metrics)
primary_metric = selected_metrics[0]
primary_metric_data = load_metric_data(metrics_config[primary_metric]["file"])
primary_df = process_data(primary_metric_data)
# Filter and prepare data for primary metric
filtered_df = filter_and_prepare_data(primary_df, selected_tasks, st.session_state.selected_model_types)
# Format data for display
display_df, metric_columns = format_display_dataframe(filtered_df, selected_tasks)
# If additional metrics are selected, add their data too
all_metric_columns = metric_columns.copy()
for metric in selected_metrics[1:]:
metric_info = metrics_config[metric]
metric_data = load_metric_data(metric_info["file"])
metric_df = process_data(metric_data)
# Process and merge the additional metric data
metric_filtered_df = filter_and_prepare_data(metric_df, selected_tasks, st.session_state.selected_model_types)
metric_display_df, _ = format_display_dataframe(metric_filtered_df, selected_tasks)
# Create a meaningful prefix for this metric
if metric == "Absolute Improvement to Baseline":
prefix = "Abs"
else:
# Use first word of each part of the metric name
prefix = "".join([word[0] for word in metric.split()]).upper()
# Combine the dataframes - keep only metric columns from metric_display_df
for col in metric_columns:
if col in metric_display_df.columns:
# Add columns with metric prefix
display_df[f"{prefix}: {col}"] = metric_display_df[col]
# Add to the list of all metric columns
all_metric_columns.append(f"{prefix}: {col}")
# Render the leaderboard table
render_leaderboard_table(display_df, all_metric_columns, primary_metric)
else:
# Show empty state
render_empty_state()
# Tab 2: Benchmark Details
with tabs[1]:
# Render task descriptions
render_task_descriptions()
# Footer removed per user request
if __name__ == "__main__":
main() |