import streamlit as st import pandas as pd from utils.style import style_long_context @st.cache_data def load_data(): return pd.read_csv("data/long_context.csv") def show(): st.title("Long Context Leaderboard") # Load and style data df = load_data() styled_df = style_long_context(df) # Display the dataframe with built-in sort on column click st.dataframe( styled_df, use_container_width=True, height=600, hide_index=True, column_config={ "Model": st.column_config.TextColumn(width="large"), "8K": st.column_config.NumberColumn(format="%.2f"), "16K": st.column_config.NumberColumn(format="%.2f"), "32K": st.column_config.NumberColumn(format="%.2f"), "Average↑": st.column_config.NumberColumn( format="%.2f", help="Average across all context lengths" ) } ) # Optionally, keep some explanatory text st.markdown(""" **Context Lengths**: - 8K: 8,000 tokens - 16K: 16,000 tokens - 32K: 32,000 tokens **Benchmark Details**: - Evaluated on Symbolic, Medium, and Hard subtasks - AUC scores aggregated across context lengths - Larger context evaluations limited by compute constraints and model performance """)