import streamlit as st import pandas as pd from PIL import Image import base64 from io import BytesIO # ─── Page config ────────────────────────────────────────────────────────────── st.set_page_config(page_title="ExpertLongBench Leaderboard", layout="wide") logo_image = Image.open("src/ExpertLongBench.png") # Display logo buffered = BytesIO() logo_image.save(buffered, format="PNG") img_data = base64.b64encode(buffered.getvalue()).decode("utf-8") st.markdown( f"""
""", unsafe_allow_html=True ) st.markdown( '''

ExpertLongBench: Benchmarking Language Models on Expert-Level Long-Form Generation with Structured Checklists

📑 Paper | 💻 GitHub | K Kaggle ⚙️ Version: V1 | # Models: 12 | Updated: May 2025

''', unsafe_allow_html=True ) # ─── Load data ──────────────────────────────────────────────────────────────── @st.cache_data def load_data(path="src/models.json"): df = pd.read_json(path, lines=True) score_cols = [f"T{i}" for i in range(1, 12)] df["Avg"] = df[score_cols].mean(axis=1).round(1) # Compute rank per column (1 = best) for col in score_cols + ["Avg"]: df[f"{col}_rank"] = df[col].rank(ascending=False, method="min").astype(int) return df df = load_data() # Precompute max ranks for color scaling score_cols = [f"T{i}" for i in range(1, 12)] + ["Avg"] max_ranks = {col: df[f"{col}_rank"].max() for col in score_cols} # ─── Tabs ────────────────────────────────────────────────────────────────────── tab1, tab2 = st.tabs(["Leaderboard", "Benchmark Details"]) with tab1: # st.markdown("**Leaderboard:** higher scores shaded green; best models bolded.") # Build raw HTML table cols = ["Model"] + [f"T{i}" for i in range(1,12)] + ["Avg"] html = "" # header html += "" + "".join(f"" for col in cols) + "" # rows for _, row in df.iterrows(): html += "" for col in cols: val = row[col] if col == "Model": html += f"" else: rank = int(row[f"{col}_rank"]) norm = 1 - (rank - 1) / ((max_ranks[col] - 1) or 1) # interpolate green (182,243,182) → white (255,255,255) r = int(255 - norm*(255-182)) g = int(255 - norm*(255-243)) b = 255 bold = "font-weight:bold;" if rank == 1 else "" style = f"background-color:rgb({r},{g},{b}); padding:6px; {bold}" html += f"" html += "" html += "
{col}
{val}{val}
" st.markdown(html, unsafe_allow_html=True) with tab2: st.markdown("## Abstract") st.write( "" ) st.markdown("## Pipeline") st.write( "" )