Spaces:

launch
/

ExpertLongBench

Running

App Files Files Community

ExpertLongBench / src /streamlit_app.py

shezamunir

Update src/streamlit_app.py

643980c verified 23 days ago

raw

history blame

3.89 kB

	import streamlit as st
	import pandas as pd
	from PIL import Image
	import base64
	from io import BytesIO

	# ─── Page config ──────────────────────────────────────────────────────────────
	st.set_page_config(page_title="ExpertLongBench Leaderboard", layout="wide")


	logo_image = Image.open("src/ExpertLongBench.png")

	# Display logo
	buffered = BytesIO()
	logo_image.save(buffered, format="PNG")
	img_data = base64.b64encode(buffered.getvalue()).decode("utf-8")

	st.markdown(
	f"""
	<div class="logo-container" style="display:flex; justify-content: center;">
	<img src="data:image/png;base64,{img_data}" style="width:50%; max-width:700px;"/>
	</div>
	""",
	unsafe_allow_html=True
	)

	st.markdown(
	'''
	<div class="header">
	<br/>
	<p style="font-size:22px;">
	ExpertLongBench: Benchmarking Language Models on Expert-Level Long-Form Generation with Structured Checklists
	</p>
	<p style="font-size:20px;">
	📑 <a href="">Paper</a> \| 💻 <a href="">GitHub</a> \| <strong>K</strong> <a href="">Kaggle</a> <add links later>
	⚙️ <strong>Version</strong>: <strong>V1</strong> \| <strong># Models</strong>: 12 \| Updated: <strong>May 2025</strong>
	</p>
	</div>
	''',
	unsafe_allow_html=True
	)
	# ─── Load data ────────────────────────────────────────────────────────────────
	@st.cache_data
	def load_data(path="src/models.json"):
	df = pd.read_json(path, lines=True)
	score_cols = [f"T{i}" for i in range(1, 12)]
	df["Avg"] = df[score_cols].mean(axis=1).round(1)
	# Compute rank per column (1 = best)
	for col in score_cols + ["Avg"]:
	df[f"{col}_rank"] = df[col].rank(ascending=False, method="min").astype(int)
	return df

	df = load_data()

	# Precompute max ranks for color scaling
	score_cols = [f"T{i}" for i in range(1, 12)] + ["Avg"]
	max_ranks = {col: df[f"{col}_rank"].max() for col in score_cols}

	# ─── Tabs ──────────────────────────────────────────────────────────────────────
	tab1, tab2 = st.tabs(["Leaderboard", "Benchmark Details"])

	with tab1:
	# st.markdown("Leaderboard: higher scores shaded green; best models bolded.")
	# Build raw HTML table
	cols = ["Model"] + [f"T{i}" for i in range(1,12)] + ["Avg"]
	html = "<table style='border-collapse:collapse; width:100%; font-size:14px;'>"
	# header
	html += "<tr>" + "".join(f"<th style='padding:6px;'>{col}</th>" for col in cols) + "</tr>"
	# rows
	for _, row in df.iterrows():
	html += "<tr>"
	for col in cols:
	val = row[col]
	if col == "Model":
	html += f"<td style='padding:6px; text-align:left;'>{val}</td>"
	else:
	rank = int(row[f"{col}_rank"])
	norm = 1 - (rank - 1) / ((max_ranks[col] - 1) or 1)
	# interpolate green (182,243,182) → white (255,255,255)
	r = int(255 - norm*(255-182))
	g = int(255 - norm*(255-243))
	b = 255
	bold = "font-weight:bold;" if rank == 1 else ""
	style = f"background-color:rgb({r},{g},{b}); padding:6px; {bold}"
	html += f"<td style='{style}'>{val}</td>"
	html += "</tr>"
	html += "</table>"
	st.markdown(html, unsafe_allow_html=True)

	with tab2:
	st.markdown("## Abstract")
	st.write(
	"<add final abstract here>"
	)
	st.markdown("## Pipeline")
	st.write(
	"<add final pipeline figure here>"
	)