|
import gradio as gr |
|
import pandas as pd |
|
import numpy as np |
|
from gradio_leaderboard import Leaderboard, SelectColumns, ColumnFilter |
|
|
|
|
|
TITLE = "<h1>M-RewardBench Leaderboard</h1>" |
|
INTRODUCTION_TEXT = "https://m-rewardbench.github.io/" |
|
GOOGLE_SHEET_URL = "https://docs.google.com/spreadsheets/d/1qrD7plUdrBwAw7G6UeDVZAaV9ihxaNAcoiKwSaqotR4/export?gid=0&format=csv" |
|
ABOUT_TEXT = """Welcome to M-RewardBench Leaderboard!""" |
|
|
|
|
|
class AutoEvalColumn: |
|
model = { |
|
"name": "Model", |
|
"type": "markdown", |
|
"displayed_by_default": True, |
|
"never_hidden": True, |
|
} |
|
|
|
model_type = { |
|
"name": "MT", |
|
"type": "markdown", |
|
"displayed_by_default": True, |
|
"never_hidden": True, |
|
} |
|
|
|
@classmethod |
|
def add_columns_from_df(cls, df, columns): |
|
for col in columns: |
|
if col.lower() != 'model': |
|
setattr(cls, col, { |
|
"name": col, |
|
"type": "markdown", |
|
"displayed_by_default": True, |
|
"never_hidden": False, |
|
}) |
|
|
|
|
|
def get_result_data(): |
|
return pd.read_csv(GOOGLE_SHEET_URL) |
|
|
|
|
|
def init_leaderboard(dataframe): |
|
if dataframe is None or dataframe.empty: |
|
raise ValueError("Leaderboard DataFrame is empty or None.") |
|
|
|
return Leaderboard( |
|
value=dataframe, |
|
datatype=[ |
|
col["type"] |
|
for col in AutoEvalColumn.__dict__.values() |
|
if isinstance(col, dict) |
|
], |
|
select_columns=SelectColumns( |
|
default_selection=[ |
|
col["name"] |
|
for col in AutoEvalColumn.__dict__.values() |
|
if isinstance(col, dict) and col["displayed_by_default"] |
|
], |
|
cant_deselect=[ |
|
col["name"] |
|
for col in AutoEvalColumn.__dict__.values() |
|
if isinstance(col, dict) and col.get("never_hidden", False) |
|
], |
|
label="Select Columns to Display:", |
|
), |
|
search_columns=["Model"], |
|
interactive=False, |
|
) |
|
|
|
|
|
def format_model_link(row): |
|
"""Format model name as HTML link if URL is available""" |
|
model_name = row["Model"] |
|
|
|
|
|
|
|
return model_name |
|
|
|
lang_ids = "eng_Latn arb_Arab tur_Latn rus_Cyrl ces_Latn pol_Latn kor_Hang zho_Hans zho_Hant fra_Latn ell_Grek deu_Latn ron_Latn ita_Latn nld_Latn pes_Arab hin_Deva ukr_Cyrl por_Latn ind_Latn jpn_Jpan spa_Latn heb_Hebr vie_Latn" |
|
|
|
emojis = "π’ π¬ π―" |
|
|
|
model_types = {"Generative RM": "π¬", "DPO": "π―", "Sequence Classifier": "π’"} |
|
|
|
from functools import partial |
|
def format_with_color(val, min_val=50, max_val=100): |
|
""" |
|
Formats a value with inline green color gradient CSS. |
|
Returns an HTML string with bold, black text and muted green background. |
|
""" |
|
try: |
|
val = float(val) |
|
if pd.isna(val): |
|
return str(val) |
|
|
|
|
|
normalized = (val - min_val) / (max_val - min_val) |
|
|
|
normalized = max(0, min(1, normalized)) |
|
|
|
|
|
|
|
intensity = int(50 + (150 * (1 - normalized))) |
|
|
|
|
|
return f'<div val={val} style="background-color: rgb({intensity}, 200, {intensity}); color: black; font-weight: bold; text-align: center; vertical-align: middle;">{val*100:.1f}</div>' |
|
|
|
except (ValueError, TypeError): |
|
return str(val) |
|
|
|
demo = gr.Blocks(theme=gr.themes.Soft()) |
|
with demo: |
|
gr.HTML(TITLE) |
|
gr.Markdown(INTRODUCTION_TEXT) |
|
|
|
with gr.Tabs() as tabs: |
|
with gr.TabItem("π
Leaderboard"): |
|
df = get_result_data() |
|
df["Model_Type"] = df["Model_Type"].map(model_types) |
|
|
|
df["Model"] = df.apply(format_model_link, axis=1) |
|
|
|
df["zho"] = df[["zho_Hans", "zho_Hant"]].mean(axis=1) |
|
|
|
columns = lang_ids.split("\t") |
|
|
|
df.pop("zho_Hans") |
|
df.pop("zho_Hant") |
|
|
|
df.rename(columns={ |
|
"Model_Type": "MT", |
|
"Avg_Multilingual": "AVG", |
|
}, inplace=True) |
|
df.rename(columns={col: col[:3] for col in columns}, inplace=True) |
|
|
|
|
|
numeric_cols = df.select_dtypes(include=[np.number]).columns |
|
|
|
|
|
for col in numeric_cols: |
|
lang_format_with_color = partial(format_with_color, |
|
min_val=df[col].min(), |
|
max_val=df[col].max()) |
|
|
|
df[col] = df[col].apply(lang_format_with_color) |
|
|
|
|
|
|
|
|
|
|
|
AutoEvalColumn.add_columns_from_df(df, numeric_cols) |
|
|
|
leaderboard = init_leaderboard(df) |
|
|
|
|
|
demo.launch(ssr_mode=False) |
|
|