babilong / app.py
dsorokin's picture
cleanup
cbb678d
raw
history blame
4.51 kB
"""A gradio app that renders a static leaderboard. This is used for Hugging Face Space."""
import ast
import argparse
import glob
import pickle
import gradio as gr
import numpy as np
import pandas as pd
def make_default_md():
leaderboard_md = f"""
# πŸ† Babilong Leaderboard
| [GitHub](https://github.com/lm-sys/FastChat) | [Paper](https://arxiv.org/abs/2306.05685) | [Dataset](https://github.com/lm-sys/FastChat/blob/main/docs/dataset_release.md) |
"""
return leaderboard_md
def make_arena_leaderboard_md():
total_models = 'UNK'
leaderboard_md = f"""Total #models: **{total_models}**. Last updated: Feb 28, 2024."""
return leaderboard_md
def model_hyperlink(model_name, link):
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
def load_leaderboard_table_csv(filename, add_hyperlink=True):
lines = open(filename).readlines()
heads = [v.strip() for v in lines[0].split(",")]
rows = []
for i in range(1, len(lines)):
row = [v.strip() for v in lines[i].split(",")]
for j in range(len(heads)):
item = {}
for h, v in zip(heads, row):
if h == "Arena Elo rating":
if v != "-":
v = int(ast.literal_eval(v))
else:
v = np.nan
elif h == "MMLU":
if v != "-":
v = round(ast.literal_eval(v) * 100, 1)
else:
v = np.nan
elif h == "MT-bench (win rate %)":
if v != "-":
v = round(ast.literal_eval(v[:-1]), 1)
else:
v = np.nan
elif h == "MT-bench (score)":
if v != "-":
v = round(ast.literal_eval(v), 2)
else:
v = np.nan
item[h] = v
if add_hyperlink:
item["Model"] = model_hyperlink(item["Model"], item["Link"])
rows.append(item)
return rows
def build_leaderboard_tab():
default_md = make_default_md()
md_1 = gr.Markdown(default_md, elem_id="leaderboard_markdown")
with gr.Tabs() as tabs:
# arena table
with gr.Tab("Arena Elo", id=0):
md = make_arena_leaderboard_md()
gr.Markdown(md, elem_id="leaderboard_markdown")
gr.Dataframe(
headers=[
"Rank",
"πŸ€– Model",
"qa 1",
"qa 2",
"qa 3",
"qa 4",
"qa 5",
],
datatype=[
"str",
"markdown",
"number",
"number",
"number",
"number",
"number",
],
# value=arena_table_vals,
elem_id="arena_leaderboard_dataframe",
height=700,
column_widths=[50, 200, 150, 150, 150, 150, 150],
wrap=True,
)
return [md_1]
block_css = """
#notice_markdown {
font-size: 104%
}
#notice_markdown th {
display: none;
}
#notice_markdown td {
padding-top: 6px;
padding-bottom: 6px;
}
#leaderboard_markdown {
font-size: 104%
}
#leaderboard_markdown td {
padding-top: 6px;
padding-bottom: 6px;
}
#leaderboard_dataframe td {
line-height: 0.1em;
}
footer {
display:none !important
}
.image-container {
display: flex;
align-items: center;
padding: 1px;
}
.image-container img {
margin: 0 30px;
height: 20px;
max-height: 100%;
width: auto;
max-width: 20%;
}
"""
def build_demo():
text_size = gr.themes.sizes.text_lg
with gr.Blocks(
title="Babilong leaderboard",
theme=gr.themes.Base(text_size=text_size),
css=block_css,
) as demo:
leader_components = build_leaderboard_tab()
return demo
if __name__ == "__main__":
elo_result_files = glob.glob("elo_results_*.pkl")
leaderboard_table_files = glob.glob("leaderboard_table_*.csv")
# leaderboard_table_files.sort(key=lambda x: int(x[18:-4]))
# leaderboard_table_file = leaderboard_table_files[-1]
demo = build_demo()
demo.launch(share=True)