Spaces:
Running
Running
File size: 4,513 Bytes
5ccbe05 43b5eac 5ccbe05 cbb678d 43b5eac 5ccbe05 43b5eac 5ccbe05 43b5eac 5ccbe05 43b5eac 5ccbe05 43b5eac cbb678d 43b5eac cbb678d 43b5eac cbb678d 43b5eac 5ccbe05 43b5eac 5ccbe05 cbb678d 5ccbe05 b31aebf 5ccbe05 43b5eac 5ccbe05 b31aebf 5ccbe05 16aecb3 5ccbe05 b31aebf 43b5eac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
"""A gradio app that renders a static leaderboard. This is used for Hugging Face Space."""
import ast
import argparse
import glob
import pickle
import gradio as gr
import numpy as np
import pandas as pd
def make_default_md():
leaderboard_md = f"""
# π Babilong Leaderboard
| [GitHub](https://github.com/lm-sys/FastChat) | [Paper](https://arxiv.org/abs/2306.05685) | [Dataset](https://github.com/lm-sys/FastChat/blob/main/docs/dataset_release.md) |
"""
return leaderboard_md
def make_arena_leaderboard_md():
total_models = 'UNK'
leaderboard_md = f"""Total #models: **{total_models}**. Last updated: Feb 28, 2024."""
return leaderboard_md
def model_hyperlink(model_name, link):
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
def load_leaderboard_table_csv(filename, add_hyperlink=True):
lines = open(filename).readlines()
heads = [v.strip() for v in lines[0].split(",")]
rows = []
for i in range(1, len(lines)):
row = [v.strip() for v in lines[i].split(",")]
for j in range(len(heads)):
item = {}
for h, v in zip(heads, row):
if h == "Arena Elo rating":
if v != "-":
v = int(ast.literal_eval(v))
else:
v = np.nan
elif h == "MMLU":
if v != "-":
v = round(ast.literal_eval(v) * 100, 1)
else:
v = np.nan
elif h == "MT-bench (win rate %)":
if v != "-":
v = round(ast.literal_eval(v[:-1]), 1)
else:
v = np.nan
elif h == "MT-bench (score)":
if v != "-":
v = round(ast.literal_eval(v), 2)
else:
v = np.nan
item[h] = v
if add_hyperlink:
item["Model"] = model_hyperlink(item["Model"], item["Link"])
rows.append(item)
return rows
def build_leaderboard_tab():
default_md = make_default_md()
md_1 = gr.Markdown(default_md, elem_id="leaderboard_markdown")
with gr.Tabs() as tabs:
# arena table
with gr.Tab("Arena Elo", id=0):
md = make_arena_leaderboard_md()
gr.Markdown(md, elem_id="leaderboard_markdown")
gr.Dataframe(
headers=[
"Rank",
"π€ Model",
"qa 1",
"qa 2",
"qa 3",
"qa 4",
"qa 5",
],
datatype=[
"str",
"markdown",
"number",
"number",
"number",
"number",
"number",
],
# value=arena_table_vals,
elem_id="arena_leaderboard_dataframe",
height=700,
column_widths=[50, 200, 150, 150, 150, 150, 150],
wrap=True,
)
return [md_1]
block_css = """
#notice_markdown {
font-size: 104%
}
#notice_markdown th {
display: none;
}
#notice_markdown td {
padding-top: 6px;
padding-bottom: 6px;
}
#leaderboard_markdown {
font-size: 104%
}
#leaderboard_markdown td {
padding-top: 6px;
padding-bottom: 6px;
}
#leaderboard_dataframe td {
line-height: 0.1em;
}
footer {
display:none !important
}
.image-container {
display: flex;
align-items: center;
padding: 1px;
}
.image-container img {
margin: 0 30px;
height: 20px;
max-height: 100%;
width: auto;
max-width: 20%;
}
"""
def build_demo():
text_size = gr.themes.sizes.text_lg
with gr.Blocks(
title="Babilong leaderboard",
theme=gr.themes.Base(text_size=text_size),
css=block_css,
) as demo:
leader_components = build_leaderboard_tab()
return demo
if __name__ == "__main__":
elo_result_files = glob.glob("elo_results_*.pkl")
leaderboard_table_files = glob.glob("leaderboard_table_*.csv")
# leaderboard_table_files.sort(key=lambda x: int(x[18:-4]))
# leaderboard_table_file = leaderboard_table_files[-1]
demo = build_demo()
demo.launch(share=True)
|