Spaces:
Running
Running
File size: 4,902 Bytes
5ccbe05 7bd86a9 ae55c78 5ccbe05 43b5eac 5ccbe05 cbb678d ae55c78 5ccbe05 7bd86a9 43b5eac 5ccbe05 ae55c78 5ccbe05 ae55c78 43b5eac 5ccbe05 7bd86a9 43b5eac 5ccbe05 ae55c78 5ccbe05 cbb678d 5ccbe05 7bd86a9 5ccbe05 43b5eac 5ccbe05 7bd86a9 5ccbe05 7bd86a9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 |
"""A gradio app that renders a static leaderboard. This is used for Hugging Face Space."""
import ast
import argparse
import glob
import pickle
import gradio as gr
import numpy as np
import pandas as pd
import os
from collections import defaultdict
from matplotlib.colors import LinearSegmentedColormap
def make_default_md():
leaderboard_md = f"""
# π Babilong Leaderboard
| [GitHub](https://github.com/booydar/recurrent-memory-transformer/) | [Paper](https://arxiv.org/abs/2402.10790) | [Dataset](https://github.com/booydar/babilong/) |
"""
return leaderboard_md
def make_arena_leaderboard_md(total_models):
leaderboard_md = f"""Total #models: **{total_models}**. Last updated: Feb 28, 2024."""
return leaderboard_md
def model_hyperlink(model_name, link):
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
def load_model(folders, tab_name, msg_lengths):
results = defaultdict(list)
class NA():
def __repr__(self) -> str:
return '-'
def __float__(self):
return 0.0
mean_score = []
for i, folder in enumerate(folders):
model_name = folder.split('/')[-1]
results['Rank'].append(i)
results['Model'].append(model_name)
for task in msg_lengths:
if not os.path.isfile(f'{folder}/{tab_name}/{task}.csv'):
results[msg_lengths[task]].append(NA())
else:
df = pd.read_csv(f'{folder}/{tab_name}/{task}.csv')
results[msg_lengths[task]].append(int(df['result'].sum() / len(df) * 100))
mean_score.append(-np.mean([float(results[msg_lengths[task]][i]) for task in list(msg_lengths.keys())[:5]]))
ranks = np.argsort(mean_score)
for i, rank in enumerate(ranks):
results['Rank'][i] = rank + 1
return pd.DataFrame(results).sort_values('Rank')
def build_leaderboard_tab(folders):
default_md = make_default_md()
md_1 = gr.Markdown(default_md, elem_id="leaderboard_markdown")
msg_lengths = {
'0': '0k',
'4000': '4k',
'8000': '8k',
'16000': '16k',
'32000': '32k',
'64000': '64k',
'128000': '128k',
'500000': '500k',
'1000000': '1M',
'10000000': '10M'
}
with gr.Tabs() as tabs:
for tab_id, tab_name in enumerate(['qa1', 'qa2', 'qa3', 'qa4', 'qa5']):
df = load_model(folders, tab_name, msg_lengths)
cmap = LinearSegmentedColormap.from_list('ryg', ["red", "yellow", "green"], N=256)
df = df.style.background_gradient(cmap=cmap, vmin=0, vmax=100, subset=list(msg_lengths.values()))
# arena table
with gr.Tab(tab_name, id=tab_id):
md = make_arena_leaderboard_md(len(folders))
gr.Markdown(md, elem_id="leaderboard_markdown")
gr.Dataframe(
headers=[
"Rank",
"π€ Model",
] + list(msg_lengths.values()),
datatype=[
"str",
"markdown",
"str",
"str",
"str",
"str",
"str",
"str",
"str",
],
value=df,
elem_id="arena_leaderboard_dataframe",
height=700,
column_widths=[50, 200] + [100] * len(msg_lengths),
wrap=True,
)
return [md_1]
block_css = """
#notice_markdown {
font-size: 104%
}
#notice_markdown th {
display: none;
}
#notice_markdown td {
padding-top: 6px;
padding-bottom: 6px;
}
#leaderboard_markdown {
font-size: 104%
}
#leaderboard_markdown td {
padding-top: 6px;
padding-bottom: 6px;
}
#leaderboard_dataframe td {
line-height: 0.1em;
}
footer {
display:none !important
}
.image-container {
display: flex;
align-items: center;
padding: 1px;
}
.image-container img {
margin: 0 30px;
height: 20px;
max-height: 100%;
width: auto;
max-width: 20%;
}
"""
def build_demo(folders):
text_size = gr.themes.sizes.text_lg
with gr.Blocks(
title="Babilong leaderboard",
theme=gr.themes.Base(text_size=text_size),
css=block_css,
) as demo:
leader_components = build_leaderboard_tab(folders)
return demo
if __name__ == "__main__":
folders = [f'results/{folders}' for folders in os.listdir('results')]
demo = build_demo(folders)
demo.launch(share=False)
|