Spaces:
Running
Running
# Original code by https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard | |
# Modified by EffiBench | |
import pandas as pd | |
import gradio as gr | |
def make_default_md_1(): | |
link_color = "#1976D2" # This color should be clear in both light and dark mode | |
leaderboard_md = f""" | |
# π EffiBench Leaderboard π | |
<a href='https://arxiv.org/abs/2402.02037' style='color: {link_color}; text-decoration: none;'>Paper</a> | | |
<a href='https://github.com/huangd1999/EffiBench' style='color: {link_color}; text-decoration: none;'>GitHub</a> | | |
<a href='https://github.com/huangd1999/EffiBench/tree/main/data' style='color: {link_color}; text-decoration: none;'>Dataset</a> | |
""" | |
return leaderboard_md | |
def make_default_md_2(): | |
leaderboard_md = f""" | |
π€ [filing a request](https://github.com/huangd1999/EffiBench/issues/new?assignees=&labels=model+eval&projects=&template=model_eval_request.yml&title=%F0%9F%92%A1+%5BREQUEST%5D+-+%3CMODEL_NAME%3E) to add your models on our leaderboard!** | |
""" | |
return leaderboard_md | |
leaderboard_md = """ | |
Three benchmarks are displayed: **EffiBench**, **HumanEval** and **MBPP**. | |
""" | |
acknowledgment_md = """ | |
### Terms of Service | |
Users are required to agree to the following terms before using the service: | |
The service is a research preview. It only provides limited safety measures and may generate offensive content. | |
It must not be used for any illegal, harmful, violent, racist, or sexual purposes. | |
Please do not upload any private information. | |
The service collects user dialogue data, including both text and images, and reserves the right to distribute it under a Creative Commons Attribution (CC-BY) or a similar license. | |
""" | |
citation_md = """ | |
### Citation | |
Please cite the following paper if you find our leaderboard or dataset helpful. | |
``` | |
@article{huang2024effibench, | |
title={EffiBench: Benchmarking the Efficiency of Automatically Generated Code}, | |
author={Huang, Dong and Zhang, Jie M and Qing, Yuhao and Cui, Heming}, | |
journal={arXiv preprint arXiv:2402.02037}, | |
year={2024} | |
} | |
""" | |
def build_leaderboard_tab(leaderboard_table_file): | |
gr.Markdown(make_default_md_1(), elem_id="leaderboard_markdown") | |
gr.Markdown(make_default_md_2(), elem_id="leaderboard_markdown") | |
df = pd.read_csv(leaderboard_table_file) | |
def filter_leaderboard(dataset, timeout): | |
filtered_df = df[(df['Timeout'] == timeout) & (df['Dataset'] == dataset)] | |
return filtered_df.drop(columns=['Timeout', 'Dataset']) | |
datasets = df['Dataset'].unique().tolist() | |
timeouts = df['Timeout'].unique().tolist() | |
with gr.Tab("Leaderboard"): | |
gr.Markdown(leaderboard_md, elem_id="leaderboard_markdown") | |
with gr.Row(): | |
dataset_dropdown = gr.Dropdown(label="Dataset", choices=datasets, value=datasets[0]) | |
timeout_dropdown = gr.Dropdown(label="Timeout", choices=timeouts, value=timeouts[0]) | |
initial_data = filter_leaderboard(datasets[0], timeouts[0]) | |
leaderboard = gr.Dataframe(value=initial_data) | |
def update_leaderboard(dataset, timeout): | |
filtered_data = filter_leaderboard(dataset, timeout) | |
return filtered_data | |
# leaderboard.update(value=filtered_data) | |
dataset_dropdown.change(fn=update_leaderboard, inputs=[dataset_dropdown, timeout_dropdown], outputs=leaderboard) | |
timeout_dropdown.change(fn=update_leaderboard, inputs=[dataset_dropdown, timeout_dropdown], outputs=leaderboard) | |
with gr.Accordion("Citation", open=True): | |
gr.Markdown(citation_md, elem_id="leaderboard_markdown") | |
gr.Markdown(acknowledgment_md, elem_id="ack_markdown") | |