File size: 3,659 Bytes

bb8527c
 
 
 
 
 
 
 
 
 
7123c10
 
 
ceed9d7
bb8527c
 
 
 
 
 
7123c10
bb8527c
 
 
 
 
f62af41
bb8527c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f62af41
 
 
 
 
bb8527c
 
 
 
 
 
 
 
a57cfe8
bb8527c
 
 
 
a57cfe8
bb8527c
 
 
 
 
a57cfe8
bb8527c
a57cfe8
bb8527c
 
a57cfe8
 
c307e47
 
bb8527c
c307e47
 
bb8527c
 
 
 
e09a98d

# Original code by https://huggingface.co/spaces/lmsys/chatbot-arena-leaderboard
# Modified by EffiBench

import pandas as pd
import gradio as gr


def make_default_md_1():
    link_color = "#1976D2"  # This color should be clear in both light and dark mode
    leaderboard_md = f"""
    # 🏆 EffiBench Leaderboard 🏆 
    <a href='https://arxiv.org/abs/2402.02037' style='color: {link_color}; text-decoration: none;'>Paper</a> |
    <a href='https://github.com/huangd1999/EffiBench' style='color: {link_color}; text-decoration: none;'>GitHub</a> |
    <a href='https://github.com/huangd1999/EffiBench/tree/main/data' style='color: {link_color}; text-decoration: none;'>Dataset</a>
    """

    return leaderboard_md

def make_default_md_2():
    leaderboard_md = f"""
    🤗 [filing a request](https://github.com/huangd1999/EffiBench/issues/new?assignees=&labels=model+eval&projects=&template=model_eval_request.yml&title=%F0%9F%92%A1+%5BREQUEST%5D+-+%3CMODEL_NAME%3E) to add your models on our leaderboard!**
    """

    return leaderboard_md

leaderboard_md = """
Three benchmarks are displayed: **EffiBench**, **HumanEval** and **MBPP**.
"""

acknowledgment_md = """
### Terms of Service

Users are required to agree to the following terms before using the service:

The service is a research preview. It only provides limited safety measures and may generate offensive content.
It must not be used for any illegal, harmful, violent, racist, or sexual purposes.
Please do not upload any private information.
The service collects user dialogue data, including both text and images, and reserves the right to distribute it under a Creative Commons Attribution (CC-BY) or a similar license.
"""

citation_md = """
### Citation
Please cite the following paper if you find our leaderboard or dataset helpful.
```
@article{huang2024effibench,
  title={EffiBench: Benchmarking the Efficiency of Automatically Generated Code},
  author={Huang, Dong and Zhang, Jie M and Qing, Yuhao and Cui, Heming},
  journal={arXiv preprint arXiv:2402.02037},
  year={2024}
}
"""

def build_leaderboard_tab(leaderboard_table_file):
    gr.Markdown(make_default_md_1(), elem_id="leaderboard_markdown")
    gr.Markdown(make_default_md_2(), elem_id="leaderboard_markdown")

    df = pd.read_csv(leaderboard_table_file)
    def filter_leaderboard(dataset, timeout):
        filtered_df = df[(df['Timeout'] == timeout) & (df['Dataset'] == dataset)]
        return filtered_df.drop(columns=['Timeout', 'Dataset'])

    datasets = df['Dataset'].unique().tolist()
    timeouts = df['Timeout'].unique().tolist()

    with gr.Tab("Leaderboard"):
        gr.Markdown(leaderboard_md, elem_id="leaderboard_markdown")
        with gr.Row():
            dataset_dropdown = gr.Dropdown(label="Dataset", choices=datasets, value=datasets[0])
            timeout_dropdown = gr.Dropdown(label="Timeout", choices=timeouts, value=timeouts[0])

        initial_data = filter_leaderboard(datasets[0], timeouts[0])
        leaderboard = gr.Dataframe(value=initial_data)

        def update_leaderboard(dataset, timeout):
            filtered_data = filter_leaderboard(dataset, timeout)
            return filtered_data
            # leaderboard.update(value=filtered_data)

        dataset_dropdown.change(fn=update_leaderboard, inputs=[dataset_dropdown, timeout_dropdown], outputs=leaderboard)
        timeout_dropdown.change(fn=update_leaderboard, inputs=[dataset_dropdown, timeout_dropdown], outputs=leaderboard)
    
    with gr.Accordion("Citation", open=True):
        gr.Markdown(citation_md, elem_id="leaderboard_markdown")
        gr.Markdown(acknowledgment_md, elem_id="ack_markdown")