File size: 3,706 Bytes
311dc3a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import pickle

import pandas as pd
import gradio as gr
from huggingface_hub import HfFileSystem, hf_hub_download

if gr.NO_RELOAD:
    ###################
    ### Load Data
    ###################

    key_to_category_name = {
        "full": "Overall",
        "coding": "Coding",
        "long_user": "Longer Query",
        "english": "English",
        "chinese": "Chinese",
        "french": "French",
        "no_tie": "Exclude Ties",
        "no_short": "Exclude Short Query (< 5 tokens)",
        "no_refusal": "Exclude Refusal",
    }
    cat_name_to_explanation = {
        "Overall": "Overall Questions",
        "Coding": "Coding: whether conversation contains code snippets",
        "Longer Query": "Longer Query (>= 500 tokens)",
        "English": "English Prompts",
        "Chinese": "Chinese Prompts",
        "French": "French Prompts",
        "Exclude Ties": "Exclude Ties and Bothbad",
        "Exclude Short Query (< 5 tokens)": "Exclude Short User Query (< 5 tokens)",
        "Exclude Refusal": 'Exclude model responses with refusal (e.g., "I cannot answer")',
    }

    fs = HfFileSystem()

    def extract_date(filename):
        return filename.split("/")[-1].split(".")[0].split("_")[-1]

    # gather ELO data
    ELO_DATA_FILES = "spaces/lmsys/chatbot-arena-leaderboard/*.pkl"
    elo_files = fs.glob(ELO_DATA_FILES)
    latest_elo_file = sorted(elo_files, key=extract_date, reverse=True)[0]

    latest_elo_file_local = hf_hub_download(
        repo_id="lmsys/chatbot-arena-leaderboard",
        filename=latest_elo_file.split("/")[-1],
        repo_type="space",
    )

    with open(latest_elo_file_local, "rb") as fin:
        elo_results = pickle.load(fin)

    arena_dfs = {}
    for k in key_to_category_name.keys():
        if k not in elo_results:
            continue
        arena_dfs[key_to_category_name[k]] = elo_results[k]["leaderboard_table_df"]

    # gather open llm leaderboard data
    LEADERBOARD_DATA_FILES = "spaces/lmsys/chatbot-arena-leaderboard/*.csv"
    leaderboard_files = fs.glob(LEADERBOARD_DATA_FILES)
    latest_leaderboard_file = sorted(leaderboard_files, key=extract_date, reverse=True)[
        0
    ]

    latest_leaderboard_file_local = hf_hub_download(
        repo_id="lmsys/chatbot-arena-leaderboard",
        filename=latest_leaderboard_file.split("/")[-1],
        repo_type="space",
    )
    leaderboard_df = pd.read_csv(latest_leaderboard_file_local)

    ###################
    ### Prepare Data
    ###################

    # merge leaderboard data with ELO data
    merged_dfs = {}
    for k, v in arena_dfs.items():
        merged_dfs[k] = (
            pd.merge(arena_dfs[k], leaderboard_df, left_index=True, right_on="key")
            .sort_values("rating", ascending=False)
            .reset_index(drop=True)
        )

    # add release dates into the merged data
    release_date_mapping = pd.read_json("release_date_mapping.json", orient="records")
    for k, v in merged_dfs.items():
        merged_dfs[k] = pd.merge(
            merged_dfs[k], release_date_mapping[["key", "Release Date"]], on="key"
        )
df = merged_dfs["Overall"]
y_min = df["rating"].min()
y_max = df["rating"].max()
y_buffer = (y_max - y_min) * 0.1

with gr.Blocks() as demo:
    gr.Markdown("# Chatbot Arena Leaderboard")
    with gr.Row():
        gr.ScatterPlot(
            df,
            title="hello",
            x="Release Date",
            y="rating",
            tooltip=["Model", "rating", "num_battles", "Organization", "License"],
            width=1000,
            height=700,
            x_label_angle=-45,
            y_lim=[y_min - y_buffer, y_max + y_buffer],
        )

if __name__ == "__main__":
    demo.launch()