File size: 5,115 Bytes
65c6479
441cdc8
 
0e06db3
97d7225
21f1468
65c6479
9c1b957
 
 
35957e0
9c1b957
 
 
 
60867e4
9c1b957
 
97d7225
541cf85
9c1b957
e608ddc
 
0e06db3
85b9042
e608ddc
 
 
 
 
 
 
65c6479
97d7225
85b9042
97d7225
d31b48d
c69a5b0
 
541cf85
b732491
1e647ba
a55b227
9c1b957
 
 
60867e4
a55b227
60867e4
9c1b957
21f1468
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7bd7d77
c69a5b0
21f1468
cfb8d80
21f1468
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7157d11
c69a5b0
21f1468
cfb8d80
6c84d42
21f1468
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c84d42
21f1468
 
6c84d42
040103b
cfb8d80
4d3390f
35957e0
 
 
 
 
 
 
 
 
 
441cdc8
9c1b957
97d7225
 
cfb8d80
97d7225
653c0f4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import gradio as gr
import pandas as pd
import os
from huggingface_hub import snapshot_download, login
from apscheduler.schedulers.background import BackgroundScheduler
from gradio_leaderboard import Leaderboard, SelectColumns, ColumnFilter

from src.display.about import (
    CITATION_BUTTON_LABEL,
    CITATION_BUTTON_TEXT,
    CONTACT_TEXT,
    EVALUATION_QUEUE_TEXT,
    INTRODUCTION_TEXT,
    LLM_BENCHMARKS_TEXT,
    TITLE,
    SUB_TITLE,
)
from src.display.css_html_js import custom_css
from src.envs import API
from src.leaderboard.load_results import load_data

# clone / pull the lmeh eval data
TOKEN = os.environ.get("TOKEN", None)
login(token=TOKEN)
RESULTS_REPO = f"SeaLLMs/SeaExam-results"
CACHE_PATH=os.getenv("HF_HOME", ".")
EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
print(EVAL_RESULTS_PATH)
snapshot_download(
    repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", 
    token=TOKEN
)

def restart_space():
    API.restart_space(repo_id="SeaLLMs/SeaExam_leaderboard", token=TOKEN)

all_columns = ['R','type', 'Model','open?', 'avg_sea ⬇️', 'en', 'zh', 'id', 'th', 'vi', 'avg', 'params(B)']
show_columns = ['R', 'Model','type','open?','params(B)', 'avg_sea ⬇️', 'en', 'zh', 'id', 'th', 'vi', 'avg', ]
TYPES = ['number', 'markdown', 'str', 'str', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number']
# Load the data from the csv file
csv_path = f'{EVAL_RESULTS_PATH}/SeaExam_results_20240808.csv'
df_m3exam, df_mmlu, df_avg = load_data(csv_path)

demo = gr.Blocks(css=custom_css)
with demo:
    gr.HTML(TITLE)
    gr.HTML(SUB_TITLE)
    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
    
    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        with gr.Tab("πŸ… Overall"):
            Leaderboard(
                value=df_avg[show_columns],
                select_columns=SelectColumns(
                    default_selection=show_columns,
                    cant_deselect=["R", "Model"],
                    label="Select Columns to Display:",
                ),
                search_columns=["Model"],
                # hide_columns=["model_name_for_query", "Model Size"],
                filter_columns=[
                    "type",
                    "open?",
                    # ColumnFilter("MOE", type="boolean", default=False, label="MoE"),
                    # ColumnFilter("Flagged", type="boolean", default=False),
                    ColumnFilter("params(B)", default=[7, 10]),
                ],
                datatype=TYPES,
                # column_widths=["2%", "33%"],
            )
        
        with gr.Tab("M3Exam"):
            Leaderboard(
                value=df_m3exam[show_columns],
                select_columns=SelectColumns(
                    default_selection=show_columns,
                    cant_deselect=["R", "Model"],
                    label="Select Columns to Display:",
                ),
                search_columns=["Model"],
                # hide_columns=["model_name_for_query", "Model Size"],
                filter_columns=[
                    "type",
                    "open?",
                    # ColumnFilter("MOE", type="boolean", default=False, label="MoE"),
                    # ColumnFilter("Flagged", type="boolean", default=False),
                    ColumnFilter("params(B)", default=[7, 10]),
                ],
                datatype=TYPES,
                # column_widths=["2%", "33%"],
            )

        with gr.Tab("MMLU"):
            Leaderboard(
                value=df_mmlu[show_columns],
                select_columns=SelectColumns(
                    default_selection=show_columns,
                    cant_deselect=["R", "Model"],
                    label="Select Columns to Display:",
                ),
                search_columns=["Model"],
                # hide_columns=["model_name_for_query", "Model Size"],
                filter_columns=[
                    "type",
                    "open?",
                    # ColumnFilter("MOE", type="boolean", default=False, label="MoE"),
                    # ColumnFilter("Flagged", type="boolean", default=False),
                    ColumnFilter("params(B)", default=[7, 10]),
                ],
                datatype=TYPES,
                # column_widths=["2%", "33%"],
            )

        with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=3):
            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
    # with gr.Row():
    #     with gr.Accordion("πŸ“™ Citation", open=False):
    #         citation_button = gr.Textbox(
    #             value=CITATION_BUTTON_TEXT,
    #             label=CITATION_BUTTON_LABEL,
    #             lines=20,
    #             elem_id="citation-button",
    #             show_copy_button=True,
    #         )
    gr.Markdown(CONTACT_TEXT, elem_classes="markdown-text")

demo.launch()

scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=1800)
scheduler.start()
demo.queue(default_concurrency_limit=40).launch(share=True)