Spaces:
Running
Running
File size: 5,640 Bytes
65c6479 441cdc8 0e06db3 97d7225 21f1468 65c6479 9c1b957 35957e0 9c1b957 60867e4 9c1b957 97d7225 541cf85 9c1b957 e608ddc 0e06db3 85b9042 e608ddc 65c6479 97d7225 85b9042 97d7225 4ecf403 541cf85 9e094e7 4ecf403 a55b227 9c1b957 2678c49 a55b227 60867e4 9c1b957 4ecf403 21f1468 4ecf403 21f1468 4ecf403 21f1468 4ecf403 7bd7d77 4ecf403 cfb8d80 21f1468 4ecf403 21f1468 4ecf403 21f1468 4ecf403 7157d11 c69a5b0 21f1468 cfb8d80 4ecf403 6c84d42 4ecf403 21f1468 4ecf403 21f1468 4ecf403 6c84d42 21f1468 6c84d42 040103b cfb8d80 4d3390f c71e3de 35957e0 441cdc8 2678c49 97d7225 cfb8d80 97d7225 653c0f4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import gradio as gr
import pandas as pd
import os
from huggingface_hub import snapshot_download, login
from apscheduler.schedulers.background import BackgroundScheduler
from gradio_leaderboard import Leaderboard, SelectColumns, ColumnFilter
from src.display.about import (
CITATION_BUTTON_LABEL,
CITATION_BUTTON_TEXT,
CONTACT_TEXT,
EVALUATION_QUEUE_TEXT,
INTRODUCTION_TEXT,
LLM_BENCHMARKS_TEXT,
TITLE,
SUB_TITLE,
)
from src.display.css_html_js import custom_css
from src.envs import API
from src.leaderboard.load_results import load_data
# clone / pull the lmeh eval data
TOKEN = os.environ.get("TOKEN", None)
login(token=TOKEN)
RESULTS_REPO = f"SeaLLMs/SeaExam-results"
CACHE_PATH=os.getenv("HF_HOME", ".")
EVAL_RESULTS_PATH = os.path.join(CACHE_PATH, "eval-results")
print(EVAL_RESULTS_PATH)
snapshot_download(
repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset",
token=TOKEN
)
def restart_space():
API.restart_space(repo_id="SeaLLMs/SeaExam_leaderboard", token=TOKEN)
all_columns = ['R', 'Model', 'type', 'open?', 'avg-pub', 'avg-prv ⬇️', 'id-pub',
'th-pub', 'vi-pub', 'id-prv', 'th-prv', 'vi-prv', '#P(B)']
show_columns = ['R', 'Model','type','open?','#P(B)', 'avg-pub', 'avg-prv ⬇️',
'id-pub', 'th-pub', 'vi-pub', 'id-prv', 'th-prv', 'vi-prv']
TYPES = ['number', 'markdown', 'str', 'str', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number']
show_columns_overall = ['R', 'Model', 'type', 'open?','#P(B)', 'SeaExam-pub', 'SeaExam-prv ⬇️',
'SeaBench-pub', 'SeaBench-prv']
TYPES_overall = ['number', 'markdown', 'str', 'str', 'number', 'number', 'number', 'number', 'number']
# Load the data from the csv file
csv_path = f'{EVAL_RESULTS_PATH}/SeaExam_results_20250318.csv'
df = pd.read_csv(csv_path, skiprows=1, header=0)
# df_m3exam, df_mmlu, df_avg = load_data(csv_path)
df_seaexam, df_seabench, df_overall = load_data(csv_path)
demo = gr.Blocks(css=custom_css)
with demo:
gr.HTML(TITLE)
# gr.HTML(SUB_TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.Tab("🏅 Overall"):
Leaderboard(
value=df_overall[show_columns_overall],
select_columns=SelectColumns(
default_selection=show_columns_overall,
cant_deselect=["R", "Model"],
label="Select Columns to Display:",
),
search_columns=["Model"],
# hide_columns=["model_name_for_query", "Model Size"],
filter_columns=[
"type",
"open?",
# ColumnFilter("MOE", type="boolean", default=False, label="MoE"),
# ColumnFilter("Flagged", type="boolean", default=False),
ColumnFilter("#P(B)", default=[7, 9], label="Paramers(B)"),
],
datatype=TYPES_overall,
# column_widths=["3%", "20%", "6%", "4%"]
)
with gr.Tab("SeaExam"):
Leaderboard(
value=df_seaexam[show_columns],
select_columns=SelectColumns(
default_selection=show_columns,
cant_deselect=["R", "Model"],
label="Select Columns to Display:",
),
search_columns=["Model"],
# hide_columns=["model_name_for_query", "Model Size"],
filter_columns=[
"type",
"open?",
# ColumnFilter("MOE", type="boolean", default=False, label="MoE"),
# ColumnFilter("Flagged", type="boolean", default=False),
ColumnFilter("#P(B)", default=[7, 9]),
],
datatype=TYPES,
# column_widths=["2%", "33%"],
)
with gr.Tab("SeaBench"):
Leaderboard(
value=df_seabench[show_columns],
select_columns=SelectColumns(
default_selection=show_columns,
cant_deselect=["R", "Model"],
label="Select Columns to Display:",
),
search_columns=["Model"],
# hide_columns=["model_name_for_query", "Model Size"],
filter_columns=[
"type",
"open?",
# ColumnFilter("MOE", type="boolean", default=False, label="MoE"),
# ColumnFilter("Flagged", type="boolean", default=False),
ColumnFilter("#P(B)", default=[7, 9]),
],
datatype=TYPES,
# column_widths=["2%", "33%"],
)
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=3):
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
# with gr.Row():
# with gr.Accordion("📙 Citation", open=False):
# citation_button = gr.Textbox(
# value=CITATION_BUTTON_TEXT,
# label=CITATION_BUTTON_LABEL,
# lines=20,
# elem_id="citation-button",
# show_copy_button=True,
# )
gr.Markdown(CONTACT_TEXT, elem_classes="markdown-text")
demo.launch(share=True)
scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=1800)
scheduler.start()
demo.queue(default_concurrency_limit=40).launch(share=True)
|