Spaces:
Running
Running
update results
Browse files
app.py
CHANGED
@@ -30,16 +30,42 @@ def restart_space():
|
|
30 |
API.restart_space(repo_id="lukecq/SeaExam_leaderboard", token=TOKEN)
|
31 |
|
32 |
# Load the CSV file
|
33 |
-
def load_csv(file_path):
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
# Example path to your CSV file
|
38 |
-
csv_path = f'{EVAL_RESULTS_PATH}/
|
39 |
-
data = load_csv(csv_path)
|
|
|
|
|
40 |
|
41 |
-
def show_data():
|
42 |
-
|
43 |
|
44 |
# iface = gr.Interface(fn=show_data, inputs = None, outputs="dataframe", title="SeaExam Leaderboard",
|
45 |
# description="Leaderboard for the SeaExam competition.")
|
@@ -52,7 +78,7 @@ with demo:
|
|
52 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
53 |
with gr.TabItem("π
LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
54 |
leaderboard_table = gr.components.Dataframe(
|
55 |
-
value=
|
56 |
# value=leaderboard_df[
|
57 |
# [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
|
58 |
# + shown_columns.value
|
@@ -67,13 +93,13 @@ with demo:
|
|
67 |
)
|
68 |
with gr.TabItem("π
M3Exam", elem_id="llm-benchmark-M3Exam", id=1):
|
69 |
leaderboard_table = gr.components.Dataframe(
|
70 |
-
value=
|
71 |
interactive=False,
|
72 |
visible=True,
|
73 |
)
|
74 |
with gr.TabItem("π
MMLU", elem_id="llm-benchmark-MMLU", id=2):
|
75 |
leaderboard_table = gr.components.Dataframe(
|
76 |
-
value=
|
77 |
interactive=False,
|
78 |
visible=True,
|
79 |
)
|
|
|
30 |
API.restart_space(repo_id="lukecq/SeaExam_leaderboard", token=TOKEN)
|
31 |
|
32 |
# Load the CSV file
|
33 |
+
# def load_csv(file_path):
|
34 |
+
# data = pd.read_csv(file_path)
|
35 |
+
# return data
|
36 |
+
|
37 |
+
def load_data(data_path):
|
38 |
+
df = pd.read_csv(data_path, skiprows=1, header=0).dropna()
|
39 |
+
|
40 |
+
columns = ['Model', 'type', 'open?', 'shot', 'en', 'zh', 'id', 'th', 'vi', 'avg', 'avg_sea']
|
41 |
+
columns_sorted = ['Model', 'type', 'open?', 'shot', 'avg', 'avg_sea', 'en', 'zh', 'id', 'th', 'vi']
|
42 |
+
|
43 |
+
# Splitting into three separate DataFrames based on the groups M3Exam and MMLU and average
|
44 |
+
df_m3exam = df.iloc[:, :11] # M3Exam columns
|
45 |
+
df_mmlu = df.iloc[:, [0, 1, 2, 3, 11, 12, 13, 14, 15, 16, 17]] # MMLU columns
|
46 |
+
df_avg = df.iloc[:, [0, 1, 2, 3, 18, 19, 20, 21, 22, 23, 24]] # Average columns
|
47 |
+
df_mmlu.columns = columns
|
48 |
+
df_avg.columns = columns
|
49 |
+
|
50 |
+
# # multiply the values in the ['en', 'zh', 'id', 'th', 'vi', 'avg', 'avg_sea'] by 100 and display as 1 decimal
|
51 |
+
for df_tmp in [df_m3exam, df_mmlu, df_avg]:
|
52 |
+
df_tmp[['en', 'zh', 'id', 'th', 'vi', 'avg', 'avg_sea']] *= 100
|
53 |
+
df_tmp[['en', 'zh', 'id', 'th', 'vi', 'avg', 'avg_sea']] = df_tmp[['en', 'zh', 'id', 'th', 'vi', 'avg', 'avg_sea']].round(2)
|
54 |
+
|
55 |
+
# change the order of the columns to ['Model', 'type', 'open?', 'shot', 'avg', 'avg_sea', 'en', 'zh', 'id', 'th', 'vi']
|
56 |
+
df_m3exam = df_m3exam[columns_sorted]
|
57 |
+
df_mmlu = df_mmlu[columns_sorted]
|
58 |
+
df_avg = df_avg[columns_sorted]
|
59 |
+
return df_m3exam, df_mmlu, df_avg
|
60 |
|
61 |
# Example path to your CSV file
|
62 |
+
csv_path = f'{EVAL_RESULTS_PATH}/SeaExam_results.csv'
|
63 |
+
# data = load_csv(csv_path)
|
64 |
+
|
65 |
+
df_m3exam, df_mmlu, df_avg = load_data(csv_path)
|
66 |
|
67 |
+
# def show_data():
|
68 |
+
# return data
|
69 |
|
70 |
# iface = gr.Interface(fn=show_data, inputs = None, outputs="dataframe", title="SeaExam Leaderboard",
|
71 |
# description="Leaderboard for the SeaExam competition.")
|
|
|
78 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
79 |
with gr.TabItem("π
LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
|
80 |
leaderboard_table = gr.components.Dataframe(
|
81 |
+
value=df_avg,
|
82 |
# value=leaderboard_df[
|
83 |
# [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
|
84 |
# + shown_columns.value
|
|
|
93 |
)
|
94 |
with gr.TabItem("π
M3Exam", elem_id="llm-benchmark-M3Exam", id=1):
|
95 |
leaderboard_table = gr.components.Dataframe(
|
96 |
+
value=df_m3exam,
|
97 |
interactive=False,
|
98 |
visible=True,
|
99 |
)
|
100 |
with gr.TabItem("π
MMLU", elem_id="llm-benchmark-MMLU", id=2):
|
101 |
leaderboard_table = gr.components.Dataframe(
|
102 |
+
value=df_mmlu,
|
103 |
interactive=False,
|
104 |
visible=True,
|
105 |
)
|