lukecq commited on
Commit
1e647ba
Β·
1 Parent(s): 653c0f4

update results

Browse files
Files changed (1) hide show
  1. app.py +36 -10
app.py CHANGED
@@ -30,16 +30,42 @@ def restart_space():
30
  API.restart_space(repo_id="lukecq/SeaExam_leaderboard", token=TOKEN)
31
 
32
  # Load the CSV file
33
- def load_csv(file_path):
34
- data = pd.read_csv(file_path)
35
- return data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  # Example path to your CSV file
38
- csv_path = f'{EVAL_RESULTS_PATH}/SeaExam_results_0419.csv'
39
- data = load_csv(csv_path)
 
 
40
 
41
- def show_data():
42
- return data
43
 
44
  # iface = gr.Interface(fn=show_data, inputs = None, outputs="dataframe", title="SeaExam Leaderboard",
45
  # description="Leaderboard for the SeaExam competition.")
@@ -52,7 +78,7 @@ with demo:
52
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
53
  with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
54
  leaderboard_table = gr.components.Dataframe(
55
- value=data,
56
  # value=leaderboard_df[
57
  # [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
58
  # + shown_columns.value
@@ -67,13 +93,13 @@ with demo:
67
  )
68
  with gr.TabItem("πŸ… M3Exam", elem_id="llm-benchmark-M3Exam", id=1):
69
  leaderboard_table = gr.components.Dataframe(
70
- value=data,
71
  interactive=False,
72
  visible=True,
73
  )
74
  with gr.TabItem("πŸ… MMLU", elem_id="llm-benchmark-MMLU", id=2):
75
  leaderboard_table = gr.components.Dataframe(
76
- value=data,
77
  interactive=False,
78
  visible=True,
79
  )
 
30
  API.restart_space(repo_id="lukecq/SeaExam_leaderboard", token=TOKEN)
31
 
32
  # Load the CSV file
33
+ # def load_csv(file_path):
34
+ # data = pd.read_csv(file_path)
35
+ # return data
36
+
37
+ def load_data(data_path):
38
+ df = pd.read_csv(data_path, skiprows=1, header=0).dropna()
39
+
40
+ columns = ['Model', 'type', 'open?', 'shot', 'en', 'zh', 'id', 'th', 'vi', 'avg', 'avg_sea']
41
+ columns_sorted = ['Model', 'type', 'open?', 'shot', 'avg', 'avg_sea', 'en', 'zh', 'id', 'th', 'vi']
42
+
43
+ # Splitting into three separate DataFrames based on the groups M3Exam and MMLU and average
44
+ df_m3exam = df.iloc[:, :11] # M3Exam columns
45
+ df_mmlu = df.iloc[:, [0, 1, 2, 3, 11, 12, 13, 14, 15, 16, 17]] # MMLU columns
46
+ df_avg = df.iloc[:, [0, 1, 2, 3, 18, 19, 20, 21, 22, 23, 24]] # Average columns
47
+ df_mmlu.columns = columns
48
+ df_avg.columns = columns
49
+
50
+ # # multiply the values in the ['en', 'zh', 'id', 'th', 'vi', 'avg', 'avg_sea'] by 100 and display as 1 decimal
51
+ for df_tmp in [df_m3exam, df_mmlu, df_avg]:
52
+ df_tmp[['en', 'zh', 'id', 'th', 'vi', 'avg', 'avg_sea']] *= 100
53
+ df_tmp[['en', 'zh', 'id', 'th', 'vi', 'avg', 'avg_sea']] = df_tmp[['en', 'zh', 'id', 'th', 'vi', 'avg', 'avg_sea']].round(2)
54
+
55
+ # change the order of the columns to ['Model', 'type', 'open?', 'shot', 'avg', 'avg_sea', 'en', 'zh', 'id', 'th', 'vi']
56
+ df_m3exam = df_m3exam[columns_sorted]
57
+ df_mmlu = df_mmlu[columns_sorted]
58
+ df_avg = df_avg[columns_sorted]
59
+ return df_m3exam, df_mmlu, df_avg
60
 
61
  # Example path to your CSV file
62
+ csv_path = f'{EVAL_RESULTS_PATH}/SeaExam_results.csv'
63
+ # data = load_csv(csv_path)
64
+
65
+ df_m3exam, df_mmlu, df_avg = load_data(csv_path)
66
 
67
+ # def show_data():
68
+ # return data
69
 
70
  # iface = gr.Interface(fn=show_data, inputs = None, outputs="dataframe", title="SeaExam Leaderboard",
71
  # description="Leaderboard for the SeaExam competition.")
 
78
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
79
  with gr.TabItem("πŸ… LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
80
  leaderboard_table = gr.components.Dataframe(
81
+ value=df_avg,
82
  # value=leaderboard_df[
83
  # [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
84
  # + shown_columns.value
 
93
  )
94
  with gr.TabItem("πŸ… M3Exam", elem_id="llm-benchmark-M3Exam", id=1):
95
  leaderboard_table = gr.components.Dataframe(
96
+ value=df_m3exam,
97
  interactive=False,
98
  visible=True,
99
  )
100
  with gr.TabItem("πŸ… MMLU", elem_id="llm-benchmark-MMLU", id=2):
101
  leaderboard_table = gr.components.Dataframe(
102
+ value=df_mmlu,
103
  interactive=False,
104
  visible=True,
105
  )