Spaces:

SeaLLMs
/

LLM_Leaderboard_for_SEA

Running

App Files Files Community

lukecq commited on Apr 20, 2024

Commit

1e647ba

1 Parent(s): 653c0f4

update results

Browse files

Files changed (1) hide show

app.py +36 -10

app.py CHANGED Viewed

@@ -30,16 +30,42 @@ def restart_space():
     API.restart_space(repo_id="lukecq/SeaExam_leaderboard", token=TOKEN)
 # Load the CSV file
-def load_csv(file_path):
-    data = pd.read_csv(file_path)
-    return data
 # Example path to your CSV file
-csv_path = f'{EVAL_RESULTS_PATH}/SeaExam_results_0419.csv'
-data = load_csv(csv_path)
-def show_data():
-    return data
 # iface = gr.Interface(fn=show_data, inputs = None, outputs="dataframe", title="SeaExam Leaderboard",
 #                      description="Leaderboard for the SeaExam competition.")
@@ -52,7 +78,7 @@ with demo:
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
             leaderboard_table = gr.components.Dataframe(
-                value=data,
                 # value=leaderboard_df[
                 #     [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
                 #     + shown_columns.value
@@ -67,13 +93,13 @@ with demo:
             )
         with gr.TabItem("🏅 M3Exam", elem_id="llm-benchmark-M3Exam", id=1):
             leaderboard_table = gr.components.Dataframe(
-                value=data,
                 interactive=False,
                 visible=True,
             )
         with gr.TabItem("🏅 MMLU", elem_id="llm-benchmark-MMLU", id=2):
             leaderboard_table = gr.components.Dataframe(
-                value=data,
                 interactive=False,
                 visible=True,
             )

     API.restart_space(repo_id="lukecq/SeaExam_leaderboard", token=TOKEN)
 # Load the CSV file
+# def load_csv(file_path):
+#     data = pd.read_csv(file_path)
+#     return data
+def load_data(data_path):
+    df = pd.read_csv(data_path, skiprows=1, header=0).dropna()
+    columns = ['Model', 'type', 'open?', 'shot', 'en', 'zh', 'id', 'th', 'vi', 'avg', 'avg_sea']
+    columns_sorted = ['Model', 'type', 'open?', 'shot', 'avg', 'avg_sea', 'en', 'zh', 'id', 'th', 'vi']
+    # Splitting into three separate DataFrames based on the groups M3Exam and MMLU and average
+    df_m3exam = df.iloc[:, :11]  # M3Exam columns
+    df_mmlu = df.iloc[:, [0, 1, 2, 3, 11, 12, 13, 14, 15, 16, 17]]  # MMLU columns
+    df_avg = df.iloc[:, [0, 1, 2, 3, 18, 19, 20, 21, 22, 23, 24]]  # Average columns
+    df_mmlu.columns = columns
+    df_avg.columns = columns
+    # # multiply the values in the ['en', 'zh', 'id', 'th', 'vi', 'avg', 'avg_sea'] by 100 and display as 1 decimal
+    for df_tmp in [df_m3exam, df_mmlu, df_avg]:
+        df_tmp[['en', 'zh', 'id', 'th', 'vi', 'avg', 'avg_sea']] *= 100
+        df_tmp[['en', 'zh', 'id', 'th', 'vi', 'avg', 'avg_sea']] = df_tmp[['en', 'zh', 'id', 'th', 'vi', 'avg', 'avg_sea']].round(2)
+    # change the order of the columns to ['Model', 'type', 'open?', 'shot', 'avg', 'avg_sea', 'en', 'zh', 'id', 'th', 'vi']
+    df_m3exam = df_m3exam[columns_sorted]
+    df_mmlu = df_mmlu[columns_sorted]
+    df_avg = df_avg[columns_sorted]
+    return df_m3exam, df_mmlu, df_avg
 # Example path to your CSV file
+csv_path = f'{EVAL_RESULTS_PATH}/SeaExam_results.csv'
+# data = load_csv(csv_path)
+df_m3exam, df_mmlu, df_avg = load_data(csv_path)
+# def show_data():
+#     return data
 # iface = gr.Interface(fn=show_data, inputs = None, outputs="dataframe", title="SeaExam Leaderboard",
 #                      description="Leaderboard for the SeaExam competition.")
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
             leaderboard_table = gr.components.Dataframe(
+                value=df_avg,
                 # value=leaderboard_df[
                 #     [c.name for c in fields(AutoEvalColumn) if c.never_hidden]
                 #     + shown_columns.value
             )
         with gr.TabItem("🏅 M3Exam", elem_id="llm-benchmark-M3Exam", id=1):
             leaderboard_table = gr.components.Dataframe(
+                value=df_m3exam,
                 interactive=False,
                 visible=True,
             )
         with gr.TabItem("🏅 MMLU", elem_id="llm-benchmark-MMLU", id=2):
             leaderboard_table = gr.components.Dataframe(
+                value=df_mmlu,
                 interactive=False,
                 visible=True,
             )