open-r1-eval-leaderboard

Running

lewtun HF Staff commited on Apr 23, 2024

Commit

1cfc013

1 Parent(s): fe858f1

Add math levels

Files changed (1) hide show

app.py CHANGED Viewed

@@ -57,14 +57,23 @@ def get_leaderboard_df(merge_values: bool = True):
             elif task.lower() == "agieval":
                 value = data["results"]["all"]["acc_norm"]
             # MATH reports qem
-            elif task.lower() in ["math", "mini_math", "aimo_kaggle"]:
                 value = data["results"]["all"]["qem"]
             else:
                 first_metric_key = next(
                     iter(data["results"][first_result_key])
                 )  # gets the first key in the first result
                 value = data["results"][first_result_key][first_metric_key]  # gets the value of the first metric
-            df.loc[model_revision, task] = value
     # Put IFEval / BBH / AGIEval in first columns
     ifeval_col = df.pop("Ifeval")

             elif task.lower() == "agieval":
                 value = data["results"]["all"]["acc_norm"]
             # MATH reports qem
+            elif task.lower() in ["math", "aimo_kaggle"]:
                 value = data["results"]["all"]["qem"]
             else:
                 first_metric_key = next(
                     iter(data["results"][first_result_key])
                 )  # gets the first key in the first result
                 value = data["results"][first_result_key][first_metric_key]  # gets the value of the first metric
+            # For mini_math we report 5 metrics, one for each level and store each one as a separate row in the dataframe
+            if task.lower() == "mini_math":
+                for k, v in data["results"].items():
+                    if k != "all":
+                        level = k.split("|")[1].split(":")[-1]
+                        value = v["qem"]
+                        df.loc[model_revision, f"{task}_{level}"] = value
+            else:
+                df.loc[model_revision, task] = value
     # Put IFEval / BBH / AGIEval in first columns
     ifeval_col = df.pop("Ifeval")