lewtun HF staff commited on
Commit
1cfc013
·
1 Parent(s): fe858f1

Add math levels

Browse files
Files changed (1) hide show
  1. app.py +11 -2
app.py CHANGED
@@ -57,14 +57,23 @@ def get_leaderboard_df(merge_values: bool = True):
57
  elif task.lower() == "agieval":
58
  value = data["results"]["all"]["acc_norm"]
59
  # MATH reports qem
60
- elif task.lower() in ["math", "mini_math", "aimo_kaggle"]:
61
  value = data["results"]["all"]["qem"]
62
  else:
63
  first_metric_key = next(
64
  iter(data["results"][first_result_key])
65
  ) # gets the first key in the first result
66
  value = data["results"][first_result_key][first_metric_key] # gets the value of the first metric
67
- df.loc[model_revision, task] = value
 
 
 
 
 
 
 
 
 
68
 
69
  # Put IFEval / BBH / AGIEval in first columns
70
  ifeval_col = df.pop("Ifeval")
 
57
  elif task.lower() == "agieval":
58
  value = data["results"]["all"]["acc_norm"]
59
  # MATH reports qem
60
+ elif task.lower() in ["math", "aimo_kaggle"]:
61
  value = data["results"]["all"]["qem"]
62
  else:
63
  first_metric_key = next(
64
  iter(data["results"][first_result_key])
65
  ) # gets the first key in the first result
66
  value = data["results"][first_result_key][first_metric_key] # gets the value of the first metric
67
+
68
+ # For mini_math we report 5 metrics, one for each level and store each one as a separate row in the dataframe
69
+ if task.lower() == "mini_math":
70
+ for k, v in data["results"].items():
71
+ if k != "all":
72
+ level = k.split("|")[1].split(":")[-1]
73
+ value = v["qem"]
74
+ df.loc[model_revision, f"{task}_{level}"] = value
75
+ else:
76
+ df.loc[model_revision, task] = value
77
 
78
  # Put IFEval / BBH / AGIEval in first columns
79
  ifeval_col = df.pop("Ifeval")