lewtun HF staff commited on
Commit
1cb92d9
·
1 Parent(s): 0ff2e8c

Add AlpacaEval

Browse files
Files changed (1) hide show
  1. app.py +9 -4
app.py CHANGED
@@ -64,6 +64,9 @@ def get_leaderboard_df(merge_values: bool = True):
64
  # MATH reports qem
65
  elif task.lower() in ["math", "math_v2", "aimo_kaggle"]:
66
  value = data["results"]["all"]["qem"]
 
 
 
67
  else:
68
  first_metric_key = next(
69
  iter(data["results"][first_result_key])
@@ -80,13 +83,15 @@ def get_leaderboard_df(merge_values: bool = True):
80
  else:
81
  df.loc[model_revision, task] = value
82
 
83
- # Put IFEval / BBH / AGIEval in first columns
 
 
84
  ifeval_col = df.pop("Ifeval")
85
- df.insert(1, "Ifeval", ifeval_col)
86
  bbh_col = df.pop("Bbh")
87
- df.insert(2, "Bbh", bbh_col)
88
  agieval_col = df.pop("Agieval")
89
- df.insert(3, "Agieval", agieval_col)
90
  # Drop rows where every entry is NaN
91
  df = df.dropna(how="all", axis=0, subset=[c for c in df.columns if c != "Date"])
92
  df.insert(loc=1, column="Average", value=df.mean(axis=1, numeric_only=True))
 
64
  # MATH reports qem
65
  elif task.lower() in ["math", "math_v2", "aimo_kaggle"]:
66
  value = data["results"]["all"]["qem"]
67
+ # Report length controlled winrate for AlpacaEval
68
+ elif task.lower() == "alpaca_eval":
69
+ value = data["results"][first_result_key]["length_controlled_winrate"] / 100.0
70
  else:
71
  first_metric_key = next(
72
  iter(data["results"][first_result_key])
 
83
  else:
84
  df.loc[model_revision, task] = value
85
 
86
+ # Put IFEval / BBH / AGIEval / AlpacaEval in first columns
87
+ alpaca_col = df.pop("Alpaca_eval")
88
+ df.insert(1, "Alpaca_eval", alpaca_col)
89
  ifeval_col = df.pop("Ifeval")
90
+ df.insert(2, "Ifeval", ifeval_col)
91
  bbh_col = df.pop("Bbh")
92
+ df.insert(3, "Bbh", bbh_col)
93
  agieval_col = df.pop("Agieval")
94
+ df.insert(4, "Agieval", agieval_col)
95
  # Drop rows where every entry is NaN
96
  df = df.dropna(how="all", axis=0, subset=[c for c in df.columns if c != "Date"])
97
  df.insert(loc=1, column="Average", value=df.mean(axis=1, numeric_only=True))