lewtun HF staff commited on
Commit
69bc633
·
1 Parent(s): 9d1c3ff

Set merge as default

Browse files
Files changed (1) hide show
  1. app.py +5 -2
app.py CHANGED
@@ -11,7 +11,7 @@ Evaluation of H4 and community models across a diverse range of benchmarks from
11
  """
12
 
13
 
14
- def get_leaderboard_df(merge_values: bool = False):
15
  filepaths = list(Path("eval_results").rglob("*.json"))
16
 
17
  # Parse filepaths to get unique models
@@ -47,6 +47,9 @@ def get_leaderboard_df(merge_values: bool = False):
47
  # HellaSwag and ARC reports acc_norm
48
  elif task.lower() in ["hellaswag", "arc"]:
49
  value = data["results"][first_result_key]["acc_norm"]
 
 
 
50
  else:
51
  first_metric_key = next(
52
  iter(data["results"][first_result_key])
@@ -76,7 +79,7 @@ def get_leaderboard_df(merge_values: bool = False):
76
  return df
77
 
78
 
79
- def refresh(merge_values: bool = False):
80
  return get_leaderboard_df(merge_values)
81
 
82
 
 
11
  """
12
 
13
 
14
+ def get_leaderboard_df(merge_values: bool = True):
15
  filepaths = list(Path("eval_results").rglob("*.json"))
16
 
17
  # Parse filepaths to get unique models
 
47
  # HellaSwag and ARC reports acc_norm
48
  elif task.lower() in ["hellaswag", "arc"]:
49
  value = data["results"][first_result_key]["acc_norm"]
50
+ # BBH has several metrics but we report just the average one
51
+ elif task.lower() == "bbh":
52
+ value = [v["em"] for k, v in data["results"].items() if "_average" in k.lower()][0]
53
  else:
54
  first_metric_key = next(
55
  iter(data["results"][first_result_key])
 
79
  return df
80
 
81
 
82
+ def refresh(merge_values: bool = True):
83
  return get_leaderboard_df(merge_values)
84
 
85