chivier commited on
Commit
f271267
·
1 Parent(s): 1aecf91

sync from github

Browse files
open-moe-llm-leaderboard-gh/src/backend/hflm_with_measurement.py CHANGED
@@ -285,7 +285,7 @@ class HFLMWithMeasurement(HFLM):
285
  # Answer: (log prob, is-exact-match)
286
  answer = (float(logits.sum()), bool(max_equal))
287
 
288
- res.append((answer, per_sample_time, 0, 0))
289
 
290
  self.cache_hook.add_partial("loglikelihood", request_str, answer)
291
  pbar.update(1)
 
285
  # Answer: (log prob, is-exact-match)
286
  answer = (float(logits.sum()), bool(max_equal))
287
 
288
+ res.append((answer, per_sample_time, 0, 0, 0, 0))
289
 
290
  self.cache_hook.add_partial("loglikelihood", request_str, answer)
291
  pbar.update(1)
open-moe-llm-leaderboard-gh/src/display/utils.py CHANGED
@@ -38,6 +38,8 @@ gpu_metrics_to_name_map = {
38
  "batch_size": BATCH_SIZE,
39
  "precision": PRECISION,
40
  GPU_Name: GPU_Name,
 
 
41
  }
42
 
43
  @dataclass
@@ -80,6 +82,7 @@ class Tasks(Enum):
80
  selfcheck = Task("selfcheckgpt", "max-selfcheckgpt", "SelfCheckGPT")
81
  mmlu = Task("mmlu", "acc", "MMLU") #MMLU/Acc (5-shot)
82
  gsm8k = Task("gsm8k_custom", "em", "GSM8K") #GSM8K/EM (5-shot)
 
83
 
84
 
85
  # These classes are for user facing column names,
@@ -119,6 +122,8 @@ for task in Tasks:
119
  continue
120
  # auto_eval_column_dict.append([f"{task.name}_prefilling_time", ColumnContent, ColumnContent(f"{task.value.col_name} {PREs}", "number", False, hidden=True)])
121
  auto_eval_column_dict.append([f"{task.name}_decoding_throughput", ColumnContent, ColumnContent(f"{task.value.col_name} {TS}", "number", True, hidden=True)])
 
 
122
 
123
 
124
  # Model information
 
38
  "batch_size": BATCH_SIZE,
39
  "precision": PRECISION,
40
  GPU_Name: GPU_Name,
41
+ MFU: MFU,
42
+ MBU: MBU
43
  }
44
 
45
  @dataclass
 
82
  selfcheck = Task("selfcheckgpt", "max-selfcheckgpt", "SelfCheckGPT")
83
  mmlu = Task("mmlu", "acc", "MMLU") #MMLU/Acc (5-shot)
84
  gsm8k = Task("gsm8k_custom", "em", "GSM8K") #GSM8K/EM (5-shot)
85
+ gsm8k_cot = Task("gsm8k_cot", "em", "GSM8K COT") #GSM8K COT/EM (5-shot)
86
 
87
 
88
  # These classes are for user facing column names,
 
122
  continue
123
  # auto_eval_column_dict.append([f"{task.name}_prefilling_time", ColumnContent, ColumnContent(f"{task.value.col_name} {PREs}", "number", False, hidden=True)])
124
  auto_eval_column_dict.append([f"{task.name}_decoding_throughput", ColumnContent, ColumnContent(f"{task.value.col_name} {TS}", "number", True, hidden=True)])
125
+ auto_eval_column_dict.append([f"{task.name}_gpu_mbu", ColumnContent, ColumnContent(f"{task.value.col_name} {MBU}", "number", True, hidden=True)])
126
+ auto_eval_column_dict.append([f"{task.name}_gpu_mfu", ColumnContent, ColumnContent(f"{task.value.col_name} {MFU}", "number", True, hidden=True)])
127
 
128
 
129
  # Model information