sync from github
Browse files
open-moe-llm-leaderboard-gh/src/backend/hflm_with_measurement.py
CHANGED
@@ -285,7 +285,7 @@ class HFLMWithMeasurement(HFLM):
|
|
285 |
# Answer: (log prob, is-exact-match)
|
286 |
answer = (float(logits.sum()), bool(max_equal))
|
287 |
|
288 |
-
res.append((answer, per_sample_time, 0, 0))
|
289 |
|
290 |
self.cache_hook.add_partial("loglikelihood", request_str, answer)
|
291 |
pbar.update(1)
|
|
|
285 |
# Answer: (log prob, is-exact-match)
|
286 |
answer = (float(logits.sum()), bool(max_equal))
|
287 |
|
288 |
+
res.append((answer, per_sample_time, 0, 0, 0, 0))
|
289 |
|
290 |
self.cache_hook.add_partial("loglikelihood", request_str, answer)
|
291 |
pbar.update(1)
|
open-moe-llm-leaderboard-gh/src/display/utils.py
CHANGED
@@ -38,6 +38,8 @@ gpu_metrics_to_name_map = {
|
|
38 |
"batch_size": BATCH_SIZE,
|
39 |
"precision": PRECISION,
|
40 |
GPU_Name: GPU_Name,
|
|
|
|
|
41 |
}
|
42 |
|
43 |
@dataclass
|
@@ -80,6 +82,7 @@ class Tasks(Enum):
|
|
80 |
selfcheck = Task("selfcheckgpt", "max-selfcheckgpt", "SelfCheckGPT")
|
81 |
mmlu = Task("mmlu", "acc", "MMLU") #MMLU/Acc (5-shot)
|
82 |
gsm8k = Task("gsm8k_custom", "em", "GSM8K") #GSM8K/EM (5-shot)
|
|
|
83 |
|
84 |
|
85 |
# These classes are for user facing column names,
|
@@ -119,6 +122,8 @@ for task in Tasks:
|
|
119 |
continue
|
120 |
# auto_eval_column_dict.append([f"{task.name}_prefilling_time", ColumnContent, ColumnContent(f"{task.value.col_name} {PREs}", "number", False, hidden=True)])
|
121 |
auto_eval_column_dict.append([f"{task.name}_decoding_throughput", ColumnContent, ColumnContent(f"{task.value.col_name} {TS}", "number", True, hidden=True)])
|
|
|
|
|
122 |
|
123 |
|
124 |
# Model information
|
|
|
38 |
"batch_size": BATCH_SIZE,
|
39 |
"precision": PRECISION,
|
40 |
GPU_Name: GPU_Name,
|
41 |
+
MFU: MFU,
|
42 |
+
MBU: MBU
|
43 |
}
|
44 |
|
45 |
@dataclass
|
|
|
82 |
selfcheck = Task("selfcheckgpt", "max-selfcheckgpt", "SelfCheckGPT")
|
83 |
mmlu = Task("mmlu", "acc", "MMLU") #MMLU/Acc (5-shot)
|
84 |
gsm8k = Task("gsm8k_custom", "em", "GSM8K") #GSM8K/EM (5-shot)
|
85 |
+
gsm8k_cot = Task("gsm8k_cot", "em", "GSM8K COT") #GSM8K COT/EM (5-shot)
|
86 |
|
87 |
|
88 |
# These classes are for user facing column names,
|
|
|
122 |
continue
|
123 |
# auto_eval_column_dict.append([f"{task.name}_prefilling_time", ColumnContent, ColumnContent(f"{task.value.col_name} {PREs}", "number", False, hidden=True)])
|
124 |
auto_eval_column_dict.append([f"{task.name}_decoding_throughput", ColumnContent, ColumnContent(f"{task.value.col_name} {TS}", "number", True, hidden=True)])
|
125 |
+
auto_eval_column_dict.append([f"{task.name}_gpu_mbu", ColumnContent, ColumnContent(f"{task.value.col_name} {MBU}", "number", True, hidden=True)])
|
126 |
+
auto_eval_column_dict.append([f"{task.name}_gpu_mfu", ColumnContent, ColumnContent(f"{task.value.col_name} {MFU}", "number", True, hidden=True)])
|
127 |
|
128 |
|
129 |
# Model information
|