Add support for model display names
Browse filesSigned-off-by: Jonathan Bnayahu <[email protected]>
- results/bluebench/modelmap.csv +18 -0
- src/leaderboard/read_evals.py +15 -1
results/bluebench/modelmap.csv
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
watsonx/ibm/granite-3-2b-instruct,ibm/granite-3-2-b-instruct
|
2 |
+
watsonx/ibm/granite-3-3-8b-instruct,ibm/granite-3-3-8b-instruct
|
3 |
+
watsonx/ibm/granite-3-8b-instruct,ibm/granite-3-8b-instruct
|
4 |
+
watsonx/meta-llama/llama-3-2-1b-instruct,meta-llama/llama-3-2-1b-instruct
|
5 |
+
watsonx/meta-llama/llama-3-2-3b-instruct,meta-llama/llama-3-2-3b-instruct
|
6 |
+
watsonx/meta-llama/llama-3-405b-instruct,meta-llama/llama-3-405b-instruct
|
7 |
+
watsonx/mistralai/mistral-medium-2505,mistralai/mistral-medium-2505
|
8 |
+
watsonx/mistralai/mistral-small-3-1-24b-instruct-2503,mistralai/mistral-small-3-1-24b-instruct-2503
|
9 |
+
watsonx/mistralai/pixtral-12b,mistralai/pixtral-12b
|
10 |
+
watsonx/meta-llama/llama-3-3-70b-instruct,meta-llama/llama-3-3-70b-instruct
|
11 |
+
watsonx/mistralai/mistral-large,mistralai/mistral-large
|
12 |
+
azure/Azure/gpt-4.1-ncf,openai/gpt-4.1
|
13 |
+
azure/Azure/gpt-4.1-mini-ncf,openai/gpt-4.1-mini
|
14 |
+
azure/Azure/gpt-4.1-nano-ncf,openai/gpt-4.1-nano
|
15 |
+
azure/Azure/gpt-4o-ncf,openai/gpt-4o
|
16 |
+
azure/Azure/o3-mini-ncf,openai/o3-mini
|
17 |
+
azure/Azure/o1-ncf,openai/o1
|
18 |
+
azure/Azure/o4-mini-ncf,openai/o4-mini
|
src/leaderboard/read_evals.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import json
|
|
|
2 |
import os
|
3 |
from dataclasses import dataclass
|
4 |
|
@@ -15,6 +16,15 @@ class EvalResult:
|
|
15 |
full_model: str # org/model (path on hub)
|
16 |
results: dict
|
17 |
date: str = "" # submission date of request file
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
@classmethod
|
20 |
def init_from_json_file(self, json_filepath):
|
@@ -25,7 +35,9 @@ class EvalResult:
|
|
25 |
env_info = data.get("environment_info").get("parsed_arguments")
|
26 |
|
27 |
full_model = env_info.get("model")
|
28 |
-
|
|
|
|
|
29 |
# Extract results available in this file (some results are split in several files)
|
30 |
results = {}
|
31 |
for task in Tasks:
|
@@ -64,6 +76,8 @@ def get_raw_eval_results(results_path: str) -> list[EvalResult]:
|
|
64 |
"""From the path of the results folder root, extract all needed info for results"""
|
65 |
model_result_filepaths = []
|
66 |
|
|
|
|
|
67 |
for root, _, files in os.walk(results_path):
|
68 |
# We should only have json files in model results
|
69 |
# if len(files) == 0 or any([not f.endswith(".json") for f in files]):
|
|
|
1 |
import json
|
2 |
+
import csv
|
3 |
import os
|
4 |
from dataclasses import dataclass
|
5 |
|
|
|
16 |
full_model: str # org/model (path on hub)
|
17 |
results: dict
|
18 |
date: str = "" # submission date of request file
|
19 |
+
|
20 |
+
modelmap = {}
|
21 |
+
|
22 |
+
@classmethod
|
23 |
+
def init_model_map(self, mapfile):
|
24 |
+
with open(mapfile) as f:
|
25 |
+
reader = csv.reader(f)
|
26 |
+
for row in reader:
|
27 |
+
self.modelmap[row[0]] = row[1]
|
28 |
|
29 |
@classmethod
|
30 |
def init_from_json_file(self, json_filepath):
|
|
|
35 |
env_info = data.get("environment_info").get("parsed_arguments")
|
36 |
|
37 |
full_model = env_info.get("model")
|
38 |
+
# Use the display name, if available
|
39 |
+
full_model = self.modelmap.get(full_model,full_model)
|
40 |
+
|
41 |
# Extract results available in this file (some results are split in several files)
|
42 |
results = {}
|
43 |
for task in Tasks:
|
|
|
76 |
"""From the path of the results folder root, extract all needed info for results"""
|
77 |
model_result_filepaths = []
|
78 |
|
79 |
+
EvalResult.init_model_map(results_path+"/modelmap.csv")
|
80 |
+
|
81 |
for root, _, files in os.walk(results_path):
|
82 |
# We should only have json files in model results
|
83 |
# if len(files) == 0 or any([not f.endswith(".json") for f in files]):
|