Add support for model display names
Browse filesSigned-off-by: Jonathan Bnayahu <[email protected]>
- results/bluebench/modelmap.csv +18 -0
- src/leaderboard/read_evals.py +15 -1
results/bluebench/modelmap.csv
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
watsonx/ibm/granite-3-2b-instruct,ibm/granite-3-2-b-instruct
|
| 2 |
+
watsonx/ibm/granite-3-3-8b-instruct,ibm/granite-3-3-8b-instruct
|
| 3 |
+
watsonx/ibm/granite-3-8b-instruct,ibm/granite-3-8b-instruct
|
| 4 |
+
watsonx/meta-llama/llama-3-2-1b-instruct,meta-llama/llama-3-2-1b-instruct
|
| 5 |
+
watsonx/meta-llama/llama-3-2-3b-instruct,meta-llama/llama-3-2-3b-instruct
|
| 6 |
+
watsonx/meta-llama/llama-3-405b-instruct,meta-llama/llama-3-405b-instruct
|
| 7 |
+
watsonx/mistralai/mistral-medium-2505,mistralai/mistral-medium-2505
|
| 8 |
+
watsonx/mistralai/mistral-small-3-1-24b-instruct-2503,mistralai/mistral-small-3-1-24b-instruct-2503
|
| 9 |
+
watsonx/mistralai/pixtral-12b,mistralai/pixtral-12b
|
| 10 |
+
watsonx/meta-llama/llama-3-3-70b-instruct,meta-llama/llama-3-3-70b-instruct
|
| 11 |
+
watsonx/mistralai/mistral-large,mistralai/mistral-large
|
| 12 |
+
azure/Azure/gpt-4.1-ncf,openai/gpt-4.1
|
| 13 |
+
azure/Azure/gpt-4.1-mini-ncf,openai/gpt-4.1-mini
|
| 14 |
+
azure/Azure/gpt-4.1-nano-ncf,openai/gpt-4.1-nano
|
| 15 |
+
azure/Azure/gpt-4o-ncf,openai/gpt-4o
|
| 16 |
+
azure/Azure/o3-mini-ncf,openai/o3-mini
|
| 17 |
+
azure/Azure/o1-ncf,openai/o1
|
| 18 |
+
azure/Azure/o4-mini-ncf,openai/o4-mini
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import json
|
|
|
|
| 2 |
import os
|
| 3 |
from dataclasses import dataclass
|
| 4 |
|
|
@@ -15,6 +16,15 @@ class EvalResult:
|
|
| 15 |
full_model: str # org/model (path on hub)
|
| 16 |
results: dict
|
| 17 |
date: str = "" # submission date of request file
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
@classmethod
|
| 20 |
def init_from_json_file(self, json_filepath):
|
|
@@ -25,7 +35,9 @@ class EvalResult:
|
|
| 25 |
env_info = data.get("environment_info").get("parsed_arguments")
|
| 26 |
|
| 27 |
full_model = env_info.get("model")
|
| 28 |
-
|
|
|
|
|
|
|
| 29 |
# Extract results available in this file (some results are split in several files)
|
| 30 |
results = {}
|
| 31 |
for task in Tasks:
|
|
@@ -64,6 +76,8 @@ def get_raw_eval_results(results_path: str) -> list[EvalResult]:
|
|
| 64 |
"""From the path of the results folder root, extract all needed info for results"""
|
| 65 |
model_result_filepaths = []
|
| 66 |
|
|
|
|
|
|
|
| 67 |
for root, _, files in os.walk(results_path):
|
| 68 |
# We should only have json files in model results
|
| 69 |
# if len(files) == 0 or any([not f.endswith(".json") for f in files]):
|
|
|
|
| 1 |
import json
|
| 2 |
+
import csv
|
| 3 |
import os
|
| 4 |
from dataclasses import dataclass
|
| 5 |
|
|
|
|
| 16 |
full_model: str # org/model (path on hub)
|
| 17 |
results: dict
|
| 18 |
date: str = "" # submission date of request file
|
| 19 |
+
|
| 20 |
+
modelmap = {}
|
| 21 |
+
|
| 22 |
+
@classmethod
|
| 23 |
+
def init_model_map(self, mapfile):
|
| 24 |
+
with open(mapfile) as f:
|
| 25 |
+
reader = csv.reader(f)
|
| 26 |
+
for row in reader:
|
| 27 |
+
self.modelmap[row[0]] = row[1]
|
| 28 |
|
| 29 |
@classmethod
|
| 30 |
def init_from_json_file(self, json_filepath):
|
|
|
|
| 35 |
env_info = data.get("environment_info").get("parsed_arguments")
|
| 36 |
|
| 37 |
full_model = env_info.get("model")
|
| 38 |
+
# Use the display name, if available
|
| 39 |
+
full_model = self.modelmap.get(full_model,full_model)
|
| 40 |
+
|
| 41 |
# Extract results available in this file (some results are split in several files)
|
| 42 |
results = {}
|
| 43 |
for task in Tasks:
|
|
|
|
| 76 |
"""From the path of the results folder root, extract all needed info for results"""
|
| 77 |
model_result_filepaths = []
|
| 78 |
|
| 79 |
+
EvalResult.init_model_map(results_path+"/modelmap.csv")
|
| 80 |
+
|
| 81 |
for root, _, files in os.walk(results_path):
|
| 82 |
# We should only have json files in model results
|
| 83 |
# if len(files) == 0 or any([not f.endswith(".json") for f in files]):
|