jbnayahu commited on
Commit
44caf2c
·
unverified ·
1 Parent(s): 9ee85e2

Add support for model display names

Browse files

Signed-off-by: Jonathan Bnayahu <[email protected]>

results/bluebench/modelmap.csv ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ watsonx/ibm/granite-3-2b-instruct,ibm/granite-3-2-b-instruct
2
+ watsonx/ibm/granite-3-3-8b-instruct,ibm/granite-3-3-8b-instruct
3
+ watsonx/ibm/granite-3-8b-instruct,ibm/granite-3-8b-instruct
4
+ watsonx/meta-llama/llama-3-2-1b-instruct,meta-llama/llama-3-2-1b-instruct
5
+ watsonx/meta-llama/llama-3-2-3b-instruct,meta-llama/llama-3-2-3b-instruct
6
+ watsonx/meta-llama/llama-3-405b-instruct,meta-llama/llama-3-405b-instruct
7
+ watsonx/mistralai/mistral-medium-2505,mistralai/mistral-medium-2505
8
+ watsonx/mistralai/mistral-small-3-1-24b-instruct-2503,mistralai/mistral-small-3-1-24b-instruct-2503
9
+ watsonx/mistralai/pixtral-12b,mistralai/pixtral-12b
10
+ watsonx/meta-llama/llama-3-3-70b-instruct,meta-llama/llama-3-3-70b-instruct
11
+ watsonx/mistralai/mistral-large,mistralai/mistral-large
12
+ azure/Azure/gpt-4.1-ncf,openai/gpt-4.1
13
+ azure/Azure/gpt-4.1-mini-ncf,openai/gpt-4.1-mini
14
+ azure/Azure/gpt-4.1-nano-ncf,openai/gpt-4.1-nano
15
+ azure/Azure/gpt-4o-ncf,openai/gpt-4o
16
+ azure/Azure/o3-mini-ncf,openai/o3-mini
17
+ azure/Azure/o1-ncf,openai/o1
18
+ azure/Azure/o4-mini-ncf,openai/o4-mini
src/leaderboard/read_evals.py CHANGED
@@ -1,4 +1,5 @@
1
  import json
 
2
  import os
3
  from dataclasses import dataclass
4
 
@@ -15,6 +16,15 @@ class EvalResult:
15
  full_model: str # org/model (path on hub)
16
  results: dict
17
  date: str = "" # submission date of request file
 
 
 
 
 
 
 
 
 
18
 
19
  @classmethod
20
  def init_from_json_file(self, json_filepath):
@@ -25,7 +35,9 @@ class EvalResult:
25
  env_info = data.get("environment_info").get("parsed_arguments")
26
 
27
  full_model = env_info.get("model")
28
-
 
 
29
  # Extract results available in this file (some results are split in several files)
30
  results = {}
31
  for task in Tasks:
@@ -64,6 +76,8 @@ def get_raw_eval_results(results_path: str) -> list[EvalResult]:
64
  """From the path of the results folder root, extract all needed info for results"""
65
  model_result_filepaths = []
66
 
 
 
67
  for root, _, files in os.walk(results_path):
68
  # We should only have json files in model results
69
  # if len(files) == 0 or any([not f.endswith(".json") for f in files]):
 
1
  import json
2
+ import csv
3
  import os
4
  from dataclasses import dataclass
5
 
 
16
  full_model: str # org/model (path on hub)
17
  results: dict
18
  date: str = "" # submission date of request file
19
+
20
+ modelmap = {}
21
+
22
+ @classmethod
23
+ def init_model_map(self, mapfile):
24
+ with open(mapfile) as f:
25
+ reader = csv.reader(f)
26
+ for row in reader:
27
+ self.modelmap[row[0]] = row[1]
28
 
29
  @classmethod
30
  def init_from_json_file(self, json_filepath):
 
35
  env_info = data.get("environment_info").get("parsed_arguments")
36
 
37
  full_model = env_info.get("model")
38
+ # Use the display name, if available
39
+ full_model = self.modelmap.get(full_model,full_model)
40
+
41
  # Extract results available in this file (some results are split in several files)
42
  results = {}
43
  for task in Tasks:
 
76
  """From the path of the results folder root, extract all needed info for results"""
77
  model_result_filepaths = []
78
 
79
+ EvalResult.init_model_map(results_path+"/modelmap.csv")
80
+
81
  for root, _, files in os.walk(results_path):
82
  # We should only have json files in model results
83
  # if len(files) == 0 or any([not f.endswith(".json") for f in files]):