bluebench

Running

jbnayahu commited on Sep 4

Commit

44caf2c

unverified ·

1 Parent(s): 9ee85e2

Add support for model display names

Signed-off-by: Jonathan Bnayahu <[email protected]>

Files changed (2) hide show

results/bluebench/modelmap.csv ADDED Viewed

+watsonx/ibm/granite-3-2b-instruct,ibm/granite-3-2-b-instruct
+watsonx/ibm/granite-3-3-8b-instruct,ibm/granite-3-3-8b-instruct
+watsonx/ibm/granite-3-8b-instruct,ibm/granite-3-8b-instruct
+watsonx/meta-llama/llama-3-2-1b-instruct,meta-llama/llama-3-2-1b-instruct
+watsonx/meta-llama/llama-3-2-3b-instruct,meta-llama/llama-3-2-3b-instruct
+watsonx/meta-llama/llama-3-405b-instruct,meta-llama/llama-3-405b-instruct
+watsonx/mistralai/mistral-medium-2505,mistralai/mistral-medium-2505
+watsonx/mistralai/mistral-small-3-1-24b-instruct-2503,mistralai/mistral-small-3-1-24b-instruct-2503
+watsonx/mistralai/pixtral-12b,mistralai/pixtral-12b
+watsonx/meta-llama/llama-3-3-70b-instruct,meta-llama/llama-3-3-70b-instruct
+watsonx/mistralai/mistral-large,mistralai/mistral-large
+azure/Azure/gpt-4.1-ncf,openai/gpt-4.1
+azure/Azure/gpt-4.1-mini-ncf,openai/gpt-4.1-mini
+azure/Azure/gpt-4.1-nano-ncf,openai/gpt-4.1-nano
+azure/Azure/gpt-4o-ncf,openai/gpt-4o
+azure/Azure/o3-mini-ncf,openai/o3-mini
+azure/Azure/o1-ncf,openai/o1
+azure/Azure/o4-mini-ncf,openai/o4-mini

src/leaderboard/read_evals.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import json
 import os
 from dataclasses import dataclass
@@ -15,6 +16,15 @@ class EvalResult:
     full_model: str # org/model (path on hub)
     results: dict
     date: str = "" # submission date of request file
     @classmethod
     def init_from_json_file(self, json_filepath):
@@ -25,7 +35,9 @@ class EvalResult:
         env_info = data.get("environment_info").get("parsed_arguments")
         full_model = env_info.get("model")
         # Extract results available in this file (some results are split in several files)
         results = {}
         for task in Tasks:
@@ -64,6 +76,8 @@ def get_raw_eval_results(results_path: str) -> list[EvalResult]:
     """From the path of the results folder root, extract all needed info for results"""
     model_result_filepaths = []
     for root, _, files in os.walk(results_path):
         # We should only have json files in model results
         # if len(files) == 0 or any([not f.endswith(".json") for f in files]):

 import json
+import csv
 import os
 from dataclasses import dataclass
     full_model: str # org/model (path on hub)
     results: dict
     date: str = "" # submission date of request file
+    modelmap = {}
+    @classmethod
+    def init_model_map(self, mapfile):
+        with open(mapfile) as f:
+            reader = csv.reader(f)
+            for row in reader:
+                self.modelmap[row[0]] = row[1]
     @classmethod
     def init_from_json_file(self, json_filepath):
         env_info = data.get("environment_info").get("parsed_arguments")
         full_model = env_info.get("model")
+        # Use the display name, if available
+        full_model = self.modelmap.get(full_model,full_model)
         # Extract results available in this file (some results are split in several files)
         results = {}
         for task in Tasks:
     """From the path of the results folder root, extract all needed info for results"""
     model_result_filepaths = []
+    EvalResult.init_model_map(results_path+"/modelmap.csv")
     for root, _, files in os.walk(results_path):
         # We should only have json files in model results
         # if len(files) == 0 or any([not f.endswith(".json") for f in files]):