Update display names
Browse files- data/results.json +6 -6
- data/tasks.json +2 -2
data/results.json
CHANGED
@@ -268,7 +268,7 @@
|
|
268 |
},
|
269 |
"c4ai-command-r-plus": {
|
270 |
"config": {
|
271 |
-
"model_name": "
|
272 |
"model_sha": "https://huggingface.co/CohereForAI/c4ai-command-r-plus"
|
273 |
},
|
274 |
"results": {
|
@@ -324,7 +324,7 @@
|
|
324 |
},
|
325 |
"claude-3-5-sonnet-20241022": {
|
326 |
"config": {
|
327 |
-
"model_name": "
|
328 |
"model_sha": "https://www.anthropic.com/claude/sonnet",
|
329 |
"model_dtype": "torch.float16"
|
330 |
},
|
@@ -413,7 +413,7 @@
|
|
413 |
},
|
414 |
"gemini-1.5-flash": {
|
415 |
"config": {
|
416 |
-
"model_name": "
|
417 |
"model_sha": "https://deepmind.google/technologies/gemini/flash",
|
418 |
"model_dtype": "torch.float16"
|
419 |
},
|
@@ -502,7 +502,7 @@
|
|
502 |
},
|
503 |
"gemini-1.5-pro": {
|
504 |
"config": {
|
505 |
-
"model_name": "
|
506 |
"model_sha": "https://deepmind.google/technologies/gemini/pro",
|
507 |
"model_dtype": "torch.float16"
|
508 |
},
|
@@ -591,7 +591,7 @@
|
|
591 |
},
|
592 |
"gpt-4o": {
|
593 |
"config": {
|
594 |
-
"model_name": "
|
595 |
"model_sha": "https://openai.com/index/hello-gpt-4o",
|
596 |
"model_dtype": "torch.float16"
|
597 |
},
|
@@ -680,7 +680,7 @@
|
|
680 |
},
|
681 |
"gpt-4o-mini": {
|
682 |
"config": {
|
683 |
-
"model_name": "
|
684 |
"model_sha": "https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence",
|
685 |
"model_dtype": "torch.float16"
|
686 |
},
|
|
|
268 |
},
|
269 |
"c4ai-command-r-plus": {
|
270 |
"config": {
|
271 |
+
"model_name": "Command R+",
|
272 |
"model_sha": "https://huggingface.co/CohereForAI/c4ai-command-r-plus"
|
273 |
},
|
274 |
"results": {
|
|
|
324 |
},
|
325 |
"claude-3-5-sonnet-20241022": {
|
326 |
"config": {
|
327 |
+
"model_name": "Claude-3.5-Sonnet",
|
328 |
"model_sha": "https://www.anthropic.com/claude/sonnet",
|
329 |
"model_dtype": "torch.float16"
|
330 |
},
|
|
|
413 |
},
|
414 |
"gemini-1.5-flash": {
|
415 |
"config": {
|
416 |
+
"model_name": "Gemini-1.5-Flash",
|
417 |
"model_sha": "https://deepmind.google/technologies/gemini/flash",
|
418 |
"model_dtype": "torch.float16"
|
419 |
},
|
|
|
502 |
},
|
503 |
"gemini-1.5-pro": {
|
504 |
"config": {
|
505 |
+
"model_name": "Gemini-1.5-Pro",
|
506 |
"model_sha": "https://deepmind.google/technologies/gemini/pro",
|
507 |
"model_dtype": "torch.float16"
|
508 |
},
|
|
|
591 |
},
|
592 |
"gpt-4o": {
|
593 |
"config": {
|
594 |
+
"model_name": "GPT-4o",
|
595 |
"model_sha": "https://openai.com/index/hello-gpt-4o",
|
596 |
"model_dtype": "torch.float16"
|
597 |
},
|
|
|
680 |
},
|
681 |
"gpt-4o-mini": {
|
682 |
"config": {
|
683 |
+
"model_name": "GPT-4o-mini",
|
684 |
"model_sha": "https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence",
|
685 |
"model_dtype": "torch.float16"
|
686 |
},
|
data/tasks.json
CHANGED
@@ -86,14 +86,14 @@
|
|
86 |
"mmmu_multiple_choice": {
|
87 |
"benchmark": "mmmu_multiple_choice",
|
88 |
"metric": "accuracy",
|
89 |
-
"display_name": "MMMU-
|
90 |
"type": "base",
|
91 |
"source": "https://github.com/UKGovernmentBEIS/inspect_evals/tree/main/src/inspect_evals/mmmu"
|
92 |
},
|
93 |
"mmmu_open": {
|
94 |
"benchmark": "mmmu_open",
|
95 |
"metric": "accuracy",
|
96 |
-
"display_name": "MMMU-
|
97 |
"type": "base",
|
98 |
"source": "https://github.com/UKGovernmentBEIS/inspect_evals/tree/main/src/inspect_evals/mmmu"
|
99 |
},
|
|
|
86 |
"mmmu_multiple_choice": {
|
87 |
"benchmark": "mmmu_multiple_choice",
|
88 |
"metric": "accuracy",
|
89 |
+
"display_name": "MMMU-MC",
|
90 |
"type": "base",
|
91 |
"source": "https://github.com/UKGovernmentBEIS/inspect_evals/tree/main/src/inspect_evals/mmmu"
|
92 |
},
|
93 |
"mmmu_open": {
|
94 |
"benchmark": "mmmu_open",
|
95 |
"metric": "accuracy",
|
96 |
+
"display_name": "MMMU-OE",
|
97 |
"type": "base",
|
98 |
"source": "https://github.com/UKGovernmentBEIS/inspect_evals/tree/main/src/inspect_evals/mmmu"
|
99 |
},
|