jwilles commited on
Commit
159f31f
·
1 Parent(s): 2b55816

Update display names

Browse files
Files changed (2) hide show
  1. data/results.json +6 -6
  2. data/tasks.json +2 -2
data/results.json CHANGED
@@ -268,7 +268,7 @@
268
  },
269
  "c4ai-command-r-plus": {
270
  "config": {
271
- "model_name": "c4ai-command-r-plus",
272
  "model_sha": "https://huggingface.co/CohereForAI/c4ai-command-r-plus"
273
  },
274
  "results": {
@@ -324,7 +324,7 @@
324
  },
325
  "claude-3-5-sonnet-20241022": {
326
  "config": {
327
- "model_name": "claude-3-5-sonnet-20241022",
328
  "model_sha": "https://www.anthropic.com/claude/sonnet",
329
  "model_dtype": "torch.float16"
330
  },
@@ -413,7 +413,7 @@
413
  },
414
  "gemini-1.5-flash": {
415
  "config": {
416
- "model_name": "gemini-1.5-flash",
417
  "model_sha": "https://deepmind.google/technologies/gemini/flash",
418
  "model_dtype": "torch.float16"
419
  },
@@ -502,7 +502,7 @@
502
  },
503
  "gemini-1.5-pro": {
504
  "config": {
505
- "model_name": "gemini-1.5-pro",
506
  "model_sha": "https://deepmind.google/technologies/gemini/pro",
507
  "model_dtype": "torch.float16"
508
  },
@@ -591,7 +591,7 @@
591
  },
592
  "gpt-4o": {
593
  "config": {
594
- "model_name": "gpt-4o",
595
  "model_sha": "https://openai.com/index/hello-gpt-4o",
596
  "model_dtype": "torch.float16"
597
  },
@@ -680,7 +680,7 @@
680
  },
681
  "gpt-4o-mini": {
682
  "config": {
683
- "model_name": "gpt-4o-mini",
684
  "model_sha": "https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence",
685
  "model_dtype": "torch.float16"
686
  },
 
268
  },
269
  "c4ai-command-r-plus": {
270
  "config": {
271
+ "model_name": "Command R+",
272
  "model_sha": "https://huggingface.co/CohereForAI/c4ai-command-r-plus"
273
  },
274
  "results": {
 
324
  },
325
  "claude-3-5-sonnet-20241022": {
326
  "config": {
327
+ "model_name": "Claude-3.5-Sonnet",
328
  "model_sha": "https://www.anthropic.com/claude/sonnet",
329
  "model_dtype": "torch.float16"
330
  },
 
413
  },
414
  "gemini-1.5-flash": {
415
  "config": {
416
+ "model_name": "Gemini-1.5-Flash",
417
  "model_sha": "https://deepmind.google/technologies/gemini/flash",
418
  "model_dtype": "torch.float16"
419
  },
 
502
  },
503
  "gemini-1.5-pro": {
504
  "config": {
505
+ "model_name": "Gemini-1.5-Pro",
506
  "model_sha": "https://deepmind.google/technologies/gemini/pro",
507
  "model_dtype": "torch.float16"
508
  },
 
591
  },
592
  "gpt-4o": {
593
  "config": {
594
+ "model_name": "GPT-4o",
595
  "model_sha": "https://openai.com/index/hello-gpt-4o",
596
  "model_dtype": "torch.float16"
597
  },
 
680
  },
681
  "gpt-4o-mini": {
682
  "config": {
683
+ "model_name": "GPT-4o-mini",
684
  "model_sha": "https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence",
685
  "model_dtype": "torch.float16"
686
  },
data/tasks.json CHANGED
@@ -86,14 +86,14 @@
86
  "mmmu_multiple_choice": {
87
  "benchmark": "mmmu_multiple_choice",
88
  "metric": "accuracy",
89
- "display_name": "MMMU-Multiple-Choice",
90
  "type": "base",
91
  "source": "https://github.com/UKGovernmentBEIS/inspect_evals/tree/main/src/inspect_evals/mmmu"
92
  },
93
  "mmmu_open": {
94
  "benchmark": "mmmu_open",
95
  "metric": "accuracy",
96
- "display_name": "MMMU-Open-Ended",
97
  "type": "base",
98
  "source": "https://github.com/UKGovernmentBEIS/inspect_evals/tree/main/src/inspect_evals/mmmu"
99
  },
 
86
  "mmmu_multiple_choice": {
87
  "benchmark": "mmmu_multiple_choice",
88
  "metric": "accuracy",
89
+ "display_name": "MMMU-MC",
90
  "type": "base",
91
  "source": "https://github.com/UKGovernmentBEIS/inspect_evals/tree/main/src/inspect_evals/mmmu"
92
  },
93
  "mmmu_open": {
94
  "benchmark": "mmmu_open",
95
  "metric": "accuracy",
96
+ "display_name": "MMMU-OE",
97
  "type": "base",
98
  "source": "https://github.com/UKGovernmentBEIS/inspect_evals/tree/main/src/inspect_evals/mmmu"
99
  },