xeon27 commited on
Commit
e1d7bbb
·
1 Parent(s): ba14348

Update new log files

Browse files
Files changed (1) hide show
  1. inspect_log_file_names.json +89 -0
inspect_log_file_names.json CHANGED
@@ -30,6 +30,35 @@
30
  "mmlu": "2024-11-04T16-26-13-05-00_mmlu_QvfQ46qJen2bvxiktHu86H.json",
31
  "gdm_intercode_ctf": "2024-11-15T20-52-53-05-00_gdm-intercode-ctf_oLYr3H6bFtrcmgM6EABmNt.json"
32
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  "c4ai-command-r-plus": {
34
  "ifeval": "2024-10-30T17-23-04-04-00_ifeval_RGucUMwdGmUnRpqyMTZTzW.json",
35
  "winogrande": "2024-10-30T14-42-18-04-00_winogrande_bY8yg7aRR5dCCK7NDCZEcc.json",
@@ -44,6 +73,66 @@
44
  "mmlu": "2024-10-30T21-55-26-04-00_mmlu_JUPPLTzfe3Kme6UuorPTqg.json",
45
  "arc_easy": "2024-10-29T17-10-40-04-00_arc-easy_UvprihBMLXPF8JENVLRkdx.json"
46
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  "Qwen2.5-72B-Instruct": {
48
  "arc_challenge": "2024-10-31T13-46-34-04-00_arc-challenge_FSybKYYwpXVLQag8VwpjKe.json",
49
  "mmlu_pro": "2024-11-01T20-31-04-04-00_mmlu-pro_2TfSPmsVmKatntHy2CnR7A.json",
 
30
  "mmlu": "2024-11-04T16-26-13-05-00_mmlu_QvfQ46qJen2bvxiktHu86H.json",
31
  "gdm_intercode_ctf": "2024-11-15T20-52-53-05-00_gdm-intercode-ctf_oLYr3H6bFtrcmgM6EABmNt.json"
32
  },
33
+ "o1": {
34
+ "winogrande": "2025-01-20T16-46-06-05-00_winogrande_YUtAdEsForRffqe4Sm3wtR.json",
35
+ "humaneval": "2025-01-17T14-59-12-05-00_humaneval_RRL8GMy9NakTxUHsDVWNng.json",
36
+ "mmmu_open": "2025-01-20T22-48-09-05-00_mmmu-open_oBzxJBYbvnktbbAwhoCrYK.json",
37
+ "mmlu_pro": "2025-01-20T14-02-37-05-00_mmlu-pro_EvDzvqaahQwhv6fJovN4BT.json",
38
+ "math": "2025-01-17T15-03-22-05-00_math_6BbvHFF8hLMsVYozyNLbyQ.json",
39
+ "arc_easy": "2025-01-17T11-29-26-05-00_arc-easy_DFbir4BdgQDbKd52r7tRKR.json",
40
+ "arc_challenge": "2025-01-17T11-44-42-05-00_arc-challenge_PsWXaBqrgv3EcTZC55gRzJ.json",
41
+ "gsm8k": "2025-01-17T12-56-38-05-00_gsm8k_iD8275qeyNTgX523pn45bF.json",
42
+ "gpqa_diamond": "2025-01-17T11-53-53-05-00_gpqa-diamond_EJV7ULFSQLRoFTEqsv3t6q.json",
43
+ "hellaswag": "2025-01-17T13-14-39-05-00_hellaswag_73sQJFnwpzWjTvEqKjUk4M.json",
44
+ "mmmu_multiple_choice": "2025-01-20T21-04-57-05-00_mmmu-multiple-choice_MctxjookaeTLCL8KpUeazT.json"
45
+ },
46
+ "claude-3-5-sonnet-20241022": {
47
+ "mmmu_multiple_choice": "2025-01-21T11-20-03-05-00_mmmu-multiple-choice_CWhKvGdoFo6pdHhDyi9GNm.json",
48
+ "mmlu_pro": "2025-01-16T19-01-05-05-00_mmlu-pro_3vi84or97gQupuj5sT6vgZ.json",
49
+ "hellaswag": "2025-01-15T15-09-33-05-00_hellaswag_QXqFxojvSToMu8ckHEMLkB.json",
50
+ "gpqa_diamond": "2025-01-15T13-56-36-05-00_gpqa-diamond_eg4gFaMRENjnnYvQNtSB59.json",
51
+ "gsm8k": "2025-01-15T14-23-25-05-00_gsm8k_nHB8Z4uZAwRAZFYpKmTptA.json",
52
+ "mmmu_open": "2025-01-21T11-24-21-05-00_mmmu-open_SSjv3Dq9gZkEEUnvJUd5xf.json",
53
+ "arc_easy": "2025-01-15T10-06-24-05-00_arc-easy_oBReQZQM5SAwMMD2jFshPb.json",
54
+ "arc_challenge": "2025-01-15T10-12-11-05-00_arc-challenge_X8i6caCzkcQo5AT5zXkXso.json",
55
+ "mmlu": "2025-01-16T15-16-51-05-00_mmlu_NFDs2kxmh3kQEbpbd8sz3w.json",
56
+ "math": "2025-01-16T12-29-54-05-00_math_NvNQU58M8r3fpiwPGnvq8h.json",
57
+ "ifeval": "2025-01-16T11-28-44-05-00_ifeval_fmWxch4ZjbmYCST6yUZsdV.json",
58
+ "humaneval": "2025-01-16T11-26-12-05-00_humaneval_kUASiaNd9uZfWvCwYHhdF5.json",
59
+ "winogrande": "2025-01-16T22-09-41-05-00_winogrande_mSWGAKg75E5RP79KWizvb9.json",
60
+ "drop": "2025-01-15T10-15-15-05-00_drop_Z9A2Y84HYponNxnzNT9TNq.json"
61
+ },
62
  "c4ai-command-r-plus": {
63
  "ifeval": "2024-10-30T17-23-04-04-00_ifeval_RGucUMwdGmUnRpqyMTZTzW.json",
64
  "winogrande": "2024-10-30T14-42-18-04-00_winogrande_bY8yg7aRR5dCCK7NDCZEcc.json",
 
73
  "mmlu": "2024-10-30T21-55-26-04-00_mmlu_JUPPLTzfe3Kme6UuorPTqg.json",
74
  "arc_easy": "2024-10-29T17-10-40-04-00_arc-easy_UvprihBMLXPF8JENVLRkdx.json"
75
  },
76
+ "gpt-4o-mini": {
77
+ "drop": "2024-10-30T17-36-25-04-00_drop_6TzJGqqEkpFUCxGD4QejV6.json",
78
+ "humaneval": "2024-10-30T21-14-41-04-00_humaneval_Z9aXdUERuwYxoTheZ5GANC.json",
79
+ "gpqa_diamond": "2024-10-30T19-31-26-04-00_gpqa-diamond_7aNe9wQiQKpNN96mfaWBPg.json",
80
+ "mmmu_open": "2025-01-20T23-13-27-05-00_mmmu-open_GWi6XNYUSLq99BdabtScGm.json",
81
+ "arc_challenge": "2024-10-30T17-34-51-04-00_arc-challenge_FbGgLswBZbRE4EhWiMyRt6.json",
82
+ "mmlu": "2024-10-31T10-49-43-04-00_mmlu_oGb9mspeGbYS2gfbkknskN.json",
83
+ "hellaswag": "2024-10-30T19-35-34-04-00_hellaswag_2SAz3cvMpDxFaApdHDR3s4.json",
84
+ "ifeval": "2024-10-30T21-15-06-04-00_ifeval_nYs9KujQMQjcpbpbLtVx8G.json",
85
+ "mmmu_multiple_choice": "2025-01-20T23-10-01-05-00_mmmu-multiple-choice_c5rLkrXkV83udX6DVJui5F.json",
86
+ "arc_easy": "2024-10-30T17-29-56-04-00_arc-easy_XcEzqqPqJsRV29NqYDfnNo.json",
87
+ "winogrande": "2024-10-31T00-59-07-04-00_winogrande_Ci55vHvbGGW38zVpMCwtWa.json",
88
+ "mmlu_pro": "2024-10-30T22-43-30-04-00_mmlu-pro_Dc2uu3EV7MJtjg6gg5Y9qH.json",
89
+ "math": "2024-10-30T21-15-49-04-00_math_YsWdRzpqMq2dqQ9SPKfack.json",
90
+ "gsm8k": "2024-10-30T19-32-39-04-00_gsm8k_nLSssETKDDWNktAFWnVwfv.json"
91
+ },
92
+ "Meta-Llama-3.1-70B-Instruct": {
93
+ "hellaswag": "2024-10-30T00-45-54-04-00_hellaswag_BKfQG9yGAr383MGnooMLBH.json",
94
+ "drop": "2024-10-29T21-01-02-04-00_drop_LzAWvLWkNrNKu5qf56wXRo.json",
95
+ "gpqa_diamond": "2024-10-29T23-41-39-04-00_gpqa-diamond_TdLdYmVM6GCVMAECcXkuhj.json",
96
+ "winogrande": "2024-10-30T09-20-56-04-00_winogrande_WnUgkSRhSMvh3zUjnuJWQZ.json",
97
+ "gsm8k": "2024-10-30T00-03-31-04-00_gsm8k_bKsUfCAfcmBCeryboNaLoX.json",
98
+ "math": "2024-10-30T02-34-50-04-00_math_2xiNcrGih26uzJdG4q88bM.json",
99
+ "ifeval": "2024-10-30T02-29-32-04-00_ifeval_Dwh3CF2ZYFrvw7UcTwrsvK.json",
100
+ "arc_challenge": "2024-10-29T20-58-56-04-00_arc-challenge_oFL5wFjT7KwNFhMFfe72JN.json",
101
+ "arc_easy": "2024-10-29T20-53-12-04-00_arc-easy_UXzR7cDeNteP39NoXUYnhm.json",
102
+ "mmlu_pro": "2024-10-30T06-11-16-04-00_mmlu-pro_oQiEBJdeKtEEt4cm9KL7uy.json",
103
+ "humaneval": "2024-10-30T02-28-25-04-00_humaneval_KcJV2rHuHJ2JLxijihEkcW.json",
104
+ "mmlu": "2024-10-30T03-51-50-04-00_mmlu_6SNjs2QmPRvqGnvbnNtaqb.json"
105
+ },
106
+ "gpt-4o": {
107
+ "gpqa_diamond": "2024-10-31T03-29-33-04-00_gpqa-diamond_nFmRv5MJiYjHjezmq4V6Va.json",
108
+ "arc_challenge": "2024-10-31T01-45-55-04-00_arc-challenge_nrsPPxh4DpzgLPQDFdcfVp.json",
109
+ "gsm8k": "2024-10-31T03-31-16-04-00_gsm8k_jVXeSvHowbietZCFsFYCwB.json",
110
+ "mmlu": "2024-10-31T10-49-43-04-00_mmlu_GarLpfQFSpM3C22nbbGp54.json",
111
+ "ifeval": "2024-10-31T05-00-11-04-00_ifeval_jxreUu8JqRdkrcHP4E3hLR.json",
112
+ "mmlu_pro": "2024-10-31T06-59-42-04-00_mmlu-pro_EuAKDwAWSfNVpqyyqrf2Ba.json",
113
+ "mmmu_open": "2025-01-20T23-07-46-05-00_mmmu-open_d3Q2HvuPZzEX6FAM4NBhnp.json",
114
+ "winogrande": "2024-10-31T09-02-03-04-00_winogrande_44kKF7M9mKoqVC7ixZVXuq.json",
115
+ "drop": "2024-10-31T01-47-20-04-00_drop_3gxDcn6vUoR3nvHX9BcSq4.json",
116
+ "arc_easy": "2024-10-31T01-41-34-04-00_arc-easy_nUavRHdiRVfrxo6dmCPadh.json",
117
+ "mmmu_multiple_choice": "2025-01-20T23-03-21-05-00_mmmu-multiple-choice_eoycAFLMirSqiURdXmBP2e.json",
118
+ "humaneval": "2024-10-31T04-59-42-04-00_humaneval_nmJcd84CcNKjWS8fBfMbZM.json",
119
+ "math": "2024-10-31T05-01-22-04-00_math_cDSpKPp3nLrFy8uYfYKEbM.json",
120
+ "hellaswag": "2024-10-31T03-33-47-04-00_hellaswag_JNnnPuz3dhZRpyXzizMUBF.json"
121
+ },
122
+ "Mistral-Large-Instruct-2407": {
123
+ "drop": "2024-10-31T01-56-12-04-00_drop_NtvuCoU2LoMbH8DztcCTen.json",
124
+ "ifeval": "2024-10-31T06-30-16-04-00_ifeval_TLkvCSFEWo4PLv6hAha7YB.json",
125
+ "mmlu": "2024-10-31T07-21-48-04-00_mmlu_YnUhmHoStr3WuJdchWmNPt.json",
126
+ "gpqa_diamond": "2024-10-31T04-22-52-04-00_gpqa-diamond_SuZUZxGdqS2ZecbLRNkKd4.json",
127
+ "gsm8k": "2024-10-31T04-28-49-04-00_gsm8k_5tQp9tbwUMj6NpjNKCAfVm.json",
128
+ "math": "2024-10-31T06-33-09-04-00_math_2CmjBedAfUxqvmcHRdBgyB.json",
129
+ "arc_easy": "2024-10-31T01-48-39-04-00_arc-easy_YbfuBT3usZXt2xgZkkR5dq.json",
130
+ "mmlu_pro": "2024-10-31T09-41-25-04-00_mmlu-pro_fyYT4aabPesfY5TpzFMPnd.json",
131
+ "humaneval": "2024-10-31T06-29-24-04-00_humaneval_nu8SUSGekKJWB8HLKDigYK.json",
132
+ "hellaswag": "2024-10-31T04-50-00-04-00_hellaswag_ZzQoZ6gkRQsTzMhQr7GYNn.json",
133
+ "arc_challenge": "2024-10-31T01-54-13-04-00_arc-challenge_WfQRhMkFcywefpU46isBVP.json",
134
+ "winogrande": "2024-10-31T11-57-58-04-00_winogrande_TP3UGwpp37Dyv6ks9Ty5Hk.json"
135
+ },
136
  "Qwen2.5-72B-Instruct": {
137
  "arc_challenge": "2024-10-31T13-46-34-04-00_arc-challenge_FSybKYYwpXVLQag8VwpjKe.json",
138
  "mmlu_pro": "2024-11-01T20-31-04-04-00_mmlu-pro_2TfSPmsVmKatntHy2CnR7A.json",