xeon27
commited on
Commit
·
e1d7bbb
1
Parent(s):
ba14348
Update new log files
Browse files- inspect_log_file_names.json +89 -0
inspect_log_file_names.json
CHANGED
@@ -30,6 +30,35 @@
|
|
30 |
"mmlu": "2024-11-04T16-26-13-05-00_mmlu_QvfQ46qJen2bvxiktHu86H.json",
|
31 |
"gdm_intercode_ctf": "2024-11-15T20-52-53-05-00_gdm-intercode-ctf_oLYr3H6bFtrcmgM6EABmNt.json"
|
32 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
"c4ai-command-r-plus": {
|
34 |
"ifeval": "2024-10-30T17-23-04-04-00_ifeval_RGucUMwdGmUnRpqyMTZTzW.json",
|
35 |
"winogrande": "2024-10-30T14-42-18-04-00_winogrande_bY8yg7aRR5dCCK7NDCZEcc.json",
|
@@ -44,6 +73,66 @@
|
|
44 |
"mmlu": "2024-10-30T21-55-26-04-00_mmlu_JUPPLTzfe3Kme6UuorPTqg.json",
|
45 |
"arc_easy": "2024-10-29T17-10-40-04-00_arc-easy_UvprihBMLXPF8JENVLRkdx.json"
|
46 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
"Qwen2.5-72B-Instruct": {
|
48 |
"arc_challenge": "2024-10-31T13-46-34-04-00_arc-challenge_FSybKYYwpXVLQag8VwpjKe.json",
|
49 |
"mmlu_pro": "2024-11-01T20-31-04-04-00_mmlu-pro_2TfSPmsVmKatntHy2CnR7A.json",
|
|
|
30 |
"mmlu": "2024-11-04T16-26-13-05-00_mmlu_QvfQ46qJen2bvxiktHu86H.json",
|
31 |
"gdm_intercode_ctf": "2024-11-15T20-52-53-05-00_gdm-intercode-ctf_oLYr3H6bFtrcmgM6EABmNt.json"
|
32 |
},
|
33 |
+
"o1": {
|
34 |
+
"winogrande": "2025-01-20T16-46-06-05-00_winogrande_YUtAdEsForRffqe4Sm3wtR.json",
|
35 |
+
"humaneval": "2025-01-17T14-59-12-05-00_humaneval_RRL8GMy9NakTxUHsDVWNng.json",
|
36 |
+
"mmmu_open": "2025-01-20T22-48-09-05-00_mmmu-open_oBzxJBYbvnktbbAwhoCrYK.json",
|
37 |
+
"mmlu_pro": "2025-01-20T14-02-37-05-00_mmlu-pro_EvDzvqaahQwhv6fJovN4BT.json",
|
38 |
+
"math": "2025-01-17T15-03-22-05-00_math_6BbvHFF8hLMsVYozyNLbyQ.json",
|
39 |
+
"arc_easy": "2025-01-17T11-29-26-05-00_arc-easy_DFbir4BdgQDbKd52r7tRKR.json",
|
40 |
+
"arc_challenge": "2025-01-17T11-44-42-05-00_arc-challenge_PsWXaBqrgv3EcTZC55gRzJ.json",
|
41 |
+
"gsm8k": "2025-01-17T12-56-38-05-00_gsm8k_iD8275qeyNTgX523pn45bF.json",
|
42 |
+
"gpqa_diamond": "2025-01-17T11-53-53-05-00_gpqa-diamond_EJV7ULFSQLRoFTEqsv3t6q.json",
|
43 |
+
"hellaswag": "2025-01-17T13-14-39-05-00_hellaswag_73sQJFnwpzWjTvEqKjUk4M.json",
|
44 |
+
"mmmu_multiple_choice": "2025-01-20T21-04-57-05-00_mmmu-multiple-choice_MctxjookaeTLCL8KpUeazT.json"
|
45 |
+
},
|
46 |
+
"claude-3-5-sonnet-20241022": {
|
47 |
+
"mmmu_multiple_choice": "2025-01-21T11-20-03-05-00_mmmu-multiple-choice_CWhKvGdoFo6pdHhDyi9GNm.json",
|
48 |
+
"mmlu_pro": "2025-01-16T19-01-05-05-00_mmlu-pro_3vi84or97gQupuj5sT6vgZ.json",
|
49 |
+
"hellaswag": "2025-01-15T15-09-33-05-00_hellaswag_QXqFxojvSToMu8ckHEMLkB.json",
|
50 |
+
"gpqa_diamond": "2025-01-15T13-56-36-05-00_gpqa-diamond_eg4gFaMRENjnnYvQNtSB59.json",
|
51 |
+
"gsm8k": "2025-01-15T14-23-25-05-00_gsm8k_nHB8Z4uZAwRAZFYpKmTptA.json",
|
52 |
+
"mmmu_open": "2025-01-21T11-24-21-05-00_mmmu-open_SSjv3Dq9gZkEEUnvJUd5xf.json",
|
53 |
+
"arc_easy": "2025-01-15T10-06-24-05-00_arc-easy_oBReQZQM5SAwMMD2jFshPb.json",
|
54 |
+
"arc_challenge": "2025-01-15T10-12-11-05-00_arc-challenge_X8i6caCzkcQo5AT5zXkXso.json",
|
55 |
+
"mmlu": "2025-01-16T15-16-51-05-00_mmlu_NFDs2kxmh3kQEbpbd8sz3w.json",
|
56 |
+
"math": "2025-01-16T12-29-54-05-00_math_NvNQU58M8r3fpiwPGnvq8h.json",
|
57 |
+
"ifeval": "2025-01-16T11-28-44-05-00_ifeval_fmWxch4ZjbmYCST6yUZsdV.json",
|
58 |
+
"humaneval": "2025-01-16T11-26-12-05-00_humaneval_kUASiaNd9uZfWvCwYHhdF5.json",
|
59 |
+
"winogrande": "2025-01-16T22-09-41-05-00_winogrande_mSWGAKg75E5RP79KWizvb9.json",
|
60 |
+
"drop": "2025-01-15T10-15-15-05-00_drop_Z9A2Y84HYponNxnzNT9TNq.json"
|
61 |
+
},
|
62 |
"c4ai-command-r-plus": {
|
63 |
"ifeval": "2024-10-30T17-23-04-04-00_ifeval_RGucUMwdGmUnRpqyMTZTzW.json",
|
64 |
"winogrande": "2024-10-30T14-42-18-04-00_winogrande_bY8yg7aRR5dCCK7NDCZEcc.json",
|
|
|
73 |
"mmlu": "2024-10-30T21-55-26-04-00_mmlu_JUPPLTzfe3Kme6UuorPTqg.json",
|
74 |
"arc_easy": "2024-10-29T17-10-40-04-00_arc-easy_UvprihBMLXPF8JENVLRkdx.json"
|
75 |
},
|
76 |
+
"gpt-4o-mini": {
|
77 |
+
"drop": "2024-10-30T17-36-25-04-00_drop_6TzJGqqEkpFUCxGD4QejV6.json",
|
78 |
+
"humaneval": "2024-10-30T21-14-41-04-00_humaneval_Z9aXdUERuwYxoTheZ5GANC.json",
|
79 |
+
"gpqa_diamond": "2024-10-30T19-31-26-04-00_gpqa-diamond_7aNe9wQiQKpNN96mfaWBPg.json",
|
80 |
+
"mmmu_open": "2025-01-20T23-13-27-05-00_mmmu-open_GWi6XNYUSLq99BdabtScGm.json",
|
81 |
+
"arc_challenge": "2024-10-30T17-34-51-04-00_arc-challenge_FbGgLswBZbRE4EhWiMyRt6.json",
|
82 |
+
"mmlu": "2024-10-31T10-49-43-04-00_mmlu_oGb9mspeGbYS2gfbkknskN.json",
|
83 |
+
"hellaswag": "2024-10-30T19-35-34-04-00_hellaswag_2SAz3cvMpDxFaApdHDR3s4.json",
|
84 |
+
"ifeval": "2024-10-30T21-15-06-04-00_ifeval_nYs9KujQMQjcpbpbLtVx8G.json",
|
85 |
+
"mmmu_multiple_choice": "2025-01-20T23-10-01-05-00_mmmu-multiple-choice_c5rLkrXkV83udX6DVJui5F.json",
|
86 |
+
"arc_easy": "2024-10-30T17-29-56-04-00_arc-easy_XcEzqqPqJsRV29NqYDfnNo.json",
|
87 |
+
"winogrande": "2024-10-31T00-59-07-04-00_winogrande_Ci55vHvbGGW38zVpMCwtWa.json",
|
88 |
+
"mmlu_pro": "2024-10-30T22-43-30-04-00_mmlu-pro_Dc2uu3EV7MJtjg6gg5Y9qH.json",
|
89 |
+
"math": "2024-10-30T21-15-49-04-00_math_YsWdRzpqMq2dqQ9SPKfack.json",
|
90 |
+
"gsm8k": "2024-10-30T19-32-39-04-00_gsm8k_nLSssETKDDWNktAFWnVwfv.json"
|
91 |
+
},
|
92 |
+
"Meta-Llama-3.1-70B-Instruct": {
|
93 |
+
"hellaswag": "2024-10-30T00-45-54-04-00_hellaswag_BKfQG9yGAr383MGnooMLBH.json",
|
94 |
+
"drop": "2024-10-29T21-01-02-04-00_drop_LzAWvLWkNrNKu5qf56wXRo.json",
|
95 |
+
"gpqa_diamond": "2024-10-29T23-41-39-04-00_gpqa-diamond_TdLdYmVM6GCVMAECcXkuhj.json",
|
96 |
+
"winogrande": "2024-10-30T09-20-56-04-00_winogrande_WnUgkSRhSMvh3zUjnuJWQZ.json",
|
97 |
+
"gsm8k": "2024-10-30T00-03-31-04-00_gsm8k_bKsUfCAfcmBCeryboNaLoX.json",
|
98 |
+
"math": "2024-10-30T02-34-50-04-00_math_2xiNcrGih26uzJdG4q88bM.json",
|
99 |
+
"ifeval": "2024-10-30T02-29-32-04-00_ifeval_Dwh3CF2ZYFrvw7UcTwrsvK.json",
|
100 |
+
"arc_challenge": "2024-10-29T20-58-56-04-00_arc-challenge_oFL5wFjT7KwNFhMFfe72JN.json",
|
101 |
+
"arc_easy": "2024-10-29T20-53-12-04-00_arc-easy_UXzR7cDeNteP39NoXUYnhm.json",
|
102 |
+
"mmlu_pro": "2024-10-30T06-11-16-04-00_mmlu-pro_oQiEBJdeKtEEt4cm9KL7uy.json",
|
103 |
+
"humaneval": "2024-10-30T02-28-25-04-00_humaneval_KcJV2rHuHJ2JLxijihEkcW.json",
|
104 |
+
"mmlu": "2024-10-30T03-51-50-04-00_mmlu_6SNjs2QmPRvqGnvbnNtaqb.json"
|
105 |
+
},
|
106 |
+
"gpt-4o": {
|
107 |
+
"gpqa_diamond": "2024-10-31T03-29-33-04-00_gpqa-diamond_nFmRv5MJiYjHjezmq4V6Va.json",
|
108 |
+
"arc_challenge": "2024-10-31T01-45-55-04-00_arc-challenge_nrsPPxh4DpzgLPQDFdcfVp.json",
|
109 |
+
"gsm8k": "2024-10-31T03-31-16-04-00_gsm8k_jVXeSvHowbietZCFsFYCwB.json",
|
110 |
+
"mmlu": "2024-10-31T10-49-43-04-00_mmlu_GarLpfQFSpM3C22nbbGp54.json",
|
111 |
+
"ifeval": "2024-10-31T05-00-11-04-00_ifeval_jxreUu8JqRdkrcHP4E3hLR.json",
|
112 |
+
"mmlu_pro": "2024-10-31T06-59-42-04-00_mmlu-pro_EuAKDwAWSfNVpqyyqrf2Ba.json",
|
113 |
+
"mmmu_open": "2025-01-20T23-07-46-05-00_mmmu-open_d3Q2HvuPZzEX6FAM4NBhnp.json",
|
114 |
+
"winogrande": "2024-10-31T09-02-03-04-00_winogrande_44kKF7M9mKoqVC7ixZVXuq.json",
|
115 |
+
"drop": "2024-10-31T01-47-20-04-00_drop_3gxDcn6vUoR3nvHX9BcSq4.json",
|
116 |
+
"arc_easy": "2024-10-31T01-41-34-04-00_arc-easy_nUavRHdiRVfrxo6dmCPadh.json",
|
117 |
+
"mmmu_multiple_choice": "2025-01-20T23-03-21-05-00_mmmu-multiple-choice_eoycAFLMirSqiURdXmBP2e.json",
|
118 |
+
"humaneval": "2024-10-31T04-59-42-04-00_humaneval_nmJcd84CcNKjWS8fBfMbZM.json",
|
119 |
+
"math": "2024-10-31T05-01-22-04-00_math_cDSpKPp3nLrFy8uYfYKEbM.json",
|
120 |
+
"hellaswag": "2024-10-31T03-33-47-04-00_hellaswag_JNnnPuz3dhZRpyXzizMUBF.json"
|
121 |
+
},
|
122 |
+
"Mistral-Large-Instruct-2407": {
|
123 |
+
"drop": "2024-10-31T01-56-12-04-00_drop_NtvuCoU2LoMbH8DztcCTen.json",
|
124 |
+
"ifeval": "2024-10-31T06-30-16-04-00_ifeval_TLkvCSFEWo4PLv6hAha7YB.json",
|
125 |
+
"mmlu": "2024-10-31T07-21-48-04-00_mmlu_YnUhmHoStr3WuJdchWmNPt.json",
|
126 |
+
"gpqa_diamond": "2024-10-31T04-22-52-04-00_gpqa-diamond_SuZUZxGdqS2ZecbLRNkKd4.json",
|
127 |
+
"gsm8k": "2024-10-31T04-28-49-04-00_gsm8k_5tQp9tbwUMj6NpjNKCAfVm.json",
|
128 |
+
"math": "2024-10-31T06-33-09-04-00_math_2CmjBedAfUxqvmcHRdBgyB.json",
|
129 |
+
"arc_easy": "2024-10-31T01-48-39-04-00_arc-easy_YbfuBT3usZXt2xgZkkR5dq.json",
|
130 |
+
"mmlu_pro": "2024-10-31T09-41-25-04-00_mmlu-pro_fyYT4aabPesfY5TpzFMPnd.json",
|
131 |
+
"humaneval": "2024-10-31T06-29-24-04-00_humaneval_nu8SUSGekKJWB8HLKDigYK.json",
|
132 |
+
"hellaswag": "2024-10-31T04-50-00-04-00_hellaswag_ZzQoZ6gkRQsTzMhQr7GYNn.json",
|
133 |
+
"arc_challenge": "2024-10-31T01-54-13-04-00_arc-challenge_WfQRhMkFcywefpU46isBVP.json",
|
134 |
+
"winogrande": "2024-10-31T11-57-58-04-00_winogrande_TP3UGwpp37Dyv6ks9Ty5Hk.json"
|
135 |
+
},
|
136 |
"Qwen2.5-72B-Instruct": {
|
137 |
"arc_challenge": "2024-10-31T13-46-34-04-00_arc-challenge_FSybKYYwpXVLQag8VwpjKe.json",
|
138 |
"mmlu_pro": "2024-11-01T20-31-04-04-00_mmlu-pro_2TfSPmsVmKatntHy2CnR7A.json",
|