lewtun HF Staff commited on
Commit
c4ff3c2
·
verified ·
1 Parent(s): 503f651

Delete eval_results/AI-MO/deepseek-math-7b-sft/aimo_v00.00/math/results_2024-04-24T13-57-53.176523.json

Browse files
eval_results/AI-MO/deepseek-math-7b-sft/aimo_v00.00/math/results_2024-04-24T13-57-53.176523.json DELETED
@@ -1,410 +0,0 @@
1
- {
2
- "config_general": {
3
- "lighteval_sha": "?",
4
- "num_fewshot_seeds": 1,
5
- "override_batch_size": 2,
6
- "max_samples": 10,
7
- "job_id": "",
8
- "start_time": 3652558.79848515,
9
- "end_time": 3652962.355642761,
10
- "total_evaluation_time_secondes": "403.55715761100873",
11
- "model_name": "AI-MO/deepseek-math-7b-sft",
12
- "model_sha": "748a5f5e457052be6c9476d87222e596fedefdb7",
13
- "model_dtype": "torch.bfloat16",
14
- "model_size": "12.93 GB",
15
- "config": null
16
- },
17
- "results": {
18
- "lighteval|math:algebra|0": {
19
- "qem": 0.7,
20
- "qem_stderr": 0.15275252316519466
21
- },
22
- "lighteval|math:counting_and_probability|0": {
23
- "qem": 0.5,
24
- "qem_stderr": 0.16666666666666666
25
- },
26
- "lighteval|math:geometry|0": {
27
- "qem": 0.3,
28
- "qem_stderr": 0.15275252316519466
29
- },
30
- "lighteval|math:intermediate_algebra|0": {
31
- "qem": 0.1,
32
- "qem_stderr": 0.09999999999999999
33
- },
34
- "lighteval|math:number_theory|0": {
35
- "qem": 0.2,
36
- "qem_stderr": 0.13333333333333333
37
- },
38
- "lighteval|math:prealgebra|0": {
39
- "qem": 0.6,
40
- "qem_stderr": 0.16329931618554522
41
- },
42
- "lighteval|math:precalculus|0": {
43
- "qem": 0.4,
44
- "qem_stderr": 0.1632993161855452
45
- },
46
- "lighteval|math:_average|0": {
47
- "qem": 0.39999999999999997,
48
- "qem_stderr": 0.14744338267163995
49
- },
50
- "all": {
51
- "qem": 0.39999999999999997,
52
- "qem_stderr": 0.14744338267163995
53
- }
54
- },
55
- "versions": {
56
- "lighteval|math:algebra|0": 0,
57
- "lighteval|math:counting_and_probability|0": 0,
58
- "lighteval|math:geometry|0": 0,
59
- "lighteval|math:intermediate_algebra|0": 0,
60
- "lighteval|math:number_theory|0": 0,
61
- "lighteval|math:prealgebra|0": 0,
62
- "lighteval|math:precalculus|0": 0
63
- },
64
- "config_tasks": {
65
- "lighteval|math:algebra": {
66
- "name": "math:algebra",
67
- "prompt_function": "math",
68
- "hf_repo": "lighteval/MATH",
69
- "hf_subset": "algebra",
70
- "metric": [
71
- "quasi_exact_match_math"
72
- ],
73
- "hf_avail_splits": [
74
- "train",
75
- "test",
76
- "validation"
77
- ],
78
- "evaluation_splits": [
79
- "test"
80
- ],
81
- "few_shots_split": null,
82
- "few_shots_select": null,
83
- "generation_size": 2048,
84
- "stop_sequence": [
85
- "\n"
86
- ],
87
- "output_regex": null,
88
- "frozen": false,
89
- "suite": [
90
- "lighteval",
91
- "math"
92
- ],
93
- "original_num_docs": 1187,
94
- "effective_num_docs": 10,
95
- "trust_dataset": true,
96
- "must_remove_duplicate_docs": null
97
- },
98
- "lighteval|math:counting_and_probability": {
99
- "name": "math:counting_and_probability",
100
- "prompt_function": "math",
101
- "hf_repo": "lighteval/MATH",
102
- "hf_subset": "counting_and_probability",
103
- "metric": [
104
- "quasi_exact_match_math"
105
- ],
106
- "hf_avail_splits": [
107
- "train",
108
- "test",
109
- "validation"
110
- ],
111
- "evaluation_splits": [
112
- "test"
113
- ],
114
- "few_shots_split": null,
115
- "few_shots_select": null,
116
- "generation_size": 2048,
117
- "stop_sequence": [
118
- "\n"
119
- ],
120
- "output_regex": null,
121
- "frozen": false,
122
- "suite": [
123
- "lighteval",
124
- "math"
125
- ],
126
- "original_num_docs": 474,
127
- "effective_num_docs": 10,
128
- "trust_dataset": true,
129
- "must_remove_duplicate_docs": null
130
- },
131
- "lighteval|math:geometry": {
132
- "name": "math:geometry",
133
- "prompt_function": "math",
134
- "hf_repo": "lighteval/MATH",
135
- "hf_subset": "geometry",
136
- "metric": [
137
- "quasi_exact_match_math"
138
- ],
139
- "hf_avail_splits": [
140
- "train",
141
- "test",
142
- "validation"
143
- ],
144
- "evaluation_splits": [
145
- "test"
146
- ],
147
- "few_shots_split": null,
148
- "few_shots_select": null,
149
- "generation_size": 2048,
150
- "stop_sequence": [
151
- "\n"
152
- ],
153
- "output_regex": null,
154
- "frozen": false,
155
- "suite": [
156
- "lighteval",
157
- "math"
158
- ],
159
- "original_num_docs": 479,
160
- "effective_num_docs": 10,
161
- "trust_dataset": true,
162
- "must_remove_duplicate_docs": null
163
- },
164
- "lighteval|math:intermediate_algebra": {
165
- "name": "math:intermediate_algebra",
166
- "prompt_function": "math",
167
- "hf_repo": "lighteval/MATH",
168
- "hf_subset": "intermediate_algebra",
169
- "metric": [
170
- "quasi_exact_match_math"
171
- ],
172
- "hf_avail_splits": [
173
- "train",
174
- "test",
175
- "validation"
176
- ],
177
- "evaluation_splits": [
178
- "test"
179
- ],
180
- "few_shots_split": null,
181
- "few_shots_select": null,
182
- "generation_size": 2048,
183
- "stop_sequence": [
184
- "\n"
185
- ],
186
- "output_regex": null,
187
- "frozen": false,
188
- "suite": [
189
- "lighteval",
190
- "math"
191
- ],
192
- "original_num_docs": 903,
193
- "effective_num_docs": 10,
194
- "trust_dataset": true,
195
- "must_remove_duplicate_docs": null
196
- },
197
- "lighteval|math:number_theory": {
198
- "name": "math:number_theory",
199
- "prompt_function": "math",
200
- "hf_repo": "lighteval/MATH",
201
- "hf_subset": "number_theory",
202
- "metric": [
203
- "quasi_exact_match_math"
204
- ],
205
- "hf_avail_splits": [
206
- "train",
207
- "test",
208
- "validation"
209
- ],
210
- "evaluation_splits": [
211
- "test"
212
- ],
213
- "few_shots_split": null,
214
- "few_shots_select": null,
215
- "generation_size": 2048,
216
- "stop_sequence": [
217
- "\n"
218
- ],
219
- "output_regex": null,
220
- "frozen": false,
221
- "suite": [
222
- "lighteval",
223
- "math"
224
- ],
225
- "original_num_docs": 540,
226
- "effective_num_docs": 10,
227
- "trust_dataset": true,
228
- "must_remove_duplicate_docs": null
229
- },
230
- "lighteval|math:prealgebra": {
231
- "name": "math:prealgebra",
232
- "prompt_function": "math",
233
- "hf_repo": "lighteval/MATH",
234
- "hf_subset": "prealgebra",
235
- "metric": [
236
- "quasi_exact_match_math"
237
- ],
238
- "hf_avail_splits": [
239
- "train",
240
- "test",
241
- "validation"
242
- ],
243
- "evaluation_splits": [
244
- "test"
245
- ],
246
- "few_shots_split": null,
247
- "few_shots_select": null,
248
- "generation_size": 2048,
249
- "stop_sequence": [
250
- "\n"
251
- ],
252
- "output_regex": null,
253
- "frozen": false,
254
- "suite": [
255
- "lighteval",
256
- "math"
257
- ],
258
- "original_num_docs": 871,
259
- "effective_num_docs": 10,
260
- "trust_dataset": true,
261
- "must_remove_duplicate_docs": null
262
- },
263
- "lighteval|math:precalculus": {
264
- "name": "math:precalculus",
265
- "prompt_function": "math",
266
- "hf_repo": "lighteval/MATH",
267
- "hf_subset": "precalculus",
268
- "metric": [
269
- "quasi_exact_match_math"
270
- ],
271
- "hf_avail_splits": [
272
- "train",
273
- "test",
274
- "validation"
275
- ],
276
- "evaluation_splits": [
277
- "test"
278
- ],
279
- "few_shots_split": null,
280
- "few_shots_select": null,
281
- "generation_size": 2048,
282
- "stop_sequence": [
283
- "\n"
284
- ],
285
- "output_regex": null,
286
- "frozen": false,
287
- "suite": [
288
- "lighteval",
289
- "math"
290
- ],
291
- "original_num_docs": 546,
292
- "effective_num_docs": 10,
293
- "trust_dataset": true,
294
- "must_remove_duplicate_docs": null
295
- }
296
- },
297
- "summary_tasks": {
298
- "lighteval|math:algebra|0": {
299
- "hashes": {
300
- "hash_examples": "a13d68854ca927ce",
301
- "hash_full_prompts": "7e0d2b25e14caad6",
302
- "hash_input_tokens": "58c8826560827dfc",
303
- "hash_cont_tokens": "310523472b3267fc"
304
- },
305
- "truncated": 10,
306
- "non_truncated": 0,
307
- "padded": 3,
308
- "non_padded": 7,
309
- "effective_few_shots": 0.0,
310
- "num_truncated_few_shots": 0
311
- },
312
- "lighteval|math:counting_and_probability|0": {
313
- "hashes": {
314
- "hash_examples": "a8004c36a2d9cb68",
315
- "hash_full_prompts": "2acaf205499ed79c",
316
- "hash_input_tokens": "d76018dbed1fcc48",
317
- "hash_cont_tokens": "733b9c4cba844ec2"
318
- },
319
- "truncated": 10,
320
- "non_truncated": 0,
321
- "padded": 1,
322
- "non_padded": 9,
323
- "effective_few_shots": 0.0,
324
- "num_truncated_few_shots": 0
325
- },
326
- "lighteval|math:geometry|0": {
327
- "hashes": {
328
- "hash_examples": "5e12e37f7378cc4c",
329
- "hash_full_prompts": "32e7c26bfc66828d",
330
- "hash_input_tokens": "288b95f1a6a9a3d5",
331
- "hash_cont_tokens": "8d9422af27507fe7"
332
- },
333
- "truncated": 10,
334
- "non_truncated": 0,
335
- "padded": 5,
336
- "non_padded": 5,
337
- "effective_few_shots": 0.0,
338
- "num_truncated_few_shots": 0
339
- },
340
- "lighteval|math:intermediate_algebra|0": {
341
- "hashes": {
342
- "hash_examples": "71738fc49d471d6d",
343
- "hash_full_prompts": "6779f5c079af81a6",
344
- "hash_input_tokens": "53a5702086e49106",
345
- "hash_cont_tokens": "6f8722f0a58ef37a"
346
- },
347
- "truncated": 10,
348
- "non_truncated": 0,
349
- "padded": 3,
350
- "non_padded": 7,
351
- "effective_few_shots": 0.0,
352
- "num_truncated_few_shots": 0
353
- },
354
- "lighteval|math:number_theory|0": {
355
- "hashes": {
356
- "hash_examples": "bdb66471a0eed93a",
357
- "hash_full_prompts": "9985b650f03b8f91",
358
- "hash_input_tokens": "8b240f273f300e85",
359
- "hash_cont_tokens": "ae8bd3b0e9f74ac5"
360
- },
361
- "truncated": 10,
362
- "non_truncated": 0,
363
- "padded": 5,
364
- "non_padded": 5,
365
- "effective_few_shots": 0.0,
366
- "num_truncated_few_shots": 0
367
- },
368
- "lighteval|math:prealgebra|0": {
369
- "hashes": {
370
- "hash_examples": "3c59373ec7e3a94a",
371
- "hash_full_prompts": "722cea4098cecd00",
372
- "hash_input_tokens": "6788548239da9c91",
373
- "hash_cont_tokens": "c2f2eaca08a1e171"
374
- },
375
- "truncated": 10,
376
- "non_truncated": 0,
377
- "padded": 1,
378
- "non_padded": 9,
379
- "effective_few_shots": 0.0,
380
- "num_truncated_few_shots": 0
381
- },
382
- "lighteval|math:precalculus|0": {
383
- "hashes": {
384
- "hash_examples": "8a97d7d7bd780ca3",
385
- "hash_full_prompts": "df2793e826f0dfcc",
386
- "hash_input_tokens": "90b2332eeca3284b",
387
- "hash_cont_tokens": "56aff6344490817d"
388
- },
389
- "truncated": 10,
390
- "non_truncated": 0,
391
- "padded": 4,
392
- "non_padded": 6,
393
- "effective_few_shots": 0.0,
394
- "num_truncated_few_shots": 0
395
- }
396
- },
397
- "summary_general": {
398
- "hashes": {
399
- "hash_examples": "e05a305d27ed0540",
400
- "hash_full_prompts": "7847980860d96b71",
401
- "hash_input_tokens": "a2515743ef01f5cf",
402
- "hash_cont_tokens": "734c8abceb12e43b"
403
- },
404
- "truncated": 70,
405
- "non_truncated": 0,
406
- "padded": 22,
407
- "non_padded": 48,
408
- "num_truncated_few_shots": 0
409
- }
410
- }