lewtun HF staff commited on
Commit
3c7a7bf
·
verified ·
1 Parent(s): db0af34

Upload eval_results/Qwen/Qwen1.5-0.5B-Chat/main/math/results_2024-04-21T21-08-03.974672.json with huggingface_hub

Browse files
eval_results/Qwen/Qwen1.5-0.5B-Chat/main/math/results_2024-04-21T21-08-03.974672.json ADDED
@@ -0,0 +1,410 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config_general": {
3
+ "lighteval_sha": "?",
4
+ "num_fewshot_seeds": 1,
5
+ "override_batch_size": 4,
6
+ "max_samples": null,
7
+ "job_id": "",
8
+ "start_time": 867482.247466206,
9
+ "end_time": 873665.762882805,
10
+ "total_evaluation_time_secondes": "6183.515416598995",
11
+ "model_name": "Qwen/Qwen1.5-0.5B-Chat",
12
+ "model_sha": "f82bd3692de0283f4a4b31e06d164dd8467fb52e",
13
+ "model_dtype": "torch.bfloat16",
14
+ "model_size": "1.05 GB",
15
+ "config": null
16
+ },
17
+ "results": {
18
+ "lighteval|math:algebra|0": {
19
+ "qem": 0.0,
20
+ "qem_stderr": 0.0
21
+ },
22
+ "lighteval|math:counting_and_probability|0": {
23
+ "qem": 0.0,
24
+ "qem_stderr": 0.0
25
+ },
26
+ "lighteval|math:geometry|0": {
27
+ "qem": 0.0,
28
+ "qem_stderr": 0.0
29
+ },
30
+ "lighteval|math:intermediate_algebra|0": {
31
+ "qem": 0.0,
32
+ "qem_stderr": 0.0
33
+ },
34
+ "lighteval|math:number_theory|0": {
35
+ "qem": 0.0,
36
+ "qem_stderr": 0.0
37
+ },
38
+ "lighteval|math:prealgebra|0": {
39
+ "qem": 0.001148105625717566,
40
+ "qem_stderr": 0.0011481056257175708
41
+ },
42
+ "lighteval|math:precalculus|0": {
43
+ "qem": 0.0,
44
+ "qem_stderr": 0.0
45
+ },
46
+ "lighteval|math:_average|0": {
47
+ "qem": 0.00016401508938822373,
48
+ "qem_stderr": 0.0001640150893882244
49
+ },
50
+ "all": {
51
+ "qem": 0.00016401508938822373,
52
+ "qem_stderr": 0.0001640150893882244
53
+ }
54
+ },
55
+ "versions": {
56
+ "lighteval|math:algebra|0": 0,
57
+ "lighteval|math:counting_and_probability|0": 0,
58
+ "lighteval|math:geometry|0": 0,
59
+ "lighteval|math:intermediate_algebra|0": 0,
60
+ "lighteval|math:number_theory|0": 0,
61
+ "lighteval|math:prealgebra|0": 0,
62
+ "lighteval|math:precalculus|0": 0
63
+ },
64
+ "config_tasks": {
65
+ "lighteval|math:algebra": {
66
+ "name": "math:algebra",
67
+ "prompt_function": "math",
68
+ "hf_repo": "lighteval/MATH",
69
+ "hf_subset": "algebra",
70
+ "metric": [
71
+ "quasi_exact_match_math"
72
+ ],
73
+ "hf_avail_splits": [
74
+ "train",
75
+ "test",
76
+ "validation"
77
+ ],
78
+ "evaluation_splits": [
79
+ "test"
80
+ ],
81
+ "few_shots_split": null,
82
+ "few_shots_select": null,
83
+ "generation_size": 2048,
84
+ "stop_sequence": [
85
+ "\n"
86
+ ],
87
+ "output_regex": null,
88
+ "frozen": false,
89
+ "suite": [
90
+ "lighteval",
91
+ "math"
92
+ ],
93
+ "original_num_docs": 1187,
94
+ "effective_num_docs": 1187,
95
+ "trust_dataset": true,
96
+ "must_remove_duplicate_docs": null
97
+ },
98
+ "lighteval|math:counting_and_probability": {
99
+ "name": "math:counting_and_probability",
100
+ "prompt_function": "math",
101
+ "hf_repo": "lighteval/MATH",
102
+ "hf_subset": "counting_and_probability",
103
+ "metric": [
104
+ "quasi_exact_match_math"
105
+ ],
106
+ "hf_avail_splits": [
107
+ "train",
108
+ "test",
109
+ "validation"
110
+ ],
111
+ "evaluation_splits": [
112
+ "test"
113
+ ],
114
+ "few_shots_split": null,
115
+ "few_shots_select": null,
116
+ "generation_size": 2048,
117
+ "stop_sequence": [
118
+ "\n"
119
+ ],
120
+ "output_regex": null,
121
+ "frozen": false,
122
+ "suite": [
123
+ "lighteval",
124
+ "math"
125
+ ],
126
+ "original_num_docs": 474,
127
+ "effective_num_docs": 474,
128
+ "trust_dataset": true,
129
+ "must_remove_duplicate_docs": null
130
+ },
131
+ "lighteval|math:geometry": {
132
+ "name": "math:geometry",
133
+ "prompt_function": "math",
134
+ "hf_repo": "lighteval/MATH",
135
+ "hf_subset": "geometry",
136
+ "metric": [
137
+ "quasi_exact_match_math"
138
+ ],
139
+ "hf_avail_splits": [
140
+ "train",
141
+ "test",
142
+ "validation"
143
+ ],
144
+ "evaluation_splits": [
145
+ "test"
146
+ ],
147
+ "few_shots_split": null,
148
+ "few_shots_select": null,
149
+ "generation_size": 2048,
150
+ "stop_sequence": [
151
+ "\n"
152
+ ],
153
+ "output_regex": null,
154
+ "frozen": false,
155
+ "suite": [
156
+ "lighteval",
157
+ "math"
158
+ ],
159
+ "original_num_docs": 479,
160
+ "effective_num_docs": 479,
161
+ "trust_dataset": true,
162
+ "must_remove_duplicate_docs": null
163
+ },
164
+ "lighteval|math:intermediate_algebra": {
165
+ "name": "math:intermediate_algebra",
166
+ "prompt_function": "math",
167
+ "hf_repo": "lighteval/MATH",
168
+ "hf_subset": "intermediate_algebra",
169
+ "metric": [
170
+ "quasi_exact_match_math"
171
+ ],
172
+ "hf_avail_splits": [
173
+ "train",
174
+ "test",
175
+ "validation"
176
+ ],
177
+ "evaluation_splits": [
178
+ "test"
179
+ ],
180
+ "few_shots_split": null,
181
+ "few_shots_select": null,
182
+ "generation_size": 2048,
183
+ "stop_sequence": [
184
+ "\n"
185
+ ],
186
+ "output_regex": null,
187
+ "frozen": false,
188
+ "suite": [
189
+ "lighteval",
190
+ "math"
191
+ ],
192
+ "original_num_docs": 903,
193
+ "effective_num_docs": 903,
194
+ "trust_dataset": true,
195
+ "must_remove_duplicate_docs": null
196
+ },
197
+ "lighteval|math:number_theory": {
198
+ "name": "math:number_theory",
199
+ "prompt_function": "math",
200
+ "hf_repo": "lighteval/MATH",
201
+ "hf_subset": "number_theory",
202
+ "metric": [
203
+ "quasi_exact_match_math"
204
+ ],
205
+ "hf_avail_splits": [
206
+ "train",
207
+ "test",
208
+ "validation"
209
+ ],
210
+ "evaluation_splits": [
211
+ "test"
212
+ ],
213
+ "few_shots_split": null,
214
+ "few_shots_select": null,
215
+ "generation_size": 2048,
216
+ "stop_sequence": [
217
+ "\n"
218
+ ],
219
+ "output_regex": null,
220
+ "frozen": false,
221
+ "suite": [
222
+ "lighteval",
223
+ "math"
224
+ ],
225
+ "original_num_docs": 540,
226
+ "effective_num_docs": 540,
227
+ "trust_dataset": true,
228
+ "must_remove_duplicate_docs": null
229
+ },
230
+ "lighteval|math:prealgebra": {
231
+ "name": "math:prealgebra",
232
+ "prompt_function": "math",
233
+ "hf_repo": "lighteval/MATH",
234
+ "hf_subset": "prealgebra",
235
+ "metric": [
236
+ "quasi_exact_match_math"
237
+ ],
238
+ "hf_avail_splits": [
239
+ "train",
240
+ "test",
241
+ "validation"
242
+ ],
243
+ "evaluation_splits": [
244
+ "test"
245
+ ],
246
+ "few_shots_split": null,
247
+ "few_shots_select": null,
248
+ "generation_size": 2048,
249
+ "stop_sequence": [
250
+ "\n"
251
+ ],
252
+ "output_regex": null,
253
+ "frozen": false,
254
+ "suite": [
255
+ "lighteval",
256
+ "math"
257
+ ],
258
+ "original_num_docs": 871,
259
+ "effective_num_docs": 871,
260
+ "trust_dataset": true,
261
+ "must_remove_duplicate_docs": null
262
+ },
263
+ "lighteval|math:precalculus": {
264
+ "name": "math:precalculus",
265
+ "prompt_function": "math",
266
+ "hf_repo": "lighteval/MATH",
267
+ "hf_subset": "precalculus",
268
+ "metric": [
269
+ "quasi_exact_match_math"
270
+ ],
271
+ "hf_avail_splits": [
272
+ "train",
273
+ "test",
274
+ "validation"
275
+ ],
276
+ "evaluation_splits": [
277
+ "test"
278
+ ],
279
+ "few_shots_split": null,
280
+ "few_shots_select": null,
281
+ "generation_size": 2048,
282
+ "stop_sequence": [
283
+ "\n"
284
+ ],
285
+ "output_regex": null,
286
+ "frozen": false,
287
+ "suite": [
288
+ "lighteval",
289
+ "math"
290
+ ],
291
+ "original_num_docs": 546,
292
+ "effective_num_docs": 546,
293
+ "trust_dataset": true,
294
+ "must_remove_duplicate_docs": null
295
+ }
296
+ },
297
+ "summary_tasks": {
298
+ "lighteval|math:algebra|0": {
299
+ "hashes": {
300
+ "hash_examples": "37a2fd2f076d2e49",
301
+ "hash_full_prompts": "3cd2a9fbe38ca7d1",
302
+ "hash_input_tokens": "af20703b79058645",
303
+ "hash_cont_tokens": "fe52ffbbb871419b"
304
+ },
305
+ "truncated": 1187,
306
+ "non_truncated": 0,
307
+ "padded": 162,
308
+ "non_padded": 1025,
309
+ "effective_few_shots": 0.0,
310
+ "num_truncated_few_shots": 0
311
+ },
312
+ "lighteval|math:counting_and_probability|0": {
313
+ "hashes": {
314
+ "hash_examples": "97b4892e28bc078b",
315
+ "hash_full_prompts": "23c0b10a3f2357ea",
316
+ "hash_input_tokens": "b4c45e9b83b3e1c5",
317
+ "hash_cont_tokens": "42d9b4d881832736"
318
+ },
319
+ "truncated": 474,
320
+ "non_truncated": 0,
321
+ "padded": 37,
322
+ "non_padded": 437,
323
+ "effective_few_shots": 0.0,
324
+ "num_truncated_few_shots": 0
325
+ },
326
+ "lighteval|math:geometry|0": {
327
+ "hashes": {
328
+ "hash_examples": "9e9f0228b8b3d093",
329
+ "hash_full_prompts": "25ae739a5bd7cf61",
330
+ "hash_input_tokens": "5841f4204130c263",
331
+ "hash_cont_tokens": "00ac23b316de2151"
332
+ },
333
+ "truncated": 478,
334
+ "non_truncated": 1,
335
+ "padded": 116,
336
+ "non_padded": 363,
337
+ "effective_few_shots": 0.0,
338
+ "num_truncated_few_shots": 0
339
+ },
340
+ "lighteval|math:intermediate_algebra|0": {
341
+ "hashes": {
342
+ "hash_examples": "cfe73a8e28ae94de",
343
+ "hash_full_prompts": "c8813c4dcc73d3b8",
344
+ "hash_input_tokens": "b7735888ef889dec",
345
+ "hash_cont_tokens": "39fac2a1c17a2e94"
346
+ },
347
+ "truncated": 901,
348
+ "non_truncated": 2,
349
+ "padded": 54,
350
+ "non_padded": 849,
351
+ "effective_few_shots": 0.0,
352
+ "num_truncated_few_shots": 0
353
+ },
354
+ "lighteval|math:number_theory|0": {
355
+ "hashes": {
356
+ "hash_examples": "4ee5237cf144afac",
357
+ "hash_full_prompts": "603a451eb11974d5",
358
+ "hash_input_tokens": "93e5d92f4261e2c8",
359
+ "hash_cont_tokens": "af9ceab4afe70f1c"
360
+ },
361
+ "truncated": 540,
362
+ "non_truncated": 0,
363
+ "padded": 4,
364
+ "non_padded": 536,
365
+ "effective_few_shots": 0.0,
366
+ "num_truncated_few_shots": 0
367
+ },
368
+ "lighteval|math:prealgebra|0": {
369
+ "hashes": {
370
+ "hash_examples": "3fb3afeb885f73d8",
371
+ "hash_full_prompts": "e5e12c93d4a8be8a",
372
+ "hash_input_tokens": "35fa5bc74f072f40",
373
+ "hash_cont_tokens": "c997620dc5abc01e"
374
+ },
375
+ "truncated": 871,
376
+ "non_truncated": 0,
377
+ "padded": 46,
378
+ "non_padded": 825,
379
+ "effective_few_shots": 0.0,
380
+ "num_truncated_few_shots": 0
381
+ },
382
+ "lighteval|math:precalculus|0": {
383
+ "hashes": {
384
+ "hash_examples": "753e25ab9ec4b46c",
385
+ "hash_full_prompts": "eabf79561a9582e5",
386
+ "hash_input_tokens": "51ed0786cadde815",
387
+ "hash_cont_tokens": "ede52acd76fdf266"
388
+ },
389
+ "truncated": 546,
390
+ "non_truncated": 0,
391
+ "padded": 23,
392
+ "non_padded": 523,
393
+ "effective_few_shots": 0.0,
394
+ "num_truncated_few_shots": 0
395
+ }
396
+ },
397
+ "summary_general": {
398
+ "hashes": {
399
+ "hash_examples": "b76099aa9092a203",
400
+ "hash_full_prompts": "5a707e66f34d9ab5",
401
+ "hash_input_tokens": "59f68ba3e73a437f",
402
+ "hash_cont_tokens": "ab3aa0922abddcb3"
403
+ },
404
+ "truncated": 4997,
405
+ "non_truncated": 3,
406
+ "padded": 442,
407
+ "non_padded": 4558,
408
+ "num_truncated_few_shots": 0
409
+ }
410
+ }