Brendan King commited on
Commit
dbdd793
·
1 Parent(s): fd7b61c

Meta-training: t5 with 5 random in-domain demonstrations

Browse files
checkpoint-70000/added_tokens.json DELETED
@@ -1 +0,0 @@
1
- {"<_PAD_>": 32100, "<go_r>": 32101, "<go_b>": 32102, "<go_a>": 32103, "<eos_u>": 32104, "<eos_r>": 32105, "<eos_b>": 32106, "<eos_a>": 32107, "<go_d>": 32108, "<eos_d>": 32109, "<sos_u>": 32110, "<sos_r>": 32111, "<sos_b>": 32112, "<sos_a>": 32113, "<sos_d>": 32114, "<sos_db>": 32115, "<eos_db>": 32116, "<sos_context>": 32117, "<eos_context>": 32118}
 
 
checkpoint-70000/config.json DELETED
@@ -1,58 +0,0 @@
1
- {
2
- "_name_or_path": "t5-small",
3
- "architectures": [
4
- "T5ForConditionalGeneration"
5
- ],
6
- "d_ff": 2048,
7
- "d_kv": 64,
8
- "d_model": 512,
9
- "decoder_start_token_id": 0,
10
- "dropout_rate": 0.1,
11
- "eos_token_id": 1,
12
- "feed_forward_proj": "relu",
13
- "initializer_factor": 1.0,
14
- "is_encoder_decoder": true,
15
- "layer_norm_epsilon": 1e-06,
16
- "model_type": "t5",
17
- "n_positions": 512,
18
- "num_decoder_layers": 6,
19
- "num_heads": 8,
20
- "num_layers": 6,
21
- "output_past": true,
22
- "pad_token_id": 0,
23
- "relative_attention_max_distance": 128,
24
- "relative_attention_num_buckets": 32,
25
- "task_specific_params": {
26
- "summarization": {
27
- "early_stopping": true,
28
- "length_penalty": 2.0,
29
- "max_length": 200,
30
- "min_length": 30,
31
- "no_repeat_ngram_size": 3,
32
- "num_beams": 4,
33
- "prefix": "summarize: "
34
- },
35
- "translation_en_to_de": {
36
- "early_stopping": true,
37
- "max_length": 300,
38
- "num_beams": 4,
39
- "prefix": "translate English to German: "
40
- },
41
- "translation_en_to_fr": {
42
- "early_stopping": true,
43
- "max_length": 300,
44
- "num_beams": 4,
45
- "prefix": "translate English to French: "
46
- },
47
- "translation_en_to_ro": {
48
- "early_stopping": true,
49
- "max_length": 300,
50
- "num_beams": 4,
51
- "prefix": "translate English to Romanian: "
52
- }
53
- },
54
- "torch_dtype": "float32",
55
- "transformers_version": "4.18.0",
56
- "use_cache": true,
57
- "vocab_size": 32119
58
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-70000/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5ddab4a47a64bdf913fdf05a4b0518c0ca8a6216e8459f7d5b9669fe16148f0
3
- size 838063
 
 
 
 
checkpoint-70000/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d561604dc3db8f6045efc5cab0290cb3168ae7a826babf46d8693de9075b673a
3
- size 242051835
 
 
 
 
checkpoint-70000/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f36f4a179464fd06752d890a2c75c4c72abe9fd790c70d83b2f4056c58f2774a
3
- size 14503
 
 
 
 
checkpoint-70000/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8df1363e34e2930396e1289cdd659a821ec2b116c9f56797854a4818e7c27aa
3
- size 623
 
 
 
 
checkpoint-70000/special_tokens_map.json DELETED
@@ -1 +0,0 @@
1
- {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"]}
 
 
checkpoint-70000/spiece.model DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
- size 791656
 
 
 
 
checkpoint-70000/tokenizer_config.json DELETED
@@ -1 +0,0 @@
1
- {"eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "extra_ids": 100, "additional_special_tokens": ["<extra_id_0>", "<extra_id_1>", "<extra_id_2>", "<extra_id_3>", "<extra_id_4>", "<extra_id_5>", "<extra_id_6>", "<extra_id_7>", "<extra_id_8>", "<extra_id_9>", "<extra_id_10>", "<extra_id_11>", "<extra_id_12>", "<extra_id_13>", "<extra_id_14>", "<extra_id_15>", "<extra_id_16>", "<extra_id_17>", "<extra_id_18>", "<extra_id_19>", "<extra_id_20>", "<extra_id_21>", "<extra_id_22>", "<extra_id_23>", "<extra_id_24>", "<extra_id_25>", "<extra_id_26>", "<extra_id_27>", "<extra_id_28>", "<extra_id_29>", "<extra_id_30>", "<extra_id_31>", "<extra_id_32>", "<extra_id_33>", "<extra_id_34>", "<extra_id_35>", "<extra_id_36>", "<extra_id_37>", "<extra_id_38>", "<extra_id_39>", "<extra_id_40>", "<extra_id_41>", "<extra_id_42>", "<extra_id_43>", "<extra_id_44>", "<extra_id_45>", "<extra_id_46>", "<extra_id_47>", "<extra_id_48>", "<extra_id_49>", "<extra_id_50>", "<extra_id_51>", "<extra_id_52>", "<extra_id_53>", "<extra_id_54>", "<extra_id_55>", "<extra_id_56>", "<extra_id_57>", "<extra_id_58>", "<extra_id_59>", "<extra_id_60>", "<extra_id_61>", "<extra_id_62>", "<extra_id_63>", "<extra_id_64>", "<extra_id_65>", "<extra_id_66>", "<extra_id_67>", "<extra_id_68>", "<extra_id_69>", "<extra_id_70>", "<extra_id_71>", "<extra_id_72>", "<extra_id_73>", "<extra_id_74>", "<extra_id_75>", "<extra_id_76>", "<extra_id_77>", "<extra_id_78>", "<extra_id_79>", "<extra_id_80>", "<extra_id_81>", "<extra_id_82>", "<extra_id_83>", "<extra_id_84>", "<extra_id_85>", "<extra_id_86>", "<extra_id_87>", "<extra_id_88>", "<extra_id_89>", "<extra_id_90>", "<extra_id_91>", "<extra_id_92>", "<extra_id_93>", "<extra_id_94>", "<extra_id_95>", "<extra_id_96>", "<extra_id_97>", "<extra_id_98>", "<extra_id_99>"], "sp_model_kwargs": {}, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "t5-small", "tokenizer_class": "T5Tokenizer"}
 
 
checkpoint-70000/trainer_state.json DELETED
@@ -1,2172 +0,0 @@
1
- {
2
- "best_metric": 0.9819265007972717,
3
- "best_model_checkpoint": "./checkpoints/random-in-domain-5-demos-t5-small/checkpoint-70000",
4
- "epoch": 8.970877518982475,
5
- "global_step": 70000,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.03,
12
- "learning_rate": 0.001,
13
- "loss": 2.2565,
14
- "step": 200
15
- },
16
- {
17
- "epoch": 0.05,
18
- "learning_rate": 0.001,
19
- "loss": 1.5897,
20
- "step": 400
21
- },
22
- {
23
- "epoch": 0.08,
24
- "learning_rate": 0.001,
25
- "loss": 1.406,
26
- "step": 600
27
- },
28
- {
29
- "epoch": 0.1,
30
- "learning_rate": 0.001,
31
- "loss": 1.3023,
32
- "step": 800
33
- },
34
- {
35
- "epoch": 0.13,
36
- "learning_rate": 0.001,
37
- "loss": 1.2344,
38
- "step": 1000
39
- },
40
- {
41
- "epoch": 0.15,
42
- "learning_rate": 0.001,
43
- "loss": 1.1836,
44
- "step": 1200
45
- },
46
- {
47
- "epoch": 0.18,
48
- "learning_rate": 0.001,
49
- "loss": 1.1485,
50
- "step": 1400
51
- },
52
- {
53
- "epoch": 0.21,
54
- "learning_rate": 0.001,
55
- "loss": 1.1179,
56
- "step": 1600
57
- },
58
- {
59
- "epoch": 0.23,
60
- "learning_rate": 0.001,
61
- "loss": 1.0795,
62
- "step": 1800
63
- },
64
- {
65
- "epoch": 0.26,
66
- "learning_rate": 0.001,
67
- "loss": 1.0487,
68
- "step": 2000
69
- },
70
- {
71
- "epoch": 0.28,
72
- "learning_rate": 0.001,
73
- "loss": 1.0367,
74
- "step": 2200
75
- },
76
- {
77
- "epoch": 0.31,
78
- "learning_rate": 0.001,
79
- "loss": 1.0215,
80
- "step": 2400
81
- },
82
- {
83
- "epoch": 0.33,
84
- "learning_rate": 0.001,
85
- "loss": 1.0043,
86
- "step": 2600
87
- },
88
- {
89
- "epoch": 0.36,
90
- "learning_rate": 0.001,
91
- "loss": 0.979,
92
- "step": 2800
93
- },
94
- {
95
- "epoch": 0.38,
96
- "learning_rate": 0.001,
97
- "loss": 0.9705,
98
- "step": 3000
99
- },
100
- {
101
- "epoch": 0.41,
102
- "learning_rate": 0.001,
103
- "loss": 0.9737,
104
- "step": 3200
105
- },
106
- {
107
- "epoch": 0.44,
108
- "learning_rate": 0.001,
109
- "loss": 0.9429,
110
- "step": 3400
111
- },
112
- {
113
- "epoch": 0.46,
114
- "learning_rate": 0.001,
115
- "loss": 0.9413,
116
- "step": 3600
117
- },
118
- {
119
- "epoch": 0.49,
120
- "learning_rate": 0.001,
121
- "loss": 0.9282,
122
- "step": 3800
123
- },
124
- {
125
- "epoch": 0.51,
126
- "learning_rate": 0.001,
127
- "loss": 0.918,
128
- "step": 4000
129
- },
130
- {
131
- "epoch": 0.54,
132
- "learning_rate": 0.001,
133
- "loss": 0.9178,
134
- "step": 4200
135
- },
136
- {
137
- "epoch": 0.56,
138
- "learning_rate": 0.001,
139
- "loss": 0.903,
140
- "step": 4400
141
- },
142
- {
143
- "epoch": 0.59,
144
- "learning_rate": 0.001,
145
- "loss": 0.9024,
146
- "step": 4600
147
- },
148
- {
149
- "epoch": 0.62,
150
- "learning_rate": 0.001,
151
- "loss": 0.886,
152
- "step": 4800
153
- },
154
- {
155
- "epoch": 0.64,
156
- "learning_rate": 0.001,
157
- "loss": 0.8795,
158
- "step": 5000
159
- },
160
- {
161
- "epoch": 0.67,
162
- "learning_rate": 0.001,
163
- "loss": 0.8883,
164
- "step": 5200
165
- },
166
- {
167
- "epoch": 0.69,
168
- "learning_rate": 0.001,
169
- "loss": 0.8601,
170
- "step": 5400
171
- },
172
- {
173
- "epoch": 0.72,
174
- "learning_rate": 0.001,
175
- "loss": 0.8643,
176
- "step": 5600
177
- },
178
- {
179
- "epoch": 0.74,
180
- "learning_rate": 0.001,
181
- "loss": 0.8641,
182
- "step": 5800
183
- },
184
- {
185
- "epoch": 0.77,
186
- "learning_rate": 0.001,
187
- "loss": 0.8625,
188
- "step": 6000
189
- },
190
- {
191
- "epoch": 0.79,
192
- "learning_rate": 0.001,
193
- "loss": 0.8655,
194
- "step": 6200
195
- },
196
- {
197
- "epoch": 0.82,
198
- "learning_rate": 0.001,
199
- "loss": 0.8538,
200
- "step": 6400
201
- },
202
- {
203
- "epoch": 0.85,
204
- "learning_rate": 0.001,
205
- "loss": 0.8575,
206
- "step": 6600
207
- },
208
- {
209
- "epoch": 0.87,
210
- "learning_rate": 0.001,
211
- "loss": 0.8427,
212
- "step": 6800
213
- },
214
- {
215
- "epoch": 0.9,
216
- "learning_rate": 0.001,
217
- "loss": 0.8332,
218
- "step": 7000
219
- },
220
- {
221
- "epoch": 0.92,
222
- "learning_rate": 0.001,
223
- "loss": 0.8373,
224
- "step": 7200
225
- },
226
- {
227
- "epoch": 0.95,
228
- "learning_rate": 0.001,
229
- "loss": 0.845,
230
- "step": 7400
231
- },
232
- {
233
- "epoch": 0.97,
234
- "learning_rate": 0.001,
235
- "loss": 0.8271,
236
- "step": 7600
237
- },
238
- {
239
- "epoch": 1.0,
240
- "learning_rate": 0.001,
241
- "loss": 0.8159,
242
- "step": 7800
243
- },
244
- {
245
- "epoch": 1.03,
246
- "learning_rate": 0.001,
247
- "loss": 0.8027,
248
- "step": 8000
249
- },
250
- {
251
- "epoch": 1.05,
252
- "learning_rate": 0.001,
253
- "loss": 0.7981,
254
- "step": 8200
255
- },
256
- {
257
- "epoch": 1.08,
258
- "learning_rate": 0.001,
259
- "loss": 0.8025,
260
- "step": 8400
261
- },
262
- {
263
- "epoch": 1.1,
264
- "learning_rate": 0.001,
265
- "loss": 0.8008,
266
- "step": 8600
267
- },
268
- {
269
- "epoch": 1.13,
270
- "learning_rate": 0.001,
271
- "loss": 0.7951,
272
- "step": 8800
273
- },
274
- {
275
- "epoch": 1.15,
276
- "learning_rate": 0.001,
277
- "loss": 0.7945,
278
- "step": 9000
279
- },
280
- {
281
- "epoch": 1.18,
282
- "learning_rate": 0.001,
283
- "loss": 0.7824,
284
- "step": 9200
285
- },
286
- {
287
- "epoch": 1.2,
288
- "learning_rate": 0.001,
289
- "loss": 0.7945,
290
- "step": 9400
291
- },
292
- {
293
- "epoch": 1.23,
294
- "learning_rate": 0.001,
295
- "loss": 0.7939,
296
- "step": 9600
297
- },
298
- {
299
- "epoch": 1.26,
300
- "learning_rate": 0.001,
301
- "loss": 0.7971,
302
- "step": 9800
303
- },
304
- {
305
- "epoch": 1.28,
306
- "learning_rate": 0.001,
307
- "loss": 0.791,
308
- "step": 10000
309
- },
310
- {
311
- "epoch": 1.28,
312
- "eval_loss": 1.0492665767669678,
313
- "eval_runtime": 236.4668,
314
- "eval_samples_per_second": 516.859,
315
- "eval_steps_per_second": 2.021,
316
- "step": 10000
317
- },
318
- {
319
- "epoch": 1.31,
320
- "learning_rate": 0.001,
321
- "loss": 0.8019,
322
- "step": 10200
323
- },
324
- {
325
- "epoch": 1.33,
326
- "learning_rate": 0.001,
327
- "loss": 0.7752,
328
- "step": 10400
329
- },
330
- {
331
- "epoch": 1.36,
332
- "learning_rate": 0.001,
333
- "loss": 0.7862,
334
- "step": 10600
335
- },
336
- {
337
- "epoch": 1.38,
338
- "learning_rate": 0.001,
339
- "loss": 0.7835,
340
- "step": 10800
341
- },
342
- {
343
- "epoch": 1.41,
344
- "learning_rate": 0.001,
345
- "loss": 0.7751,
346
- "step": 11000
347
- },
348
- {
349
- "epoch": 1.44,
350
- "learning_rate": 0.001,
351
- "loss": 0.7747,
352
- "step": 11200
353
- },
354
- {
355
- "epoch": 1.46,
356
- "learning_rate": 0.001,
357
- "loss": 0.7623,
358
- "step": 11400
359
- },
360
- {
361
- "epoch": 1.49,
362
- "learning_rate": 0.001,
363
- "loss": 0.7898,
364
- "step": 11600
365
- },
366
- {
367
- "epoch": 1.51,
368
- "learning_rate": 0.001,
369
- "loss": 0.7629,
370
- "step": 11800
371
- },
372
- {
373
- "epoch": 1.54,
374
- "learning_rate": 0.001,
375
- "loss": 0.7601,
376
- "step": 12000
377
- },
378
- {
379
- "epoch": 1.56,
380
- "learning_rate": 0.001,
381
- "loss": 0.7698,
382
- "step": 12200
383
- },
384
- {
385
- "epoch": 1.59,
386
- "learning_rate": 0.001,
387
- "loss": 0.7664,
388
- "step": 12400
389
- },
390
- {
391
- "epoch": 1.61,
392
- "learning_rate": 0.001,
393
- "loss": 0.7616,
394
- "step": 12600
395
- },
396
- {
397
- "epoch": 1.64,
398
- "learning_rate": 0.001,
399
- "loss": 0.764,
400
- "step": 12800
401
- },
402
- {
403
- "epoch": 1.67,
404
- "learning_rate": 0.001,
405
- "loss": 0.774,
406
- "step": 13000
407
- },
408
- {
409
- "epoch": 1.69,
410
- "learning_rate": 0.001,
411
- "loss": 0.7524,
412
- "step": 13200
413
- },
414
- {
415
- "epoch": 1.72,
416
- "learning_rate": 0.001,
417
- "loss": 0.7637,
418
- "step": 13400
419
- },
420
- {
421
- "epoch": 1.74,
422
- "learning_rate": 0.001,
423
- "loss": 0.7615,
424
- "step": 13600
425
- },
426
- {
427
- "epoch": 1.77,
428
- "learning_rate": 0.001,
429
- "loss": 0.7702,
430
- "step": 13800
431
- },
432
- {
433
- "epoch": 1.79,
434
- "learning_rate": 0.001,
435
- "loss": 0.7453,
436
- "step": 14000
437
- },
438
- {
439
- "epoch": 1.82,
440
- "learning_rate": 0.001,
441
- "loss": 0.7369,
442
- "step": 14200
443
- },
444
- {
445
- "epoch": 1.85,
446
- "learning_rate": 0.001,
447
- "loss": 0.7499,
448
- "step": 14400
449
- },
450
- {
451
- "epoch": 1.87,
452
- "learning_rate": 0.001,
453
- "loss": 0.7491,
454
- "step": 14600
455
- },
456
- {
457
- "epoch": 1.9,
458
- "learning_rate": 0.001,
459
- "loss": 0.7607,
460
- "step": 14800
461
- },
462
- {
463
- "epoch": 1.92,
464
- "learning_rate": 0.001,
465
- "loss": 0.7417,
466
- "step": 15000
467
- },
468
- {
469
- "epoch": 1.95,
470
- "learning_rate": 0.001,
471
- "loss": 0.7312,
472
- "step": 15200
473
- },
474
- {
475
- "epoch": 1.97,
476
- "learning_rate": 0.001,
477
- "loss": 0.7464,
478
- "step": 15400
479
- },
480
- {
481
- "epoch": 2.0,
482
- "learning_rate": 0.001,
483
- "loss": 0.7555,
484
- "step": 15600
485
- },
486
- {
487
- "epoch": 2.02,
488
- "learning_rate": 0.001,
489
- "loss": 0.7236,
490
- "step": 15800
491
- },
492
- {
493
- "epoch": 2.05,
494
- "learning_rate": 0.001,
495
- "loss": 0.716,
496
- "step": 16000
497
- },
498
- {
499
- "epoch": 2.08,
500
- "learning_rate": 0.001,
501
- "loss": 0.7199,
502
- "step": 16200
503
- },
504
- {
505
- "epoch": 2.1,
506
- "learning_rate": 0.001,
507
- "loss": 0.7237,
508
- "step": 16400
509
- },
510
- {
511
- "epoch": 2.13,
512
- "learning_rate": 0.001,
513
- "loss": 0.7293,
514
- "step": 16600
515
- },
516
- {
517
- "epoch": 2.15,
518
- "learning_rate": 0.001,
519
- "loss": 0.725,
520
- "step": 16800
521
- },
522
- {
523
- "epoch": 2.18,
524
- "learning_rate": 0.001,
525
- "loss": 0.7262,
526
- "step": 17000
527
- },
528
- {
529
- "epoch": 2.2,
530
- "learning_rate": 0.001,
531
- "loss": 0.7129,
532
- "step": 17200
533
- },
534
- {
535
- "epoch": 2.23,
536
- "learning_rate": 0.001,
537
- "loss": 0.7205,
538
- "step": 17400
539
- },
540
- {
541
- "epoch": 2.26,
542
- "learning_rate": 0.001,
543
- "loss": 0.7145,
544
- "step": 17600
545
- },
546
- {
547
- "epoch": 2.28,
548
- "learning_rate": 0.001,
549
- "loss": 0.727,
550
- "step": 17800
551
- },
552
- {
553
- "epoch": 2.31,
554
- "learning_rate": 0.001,
555
- "loss": 0.7261,
556
- "step": 18000
557
- },
558
- {
559
- "epoch": 2.33,
560
- "learning_rate": 0.001,
561
- "loss": 0.7226,
562
- "step": 18200
563
- },
564
- {
565
- "epoch": 2.36,
566
- "learning_rate": 0.001,
567
- "loss": 0.7262,
568
- "step": 18400
569
- },
570
- {
571
- "epoch": 2.38,
572
- "learning_rate": 0.001,
573
- "loss": 0.7224,
574
- "step": 18600
575
- },
576
- {
577
- "epoch": 2.41,
578
- "learning_rate": 0.001,
579
- "loss": 0.7123,
580
- "step": 18800
581
- },
582
- {
583
- "epoch": 2.43,
584
- "learning_rate": 0.001,
585
- "loss": 0.7151,
586
- "step": 19000
587
- },
588
- {
589
- "epoch": 2.46,
590
- "learning_rate": 0.001,
591
- "loss": 0.7059,
592
- "step": 19200
593
- },
594
- {
595
- "epoch": 2.49,
596
- "learning_rate": 0.001,
597
- "loss": 0.7125,
598
- "step": 19400
599
- },
600
- {
601
- "epoch": 2.51,
602
- "learning_rate": 0.001,
603
- "loss": 0.7113,
604
- "step": 19600
605
- },
606
- {
607
- "epoch": 2.54,
608
- "learning_rate": 0.001,
609
- "loss": 0.7083,
610
- "step": 19800
611
- },
612
- {
613
- "epoch": 2.56,
614
- "learning_rate": 0.001,
615
- "loss": 0.7054,
616
- "step": 20000
617
- },
618
- {
619
- "epoch": 2.56,
620
- "eval_loss": 1.007745623588562,
621
- "eval_runtime": 236.2858,
622
- "eval_samples_per_second": 517.255,
623
- "eval_steps_per_second": 2.023,
624
- "step": 20000
625
- },
626
- {
627
- "epoch": 2.59,
628
- "learning_rate": 0.001,
629
- "loss": 0.6979,
630
- "step": 20200
631
- },
632
- {
633
- "epoch": 2.61,
634
- "learning_rate": 0.001,
635
- "loss": 0.7047,
636
- "step": 20400
637
- },
638
- {
639
- "epoch": 2.64,
640
- "learning_rate": 0.001,
641
- "loss": 0.7163,
642
- "step": 20600
643
- },
644
- {
645
- "epoch": 2.67,
646
- "learning_rate": 0.001,
647
- "loss": 0.7064,
648
- "step": 20800
649
- },
650
- {
651
- "epoch": 2.69,
652
- "learning_rate": 0.001,
653
- "loss": 0.7087,
654
- "step": 21000
655
- },
656
- {
657
- "epoch": 2.72,
658
- "learning_rate": 0.001,
659
- "loss": 0.7112,
660
- "step": 21200
661
- },
662
- {
663
- "epoch": 2.74,
664
- "learning_rate": 0.001,
665
- "loss": 0.7174,
666
- "step": 21400
667
- },
668
- {
669
- "epoch": 2.77,
670
- "learning_rate": 0.001,
671
- "loss": 0.7056,
672
- "step": 21600
673
- },
674
- {
675
- "epoch": 2.79,
676
- "learning_rate": 0.001,
677
- "loss": 0.7008,
678
- "step": 21800
679
- },
680
- {
681
- "epoch": 2.82,
682
- "learning_rate": 0.001,
683
- "loss": 0.7001,
684
- "step": 22000
685
- },
686
- {
687
- "epoch": 2.85,
688
- "learning_rate": 0.001,
689
- "loss": 0.7014,
690
- "step": 22200
691
- },
692
- {
693
- "epoch": 2.87,
694
- "learning_rate": 0.001,
695
- "loss": 0.7007,
696
- "step": 22400
697
- },
698
- {
699
- "epoch": 2.9,
700
- "learning_rate": 0.001,
701
- "loss": 0.7051,
702
- "step": 22600
703
- },
704
- {
705
- "epoch": 2.92,
706
- "learning_rate": 0.001,
707
- "loss": 0.7069,
708
- "step": 22800
709
- },
710
- {
711
- "epoch": 2.95,
712
- "learning_rate": 0.001,
713
- "loss": 0.696,
714
- "step": 23000
715
- },
716
- {
717
- "epoch": 2.97,
718
- "learning_rate": 0.001,
719
- "loss": 0.6893,
720
- "step": 23200
721
- },
722
- {
723
- "epoch": 3.0,
724
- "learning_rate": 0.001,
725
- "loss": 0.6865,
726
- "step": 23400
727
- },
728
- {
729
- "epoch": 3.02,
730
- "learning_rate": 0.001,
731
- "loss": 0.6848,
732
- "step": 23600
733
- },
734
- {
735
- "epoch": 3.05,
736
- "learning_rate": 0.001,
737
- "loss": 0.6707,
738
- "step": 23800
739
- },
740
- {
741
- "epoch": 3.08,
742
- "learning_rate": 0.001,
743
- "loss": 0.6826,
744
- "step": 24000
745
- },
746
- {
747
- "epoch": 3.1,
748
- "learning_rate": 0.001,
749
- "loss": 0.6754,
750
- "step": 24200
751
- },
752
- {
753
- "epoch": 3.13,
754
- "learning_rate": 0.001,
755
- "loss": 0.6722,
756
- "step": 24400
757
- },
758
- {
759
- "epoch": 3.15,
760
- "learning_rate": 0.001,
761
- "loss": 0.6775,
762
- "step": 24600
763
- },
764
- {
765
- "epoch": 3.18,
766
- "learning_rate": 0.001,
767
- "loss": 0.6796,
768
- "step": 24800
769
- },
770
- {
771
- "epoch": 3.2,
772
- "learning_rate": 0.001,
773
- "loss": 0.6826,
774
- "step": 25000
775
- },
776
- {
777
- "epoch": 3.23,
778
- "learning_rate": 0.001,
779
- "loss": 0.6778,
780
- "step": 25200
781
- },
782
- {
783
- "epoch": 3.26,
784
- "learning_rate": 0.001,
785
- "loss": 0.6922,
786
- "step": 25400
787
- },
788
- {
789
- "epoch": 3.28,
790
- "learning_rate": 0.001,
791
- "loss": 0.6706,
792
- "step": 25600
793
- },
794
- {
795
- "epoch": 3.31,
796
- "learning_rate": 0.001,
797
- "loss": 0.6721,
798
- "step": 25800
799
- },
800
- {
801
- "epoch": 3.33,
802
- "learning_rate": 0.001,
803
- "loss": 0.6737,
804
- "step": 26000
805
- },
806
- {
807
- "epoch": 3.36,
808
- "learning_rate": 0.001,
809
- "loss": 0.7046,
810
- "step": 26200
811
- },
812
- {
813
- "epoch": 3.38,
814
- "learning_rate": 0.001,
815
- "loss": 0.6754,
816
- "step": 26400
817
- },
818
- {
819
- "epoch": 3.41,
820
- "learning_rate": 0.001,
821
- "loss": 0.6794,
822
- "step": 26600
823
- },
824
- {
825
- "epoch": 3.43,
826
- "learning_rate": 0.001,
827
- "loss": 0.684,
828
- "step": 26800
829
- },
830
- {
831
- "epoch": 3.46,
832
- "learning_rate": 0.001,
833
- "loss": 0.6776,
834
- "step": 27000
835
- },
836
- {
837
- "epoch": 3.49,
838
- "learning_rate": 0.001,
839
- "loss": 0.6826,
840
- "step": 27200
841
- },
842
- {
843
- "epoch": 3.51,
844
- "learning_rate": 0.001,
845
- "loss": 0.6877,
846
- "step": 27400
847
- },
848
- {
849
- "epoch": 3.54,
850
- "learning_rate": 0.001,
851
- "loss": 0.6773,
852
- "step": 27600
853
- },
854
- {
855
- "epoch": 3.56,
856
- "learning_rate": 0.001,
857
- "loss": 0.6731,
858
- "step": 27800
859
- },
860
- {
861
- "epoch": 3.59,
862
- "learning_rate": 0.001,
863
- "loss": 0.6704,
864
- "step": 28000
865
- },
866
- {
867
- "epoch": 3.61,
868
- "learning_rate": 0.001,
869
- "loss": 0.681,
870
- "step": 28200
871
- },
872
- {
873
- "epoch": 3.64,
874
- "learning_rate": 0.001,
875
- "loss": 0.6745,
876
- "step": 28400
877
- },
878
- {
879
- "epoch": 3.67,
880
- "learning_rate": 0.001,
881
- "loss": 0.6734,
882
- "step": 28600
883
- },
884
- {
885
- "epoch": 3.69,
886
- "learning_rate": 0.001,
887
- "loss": 0.6759,
888
- "step": 28800
889
- },
890
- {
891
- "epoch": 3.72,
892
- "learning_rate": 0.001,
893
- "loss": 0.6729,
894
- "step": 29000
895
- },
896
- {
897
- "epoch": 3.74,
898
- "learning_rate": 0.001,
899
- "loss": 0.6727,
900
- "step": 29200
901
- },
902
- {
903
- "epoch": 3.77,
904
- "learning_rate": 0.001,
905
- "loss": 0.6734,
906
- "step": 29400
907
- },
908
- {
909
- "epoch": 3.79,
910
- "learning_rate": 0.001,
911
- "loss": 0.6747,
912
- "step": 29600
913
- },
914
- {
915
- "epoch": 3.82,
916
- "learning_rate": 0.001,
917
- "loss": 0.6739,
918
- "step": 29800
919
- },
920
- {
921
- "epoch": 3.84,
922
- "learning_rate": 0.001,
923
- "loss": 0.6564,
924
- "step": 30000
925
- },
926
- {
927
- "epoch": 3.84,
928
- "eval_loss": 0.9836647510528564,
929
- "eval_runtime": 236.2546,
930
- "eval_samples_per_second": 517.323,
931
- "eval_steps_per_second": 2.023,
932
- "step": 30000
933
- },
934
- {
935
- "epoch": 3.87,
936
- "learning_rate": 0.001,
937
- "loss": 0.6748,
938
- "step": 30200
939
- },
940
- {
941
- "epoch": 3.9,
942
- "learning_rate": 0.001,
943
- "loss": 0.6582,
944
- "step": 30400
945
- },
946
- {
947
- "epoch": 3.92,
948
- "learning_rate": 0.001,
949
- "loss": 0.6526,
950
- "step": 30600
951
- },
952
- {
953
- "epoch": 3.95,
954
- "learning_rate": 0.001,
955
- "loss": 0.6587,
956
- "step": 30800
957
- },
958
- {
959
- "epoch": 3.97,
960
- "learning_rate": 0.001,
961
- "loss": 0.6621,
962
- "step": 31000
963
- },
964
- {
965
- "epoch": 4.0,
966
- "learning_rate": 0.001,
967
- "loss": 0.6829,
968
- "step": 31200
969
- },
970
- {
971
- "epoch": 4.02,
972
- "learning_rate": 0.001,
973
- "loss": 0.6448,
974
- "step": 31400
975
- },
976
- {
977
- "epoch": 4.05,
978
- "learning_rate": 0.001,
979
- "loss": 0.6391,
980
- "step": 31600
981
- },
982
- {
983
- "epoch": 4.08,
984
- "learning_rate": 0.001,
985
- "loss": 0.6596,
986
- "step": 31800
987
- },
988
- {
989
- "epoch": 4.1,
990
- "learning_rate": 0.001,
991
- "loss": 0.6503,
992
- "step": 32000
993
- },
994
- {
995
- "epoch": 4.13,
996
- "learning_rate": 0.001,
997
- "loss": 0.6484,
998
- "step": 32200
999
- },
1000
- {
1001
- "epoch": 4.15,
1002
- "learning_rate": 0.001,
1003
- "loss": 0.6564,
1004
- "step": 32400
1005
- },
1006
- {
1007
- "epoch": 4.18,
1008
- "learning_rate": 0.001,
1009
- "loss": 0.655,
1010
- "step": 32600
1011
- },
1012
- {
1013
- "epoch": 4.2,
1014
- "learning_rate": 0.001,
1015
- "loss": 0.6421,
1016
- "step": 32800
1017
- },
1018
- {
1019
- "epoch": 4.23,
1020
- "learning_rate": 0.001,
1021
- "loss": 0.6537,
1022
- "step": 33000
1023
- },
1024
- {
1025
- "epoch": 4.25,
1026
- "learning_rate": 0.001,
1027
- "loss": 0.6459,
1028
- "step": 33200
1029
- },
1030
- {
1031
- "epoch": 4.28,
1032
- "learning_rate": 0.001,
1033
- "loss": 0.6525,
1034
- "step": 33400
1035
- },
1036
- {
1037
- "epoch": 4.31,
1038
- "learning_rate": 0.001,
1039
- "loss": 0.6523,
1040
- "step": 33600
1041
- },
1042
- {
1043
- "epoch": 4.33,
1044
- "learning_rate": 0.001,
1045
- "loss": 0.6496,
1046
- "step": 33800
1047
- },
1048
- {
1049
- "epoch": 4.36,
1050
- "learning_rate": 0.001,
1051
- "loss": 0.6477,
1052
- "step": 34000
1053
- },
1054
- {
1055
- "epoch": 4.38,
1056
- "learning_rate": 0.001,
1057
- "loss": 0.6455,
1058
- "step": 34200
1059
- },
1060
- {
1061
- "epoch": 4.41,
1062
- "learning_rate": 0.001,
1063
- "loss": 0.6413,
1064
- "step": 34400
1065
- },
1066
- {
1067
- "epoch": 4.43,
1068
- "learning_rate": 0.001,
1069
- "loss": 0.6414,
1070
- "step": 34600
1071
- },
1072
- {
1073
- "epoch": 4.46,
1074
- "learning_rate": 0.001,
1075
- "loss": 0.6552,
1076
- "step": 34800
1077
- },
1078
- {
1079
- "epoch": 4.49,
1080
- "learning_rate": 0.001,
1081
- "loss": 0.6496,
1082
- "step": 35000
1083
- },
1084
- {
1085
- "epoch": 4.51,
1086
- "learning_rate": 0.001,
1087
- "loss": 0.6495,
1088
- "step": 35200
1089
- },
1090
- {
1091
- "epoch": 4.54,
1092
- "learning_rate": 0.001,
1093
- "loss": 0.6495,
1094
- "step": 35400
1095
- },
1096
- {
1097
- "epoch": 4.56,
1098
- "learning_rate": 0.001,
1099
- "loss": 0.6468,
1100
- "step": 35600
1101
- },
1102
- {
1103
- "epoch": 4.59,
1104
- "learning_rate": 0.001,
1105
- "loss": 0.6583,
1106
- "step": 35800
1107
- },
1108
- {
1109
- "epoch": 4.61,
1110
- "learning_rate": 0.001,
1111
- "loss": 0.66,
1112
- "step": 36000
1113
- },
1114
- {
1115
- "epoch": 4.64,
1116
- "learning_rate": 0.001,
1117
- "loss": 0.6555,
1118
- "step": 36200
1119
- },
1120
- {
1121
- "epoch": 4.66,
1122
- "learning_rate": 0.001,
1123
- "loss": 0.6489,
1124
- "step": 36400
1125
- },
1126
- {
1127
- "epoch": 4.69,
1128
- "learning_rate": 0.001,
1129
- "loss": 0.6361,
1130
- "step": 36600
1131
- },
1132
- {
1133
- "epoch": 4.72,
1134
- "learning_rate": 0.001,
1135
- "loss": 0.6493,
1136
- "step": 36800
1137
- },
1138
- {
1139
- "epoch": 4.74,
1140
- "learning_rate": 0.001,
1141
- "loss": 0.6402,
1142
- "step": 37000
1143
- },
1144
- {
1145
- "epoch": 4.77,
1146
- "learning_rate": 0.001,
1147
- "loss": 0.647,
1148
- "step": 37200
1149
- },
1150
- {
1151
- "epoch": 4.79,
1152
- "learning_rate": 0.001,
1153
- "loss": 0.6533,
1154
- "step": 37400
1155
- },
1156
- {
1157
- "epoch": 4.82,
1158
- "learning_rate": 0.001,
1159
- "loss": 0.6426,
1160
- "step": 37600
1161
- },
1162
- {
1163
- "epoch": 4.84,
1164
- "learning_rate": 0.001,
1165
- "loss": 0.6478,
1166
- "step": 37800
1167
- },
1168
- {
1169
- "epoch": 4.87,
1170
- "learning_rate": 0.001,
1171
- "loss": 0.6441,
1172
- "step": 38000
1173
- },
1174
- {
1175
- "epoch": 4.9,
1176
- "learning_rate": 0.001,
1177
- "loss": 0.6519,
1178
- "step": 38200
1179
- },
1180
- {
1181
- "epoch": 4.92,
1182
- "learning_rate": 0.001,
1183
- "loss": 0.6358,
1184
- "step": 38400
1185
- },
1186
- {
1187
- "epoch": 4.95,
1188
- "learning_rate": 0.001,
1189
- "loss": 0.6546,
1190
- "step": 38600
1191
- },
1192
- {
1193
- "epoch": 4.97,
1194
- "learning_rate": 0.001,
1195
- "loss": 0.6545,
1196
- "step": 38800
1197
- },
1198
- {
1199
- "epoch": 5.0,
1200
- "learning_rate": 0.001,
1201
- "loss": 0.6578,
1202
- "step": 39000
1203
- },
1204
- {
1205
- "epoch": 5.02,
1206
- "learning_rate": 0.001,
1207
- "loss": 0.6233,
1208
- "step": 39200
1209
- },
1210
- {
1211
- "epoch": 5.05,
1212
- "learning_rate": 0.001,
1213
- "loss": 0.6281,
1214
- "step": 39400
1215
- },
1216
- {
1217
- "epoch": 5.07,
1218
- "learning_rate": 0.001,
1219
- "loss": 0.6272,
1220
- "step": 39600
1221
- },
1222
- {
1223
- "epoch": 5.1,
1224
- "learning_rate": 0.001,
1225
- "loss": 0.6242,
1226
- "step": 39800
1227
- },
1228
- {
1229
- "epoch": 5.13,
1230
- "learning_rate": 0.001,
1231
- "loss": 0.6112,
1232
- "step": 40000
1233
- },
1234
- {
1235
- "epoch": 5.13,
1236
- "eval_loss": 0.9964196085929871,
1237
- "eval_runtime": 236.0055,
1238
- "eval_samples_per_second": 517.869,
1239
- "eval_steps_per_second": 2.025,
1240
- "step": 40000
1241
- },
1242
- {
1243
- "epoch": 5.15,
1244
- "learning_rate": 0.001,
1245
- "loss": 0.6222,
1246
- "step": 40200
1247
- },
1248
- {
1249
- "epoch": 5.18,
1250
- "learning_rate": 0.001,
1251
- "loss": 0.6332,
1252
- "step": 40400
1253
- },
1254
- {
1255
- "epoch": 5.2,
1256
- "learning_rate": 0.001,
1257
- "loss": 0.6206,
1258
- "step": 40600
1259
- },
1260
- {
1261
- "epoch": 5.23,
1262
- "learning_rate": 0.001,
1263
- "loss": 0.6279,
1264
- "step": 40800
1265
- },
1266
- {
1267
- "epoch": 5.25,
1268
- "learning_rate": 0.001,
1269
- "loss": 0.6191,
1270
- "step": 41000
1271
- },
1272
- {
1273
- "epoch": 5.28,
1274
- "learning_rate": 0.001,
1275
- "loss": 0.6262,
1276
- "step": 41200
1277
- },
1278
- {
1279
- "epoch": 5.31,
1280
- "learning_rate": 0.001,
1281
- "loss": 0.6386,
1282
- "step": 41400
1283
- },
1284
- {
1285
- "epoch": 5.33,
1286
- "learning_rate": 0.001,
1287
- "loss": 0.6321,
1288
- "step": 41600
1289
- },
1290
- {
1291
- "epoch": 5.36,
1292
- "learning_rate": 0.001,
1293
- "loss": 0.638,
1294
- "step": 41800
1295
- },
1296
- {
1297
- "epoch": 5.38,
1298
- "learning_rate": 0.001,
1299
- "loss": 0.626,
1300
- "step": 42000
1301
- },
1302
- {
1303
- "epoch": 5.41,
1304
- "learning_rate": 0.001,
1305
- "loss": 0.6298,
1306
- "step": 42200
1307
- },
1308
- {
1309
- "epoch": 5.43,
1310
- "learning_rate": 0.001,
1311
- "loss": 0.6148,
1312
- "step": 42400
1313
- },
1314
- {
1315
- "epoch": 5.46,
1316
- "learning_rate": 0.001,
1317
- "loss": 0.6225,
1318
- "step": 42600
1319
- },
1320
- {
1321
- "epoch": 5.49,
1322
- "learning_rate": 0.001,
1323
- "loss": 0.6331,
1324
- "step": 42800
1325
- },
1326
- {
1327
- "epoch": 5.51,
1328
- "learning_rate": 0.001,
1329
- "loss": 0.6347,
1330
- "step": 43000
1331
- },
1332
- {
1333
- "epoch": 5.54,
1334
- "learning_rate": 0.001,
1335
- "loss": 0.6331,
1336
- "step": 43200
1337
- },
1338
- {
1339
- "epoch": 5.56,
1340
- "learning_rate": 0.001,
1341
- "loss": 0.6274,
1342
- "step": 43400
1343
- },
1344
- {
1345
- "epoch": 5.59,
1346
- "learning_rate": 0.001,
1347
- "loss": 0.6388,
1348
- "step": 43600
1349
- },
1350
- {
1351
- "epoch": 5.61,
1352
- "learning_rate": 0.001,
1353
- "loss": 0.6262,
1354
- "step": 43800
1355
- },
1356
- {
1357
- "epoch": 5.64,
1358
- "learning_rate": 0.001,
1359
- "loss": 0.6245,
1360
- "step": 44000
1361
- },
1362
- {
1363
- "epoch": 5.66,
1364
- "learning_rate": 0.001,
1365
- "loss": 0.6317,
1366
- "step": 44200
1367
- },
1368
- {
1369
- "epoch": 5.69,
1370
- "learning_rate": 0.001,
1371
- "loss": 0.6323,
1372
- "step": 44400
1373
- },
1374
- {
1375
- "epoch": 5.72,
1376
- "learning_rate": 0.001,
1377
- "loss": 0.6275,
1378
- "step": 44600
1379
- },
1380
- {
1381
- "epoch": 5.74,
1382
- "learning_rate": 0.001,
1383
- "loss": 0.6284,
1384
- "step": 44800
1385
- },
1386
- {
1387
- "epoch": 5.77,
1388
- "learning_rate": 0.001,
1389
- "loss": 0.6301,
1390
- "step": 45000
1391
- },
1392
- {
1393
- "epoch": 5.79,
1394
- "learning_rate": 0.001,
1395
- "loss": 0.6453,
1396
- "step": 45200
1397
- },
1398
- {
1399
- "epoch": 5.82,
1400
- "learning_rate": 0.001,
1401
- "loss": 0.6244,
1402
- "step": 45400
1403
- },
1404
- {
1405
- "epoch": 5.84,
1406
- "learning_rate": 0.001,
1407
- "loss": 0.6271,
1408
- "step": 45600
1409
- },
1410
- {
1411
- "epoch": 5.87,
1412
- "learning_rate": 0.001,
1413
- "loss": 0.6275,
1414
- "step": 45800
1415
- },
1416
- {
1417
- "epoch": 5.9,
1418
- "learning_rate": 0.001,
1419
- "loss": 0.6483,
1420
- "step": 46000
1421
- },
1422
- {
1423
- "epoch": 5.92,
1424
- "learning_rate": 0.001,
1425
- "loss": 0.6143,
1426
- "step": 46200
1427
- },
1428
- {
1429
- "epoch": 5.95,
1430
- "learning_rate": 0.001,
1431
- "loss": 0.6272,
1432
- "step": 46400
1433
- },
1434
- {
1435
- "epoch": 5.97,
1436
- "learning_rate": 0.001,
1437
- "loss": 0.6425,
1438
- "step": 46600
1439
- },
1440
- {
1441
- "epoch": 6.0,
1442
- "learning_rate": 0.001,
1443
- "loss": 0.6243,
1444
- "step": 46800
1445
- },
1446
- {
1447
- "epoch": 6.02,
1448
- "learning_rate": 0.001,
1449
- "loss": 0.6014,
1450
- "step": 47000
1451
- },
1452
- {
1453
- "epoch": 6.05,
1454
- "learning_rate": 0.001,
1455
- "loss": 0.6004,
1456
- "step": 47200
1457
- },
1458
- {
1459
- "epoch": 6.07,
1460
- "learning_rate": 0.001,
1461
- "loss": 0.611,
1462
- "step": 47400
1463
- },
1464
- {
1465
- "epoch": 6.1,
1466
- "learning_rate": 0.001,
1467
- "loss": 0.6111,
1468
- "step": 47600
1469
- },
1470
- {
1471
- "epoch": 6.13,
1472
- "learning_rate": 0.001,
1473
- "loss": 0.6101,
1474
- "step": 47800
1475
- },
1476
- {
1477
- "epoch": 6.15,
1478
- "learning_rate": 0.001,
1479
- "loss": 0.6154,
1480
- "step": 48000
1481
- },
1482
- {
1483
- "epoch": 6.18,
1484
- "learning_rate": 0.001,
1485
- "loss": 0.6094,
1486
- "step": 48200
1487
- },
1488
- {
1489
- "epoch": 6.2,
1490
- "learning_rate": 0.001,
1491
- "loss": 0.6082,
1492
- "step": 48400
1493
- },
1494
- {
1495
- "epoch": 6.23,
1496
- "learning_rate": 0.001,
1497
- "loss": 0.6013,
1498
- "step": 48600
1499
- },
1500
- {
1501
- "epoch": 6.25,
1502
- "learning_rate": 0.001,
1503
- "loss": 0.6148,
1504
- "step": 48800
1505
- },
1506
- {
1507
- "epoch": 6.28,
1508
- "learning_rate": 0.001,
1509
- "loss": 0.6049,
1510
- "step": 49000
1511
- },
1512
- {
1513
- "epoch": 6.31,
1514
- "learning_rate": 0.001,
1515
- "loss": 0.6089,
1516
- "step": 49200
1517
- },
1518
- {
1519
- "epoch": 6.33,
1520
- "learning_rate": 0.001,
1521
- "loss": 0.61,
1522
- "step": 49400
1523
- },
1524
- {
1525
- "epoch": 6.36,
1526
- "learning_rate": 0.001,
1527
- "loss": 0.6056,
1528
- "step": 49600
1529
- },
1530
- {
1531
- "epoch": 6.38,
1532
- "learning_rate": 0.001,
1533
- "loss": 0.6138,
1534
- "step": 49800
1535
- },
1536
- {
1537
- "epoch": 6.41,
1538
- "learning_rate": 0.001,
1539
- "loss": 0.6212,
1540
- "step": 50000
1541
- },
1542
- {
1543
- "epoch": 6.41,
1544
- "eval_loss": 0.9890182614326477,
1545
- "eval_runtime": 236.1033,
1546
- "eval_samples_per_second": 517.655,
1547
- "eval_steps_per_second": 2.025,
1548
- "step": 50000
1549
- },
1550
- {
1551
- "epoch": 6.43,
1552
- "learning_rate": 0.001,
1553
- "loss": 0.612,
1554
- "step": 50200
1555
- },
1556
- {
1557
- "epoch": 6.46,
1558
- "learning_rate": 0.001,
1559
- "loss": 0.6111,
1560
- "step": 50400
1561
- },
1562
- {
1563
- "epoch": 6.48,
1564
- "learning_rate": 0.001,
1565
- "loss": 0.6146,
1566
- "step": 50600
1567
- },
1568
- {
1569
- "epoch": 6.51,
1570
- "learning_rate": 0.001,
1571
- "loss": 0.615,
1572
- "step": 50800
1573
- },
1574
- {
1575
- "epoch": 6.54,
1576
- "learning_rate": 0.001,
1577
- "loss": 0.6128,
1578
- "step": 51000
1579
- },
1580
- {
1581
- "epoch": 6.56,
1582
- "learning_rate": 0.001,
1583
- "loss": 0.6183,
1584
- "step": 51200
1585
- },
1586
- {
1587
- "epoch": 6.59,
1588
- "learning_rate": 0.001,
1589
- "loss": 0.6115,
1590
- "step": 51400
1591
- },
1592
- {
1593
- "epoch": 6.61,
1594
- "learning_rate": 0.001,
1595
- "loss": 0.6068,
1596
- "step": 51600
1597
- },
1598
- {
1599
- "epoch": 6.64,
1600
- "learning_rate": 0.001,
1601
- "loss": 0.6107,
1602
- "step": 51800
1603
- },
1604
- {
1605
- "epoch": 6.66,
1606
- "learning_rate": 0.001,
1607
- "loss": 0.6256,
1608
- "step": 52000
1609
- },
1610
- {
1611
- "epoch": 6.69,
1612
- "learning_rate": 0.001,
1613
- "loss": 0.6149,
1614
- "step": 52200
1615
- },
1616
- {
1617
- "epoch": 6.72,
1618
- "learning_rate": 0.001,
1619
- "loss": 0.606,
1620
- "step": 52400
1621
- },
1622
- {
1623
- "epoch": 6.74,
1624
- "learning_rate": 0.001,
1625
- "loss": 0.6203,
1626
- "step": 52600
1627
- },
1628
- {
1629
- "epoch": 6.77,
1630
- "learning_rate": 0.001,
1631
- "loss": 0.6117,
1632
- "step": 52800
1633
- },
1634
- {
1635
- "epoch": 6.79,
1636
- "learning_rate": 0.001,
1637
- "loss": 0.6079,
1638
- "step": 53000
1639
- },
1640
- {
1641
- "epoch": 6.82,
1642
- "learning_rate": 0.001,
1643
- "loss": 0.6164,
1644
- "step": 53200
1645
- },
1646
- {
1647
- "epoch": 6.84,
1648
- "learning_rate": 0.001,
1649
- "loss": 0.6162,
1650
- "step": 53400
1651
- },
1652
- {
1653
- "epoch": 6.87,
1654
- "learning_rate": 0.001,
1655
- "loss": 0.6094,
1656
- "step": 53600
1657
- },
1658
- {
1659
- "epoch": 6.89,
1660
- "learning_rate": 0.001,
1661
- "loss": 0.6134,
1662
- "step": 53800
1663
- },
1664
- {
1665
- "epoch": 6.92,
1666
- "learning_rate": 0.001,
1667
- "loss": 0.617,
1668
- "step": 54000
1669
- },
1670
- {
1671
- "epoch": 6.95,
1672
- "learning_rate": 0.001,
1673
- "loss": 0.6165,
1674
- "step": 54200
1675
- },
1676
- {
1677
- "epoch": 6.97,
1678
- "learning_rate": 0.001,
1679
- "loss": 0.6127,
1680
- "step": 54400
1681
- },
1682
- {
1683
- "epoch": 7.0,
1684
- "learning_rate": 0.001,
1685
- "loss": 0.6127,
1686
- "step": 54600
1687
- },
1688
- {
1689
- "epoch": 7.02,
1690
- "learning_rate": 0.001,
1691
- "loss": 0.593,
1692
- "step": 54800
1693
- },
1694
- {
1695
- "epoch": 7.05,
1696
- "learning_rate": 0.001,
1697
- "loss": 0.5891,
1698
- "step": 55000
1699
- },
1700
- {
1701
- "epoch": 7.07,
1702
- "learning_rate": 0.001,
1703
- "loss": 0.6004,
1704
- "step": 55200
1705
- },
1706
- {
1707
- "epoch": 7.1,
1708
- "learning_rate": 0.001,
1709
- "loss": 0.5824,
1710
- "step": 55400
1711
- },
1712
- {
1713
- "epoch": 7.13,
1714
- "learning_rate": 0.001,
1715
- "loss": 0.5949,
1716
- "step": 55600
1717
- },
1718
- {
1719
- "epoch": 7.15,
1720
- "learning_rate": 0.001,
1721
- "loss": 0.5931,
1722
- "step": 55800
1723
- },
1724
- {
1725
- "epoch": 7.18,
1726
- "learning_rate": 0.001,
1727
- "loss": 0.5861,
1728
- "step": 56000
1729
- },
1730
- {
1731
- "epoch": 7.2,
1732
- "learning_rate": 0.001,
1733
- "loss": 0.6035,
1734
- "step": 56200
1735
- },
1736
- {
1737
- "epoch": 7.23,
1738
- "learning_rate": 0.001,
1739
- "loss": 0.5859,
1740
- "step": 56400
1741
- },
1742
- {
1743
- "epoch": 7.25,
1744
- "learning_rate": 0.001,
1745
- "loss": 0.5953,
1746
- "step": 56600
1747
- },
1748
- {
1749
- "epoch": 7.28,
1750
- "learning_rate": 0.001,
1751
- "loss": 0.6032,
1752
- "step": 56800
1753
- },
1754
- {
1755
- "epoch": 7.3,
1756
- "learning_rate": 0.001,
1757
- "loss": 0.5945,
1758
- "step": 57000
1759
- },
1760
- {
1761
- "epoch": 7.33,
1762
- "learning_rate": 0.001,
1763
- "loss": 0.598,
1764
- "step": 57200
1765
- },
1766
- {
1767
- "epoch": 7.36,
1768
- "learning_rate": 0.001,
1769
- "loss": 0.5981,
1770
- "step": 57400
1771
- },
1772
- {
1773
- "epoch": 7.38,
1774
- "learning_rate": 0.001,
1775
- "loss": 0.6034,
1776
- "step": 57600
1777
- },
1778
- {
1779
- "epoch": 7.41,
1780
- "learning_rate": 0.001,
1781
- "loss": 0.6075,
1782
- "step": 57800
1783
- },
1784
- {
1785
- "epoch": 7.43,
1786
- "learning_rate": 0.001,
1787
- "loss": 0.6065,
1788
- "step": 58000
1789
- },
1790
- {
1791
- "epoch": 7.46,
1792
- "learning_rate": 0.001,
1793
- "loss": 0.5972,
1794
- "step": 58200
1795
- },
1796
- {
1797
- "epoch": 7.48,
1798
- "learning_rate": 0.001,
1799
- "loss": 0.6048,
1800
- "step": 58400
1801
- },
1802
- {
1803
- "epoch": 7.51,
1804
- "learning_rate": 0.001,
1805
- "loss": 0.5977,
1806
- "step": 58600
1807
- },
1808
- {
1809
- "epoch": 7.54,
1810
- "learning_rate": 0.001,
1811
- "loss": 0.5998,
1812
- "step": 58800
1813
- },
1814
- {
1815
- "epoch": 7.56,
1816
- "learning_rate": 0.001,
1817
- "loss": 0.5943,
1818
- "step": 59000
1819
- },
1820
- {
1821
- "epoch": 7.59,
1822
- "learning_rate": 0.001,
1823
- "loss": 0.5883,
1824
- "step": 59200
1825
- },
1826
- {
1827
- "epoch": 7.61,
1828
- "learning_rate": 0.001,
1829
- "loss": 0.5999,
1830
- "step": 59400
1831
- },
1832
- {
1833
- "epoch": 7.64,
1834
- "learning_rate": 0.001,
1835
- "loss": 0.5957,
1836
- "step": 59600
1837
- },
1838
- {
1839
- "epoch": 7.66,
1840
- "learning_rate": 0.001,
1841
- "loss": 0.6103,
1842
- "step": 59800
1843
- },
1844
- {
1845
- "epoch": 7.69,
1846
- "learning_rate": 0.001,
1847
- "loss": 0.6078,
1848
- "step": 60000
1849
- },
1850
- {
1851
- "epoch": 7.69,
1852
- "eval_loss": 0.9891012907028198,
1853
- "eval_runtime": 236.1606,
1854
- "eval_samples_per_second": 517.529,
1855
- "eval_steps_per_second": 2.024,
1856
- "step": 60000
1857
- },
1858
- {
1859
- "epoch": 7.71,
1860
- "learning_rate": 0.001,
1861
- "loss": 0.5903,
1862
- "step": 60200
1863
- },
1864
- {
1865
- "epoch": 7.74,
1866
- "learning_rate": 0.001,
1867
- "loss": 0.6007,
1868
- "step": 60400
1869
- },
1870
- {
1871
- "epoch": 7.77,
1872
- "learning_rate": 0.001,
1873
- "loss": 0.5973,
1874
- "step": 60600
1875
- },
1876
- {
1877
- "epoch": 7.79,
1878
- "learning_rate": 0.001,
1879
- "loss": 0.596,
1880
- "step": 60800
1881
- },
1882
- {
1883
- "epoch": 7.82,
1884
- "learning_rate": 0.001,
1885
- "loss": 0.597,
1886
- "step": 61000
1887
- },
1888
- {
1889
- "epoch": 7.84,
1890
- "learning_rate": 0.001,
1891
- "loss": 0.6018,
1892
- "step": 61200
1893
- },
1894
- {
1895
- "epoch": 7.87,
1896
- "learning_rate": 0.001,
1897
- "loss": 0.6024,
1898
- "step": 61400
1899
- },
1900
- {
1901
- "epoch": 7.89,
1902
- "learning_rate": 0.001,
1903
- "loss": 0.6011,
1904
- "step": 61600
1905
- },
1906
- {
1907
- "epoch": 7.92,
1908
- "learning_rate": 0.001,
1909
- "loss": 0.5883,
1910
- "step": 61800
1911
- },
1912
- {
1913
- "epoch": 7.95,
1914
- "learning_rate": 0.001,
1915
- "loss": 0.6006,
1916
- "step": 62000
1917
- },
1918
- {
1919
- "epoch": 7.97,
1920
- "learning_rate": 0.001,
1921
- "loss": 0.6004,
1922
- "step": 62200
1923
- },
1924
- {
1925
- "epoch": 8.0,
1926
- "learning_rate": 0.001,
1927
- "loss": 0.6083,
1928
- "step": 62400
1929
- },
1930
- {
1931
- "epoch": 8.02,
1932
- "learning_rate": 0.001,
1933
- "loss": 0.5844,
1934
- "step": 62600
1935
- },
1936
- {
1937
- "epoch": 8.05,
1938
- "learning_rate": 0.001,
1939
- "loss": 0.5751,
1940
- "step": 62800
1941
- },
1942
- {
1943
- "epoch": 8.07,
1944
- "learning_rate": 0.001,
1945
- "loss": 0.5716,
1946
- "step": 63000
1947
- },
1948
- {
1949
- "epoch": 8.1,
1950
- "learning_rate": 0.001,
1951
- "loss": 0.5748,
1952
- "step": 63200
1953
- },
1954
- {
1955
- "epoch": 8.13,
1956
- "learning_rate": 0.001,
1957
- "loss": 0.5752,
1958
- "step": 63400
1959
- },
1960
- {
1961
- "epoch": 8.15,
1962
- "learning_rate": 0.001,
1963
- "loss": 0.5697,
1964
- "step": 63600
1965
- },
1966
- {
1967
- "epoch": 8.18,
1968
- "learning_rate": 0.001,
1969
- "loss": 0.5751,
1970
- "step": 63800
1971
- },
1972
- {
1973
- "epoch": 8.2,
1974
- "learning_rate": 0.001,
1975
- "loss": 0.5821,
1976
- "step": 64000
1977
- },
1978
- {
1979
- "epoch": 8.23,
1980
- "learning_rate": 0.001,
1981
- "loss": 0.578,
1982
- "step": 64200
1983
- },
1984
- {
1985
- "epoch": 8.25,
1986
- "learning_rate": 0.001,
1987
- "loss": 0.5768,
1988
- "step": 64400
1989
- },
1990
- {
1991
- "epoch": 8.28,
1992
- "learning_rate": 0.001,
1993
- "loss": 0.5807,
1994
- "step": 64600
1995
- },
1996
- {
1997
- "epoch": 8.3,
1998
- "learning_rate": 0.001,
1999
- "loss": 0.5872,
2000
- "step": 64800
2001
- },
2002
- {
2003
- "epoch": 8.33,
2004
- "learning_rate": 0.001,
2005
- "loss": 0.5802,
2006
- "step": 65000
2007
- },
2008
- {
2009
- "epoch": 8.36,
2010
- "learning_rate": 0.001,
2011
- "loss": 0.5883,
2012
- "step": 65200
2013
- },
2014
- {
2015
- "epoch": 8.38,
2016
- "learning_rate": 0.001,
2017
- "loss": 0.5792,
2018
- "step": 65400
2019
- },
2020
- {
2021
- "epoch": 8.41,
2022
- "learning_rate": 0.001,
2023
- "loss": 0.5888,
2024
- "step": 65600
2025
- },
2026
- {
2027
- "epoch": 8.43,
2028
- "learning_rate": 0.001,
2029
- "loss": 0.6033,
2030
- "step": 65800
2031
- },
2032
- {
2033
- "epoch": 8.46,
2034
- "learning_rate": 0.001,
2035
- "loss": 0.5967,
2036
- "step": 66000
2037
- },
2038
- {
2039
- "epoch": 8.48,
2040
- "learning_rate": 0.001,
2041
- "loss": 0.5909,
2042
- "step": 66200
2043
- },
2044
- {
2045
- "epoch": 8.51,
2046
- "learning_rate": 0.001,
2047
- "loss": 0.5823,
2048
- "step": 66400
2049
- },
2050
- {
2051
- "epoch": 8.54,
2052
- "learning_rate": 0.001,
2053
- "loss": 0.5883,
2054
- "step": 66600
2055
- },
2056
- {
2057
- "epoch": 8.56,
2058
- "learning_rate": 0.001,
2059
- "loss": 0.5862,
2060
- "step": 66800
2061
- },
2062
- {
2063
- "epoch": 8.59,
2064
- "learning_rate": 0.001,
2065
- "loss": 0.5924,
2066
- "step": 67000
2067
- },
2068
- {
2069
- "epoch": 8.61,
2070
- "learning_rate": 0.001,
2071
- "loss": 0.5902,
2072
- "step": 67200
2073
- },
2074
- {
2075
- "epoch": 8.64,
2076
- "learning_rate": 0.001,
2077
- "loss": 0.5841,
2078
- "step": 67400
2079
- },
2080
- {
2081
- "epoch": 8.66,
2082
- "learning_rate": 0.001,
2083
- "loss": 0.5919,
2084
- "step": 67600
2085
- },
2086
- {
2087
- "epoch": 8.69,
2088
- "learning_rate": 0.001,
2089
- "loss": 0.5889,
2090
- "step": 67800
2091
- },
2092
- {
2093
- "epoch": 8.71,
2094
- "learning_rate": 0.001,
2095
- "loss": 0.5913,
2096
- "step": 68000
2097
- },
2098
- {
2099
- "epoch": 8.74,
2100
- "learning_rate": 0.001,
2101
- "loss": 0.585,
2102
- "step": 68200
2103
- },
2104
- {
2105
- "epoch": 8.77,
2106
- "learning_rate": 0.001,
2107
- "loss": 0.5887,
2108
- "step": 68400
2109
- },
2110
- {
2111
- "epoch": 8.79,
2112
- "learning_rate": 0.001,
2113
- "loss": 0.5931,
2114
- "step": 68600
2115
- },
2116
- {
2117
- "epoch": 8.82,
2118
- "learning_rate": 0.001,
2119
- "loss": 0.5799,
2120
- "step": 68800
2121
- },
2122
- {
2123
- "epoch": 8.84,
2124
- "learning_rate": 0.001,
2125
- "loss": 0.585,
2126
- "step": 69000
2127
- },
2128
- {
2129
- "epoch": 8.87,
2130
- "learning_rate": 0.001,
2131
- "loss": 0.5765,
2132
- "step": 69200
2133
- },
2134
- {
2135
- "epoch": 8.89,
2136
- "learning_rate": 0.001,
2137
- "loss": 0.5827,
2138
- "step": 69400
2139
- },
2140
- {
2141
- "epoch": 8.92,
2142
- "learning_rate": 0.001,
2143
- "loss": 0.5875,
2144
- "step": 69600
2145
- },
2146
- {
2147
- "epoch": 8.95,
2148
- "learning_rate": 0.001,
2149
- "loss": 0.5909,
2150
- "step": 69800
2151
- },
2152
- {
2153
- "epoch": 8.97,
2154
- "learning_rate": 0.001,
2155
- "loss": 0.6049,
2156
- "step": 70000
2157
- },
2158
- {
2159
- "epoch": 8.97,
2160
- "eval_loss": 0.9819265007972717,
2161
- "eval_runtime": 236.1037,
2162
- "eval_samples_per_second": 517.654,
2163
- "eval_steps_per_second": 2.025,
2164
- "step": 70000
2165
- }
2166
- ],
2167
- "max_steps": 78030,
2168
- "num_train_epochs": 10,
2169
- "total_flos": 1.0371070946901688e+18,
2170
- "trial_name": null,
2171
- "trial_params": null
2172
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-70000/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:49c54b65c82648d26883e671a714432a6b89db3e379b3bb499efde31bee2c112
3
- size 3631