Muhammad Khalifa commited on
Commit
7d22281
·
1 Parent(s): c5d9bce

remove unneeded folder

Browse files
with_input/decomp_code_with_intermediates/best_model/adapter_config.json DELETED
@@ -1,21 +0,0 @@
1
- {
2
- "base_model_name_or_path": "huggyllama/llama-7b",
3
- "bias": "none",
4
- "enable_lora": null,
5
- "fan_in_fan_out": false,
6
- "inference_mode": true,
7
- "init_lora_weights": true,
8
- "lora_alpha": 16,
9
- "lora_dropout": 0.05,
10
- "merge_weights": false,
11
- "modules_to_save": null,
12
- "peft_type": "LORA",
13
- "r": 16,
14
- "target_modules": [
15
- "q_proj",
16
- "k_proj",
17
- "v_proj",
18
- "o_proj"
19
- ],
20
- "task_type": "CAUSAL_LM"
21
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
with_input/decomp_code_with_intermediates/best_model/adapter_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:76aa0870af36dca253bba8ee06ec331a5604fc78803f5181836c06702e84c742
3
- size 67201357
 
 
 
 
with_input/decomp_code_with_intermediates/best_model/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:595206e644bd8c27afa495804726feb8f34923038c2531af6f7cfb9bd0aea415
3
- size 134433093
 
 
 
 
with_input/decomp_code_with_intermediates/best_model/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:65c022292c1fb262c54e2784716b052f1a6d5fde0e8354f090107d64cdaffcd1
3
- size 14575
 
 
 
 
with_input/decomp_code_with_intermediates/best_model/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:bdbb310518f28dfe4320b99ab0c420cbd81f1e17ce9a42136205bc4d89821c91
3
- size 627
 
 
 
 
with_input/decomp_code_with_intermediates/best_model/trainer_state.json DELETED
@@ -1,3987 +0,0 @@
1
- {
2
- "best_metric": 0.27837860584259033,
3
- "best_model_checkpoint": "checkpoints/instrucode/with_input/decomp_code_with_intermediates/checkpoint-6200",
4
- "epoch": 3.987138263665595,
5
- "eval_steps": 200,
6
- "global_step": 6200,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.01,
13
- "learning_rate": 2.9999999999999997e-05,
14
- "loss": 1.0679,
15
- "step": 10
16
- },
17
- {
18
- "epoch": 0.01,
19
- "learning_rate": 5.6999999999999996e-05,
20
- "loss": 1.049,
21
- "step": 20
22
- },
23
- {
24
- "epoch": 0.02,
25
- "learning_rate": 8.699999999999999e-05,
26
- "loss": 0.8986,
27
- "step": 30
28
- },
29
- {
30
- "epoch": 0.03,
31
- "learning_rate": 0.000117,
32
- "loss": 0.6797,
33
- "step": 40
34
- },
35
- {
36
- "epoch": 0.03,
37
- "learning_rate": 0.000147,
38
- "loss": 0.5126,
39
- "step": 50
40
- },
41
- {
42
- "epoch": 0.04,
43
- "learning_rate": 0.00017699999999999997,
44
- "loss": 0.4582,
45
- "step": 60
46
- },
47
- {
48
- "epoch": 0.05,
49
- "learning_rate": 0.00020699999999999996,
50
- "loss": 0.4072,
51
- "step": 70
52
- },
53
- {
54
- "epoch": 0.05,
55
- "learning_rate": 0.000237,
56
- "loss": 0.3944,
57
- "step": 80
58
- },
59
- {
60
- "epoch": 0.06,
61
- "learning_rate": 0.000267,
62
- "loss": 0.3726,
63
- "step": 90
64
- },
65
- {
66
- "epoch": 0.06,
67
- "learning_rate": 0.00029699999999999996,
68
- "loss": 0.3735,
69
- "step": 100
70
- },
71
- {
72
- "epoch": 0.07,
73
- "learning_rate": 0.00029964820846905535,
74
- "loss": 0.3596,
75
- "step": 110
76
- },
77
- {
78
- "epoch": 0.08,
79
- "learning_rate": 0.000299257328990228,
80
- "loss": 0.351,
81
- "step": 120
82
- },
83
- {
84
- "epoch": 0.08,
85
- "learning_rate": 0.00029886644951140066,
86
- "loss": 0.3575,
87
- "step": 130
88
- },
89
- {
90
- "epoch": 0.09,
91
- "learning_rate": 0.00029847557003257326,
92
- "loss": 0.3498,
93
- "step": 140
94
- },
95
- {
96
- "epoch": 0.1,
97
- "learning_rate": 0.0002980846905537459,
98
- "loss": 0.3383,
99
- "step": 150
100
- },
101
- {
102
- "epoch": 0.1,
103
- "learning_rate": 0.00029769381107491857,
104
- "loss": 0.3487,
105
- "step": 160
106
- },
107
- {
108
- "epoch": 0.11,
109
- "learning_rate": 0.00029730293159609117,
110
- "loss": 0.3397,
111
- "step": 170
112
- },
113
- {
114
- "epoch": 0.12,
115
- "learning_rate": 0.0002969120521172638,
116
- "loss": 0.3469,
117
- "step": 180
118
- },
119
- {
120
- "epoch": 0.12,
121
- "learning_rate": 0.0002965211726384364,
122
- "loss": 0.3317,
123
- "step": 190
124
- },
125
- {
126
- "epoch": 0.13,
127
- "learning_rate": 0.0002961302931596091,
128
- "loss": 0.3329,
129
- "step": 200
130
- },
131
- {
132
- "epoch": 0.13,
133
- "eval_loss": 0.331393301486969,
134
- "eval_runtime": 441.8972,
135
- "eval_samples_per_second": 4.526,
136
- "eval_steps_per_second": 0.566,
137
- "step": 200
138
- },
139
- {
140
- "epoch": 0.14,
141
- "learning_rate": 0.00029573941368078174,
142
- "loss": 0.3368,
143
- "step": 210
144
- },
145
- {
146
- "epoch": 0.14,
147
- "learning_rate": 0.0002953485342019544,
148
- "loss": 0.3259,
149
- "step": 220
150
- },
151
- {
152
- "epoch": 0.15,
153
- "learning_rate": 0.00029495765472312705,
154
- "loss": 0.3223,
155
- "step": 230
156
- },
157
- {
158
- "epoch": 0.15,
159
- "learning_rate": 0.00029456677524429965,
160
- "loss": 0.3383,
161
- "step": 240
162
- },
163
- {
164
- "epoch": 0.16,
165
- "learning_rate": 0.0002941758957654723,
166
- "loss": 0.3239,
167
- "step": 250
168
- },
169
- {
170
- "epoch": 0.17,
171
- "learning_rate": 0.0002937850162866449,
172
- "loss": 0.341,
173
- "step": 260
174
- },
175
- {
176
- "epoch": 0.17,
177
- "learning_rate": 0.00029339413680781756,
178
- "loss": 0.3222,
179
- "step": 270
180
- },
181
- {
182
- "epoch": 0.18,
183
- "learning_rate": 0.0002930032573289902,
184
- "loss": 0.3235,
185
- "step": 280
186
- },
187
- {
188
- "epoch": 0.19,
189
- "learning_rate": 0.0002926123778501628,
190
- "loss": 0.3343,
191
- "step": 290
192
- },
193
- {
194
- "epoch": 0.19,
195
- "learning_rate": 0.00029222149837133547,
196
- "loss": 0.3332,
197
- "step": 300
198
- },
199
- {
200
- "epoch": 0.2,
201
- "learning_rate": 0.0002918306188925081,
202
- "loss": 0.3198,
203
- "step": 310
204
- },
205
- {
206
- "epoch": 0.21,
207
- "learning_rate": 0.0002914397394136808,
208
- "loss": 0.3269,
209
- "step": 320
210
- },
211
- {
212
- "epoch": 0.21,
213
- "learning_rate": 0.0002910488599348534,
214
- "loss": 0.3229,
215
- "step": 330
216
- },
217
- {
218
- "epoch": 0.22,
219
- "learning_rate": 0.00029065798045602604,
220
- "loss": 0.3284,
221
- "step": 340
222
- },
223
- {
224
- "epoch": 0.23,
225
- "learning_rate": 0.00029026710097719864,
226
- "loss": 0.3202,
227
- "step": 350
228
- },
229
- {
230
- "epoch": 0.23,
231
- "learning_rate": 0.0002898762214983713,
232
- "loss": 0.3214,
233
- "step": 360
234
- },
235
- {
236
- "epoch": 0.24,
237
- "learning_rate": 0.00028948534201954395,
238
- "loss": 0.3169,
239
- "step": 370
240
- },
241
- {
242
- "epoch": 0.24,
243
- "learning_rate": 0.0002890944625407166,
244
- "loss": 0.3177,
245
- "step": 380
246
- },
247
- {
248
- "epoch": 0.25,
249
- "learning_rate": 0.0002887035830618892,
250
- "loss": 0.3254,
251
- "step": 390
252
- },
253
- {
254
- "epoch": 0.26,
255
- "learning_rate": 0.00028831270358306186,
256
- "loss": 0.3235,
257
- "step": 400
258
- },
259
- {
260
- "epoch": 0.26,
261
- "eval_loss": 0.31607797741889954,
262
- "eval_runtime": 442.0436,
263
- "eval_samples_per_second": 4.524,
264
- "eval_steps_per_second": 0.566,
265
- "step": 400
266
- },
267
- {
268
- "epoch": 0.26,
269
- "learning_rate": 0.0002879218241042345,
270
- "loss": 0.323,
271
- "step": 410
272
- },
273
- {
274
- "epoch": 0.27,
275
- "learning_rate": 0.0002875309446254071,
276
- "loss": 0.3077,
277
- "step": 420
278
- },
279
- {
280
- "epoch": 0.28,
281
- "learning_rate": 0.0002871400651465798,
282
- "loss": 0.3188,
283
- "step": 430
284
- },
285
- {
286
- "epoch": 0.28,
287
- "learning_rate": 0.00028674918566775243,
288
- "loss": 0.3265,
289
- "step": 440
290
- },
291
- {
292
- "epoch": 0.29,
293
- "learning_rate": 0.00028635830618892503,
294
- "loss": 0.3087,
295
- "step": 450
296
- },
297
- {
298
- "epoch": 0.3,
299
- "learning_rate": 0.0002859674267100977,
300
- "loss": 0.3102,
301
- "step": 460
302
- },
303
- {
304
- "epoch": 0.3,
305
- "learning_rate": 0.00028557654723127034,
306
- "loss": 0.3135,
307
- "step": 470
308
- },
309
- {
310
- "epoch": 0.31,
311
- "learning_rate": 0.000285185667752443,
312
- "loss": 0.317,
313
- "step": 480
314
- },
315
- {
316
- "epoch": 0.32,
317
- "learning_rate": 0.00028479478827361565,
318
- "loss": 0.3141,
319
- "step": 490
320
- },
321
- {
322
- "epoch": 0.32,
323
- "learning_rate": 0.00028440390879478825,
324
- "loss": 0.3219,
325
- "step": 500
326
- },
327
- {
328
- "epoch": 0.33,
329
- "learning_rate": 0.0002840130293159609,
330
- "loss": 0.3121,
331
- "step": 510
332
- },
333
- {
334
- "epoch": 0.33,
335
- "learning_rate": 0.0002836221498371335,
336
- "loss": 0.3111,
337
- "step": 520
338
- },
339
- {
340
- "epoch": 0.34,
341
- "learning_rate": 0.00028323127035830616,
342
- "loss": 0.3212,
343
- "step": 530
344
- },
345
- {
346
- "epoch": 0.35,
347
- "learning_rate": 0.0002828403908794788,
348
- "loss": 0.3104,
349
- "step": 540
350
- },
351
- {
352
- "epoch": 0.35,
353
- "learning_rate": 0.0002824495114006514,
354
- "loss": 0.3152,
355
- "step": 550
356
- },
357
- {
358
- "epoch": 0.36,
359
- "learning_rate": 0.0002820586319218241,
360
- "loss": 0.3197,
361
- "step": 560
362
- },
363
- {
364
- "epoch": 0.37,
365
- "learning_rate": 0.00028166775244299673,
366
- "loss": 0.3199,
367
- "step": 570
368
- },
369
- {
370
- "epoch": 0.37,
371
- "learning_rate": 0.0002812768729641694,
372
- "loss": 0.3099,
373
- "step": 580
374
- },
375
- {
376
- "epoch": 0.38,
377
- "learning_rate": 0.000280885993485342,
378
- "loss": 0.31,
379
- "step": 590
380
- },
381
- {
382
- "epoch": 0.39,
383
- "learning_rate": 0.00028049511400651464,
384
- "loss": 0.3109,
385
- "step": 600
386
- },
387
- {
388
- "epoch": 0.39,
389
- "eval_loss": 0.3083530366420746,
390
- "eval_runtime": 441.735,
391
- "eval_samples_per_second": 4.528,
392
- "eval_steps_per_second": 0.566,
393
- "step": 600
394
- },
395
- {
396
- "epoch": 0.39,
397
- "learning_rate": 0.00028010423452768724,
398
- "loss": 0.3041,
399
- "step": 610
400
- },
401
- {
402
- "epoch": 0.4,
403
- "learning_rate": 0.0002797133550488599,
404
- "loss": 0.3056,
405
- "step": 620
406
- },
407
- {
408
- "epoch": 0.41,
409
- "learning_rate": 0.00027932247557003255,
410
- "loss": 0.3102,
411
- "step": 630
412
- },
413
- {
414
- "epoch": 0.41,
415
- "learning_rate": 0.0002789315960912052,
416
- "loss": 0.3156,
417
- "step": 640
418
- },
419
- {
420
- "epoch": 0.42,
421
- "learning_rate": 0.0002785407166123778,
422
- "loss": 0.3121,
423
- "step": 650
424
- },
425
- {
426
- "epoch": 0.42,
427
- "learning_rate": 0.00027814983713355046,
428
- "loss": 0.3068,
429
- "step": 660
430
- },
431
- {
432
- "epoch": 0.43,
433
- "learning_rate": 0.0002777589576547231,
434
- "loss": 0.3037,
435
- "step": 670
436
- },
437
- {
438
- "epoch": 0.44,
439
- "learning_rate": 0.0002773680781758957,
440
- "loss": 0.3103,
441
- "step": 680
442
- },
443
- {
444
- "epoch": 0.44,
445
- "learning_rate": 0.0002769771986970684,
446
- "loss": 0.308,
447
- "step": 690
448
- },
449
- {
450
- "epoch": 0.45,
451
- "learning_rate": 0.000276586319218241,
452
- "loss": 0.3005,
453
- "step": 700
454
- },
455
- {
456
- "epoch": 0.46,
457
- "learning_rate": 0.00027619543973941363,
458
- "loss": 0.3054,
459
- "step": 710
460
- },
461
- {
462
- "epoch": 0.46,
463
- "learning_rate": 0.0002758045602605863,
464
- "loss": 0.3087,
465
- "step": 720
466
- },
467
- {
468
- "epoch": 0.47,
469
- "learning_rate": 0.00027541368078175894,
470
- "loss": 0.3071,
471
- "step": 730
472
- },
473
- {
474
- "epoch": 0.48,
475
- "learning_rate": 0.0002750228013029316,
476
- "loss": 0.3156,
477
- "step": 740
478
- },
479
- {
480
- "epoch": 0.48,
481
- "learning_rate": 0.0002746319218241042,
482
- "loss": 0.3072,
483
- "step": 750
484
- },
485
- {
486
- "epoch": 0.49,
487
- "learning_rate": 0.00027424104234527685,
488
- "loss": 0.3174,
489
- "step": 760
490
- },
491
- {
492
- "epoch": 0.5,
493
- "learning_rate": 0.0002738501628664495,
494
- "loss": 0.3064,
495
- "step": 770
496
- },
497
- {
498
- "epoch": 0.5,
499
- "learning_rate": 0.0002734592833876221,
500
- "loss": 0.3049,
501
- "step": 780
502
- },
503
- {
504
- "epoch": 0.51,
505
- "learning_rate": 0.00027306840390879477,
506
- "loss": 0.3014,
507
- "step": 790
508
- },
509
- {
510
- "epoch": 0.51,
511
- "learning_rate": 0.00027267752442996737,
512
- "loss": 0.3047,
513
- "step": 800
514
- },
515
- {
516
- "epoch": 0.51,
517
- "eval_loss": 0.3024560511112213,
518
- "eval_runtime": 441.654,
519
- "eval_samples_per_second": 4.528,
520
- "eval_steps_per_second": 0.566,
521
- "step": 800
522
- },
523
- {
524
- "epoch": 0.52,
525
- "learning_rate": 0.00027228664495114,
526
- "loss": 0.3075,
527
- "step": 810
528
- },
529
- {
530
- "epoch": 0.53,
531
- "learning_rate": 0.0002718957654723127,
532
- "loss": 0.3149,
533
- "step": 820
534
- },
535
- {
536
- "epoch": 0.53,
537
- "learning_rate": 0.00027150488599348533,
538
- "loss": 0.3084,
539
- "step": 830
540
- },
541
- {
542
- "epoch": 0.54,
543
- "learning_rate": 0.000271114006514658,
544
- "loss": 0.3046,
545
- "step": 840
546
- },
547
- {
548
- "epoch": 0.55,
549
- "learning_rate": 0.0002707231270358306,
550
- "loss": 0.296,
551
- "step": 850
552
- },
553
- {
554
- "epoch": 0.55,
555
- "learning_rate": 0.00027033224755700324,
556
- "loss": 0.3089,
557
- "step": 860
558
- },
559
- {
560
- "epoch": 0.56,
561
- "learning_rate": 0.00026994136807817584,
562
- "loss": 0.3074,
563
- "step": 870
564
- },
565
- {
566
- "epoch": 0.57,
567
- "learning_rate": 0.0002695504885993485,
568
- "loss": 0.2972,
569
- "step": 880
570
- },
571
- {
572
- "epoch": 0.57,
573
- "learning_rate": 0.00026915960912052116,
574
- "loss": 0.2911,
575
- "step": 890
576
- },
577
- {
578
- "epoch": 0.58,
579
- "learning_rate": 0.0002687687296416938,
580
- "loss": 0.3147,
581
- "step": 900
582
- },
583
- {
584
- "epoch": 0.59,
585
- "learning_rate": 0.0002683778501628664,
586
- "loss": 0.2985,
587
- "step": 910
588
- },
589
- {
590
- "epoch": 0.59,
591
- "learning_rate": 0.00026798697068403907,
592
- "loss": 0.3073,
593
- "step": 920
594
- },
595
- {
596
- "epoch": 0.6,
597
- "learning_rate": 0.0002675960912052117,
598
- "loss": 0.304,
599
- "step": 930
600
- },
601
- {
602
- "epoch": 0.6,
603
- "learning_rate": 0.0002672052117263843,
604
- "loss": 0.2991,
605
- "step": 940
606
- },
607
- {
608
- "epoch": 0.61,
609
- "learning_rate": 0.000266814332247557,
610
- "loss": 0.2949,
611
- "step": 950
612
- },
613
- {
614
- "epoch": 0.62,
615
- "learning_rate": 0.0002664234527687296,
616
- "loss": 0.2962,
617
- "step": 960
618
- },
619
- {
620
- "epoch": 0.62,
621
- "learning_rate": 0.00026603257328990223,
622
- "loss": 0.3013,
623
- "step": 970
624
- },
625
- {
626
- "epoch": 0.63,
627
- "learning_rate": 0.0002656416938110749,
628
- "loss": 0.3004,
629
- "step": 980
630
- },
631
- {
632
- "epoch": 0.64,
633
- "learning_rate": 0.00026525081433224755,
634
- "loss": 0.2941,
635
- "step": 990
636
- },
637
- {
638
- "epoch": 0.64,
639
- "learning_rate": 0.0002648599348534202,
640
- "loss": 0.2914,
641
- "step": 1000
642
- },
643
- {
644
- "epoch": 0.64,
645
- "eval_loss": 0.29872822761535645,
646
- "eval_runtime": 442.1635,
647
- "eval_samples_per_second": 4.523,
648
- "eval_steps_per_second": 0.565,
649
- "step": 1000
650
- },
651
- {
652
- "epoch": 0.65,
653
- "learning_rate": 0.0002644690553745928,
654
- "loss": 0.2975,
655
- "step": 1010
656
- },
657
- {
658
- "epoch": 0.66,
659
- "learning_rate": 0.00026407817589576546,
660
- "loss": 0.2966,
661
- "step": 1020
662
- },
663
- {
664
- "epoch": 0.66,
665
- "learning_rate": 0.0002636872964169381,
666
- "loss": 0.2993,
667
- "step": 1030
668
- },
669
- {
670
- "epoch": 0.67,
671
- "learning_rate": 0.0002632964169381107,
672
- "loss": 0.3016,
673
- "step": 1040
674
- },
675
- {
676
- "epoch": 0.68,
677
- "learning_rate": 0.00026290553745928337,
678
- "loss": 0.303,
679
- "step": 1050
680
- },
681
- {
682
- "epoch": 0.68,
683
- "learning_rate": 0.00026251465798045597,
684
- "loss": 0.3021,
685
- "step": 1060
686
- },
687
- {
688
- "epoch": 0.69,
689
- "learning_rate": 0.0002621237785016286,
690
- "loss": 0.2993,
691
- "step": 1070
692
- },
693
- {
694
- "epoch": 0.69,
695
- "learning_rate": 0.0002617328990228013,
696
- "loss": 0.3008,
697
- "step": 1080
698
- },
699
- {
700
- "epoch": 0.7,
701
- "learning_rate": 0.00026134201954397393,
702
- "loss": 0.2929,
703
- "step": 1090
704
- },
705
- {
706
- "epoch": 0.71,
707
- "learning_rate": 0.0002609511400651466,
708
- "loss": 0.2946,
709
- "step": 1100
710
- },
711
- {
712
- "epoch": 0.71,
713
- "learning_rate": 0.0002605602605863192,
714
- "loss": 0.3,
715
- "step": 1110
716
- },
717
- {
718
- "epoch": 0.72,
719
- "learning_rate": 0.00026016938110749185,
720
- "loss": 0.3008,
721
- "step": 1120
722
- },
723
- {
724
- "epoch": 0.73,
725
- "learning_rate": 0.00025977850162866445,
726
- "loss": 0.3037,
727
- "step": 1130
728
- },
729
- {
730
- "epoch": 0.73,
731
- "learning_rate": 0.0002593876221498371,
732
- "loss": 0.3102,
733
- "step": 1140
734
- },
735
- {
736
- "epoch": 0.74,
737
- "learning_rate": 0.00025899674267100976,
738
- "loss": 0.3065,
739
- "step": 1150
740
- },
741
- {
742
- "epoch": 0.75,
743
- "learning_rate": 0.00025860586319218236,
744
- "loss": 0.2961,
745
- "step": 1160
746
- },
747
- {
748
- "epoch": 0.75,
749
- "learning_rate": 0.000258214983713355,
750
- "loss": 0.3014,
751
- "step": 1170
752
- },
753
- {
754
- "epoch": 0.76,
755
- "learning_rate": 0.00025782410423452767,
756
- "loss": 0.3041,
757
- "step": 1180
758
- },
759
- {
760
- "epoch": 0.77,
761
- "learning_rate": 0.0002574332247557003,
762
- "loss": 0.2905,
763
- "step": 1190
764
- },
765
- {
766
- "epoch": 0.77,
767
- "learning_rate": 0.0002570423452768729,
768
- "loss": 0.2957,
769
- "step": 1200
770
- },
771
- {
772
- "epoch": 0.77,
773
- "eval_loss": 0.2957862615585327,
774
- "eval_runtime": 442.3344,
775
- "eval_samples_per_second": 4.521,
776
- "eval_steps_per_second": 0.565,
777
- "step": 1200
778
- },
779
- {
780
- "epoch": 0.78,
781
- "learning_rate": 0.0002566514657980456,
782
- "loss": 0.2933,
783
- "step": 1210
784
- },
785
- {
786
- "epoch": 0.78,
787
- "learning_rate": 0.0002562605863192182,
788
- "loss": 0.3107,
789
- "step": 1220
790
- },
791
- {
792
- "epoch": 0.79,
793
- "learning_rate": 0.00025586970684039084,
794
- "loss": 0.2954,
795
- "step": 1230
796
- },
797
- {
798
- "epoch": 0.8,
799
- "learning_rate": 0.0002554788273615635,
800
- "loss": 0.308,
801
- "step": 1240
802
- },
803
- {
804
- "epoch": 0.8,
805
- "learning_rate": 0.00025508794788273615,
806
- "loss": 0.302,
807
- "step": 1250
808
- },
809
- {
810
- "epoch": 0.81,
811
- "learning_rate": 0.0002546970684039088,
812
- "loss": 0.3004,
813
- "step": 1260
814
- },
815
- {
816
- "epoch": 0.82,
817
- "learning_rate": 0.0002543061889250814,
818
- "loss": 0.3029,
819
- "step": 1270
820
- },
821
- {
822
- "epoch": 0.82,
823
- "learning_rate": 0.00025391530944625406,
824
- "loss": 0.2893,
825
- "step": 1280
826
- },
827
- {
828
- "epoch": 0.83,
829
- "learning_rate": 0.0002535244299674267,
830
- "loss": 0.3027,
831
- "step": 1290
832
- },
833
- {
834
- "epoch": 0.84,
835
- "learning_rate": 0.0002531335504885993,
836
- "loss": 0.2948,
837
- "step": 1300
838
- },
839
- {
840
- "epoch": 0.84,
841
- "learning_rate": 0.00025274267100977197,
842
- "loss": 0.2896,
843
- "step": 1310
844
- },
845
- {
846
- "epoch": 0.85,
847
- "learning_rate": 0.00025235179153094457,
848
- "loss": 0.303,
849
- "step": 1320
850
- },
851
- {
852
- "epoch": 0.86,
853
- "learning_rate": 0.00025196091205211723,
854
- "loss": 0.3001,
855
- "step": 1330
856
- },
857
- {
858
- "epoch": 0.86,
859
- "learning_rate": 0.0002515700325732899,
860
- "loss": 0.2944,
861
- "step": 1340
862
- },
863
- {
864
- "epoch": 0.87,
865
- "learning_rate": 0.00025117915309446254,
866
- "loss": 0.3023,
867
- "step": 1350
868
- },
869
- {
870
- "epoch": 0.87,
871
- "learning_rate": 0.0002507882736156352,
872
- "loss": 0.3007,
873
- "step": 1360
874
- },
875
- {
876
- "epoch": 0.88,
877
- "learning_rate": 0.0002503973941368078,
878
- "loss": 0.3024,
879
- "step": 1370
880
- },
881
- {
882
- "epoch": 0.89,
883
- "learning_rate": 0.00025000651465798045,
884
- "loss": 0.3012,
885
- "step": 1380
886
- },
887
- {
888
- "epoch": 0.89,
889
- "learning_rate": 0.00024961563517915305,
890
- "loss": 0.294,
891
- "step": 1390
892
- },
893
- {
894
- "epoch": 0.9,
895
- "learning_rate": 0.0002492247557003257,
896
- "loss": 0.2987,
897
- "step": 1400
898
- },
899
- {
900
- "epoch": 0.9,
901
- "eval_loss": 0.2930598855018616,
902
- "eval_runtime": 442.2601,
903
- "eval_samples_per_second": 4.522,
904
- "eval_steps_per_second": 0.565,
905
- "step": 1400
906
- },
907
- {
908
- "epoch": 0.91,
909
- "learning_rate": 0.00024883387622149836,
910
- "loss": 0.2973,
911
- "step": 1410
912
- },
913
- {
914
- "epoch": 0.91,
915
- "learning_rate": 0.00024844299674267096,
916
- "loss": 0.2954,
917
- "step": 1420
918
- },
919
- {
920
- "epoch": 0.92,
921
- "learning_rate": 0.0002480521172638436,
922
- "loss": 0.2907,
923
- "step": 1430
924
- },
925
- {
926
- "epoch": 0.93,
927
- "learning_rate": 0.00024766123778501627,
928
- "loss": 0.2973,
929
- "step": 1440
930
- },
931
- {
932
- "epoch": 0.93,
933
- "learning_rate": 0.00024727035830618893,
934
- "loss": 0.2991,
935
- "step": 1450
936
- },
937
- {
938
- "epoch": 0.94,
939
- "learning_rate": 0.00024687947882736153,
940
- "loss": 0.2918,
941
- "step": 1460
942
- },
943
- {
944
- "epoch": 0.95,
945
- "learning_rate": 0.0002464885993485342,
946
- "loss": 0.2892,
947
- "step": 1470
948
- },
949
- {
950
- "epoch": 0.95,
951
- "learning_rate": 0.0002460977198697068,
952
- "loss": 0.2965,
953
- "step": 1480
954
- },
955
- {
956
- "epoch": 0.96,
957
- "learning_rate": 0.00024570684039087944,
958
- "loss": 0.2965,
959
- "step": 1490
960
- },
961
- {
962
- "epoch": 0.96,
963
- "learning_rate": 0.0002453159609120521,
964
- "loss": 0.2925,
965
- "step": 1500
966
- },
967
- {
968
- "epoch": 0.97,
969
- "learning_rate": 0.00024492508143322475,
970
- "loss": 0.292,
971
- "step": 1510
972
- },
973
- {
974
- "epoch": 0.98,
975
- "learning_rate": 0.00024453420195439735,
976
- "loss": 0.2945,
977
- "step": 1520
978
- },
979
- {
980
- "epoch": 0.98,
981
- "learning_rate": 0.00024414332247557,
982
- "loss": 0.2955,
983
- "step": 1530
984
- },
985
- {
986
- "epoch": 0.99,
987
- "learning_rate": 0.00024375244299674263,
988
- "loss": 0.308,
989
- "step": 1540
990
- },
991
- {
992
- "epoch": 1.0,
993
- "learning_rate": 0.0002433615635179153,
994
- "loss": 0.294,
995
- "step": 1550
996
- },
997
- {
998
- "epoch": 1.0,
999
- "learning_rate": 0.00024297068403908795,
1000
- "loss": 0.2937,
1001
- "step": 1560
1002
- },
1003
- {
1004
- "epoch": 1.01,
1005
- "learning_rate": 0.00024257980456026055,
1006
- "loss": 0.2781,
1007
- "step": 1570
1008
- },
1009
- {
1010
- "epoch": 1.02,
1011
- "learning_rate": 0.0002421889250814332,
1012
- "loss": 0.2763,
1013
- "step": 1580
1014
- },
1015
- {
1016
- "epoch": 1.02,
1017
- "learning_rate": 0.00024179804560260583,
1018
- "loss": 0.2842,
1019
- "step": 1590
1020
- },
1021
- {
1022
- "epoch": 1.03,
1023
- "learning_rate": 0.00024140716612377849,
1024
- "loss": 0.2838,
1025
- "step": 1600
1026
- },
1027
- {
1028
- "epoch": 1.03,
1029
- "eval_loss": 0.2916465997695923,
1030
- "eval_runtime": 441.7075,
1031
- "eval_samples_per_second": 4.528,
1032
- "eval_steps_per_second": 0.566,
1033
- "step": 1600
1034
- },
1035
- {
1036
- "epoch": 1.04,
1037
- "learning_rate": 0.00024101628664495114,
1038
- "loss": 0.2763,
1039
- "step": 1610
1040
- },
1041
- {
1042
- "epoch": 1.04,
1043
- "learning_rate": 0.00024062540716612377,
1044
- "loss": 0.2812,
1045
- "step": 1620
1046
- },
1047
- {
1048
- "epoch": 1.05,
1049
- "learning_rate": 0.0002402345276872964,
1050
- "loss": 0.2889,
1051
- "step": 1630
1052
- },
1053
- {
1054
- "epoch": 1.05,
1055
- "learning_rate": 0.00023984364820846902,
1056
- "loss": 0.2901,
1057
- "step": 1640
1058
- },
1059
- {
1060
- "epoch": 1.06,
1061
- "learning_rate": 0.00023945276872964168,
1062
- "loss": 0.2688,
1063
- "step": 1650
1064
- },
1065
- {
1066
- "epoch": 1.07,
1067
- "learning_rate": 0.0002390618892508143,
1068
- "loss": 0.2856,
1069
- "step": 1660
1070
- },
1071
- {
1072
- "epoch": 1.07,
1073
- "learning_rate": 0.00023867100977198696,
1074
- "loss": 0.2901,
1075
- "step": 1670
1076
- },
1077
- {
1078
- "epoch": 1.08,
1079
- "learning_rate": 0.00023828013029315956,
1080
- "loss": 0.2892,
1081
- "step": 1680
1082
- },
1083
- {
1084
- "epoch": 1.09,
1085
- "learning_rate": 0.00023788925081433222,
1086
- "loss": 0.281,
1087
- "step": 1690
1088
- },
1089
- {
1090
- "epoch": 1.09,
1091
- "learning_rate": 0.00023749837133550487,
1092
- "loss": 0.284,
1093
- "step": 1700
1094
- },
1095
- {
1096
- "epoch": 1.1,
1097
- "learning_rate": 0.0002371074918566775,
1098
- "loss": 0.2834,
1099
- "step": 1710
1100
- },
1101
- {
1102
- "epoch": 1.11,
1103
- "learning_rate": 0.00023671661237785016,
1104
- "loss": 0.2832,
1105
- "step": 1720
1106
- },
1107
- {
1108
- "epoch": 1.11,
1109
- "learning_rate": 0.00023632573289902276,
1110
- "loss": 0.2803,
1111
- "step": 1730
1112
- },
1113
- {
1114
- "epoch": 1.12,
1115
- "learning_rate": 0.00023593485342019541,
1116
- "loss": 0.2868,
1117
- "step": 1740
1118
- },
1119
- {
1120
- "epoch": 1.13,
1121
- "learning_rate": 0.00023554397394136807,
1122
- "loss": 0.2883,
1123
- "step": 1750
1124
- },
1125
- {
1126
- "epoch": 1.13,
1127
- "learning_rate": 0.0002351530944625407,
1128
- "loss": 0.2859,
1129
- "step": 1760
1130
- },
1131
- {
1132
- "epoch": 1.14,
1133
- "learning_rate": 0.00023476221498371335,
1134
- "loss": 0.2888,
1135
- "step": 1770
1136
- },
1137
- {
1138
- "epoch": 1.14,
1139
- "learning_rate": 0.00023437133550488595,
1140
- "loss": 0.2983,
1141
- "step": 1780
1142
- },
1143
- {
1144
- "epoch": 1.15,
1145
- "learning_rate": 0.0002339804560260586,
1146
- "loss": 0.2809,
1147
- "step": 1790
1148
- },
1149
- {
1150
- "epoch": 1.16,
1151
- "learning_rate": 0.00023358957654723124,
1152
- "loss": 0.2982,
1153
- "step": 1800
1154
- },
1155
- {
1156
- "epoch": 1.16,
1157
- "eval_loss": 0.28945282101631165,
1158
- "eval_runtime": 441.6135,
1159
- "eval_samples_per_second": 4.529,
1160
- "eval_steps_per_second": 0.566,
1161
- "step": 1800
1162
- },
1163
- {
1164
- "epoch": 1.16,
1165
- "learning_rate": 0.0002331986970684039,
1166
- "loss": 0.2848,
1167
- "step": 1810
1168
- },
1169
- {
1170
- "epoch": 1.17,
1171
- "learning_rate": 0.00023280781758957655,
1172
- "loss": 0.2821,
1173
- "step": 1820
1174
- },
1175
- {
1176
- "epoch": 1.18,
1177
- "learning_rate": 0.00023241693811074915,
1178
- "loss": 0.2809,
1179
- "step": 1830
1180
- },
1181
- {
1182
- "epoch": 1.18,
1183
- "learning_rate": 0.0002320260586319218,
1184
- "loss": 0.2858,
1185
- "step": 1840
1186
- },
1187
- {
1188
- "epoch": 1.19,
1189
- "learning_rate": 0.00023163517915309443,
1190
- "loss": 0.2751,
1191
- "step": 1850
1192
- },
1193
- {
1194
- "epoch": 1.2,
1195
- "learning_rate": 0.0002312442996742671,
1196
- "loss": 0.2779,
1197
- "step": 1860
1198
- },
1199
- {
1200
- "epoch": 1.2,
1201
- "learning_rate": 0.00023085342019543974,
1202
- "loss": 0.2797,
1203
- "step": 1870
1204
- },
1205
- {
1206
- "epoch": 1.21,
1207
- "learning_rate": 0.00023046254071661234,
1208
- "loss": 0.2888,
1209
- "step": 1880
1210
- },
1211
- {
1212
- "epoch": 1.22,
1213
- "learning_rate": 0.000230071661237785,
1214
- "loss": 0.2822,
1215
- "step": 1890
1216
- },
1217
- {
1218
- "epoch": 1.22,
1219
- "learning_rate": 0.00022968078175895763,
1220
- "loss": 0.2872,
1221
- "step": 1900
1222
- },
1223
- {
1224
- "epoch": 1.23,
1225
- "learning_rate": 0.00022928990228013028,
1226
- "loss": 0.2921,
1227
- "step": 1910
1228
- },
1229
- {
1230
- "epoch": 1.23,
1231
- "learning_rate": 0.0002288990228013029,
1232
- "loss": 0.2914,
1233
- "step": 1920
1234
- },
1235
- {
1236
- "epoch": 1.24,
1237
- "learning_rate": 0.00022850814332247554,
1238
- "loss": 0.2935,
1239
- "step": 1930
1240
- },
1241
- {
1242
- "epoch": 1.25,
1243
- "learning_rate": 0.00022811726384364817,
1244
- "loss": 0.2845,
1245
- "step": 1940
1246
- },
1247
- {
1248
- "epoch": 1.25,
1249
- "learning_rate": 0.00022772638436482082,
1250
- "loss": 0.3053,
1251
- "step": 1950
1252
- },
1253
- {
1254
- "epoch": 1.26,
1255
- "learning_rate": 0.00022733550488599348,
1256
- "loss": 0.3502,
1257
- "step": 1960
1258
- },
1259
- {
1260
- "epoch": 1.27,
1261
- "learning_rate": 0.0002269446254071661,
1262
- "loss": 0.2942,
1263
- "step": 1970
1264
- },
1265
- {
1266
- "epoch": 1.27,
1267
- "learning_rate": 0.00022655374592833873,
1268
- "loss": 0.2895,
1269
- "step": 1980
1270
- },
1271
- {
1272
- "epoch": 1.28,
1273
- "learning_rate": 0.00022616286644951136,
1274
- "loss": 0.281,
1275
- "step": 1990
1276
- },
1277
- {
1278
- "epoch": 1.29,
1279
- "learning_rate": 0.00022577198697068402,
1280
- "loss": 0.2839,
1281
- "step": 2000
1282
- },
1283
- {
1284
- "epoch": 1.29,
1285
- "eval_loss": 0.28936389088630676,
1286
- "eval_runtime": 441.8381,
1287
- "eval_samples_per_second": 4.527,
1288
- "eval_steps_per_second": 0.566,
1289
- "step": 2000
1290
- },
1291
- {
1292
- "epoch": 1.29,
1293
- "learning_rate": 0.00022538110749185667,
1294
- "loss": 0.2832,
1295
- "step": 2010
1296
- },
1297
- {
1298
- "epoch": 1.3,
1299
- "learning_rate": 0.0002249902280130293,
1300
- "loss": 0.28,
1301
- "step": 2020
1302
- },
1303
- {
1304
- "epoch": 1.31,
1305
- "learning_rate": 0.00022459934853420196,
1306
- "loss": 0.2788,
1307
- "step": 2030
1308
- },
1309
- {
1310
- "epoch": 1.31,
1311
- "learning_rate": 0.00022420846905537456,
1312
- "loss": 0.298,
1313
- "step": 2040
1314
- },
1315
- {
1316
- "epoch": 1.32,
1317
- "learning_rate": 0.0002238175895765472,
1318
- "loss": 0.2815,
1319
- "step": 2050
1320
- },
1321
- {
1322
- "epoch": 1.32,
1323
- "learning_rate": 0.00022342671009771984,
1324
- "loss": 0.2866,
1325
- "step": 2060
1326
- },
1327
- {
1328
- "epoch": 1.33,
1329
- "learning_rate": 0.0002230358306188925,
1330
- "loss": 0.2839,
1331
- "step": 2070
1332
- },
1333
- {
1334
- "epoch": 1.34,
1335
- "learning_rate": 0.00022264495114006515,
1336
- "loss": 0.285,
1337
- "step": 2080
1338
- },
1339
- {
1340
- "epoch": 1.34,
1341
- "learning_rate": 0.00022225407166123775,
1342
- "loss": 0.2846,
1343
- "step": 2090
1344
- },
1345
- {
1346
- "epoch": 1.35,
1347
- "learning_rate": 0.0002218631921824104,
1348
- "loss": 0.2903,
1349
- "step": 2100
1350
- },
1351
- {
1352
- "epoch": 1.36,
1353
- "learning_rate": 0.00022147231270358304,
1354
- "loss": 0.2914,
1355
- "step": 2110
1356
- },
1357
- {
1358
- "epoch": 1.36,
1359
- "learning_rate": 0.0002210814332247557,
1360
- "loss": 0.2839,
1361
- "step": 2120
1362
- },
1363
- {
1364
- "epoch": 1.37,
1365
- "learning_rate": 0.00022069055374592835,
1366
- "loss": 0.2814,
1367
- "step": 2130
1368
- },
1369
- {
1370
- "epoch": 1.38,
1371
- "learning_rate": 0.00022029967426710095,
1372
- "loss": 0.2989,
1373
- "step": 2140
1374
- },
1375
- {
1376
- "epoch": 1.38,
1377
- "learning_rate": 0.0002199087947882736,
1378
- "loss": 0.286,
1379
- "step": 2150
1380
- },
1381
- {
1382
- "epoch": 1.39,
1383
- "learning_rate": 0.00021951791530944623,
1384
- "loss": 0.2875,
1385
- "step": 2160
1386
- },
1387
- {
1388
- "epoch": 1.4,
1389
- "learning_rate": 0.00021912703583061889,
1390
- "loss": 0.2771,
1391
- "step": 2170
1392
- },
1393
- {
1394
- "epoch": 1.4,
1395
- "learning_rate": 0.00021873615635179151,
1396
- "loss": 0.2793,
1397
- "step": 2180
1398
- },
1399
- {
1400
- "epoch": 1.41,
1401
- "learning_rate": 0.00021834527687296414,
1402
- "loss": 0.2899,
1403
- "step": 2190
1404
- },
1405
- {
1406
- "epoch": 1.41,
1407
- "learning_rate": 0.00021795439739413677,
1408
- "loss": 0.2871,
1409
- "step": 2200
1410
- },
1411
- {
1412
- "epoch": 1.41,
1413
- "eval_loss": 0.28674158453941345,
1414
- "eval_runtime": 441.6324,
1415
- "eval_samples_per_second": 4.529,
1416
- "eval_steps_per_second": 0.566,
1417
- "step": 2200
1418
- },
1419
- {
1420
- "epoch": 1.42,
1421
- "learning_rate": 0.00021756351791530943,
1422
- "loss": 0.2856,
1423
- "step": 2210
1424
- },
1425
- {
1426
- "epoch": 1.43,
1427
- "learning_rate": 0.00021717263843648208,
1428
- "loss": 0.2838,
1429
- "step": 2220
1430
- },
1431
- {
1432
- "epoch": 1.43,
1433
- "learning_rate": 0.0002167817589576547,
1434
- "loss": 0.2851,
1435
- "step": 2230
1436
- },
1437
- {
1438
- "epoch": 1.44,
1439
- "learning_rate": 0.00021639087947882734,
1440
- "loss": 0.2823,
1441
- "step": 2240
1442
- },
1443
- {
1444
- "epoch": 1.45,
1445
- "learning_rate": 0.00021599999999999996,
1446
- "loss": 0.2872,
1447
- "step": 2250
1448
- },
1449
- {
1450
- "epoch": 1.45,
1451
- "learning_rate": 0.00021560912052117262,
1452
- "loss": 0.2782,
1453
- "step": 2260
1454
- },
1455
- {
1456
- "epoch": 1.46,
1457
- "learning_rate": 0.00021521824104234528,
1458
- "loss": 0.2722,
1459
- "step": 2270
1460
- },
1461
- {
1462
- "epoch": 1.47,
1463
- "learning_rate": 0.0002148273615635179,
1464
- "loss": 0.2951,
1465
- "step": 2280
1466
- },
1467
- {
1468
- "epoch": 1.47,
1469
- "learning_rate": 0.00021443648208469053,
1470
- "loss": 0.2774,
1471
- "step": 2290
1472
- },
1473
- {
1474
- "epoch": 1.48,
1475
- "learning_rate": 0.00021404560260586316,
1476
- "loss": 0.2897,
1477
- "step": 2300
1478
- },
1479
- {
1480
- "epoch": 1.49,
1481
- "learning_rate": 0.00021365472312703581,
1482
- "loss": 0.2848,
1483
- "step": 2310
1484
- },
1485
- {
1486
- "epoch": 1.49,
1487
- "learning_rate": 0.00021326384364820844,
1488
- "loss": 0.2823,
1489
- "step": 2320
1490
- },
1491
- {
1492
- "epoch": 1.5,
1493
- "learning_rate": 0.0002128729641693811,
1494
- "loss": 0.2795,
1495
- "step": 2330
1496
- },
1497
- {
1498
- "epoch": 1.5,
1499
- "learning_rate": 0.0002124820846905537,
1500
- "loss": 0.275,
1501
- "step": 2340
1502
- },
1503
- {
1504
- "epoch": 1.51,
1505
- "learning_rate": 0.00021209120521172635,
1506
- "loss": 0.2789,
1507
- "step": 2350
1508
- },
1509
- {
1510
- "epoch": 1.52,
1511
- "learning_rate": 0.000211700325732899,
1512
- "loss": 0.2887,
1513
- "step": 2360
1514
- },
1515
- {
1516
- "epoch": 1.52,
1517
- "learning_rate": 0.00021130944625407164,
1518
- "loss": 0.2871,
1519
- "step": 2370
1520
- },
1521
- {
1522
- "epoch": 1.53,
1523
- "learning_rate": 0.0002109185667752443,
1524
- "loss": 0.2776,
1525
- "step": 2380
1526
- },
1527
- {
1528
- "epoch": 1.54,
1529
- "learning_rate": 0.00021052768729641695,
1530
- "loss": 0.2785,
1531
- "step": 2390
1532
- },
1533
- {
1534
- "epoch": 1.54,
1535
- "learning_rate": 0.00021013680781758955,
1536
- "loss": 0.2835,
1537
- "step": 2400
1538
- },
1539
- {
1540
- "epoch": 1.54,
1541
- "eval_loss": 0.2856335937976837,
1542
- "eval_runtime": 441.712,
1543
- "eval_samples_per_second": 4.528,
1544
- "eval_steps_per_second": 0.566,
1545
- "step": 2400
1546
- },
1547
- {
1548
- "epoch": 1.55,
1549
- "learning_rate": 0.0002097459283387622,
1550
- "loss": 0.2881,
1551
- "step": 2410
1552
- },
1553
- {
1554
- "epoch": 1.56,
1555
- "learning_rate": 0.00020935504885993483,
1556
- "loss": 0.276,
1557
- "step": 2420
1558
- },
1559
- {
1560
- "epoch": 1.56,
1561
- "learning_rate": 0.0002089641693811075,
1562
- "loss": 0.2883,
1563
- "step": 2430
1564
- },
1565
- {
1566
- "epoch": 1.57,
1567
- "learning_rate": 0.00020857328990228012,
1568
- "loss": 0.2801,
1569
- "step": 2440
1570
- },
1571
- {
1572
- "epoch": 1.58,
1573
- "learning_rate": 0.00020818241042345274,
1574
- "loss": 0.2815,
1575
- "step": 2450
1576
- },
1577
- {
1578
- "epoch": 1.58,
1579
- "learning_rate": 0.00020779153094462537,
1580
- "loss": 0.2925,
1581
- "step": 2460
1582
- },
1583
- {
1584
- "epoch": 1.59,
1585
- "learning_rate": 0.00020740065146579803,
1586
- "loss": 0.2694,
1587
- "step": 2470
1588
- },
1589
- {
1590
- "epoch": 1.59,
1591
- "learning_rate": 0.00020700977198697068,
1592
- "loss": 0.286,
1593
- "step": 2480
1594
- },
1595
- {
1596
- "epoch": 1.6,
1597
- "learning_rate": 0.0002066188925081433,
1598
- "loss": 0.29,
1599
- "step": 2490
1600
- },
1601
- {
1602
- "epoch": 1.61,
1603
- "learning_rate": 0.00020622801302931594,
1604
- "loss": 0.285,
1605
- "step": 2500
1606
- },
1607
- {
1608
- "epoch": 1.61,
1609
- "learning_rate": 0.00020583713355048857,
1610
- "loss": 0.2707,
1611
- "step": 2510
1612
- },
1613
- {
1614
- "epoch": 1.62,
1615
- "learning_rate": 0.00020544625407166122,
1616
- "loss": 0.2883,
1617
- "step": 2520
1618
- },
1619
- {
1620
- "epoch": 1.63,
1621
- "learning_rate": 0.00020505537459283388,
1622
- "loss": 0.2849,
1623
- "step": 2530
1624
- },
1625
- {
1626
- "epoch": 1.63,
1627
- "learning_rate": 0.0002046644951140065,
1628
- "loss": 0.2827,
1629
- "step": 2540
1630
- },
1631
- {
1632
- "epoch": 1.64,
1633
- "learning_rate": 0.00020427361563517913,
1634
- "loss": 0.27,
1635
- "step": 2550
1636
- },
1637
- {
1638
- "epoch": 1.65,
1639
- "learning_rate": 0.00020388273615635176,
1640
- "loss": 0.2803,
1641
- "step": 2560
1642
- },
1643
- {
1644
- "epoch": 1.65,
1645
- "learning_rate": 0.00020349185667752442,
1646
- "loss": 0.2828,
1647
- "step": 2570
1648
- },
1649
- {
1650
- "epoch": 1.66,
1651
- "learning_rate": 0.00020310097719869705,
1652
- "loss": 0.2821,
1653
- "step": 2580
1654
- },
1655
- {
1656
- "epoch": 1.67,
1657
- "learning_rate": 0.0002027100977198697,
1658
- "loss": 0.2732,
1659
- "step": 2590
1660
- },
1661
- {
1662
- "epoch": 1.67,
1663
- "learning_rate": 0.0002023192182410423,
1664
- "loss": 0.2758,
1665
- "step": 2600
1666
- },
1667
- {
1668
- "epoch": 1.67,
1669
- "eval_loss": 0.28436511754989624,
1670
- "eval_runtime": 441.3288,
1671
- "eval_samples_per_second": 4.532,
1672
- "eval_steps_per_second": 0.566,
1673
- "step": 2600
1674
- },
1675
- {
1676
- "epoch": 1.68,
1677
- "learning_rate": 0.00020192833876221496,
1678
- "loss": 0.2715,
1679
- "step": 2610
1680
- },
1681
- {
1682
- "epoch": 1.68,
1683
- "learning_rate": 0.0002015374592833876,
1684
- "loss": 0.2741,
1685
- "step": 2620
1686
- },
1687
- {
1688
- "epoch": 1.69,
1689
- "learning_rate": 0.00020114657980456024,
1690
- "loss": 0.2856,
1691
- "step": 2630
1692
- },
1693
- {
1694
- "epoch": 1.7,
1695
- "learning_rate": 0.0002007557003257329,
1696
- "loss": 0.2828,
1697
- "step": 2640
1698
- },
1699
- {
1700
- "epoch": 1.7,
1701
- "learning_rate": 0.0002003648208469055,
1702
- "loss": 0.2695,
1703
- "step": 2650
1704
- },
1705
- {
1706
- "epoch": 1.71,
1707
- "learning_rate": 0.00019997394136807815,
1708
- "loss": 0.2914,
1709
- "step": 2660
1710
- },
1711
- {
1712
- "epoch": 1.72,
1713
- "learning_rate": 0.0001995830618892508,
1714
- "loss": 0.2766,
1715
- "step": 2670
1716
- },
1717
- {
1718
- "epoch": 1.72,
1719
- "learning_rate": 0.00019919218241042344,
1720
- "loss": 0.2738,
1721
- "step": 2680
1722
- },
1723
- {
1724
- "epoch": 1.73,
1725
- "learning_rate": 0.0001988013029315961,
1726
- "loss": 0.2745,
1727
- "step": 2690
1728
- },
1729
- {
1730
- "epoch": 1.74,
1731
- "learning_rate": 0.0001984104234527687,
1732
- "loss": 0.2737,
1733
- "step": 2700
1734
- },
1735
- {
1736
- "epoch": 1.74,
1737
- "learning_rate": 0.00019801954397394135,
1738
- "loss": 0.2734,
1739
- "step": 2710
1740
- },
1741
- {
1742
- "epoch": 1.75,
1743
- "learning_rate": 0.00019762866449511398,
1744
- "loss": 0.2658,
1745
- "step": 2720
1746
- },
1747
- {
1748
- "epoch": 1.76,
1749
- "learning_rate": 0.00019723778501628663,
1750
- "loss": 0.2856,
1751
- "step": 2730
1752
- },
1753
- {
1754
- "epoch": 1.76,
1755
- "learning_rate": 0.00019684690553745929,
1756
- "loss": 0.2762,
1757
- "step": 2740
1758
- },
1759
- {
1760
- "epoch": 1.77,
1761
- "learning_rate": 0.00019645602605863191,
1762
- "loss": 0.2755,
1763
- "step": 2750
1764
- },
1765
- {
1766
- "epoch": 1.77,
1767
- "learning_rate": 0.00019606514657980454,
1768
- "loss": 0.2702,
1769
- "step": 2760
1770
- },
1771
- {
1772
- "epoch": 1.78,
1773
- "learning_rate": 0.00019567426710097717,
1774
- "loss": 0.2836,
1775
- "step": 2770
1776
- },
1777
- {
1778
- "epoch": 1.79,
1779
- "learning_rate": 0.00019528338762214983,
1780
- "loss": 0.2703,
1781
- "step": 2780
1782
- },
1783
- {
1784
- "epoch": 1.79,
1785
- "learning_rate": 0.00019489250814332248,
1786
- "loss": 0.282,
1787
- "step": 2790
1788
- },
1789
- {
1790
- "epoch": 1.8,
1791
- "learning_rate": 0.0001945016286644951,
1792
- "loss": 0.2797,
1793
- "step": 2800
1794
- },
1795
- {
1796
- "epoch": 1.8,
1797
- "eval_loss": 0.28348231315612793,
1798
- "eval_runtime": 441.491,
1799
- "eval_samples_per_second": 4.53,
1800
- "eval_steps_per_second": 0.566,
1801
- "step": 2800
1802
- },
1803
- {
1804
- "epoch": 1.81,
1805
- "learning_rate": 0.00019411074918566774,
1806
- "loss": 0.2853,
1807
- "step": 2810
1808
- },
1809
- {
1810
- "epoch": 1.81,
1811
- "learning_rate": 0.00019371986970684037,
1812
- "loss": 0.2854,
1813
- "step": 2820
1814
- },
1815
- {
1816
- "epoch": 1.82,
1817
- "learning_rate": 0.00019332899022801302,
1818
- "loss": 0.2772,
1819
- "step": 2830
1820
- },
1821
- {
1822
- "epoch": 1.83,
1823
- "learning_rate": 0.00019293811074918565,
1824
- "loss": 0.2756,
1825
- "step": 2840
1826
- },
1827
- {
1828
- "epoch": 1.83,
1829
- "learning_rate": 0.0001925472312703583,
1830
- "loss": 0.2798,
1831
- "step": 2850
1832
- },
1833
- {
1834
- "epoch": 1.84,
1835
- "learning_rate": 0.0001921563517915309,
1836
- "loss": 0.2765,
1837
- "step": 2860
1838
- },
1839
- {
1840
- "epoch": 1.85,
1841
- "learning_rate": 0.00019176547231270356,
1842
- "loss": 0.2835,
1843
- "step": 2870
1844
- },
1845
- {
1846
- "epoch": 1.85,
1847
- "learning_rate": 0.00019137459283387622,
1848
- "loss": 0.2756,
1849
- "step": 2880
1850
- },
1851
- {
1852
- "epoch": 1.86,
1853
- "learning_rate": 0.00019098371335504884,
1854
- "loss": 0.2777,
1855
- "step": 2890
1856
- },
1857
- {
1858
- "epoch": 1.86,
1859
- "learning_rate": 0.0001905928338762215,
1860
- "loss": 0.2777,
1861
- "step": 2900
1862
- },
1863
- {
1864
- "epoch": 1.87,
1865
- "learning_rate": 0.0001902019543973941,
1866
- "loss": 0.2737,
1867
- "step": 2910
1868
- },
1869
- {
1870
- "epoch": 1.88,
1871
- "learning_rate": 0.00018981107491856675,
1872
- "loss": 0.2804,
1873
- "step": 2920
1874
- },
1875
- {
1876
- "epoch": 1.88,
1877
- "learning_rate": 0.0001894201954397394,
1878
- "loss": 0.2867,
1879
- "step": 2930
1880
- },
1881
- {
1882
- "epoch": 1.89,
1883
- "learning_rate": 0.00018902931596091204,
1884
- "loss": 0.2807,
1885
- "step": 2940
1886
- },
1887
- {
1888
- "epoch": 1.9,
1889
- "learning_rate": 0.0001886384364820847,
1890
- "loss": 0.2782,
1891
- "step": 2950
1892
- },
1893
- {
1894
- "epoch": 1.9,
1895
- "learning_rate": 0.0001882475570032573,
1896
- "loss": 0.2829,
1897
- "step": 2960
1898
- },
1899
- {
1900
- "epoch": 1.91,
1901
- "learning_rate": 0.00018785667752442995,
1902
- "loss": 0.2882,
1903
- "step": 2970
1904
- },
1905
- {
1906
- "epoch": 1.92,
1907
- "learning_rate": 0.00018746579804560258,
1908
- "loss": 0.2682,
1909
- "step": 2980
1910
- },
1911
- {
1912
- "epoch": 1.92,
1913
- "learning_rate": 0.00018707491856677523,
1914
- "loss": 0.2862,
1915
- "step": 2990
1916
- },
1917
- {
1918
- "epoch": 1.93,
1919
- "learning_rate": 0.0001866840390879479,
1920
- "loss": 0.2813,
1921
- "step": 3000
1922
- },
1923
- {
1924
- "epoch": 1.93,
1925
- "eval_loss": 0.2826210558414459,
1926
- "eval_runtime": 441.4835,
1927
- "eval_samples_per_second": 4.53,
1928
- "eval_steps_per_second": 0.566,
1929
- "step": 3000
1930
- },
1931
- {
1932
- "epoch": 1.94,
1933
- "learning_rate": 0.0001862931596091205,
1934
- "loss": 0.2848,
1935
- "step": 3010
1936
- },
1937
- {
1938
- "epoch": 1.94,
1939
- "learning_rate": 0.00018590228013029314,
1940
- "loss": 0.2774,
1941
- "step": 3020
1942
- },
1943
- {
1944
- "epoch": 1.95,
1945
- "learning_rate": 0.00018551140065146577,
1946
- "loss": 0.2872,
1947
- "step": 3030
1948
- },
1949
- {
1950
- "epoch": 1.95,
1951
- "learning_rate": 0.00018512052117263843,
1952
- "loss": 0.2763,
1953
- "step": 3040
1954
- },
1955
- {
1956
- "epoch": 1.96,
1957
- "learning_rate": 0.00018472964169381108,
1958
- "loss": 0.2818,
1959
- "step": 3050
1960
- },
1961
- {
1962
- "epoch": 1.97,
1963
- "learning_rate": 0.00018433876221498368,
1964
- "loss": 0.2746,
1965
- "step": 3060
1966
- },
1967
- {
1968
- "epoch": 1.97,
1969
- "learning_rate": 0.00018394788273615634,
1970
- "loss": 0.2808,
1971
- "step": 3070
1972
- },
1973
- {
1974
- "epoch": 1.98,
1975
- "learning_rate": 0.00018355700325732897,
1976
- "loss": 0.2738,
1977
- "step": 3080
1978
- },
1979
- {
1980
- "epoch": 1.99,
1981
- "learning_rate": 0.00018316612377850162,
1982
- "loss": 0.2846,
1983
- "step": 3090
1984
- },
1985
- {
1986
- "epoch": 1.99,
1987
- "learning_rate": 0.00018277524429967425,
1988
- "loss": 0.2739,
1989
- "step": 3100
1990
- },
1991
- {
1992
- "epoch": 2.0,
1993
- "learning_rate": 0.00018238436482084688,
1994
- "loss": 0.2845,
1995
- "step": 3110
1996
- },
1997
- {
1998
- "epoch": 2.01,
1999
- "learning_rate": 0.0001819934853420195,
2000
- "loss": 0.2692,
2001
- "step": 3120
2002
- },
2003
- {
2004
- "epoch": 2.01,
2005
- "learning_rate": 0.00018160260586319216,
2006
- "loss": 0.2687,
2007
- "step": 3130
2008
- },
2009
- {
2010
- "epoch": 2.02,
2011
- "learning_rate": 0.00018121172638436482,
2012
- "loss": 0.2626,
2013
- "step": 3140
2014
- },
2015
- {
2016
- "epoch": 2.03,
2017
- "learning_rate": 0.00018082084690553745,
2018
- "loss": 0.2671,
2019
- "step": 3150
2020
- },
2021
- {
2022
- "epoch": 2.03,
2023
- "learning_rate": 0.0001804299674267101,
2024
- "loss": 0.2731,
2025
- "step": 3160
2026
- },
2027
- {
2028
- "epoch": 2.04,
2029
- "learning_rate": 0.0001800390879478827,
2030
- "loss": 0.2644,
2031
- "step": 3170
2032
- },
2033
- {
2034
- "epoch": 2.05,
2035
- "learning_rate": 0.00017964820846905536,
2036
- "loss": 0.2617,
2037
- "step": 3180
2038
- },
2039
- {
2040
- "epoch": 2.05,
2041
- "learning_rate": 0.000179257328990228,
2042
- "loss": 0.2554,
2043
- "step": 3190
2044
- },
2045
- {
2046
- "epoch": 2.06,
2047
- "learning_rate": 0.00017886644951140064,
2048
- "loss": 0.264,
2049
- "step": 3200
2050
- },
2051
- {
2052
- "epoch": 2.06,
2053
- "eval_loss": 0.28276166319847107,
2054
- "eval_runtime": 441.3938,
2055
- "eval_samples_per_second": 4.531,
2056
- "eval_steps_per_second": 0.566,
2057
- "step": 3200
2058
- },
2059
- {
2060
- "epoch": 2.06,
2061
- "learning_rate": 0.0001784755700325733,
2062
- "loss": 0.26,
2063
- "step": 3210
2064
- },
2065
- {
2066
- "epoch": 2.07,
2067
- "learning_rate": 0.0001780846905537459,
2068
- "loss": 0.2689,
2069
- "step": 3220
2070
- },
2071
- {
2072
- "epoch": 2.08,
2073
- "learning_rate": 0.00017769381107491855,
2074
- "loss": 0.271,
2075
- "step": 3230
2076
- },
2077
- {
2078
- "epoch": 2.08,
2079
- "learning_rate": 0.00017730293159609118,
2080
- "loss": 0.2762,
2081
- "step": 3240
2082
- },
2083
- {
2084
- "epoch": 2.09,
2085
- "learning_rate": 0.00017691205211726384,
2086
- "loss": 0.2787,
2087
- "step": 3250
2088
- },
2089
- {
2090
- "epoch": 2.1,
2091
- "learning_rate": 0.0001765211726384365,
2092
- "loss": 0.2698,
2093
- "step": 3260
2094
- },
2095
- {
2096
- "epoch": 2.1,
2097
- "learning_rate": 0.0001761302931596091,
2098
- "loss": 0.2666,
2099
- "step": 3270
2100
- },
2101
- {
2102
- "epoch": 2.11,
2103
- "learning_rate": 0.00017573941368078175,
2104
- "loss": 0.2732,
2105
- "step": 3280
2106
- },
2107
- {
2108
- "epoch": 2.12,
2109
- "learning_rate": 0.00017534853420195438,
2110
- "loss": 0.266,
2111
- "step": 3290
2112
- },
2113
- {
2114
- "epoch": 2.12,
2115
- "learning_rate": 0.00017495765472312703,
2116
- "loss": 0.2619,
2117
- "step": 3300
2118
- },
2119
- {
2120
- "epoch": 2.13,
2121
- "learning_rate": 0.00017456677524429966,
2122
- "loss": 0.28,
2123
- "step": 3310
2124
- },
2125
- {
2126
- "epoch": 2.14,
2127
- "learning_rate": 0.0001741758957654723,
2128
- "loss": 0.2725,
2129
- "step": 3320
2130
- },
2131
- {
2132
- "epoch": 2.14,
2133
- "learning_rate": 0.00017378501628664494,
2134
- "loss": 0.2668,
2135
- "step": 3330
2136
- },
2137
- {
2138
- "epoch": 2.15,
2139
- "learning_rate": 0.00017339413680781757,
2140
- "loss": 0.2721,
2141
- "step": 3340
2142
- },
2143
- {
2144
- "epoch": 2.15,
2145
- "learning_rate": 0.00017300325732899023,
2146
- "loss": 0.2514,
2147
- "step": 3350
2148
- },
2149
- {
2150
- "epoch": 2.16,
2151
- "learning_rate": 0.00017261237785016285,
2152
- "loss": 0.2623,
2153
- "step": 3360
2154
- },
2155
- {
2156
- "epoch": 2.17,
2157
- "learning_rate": 0.00017222149837133548,
2158
- "loss": 0.2569,
2159
- "step": 3370
2160
- },
2161
- {
2162
- "epoch": 2.17,
2163
- "learning_rate": 0.0001718306188925081,
2164
- "loss": 0.2654,
2165
- "step": 3380
2166
- },
2167
- {
2168
- "epoch": 2.18,
2169
- "learning_rate": 0.00017143973941368077,
2170
- "loss": 0.2678,
2171
- "step": 3390
2172
- },
2173
- {
2174
- "epoch": 2.19,
2175
- "learning_rate": 0.00017104885993485342,
2176
- "loss": 0.2651,
2177
- "step": 3400
2178
- },
2179
- {
2180
- "epoch": 2.19,
2181
- "eval_loss": 0.28253164887428284,
2182
- "eval_runtime": 441.4474,
2183
- "eval_samples_per_second": 4.531,
2184
- "eval_steps_per_second": 0.566,
2185
- "step": 3400
2186
- },
2187
- {
2188
- "epoch": 2.19,
2189
- "learning_rate": 0.00017065798045602605,
2190
- "loss": 0.2587,
2191
- "step": 3410
2192
- },
2193
- {
2194
- "epoch": 2.2,
2195
- "learning_rate": 0.00017026710097719868,
2196
- "loss": 0.2835,
2197
- "step": 3420
2198
- },
2199
- {
2200
- "epoch": 2.21,
2201
- "learning_rate": 0.0001698762214983713,
2202
- "loss": 0.272,
2203
- "step": 3430
2204
- },
2205
- {
2206
- "epoch": 2.21,
2207
- "learning_rate": 0.00016948534201954396,
2208
- "loss": 0.2707,
2209
- "step": 3440
2210
- },
2211
- {
2212
- "epoch": 2.22,
2213
- "learning_rate": 0.0001690944625407166,
2214
- "loss": 0.2663,
2215
- "step": 3450
2216
- },
2217
- {
2218
- "epoch": 2.23,
2219
- "learning_rate": 0.00016870358306188924,
2220
- "loss": 0.2767,
2221
- "step": 3460
2222
- },
2223
- {
2224
- "epoch": 2.23,
2225
- "learning_rate": 0.00016831270358306187,
2226
- "loss": 0.2696,
2227
- "step": 3470
2228
- },
2229
- {
2230
- "epoch": 2.24,
2231
- "learning_rate": 0.0001679218241042345,
2232
- "loss": 0.2735,
2233
- "step": 3480
2234
- },
2235
- {
2236
- "epoch": 2.24,
2237
- "learning_rate": 0.00016753094462540716,
2238
- "loss": 0.266,
2239
- "step": 3490
2240
- },
2241
- {
2242
- "epoch": 2.25,
2243
- "learning_rate": 0.00016714006514657978,
2244
- "loss": 0.2646,
2245
- "step": 3500
2246
- },
2247
- {
2248
- "epoch": 2.26,
2249
- "learning_rate": 0.00016674918566775244,
2250
- "loss": 0.2671,
2251
- "step": 3510
2252
- },
2253
- {
2254
- "epoch": 2.26,
2255
- "learning_rate": 0.0001663583061889251,
2256
- "loss": 0.2583,
2257
- "step": 3520
2258
- },
2259
- {
2260
- "epoch": 2.27,
2261
- "learning_rate": 0.0001659674267100977,
2262
- "loss": 0.2754,
2263
- "step": 3530
2264
- },
2265
- {
2266
- "epoch": 2.28,
2267
- "learning_rate": 0.00016557654723127035,
2268
- "loss": 0.2599,
2269
- "step": 3540
2270
- },
2271
- {
2272
- "epoch": 2.28,
2273
- "learning_rate": 0.00016518566775244298,
2274
- "loss": 0.2618,
2275
- "step": 3550
2276
- },
2277
- {
2278
- "epoch": 2.29,
2279
- "learning_rate": 0.00016479478827361563,
2280
- "loss": 0.2669,
2281
- "step": 3560
2282
- },
2283
- {
2284
- "epoch": 2.3,
2285
- "learning_rate": 0.00016440390879478826,
2286
- "loss": 0.2656,
2287
- "step": 3570
2288
- },
2289
- {
2290
- "epoch": 2.3,
2291
- "learning_rate": 0.0001640130293159609,
2292
- "loss": 0.2648,
2293
- "step": 3580
2294
- },
2295
- {
2296
- "epoch": 2.31,
2297
- "learning_rate": 0.00016362214983713352,
2298
- "loss": 0.2762,
2299
- "step": 3590
2300
- },
2301
- {
2302
- "epoch": 2.32,
2303
- "learning_rate": 0.00016323127035830617,
2304
- "loss": 0.2697,
2305
- "step": 3600
2306
- },
2307
- {
2308
- "epoch": 2.32,
2309
- "eval_loss": 0.2820605933666229,
2310
- "eval_runtime": 441.4527,
2311
- "eval_samples_per_second": 4.53,
2312
- "eval_steps_per_second": 0.566,
2313
- "step": 3600
2314
- },
2315
- {
2316
- "epoch": 2.32,
2317
- "learning_rate": 0.00016284039087947883,
2318
- "loss": 0.2711,
2319
- "step": 3610
2320
- },
2321
- {
2322
- "epoch": 2.33,
2323
- "learning_rate": 0.00016244951140065146,
2324
- "loss": 0.2655,
2325
- "step": 3620
2326
- },
2327
- {
2328
- "epoch": 2.33,
2329
- "learning_rate": 0.00016205863192182408,
2330
- "loss": 0.2705,
2331
- "step": 3630
2332
- },
2333
- {
2334
- "epoch": 2.34,
2335
- "learning_rate": 0.0001616677524429967,
2336
- "loss": 0.2598,
2337
- "step": 3640
2338
- },
2339
- {
2340
- "epoch": 2.35,
2341
- "learning_rate": 0.00016127687296416937,
2342
- "loss": 0.2608,
2343
- "step": 3650
2344
- },
2345
- {
2346
- "epoch": 2.35,
2347
- "learning_rate": 0.00016088599348534202,
2348
- "loss": 0.264,
2349
- "step": 3660
2350
- },
2351
- {
2352
- "epoch": 2.36,
2353
- "learning_rate": 0.00016049511400651465,
2354
- "loss": 0.2652,
2355
- "step": 3670
2356
- },
2357
- {
2358
- "epoch": 2.37,
2359
- "learning_rate": 0.00016010423452768728,
2360
- "loss": 0.2541,
2361
- "step": 3680
2362
- },
2363
- {
2364
- "epoch": 2.37,
2365
- "learning_rate": 0.0001597133550488599,
2366
- "loss": 0.2616,
2367
- "step": 3690
2368
- },
2369
- {
2370
- "epoch": 2.38,
2371
- "learning_rate": 0.00015932247557003256,
2372
- "loss": 0.2652,
2373
- "step": 3700
2374
- },
2375
- {
2376
- "epoch": 2.39,
2377
- "learning_rate": 0.0001589315960912052,
2378
- "loss": 0.2587,
2379
- "step": 3710
2380
- },
2381
- {
2382
- "epoch": 2.39,
2383
- "learning_rate": 0.00015854071661237785,
2384
- "loss": 0.2629,
2385
- "step": 3720
2386
- },
2387
- {
2388
- "epoch": 2.4,
2389
- "learning_rate": 0.00015814983713355045,
2390
- "loss": 0.2692,
2391
- "step": 3730
2392
- },
2393
- {
2394
- "epoch": 2.41,
2395
- "learning_rate": 0.0001577589576547231,
2396
- "loss": 0.2683,
2397
- "step": 3740
2398
- },
2399
- {
2400
- "epoch": 2.41,
2401
- "learning_rate": 0.00015736807817589576,
2402
- "loss": 0.2674,
2403
- "step": 3750
2404
- },
2405
- {
2406
- "epoch": 2.42,
2407
- "learning_rate": 0.00015697719869706839,
2408
- "loss": 0.2591,
2409
- "step": 3760
2410
- },
2411
- {
2412
- "epoch": 2.42,
2413
- "learning_rate": 0.00015658631921824104,
2414
- "loss": 0.2641,
2415
- "step": 3770
2416
- },
2417
- {
2418
- "epoch": 2.43,
2419
- "learning_rate": 0.00015619543973941364,
2420
- "loss": 0.2683,
2421
- "step": 3780
2422
- },
2423
- {
2424
- "epoch": 2.44,
2425
- "learning_rate": 0.0001558045602605863,
2426
- "loss": 0.2675,
2427
- "step": 3790
2428
- },
2429
- {
2430
- "epoch": 2.44,
2431
- "learning_rate": 0.00015541368078175895,
2432
- "loss": 0.2593,
2433
- "step": 3800
2434
- },
2435
- {
2436
- "epoch": 2.44,
2437
- "eval_loss": 0.28158313035964966,
2438
- "eval_runtime": 441.4901,
2439
- "eval_samples_per_second": 4.53,
2440
- "eval_steps_per_second": 0.566,
2441
- "step": 3800
2442
- },
2443
- {
2444
- "epoch": 2.45,
2445
- "learning_rate": 0.00015502280130293158,
2446
- "loss": 0.2751,
2447
- "step": 3810
2448
- },
2449
- {
2450
- "epoch": 2.46,
2451
- "learning_rate": 0.00015463192182410424,
2452
- "loss": 0.2716,
2453
- "step": 3820
2454
- },
2455
- {
2456
- "epoch": 2.46,
2457
- "learning_rate": 0.00015424104234527684,
2458
- "loss": 0.2696,
2459
- "step": 3830
2460
- },
2461
- {
2462
- "epoch": 2.47,
2463
- "learning_rate": 0.0001538501628664495,
2464
- "loss": 0.2717,
2465
- "step": 3840
2466
- },
2467
- {
2468
- "epoch": 2.48,
2469
- "learning_rate": 0.00015345928338762212,
2470
- "loss": 0.257,
2471
- "step": 3850
2472
- },
2473
- {
2474
- "epoch": 2.48,
2475
- "learning_rate": 0.00015306840390879478,
2476
- "loss": 0.2632,
2477
- "step": 3860
2478
- },
2479
- {
2480
- "epoch": 2.49,
2481
- "learning_rate": 0.00015267752442996743,
2482
- "loss": 0.2708,
2483
- "step": 3870
2484
- },
2485
- {
2486
- "epoch": 2.5,
2487
- "learning_rate": 0.00015228664495114003,
2488
- "loss": 0.2769,
2489
- "step": 3880
2490
- },
2491
- {
2492
- "epoch": 2.5,
2493
- "learning_rate": 0.0001518957654723127,
2494
- "loss": 0.2637,
2495
- "step": 3890
2496
- },
2497
- {
2498
- "epoch": 2.51,
2499
- "learning_rate": 0.00015150488599348532,
2500
- "loss": 0.2651,
2501
- "step": 3900
2502
- },
2503
- {
2504
- "epoch": 2.51,
2505
- "learning_rate": 0.00015111400651465797,
2506
- "loss": 0.2651,
2507
- "step": 3910
2508
- },
2509
- {
2510
- "epoch": 2.52,
2511
- "learning_rate": 0.00015072312703583063,
2512
- "loss": 0.2712,
2513
- "step": 3920
2514
- },
2515
- {
2516
- "epoch": 2.53,
2517
- "learning_rate": 0.00015033224755700325,
2518
- "loss": 0.2677,
2519
- "step": 3930
2520
- },
2521
- {
2522
- "epoch": 2.53,
2523
- "learning_rate": 0.00014994136807817588,
2524
- "loss": 0.2697,
2525
- "step": 3940
2526
- },
2527
- {
2528
- "epoch": 2.54,
2529
- "learning_rate": 0.0001495504885993485,
2530
- "loss": 0.263,
2531
- "step": 3950
2532
- },
2533
- {
2534
- "epoch": 2.55,
2535
- "learning_rate": 0.00014915960912052117,
2536
- "loss": 0.2662,
2537
- "step": 3960
2538
- },
2539
- {
2540
- "epoch": 2.55,
2541
- "learning_rate": 0.0001487687296416938,
2542
- "loss": 0.258,
2543
- "step": 3970
2544
- },
2545
- {
2546
- "epoch": 2.56,
2547
- "learning_rate": 0.00014837785016286642,
2548
- "loss": 0.2611,
2549
- "step": 3980
2550
- },
2551
- {
2552
- "epoch": 2.57,
2553
- "learning_rate": 0.00014798697068403908,
2554
- "loss": 0.259,
2555
- "step": 3990
2556
- },
2557
- {
2558
- "epoch": 2.57,
2559
- "learning_rate": 0.0001475960912052117,
2560
- "loss": 0.2618,
2561
- "step": 4000
2562
- },
2563
- {
2564
- "epoch": 2.57,
2565
- "eval_loss": 0.2809056341648102,
2566
- "eval_runtime": 442.2264,
2567
- "eval_samples_per_second": 4.523,
2568
- "eval_steps_per_second": 0.565,
2569
- "step": 4000
2570
- },
2571
- {
2572
- "epoch": 2.58,
2573
- "learning_rate": 0.00014720521172638436,
2574
- "loss": 0.2617,
2575
- "step": 4010
2576
- },
2577
- {
2578
- "epoch": 2.59,
2579
- "learning_rate": 0.000146814332247557,
2580
- "loss": 0.2612,
2581
- "step": 4020
2582
- },
2583
- {
2584
- "epoch": 2.59,
2585
- "learning_rate": 0.00014642345276872962,
2586
- "loss": 0.253,
2587
- "step": 4030
2588
- },
2589
- {
2590
- "epoch": 2.6,
2591
- "learning_rate": 0.00014603257328990227,
2592
- "loss": 0.2611,
2593
- "step": 4040
2594
- },
2595
- {
2596
- "epoch": 2.6,
2597
- "learning_rate": 0.0001456416938110749,
2598
- "loss": 0.2574,
2599
- "step": 4050
2600
- },
2601
- {
2602
- "epoch": 2.61,
2603
- "learning_rate": 0.00014525081433224756,
2604
- "loss": 0.2708,
2605
- "step": 4060
2606
- },
2607
- {
2608
- "epoch": 2.62,
2609
- "learning_rate": 0.00014485993485342018,
2610
- "loss": 0.2639,
2611
- "step": 4070
2612
- },
2613
- {
2614
- "epoch": 2.62,
2615
- "learning_rate": 0.0001444690553745928,
2616
- "loss": 0.2712,
2617
- "step": 4080
2618
- },
2619
- {
2620
- "epoch": 2.63,
2621
- "learning_rate": 0.00014407817589576547,
2622
- "loss": 0.2644,
2623
- "step": 4090
2624
- },
2625
- {
2626
- "epoch": 2.64,
2627
- "learning_rate": 0.0001436872964169381,
2628
- "loss": 0.2613,
2629
- "step": 4100
2630
- },
2631
- {
2632
- "epoch": 2.64,
2633
- "learning_rate": 0.00014329641693811072,
2634
- "loss": 0.2601,
2635
- "step": 4110
2636
- },
2637
- {
2638
- "epoch": 2.65,
2639
- "learning_rate": 0.00014290553745928338,
2640
- "loss": 0.2741,
2641
- "step": 4120
2642
- },
2643
- {
2644
- "epoch": 2.66,
2645
- "learning_rate": 0.000142514657980456,
2646
- "loss": 0.2568,
2647
- "step": 4130
2648
- },
2649
- {
2650
- "epoch": 2.66,
2651
- "learning_rate": 0.00014212377850162866,
2652
- "loss": 0.2589,
2653
- "step": 4140
2654
- },
2655
- {
2656
- "epoch": 2.67,
2657
- "learning_rate": 0.0001417328990228013,
2658
- "loss": 0.2781,
2659
- "step": 4150
2660
- },
2661
- {
2662
- "epoch": 2.68,
2663
- "learning_rate": 0.00014134201954397392,
2664
- "loss": 0.2699,
2665
- "step": 4160
2666
- },
2667
- {
2668
- "epoch": 2.68,
2669
- "learning_rate": 0.00014095114006514657,
2670
- "loss": 0.2783,
2671
- "step": 4170
2672
- },
2673
- {
2674
- "epoch": 2.69,
2675
- "learning_rate": 0.0001405602605863192,
2676
- "loss": 0.2638,
2677
- "step": 4180
2678
- },
2679
- {
2680
- "epoch": 2.69,
2681
- "learning_rate": 0.00014016938110749186,
2682
- "loss": 0.2602,
2683
- "step": 4190
2684
- },
2685
- {
2686
- "epoch": 2.7,
2687
- "learning_rate": 0.00013977850162866449,
2688
- "loss": 0.2725,
2689
- "step": 4200
2690
- },
2691
- {
2692
- "epoch": 2.7,
2693
- "eval_loss": 0.28008410334587097,
2694
- "eval_runtime": 441.5362,
2695
- "eval_samples_per_second": 4.53,
2696
- "eval_steps_per_second": 0.566,
2697
- "step": 4200
2698
- },
2699
- {
2700
- "epoch": 2.71,
2701
- "learning_rate": 0.0001393876221498371,
2702
- "loss": 0.2577,
2703
- "step": 4210
2704
- },
2705
- {
2706
- "epoch": 2.71,
2707
- "learning_rate": 0.00013899674267100977,
2708
- "loss": 0.2706,
2709
- "step": 4220
2710
- },
2711
- {
2712
- "epoch": 2.72,
2713
- "learning_rate": 0.0001386058631921824,
2714
- "loss": 0.2724,
2715
- "step": 4230
2716
- },
2717
- {
2718
- "epoch": 2.73,
2719
- "learning_rate": 0.00013821498371335502,
2720
- "loss": 0.2638,
2721
- "step": 4240
2722
- },
2723
- {
2724
- "epoch": 2.73,
2725
- "learning_rate": 0.00013782410423452768,
2726
- "loss": 0.2544,
2727
- "step": 4250
2728
- },
2729
- {
2730
- "epoch": 2.74,
2731
- "learning_rate": 0.0001374332247557003,
2732
- "loss": 0.2645,
2733
- "step": 4260
2734
- },
2735
- {
2736
- "epoch": 2.75,
2737
- "learning_rate": 0.00013704234527687296,
2738
- "loss": 0.2652,
2739
- "step": 4270
2740
- },
2741
- {
2742
- "epoch": 2.75,
2743
- "learning_rate": 0.0001366514657980456,
2744
- "loss": 0.2785,
2745
- "step": 4280
2746
- },
2747
- {
2748
- "epoch": 2.76,
2749
- "learning_rate": 0.00013626058631921822,
2750
- "loss": 0.2722,
2751
- "step": 4290
2752
- },
2753
- {
2754
- "epoch": 2.77,
2755
- "learning_rate": 0.00013586970684039087,
2756
- "loss": 0.27,
2757
- "step": 4300
2758
- },
2759
- {
2760
- "epoch": 2.77,
2761
- "learning_rate": 0.0001354788273615635,
2762
- "loss": 0.2576,
2763
- "step": 4310
2764
- },
2765
- {
2766
- "epoch": 2.78,
2767
- "learning_rate": 0.00013508794788273616,
2768
- "loss": 0.2617,
2769
- "step": 4320
2770
- },
2771
- {
2772
- "epoch": 2.78,
2773
- "learning_rate": 0.0001346970684039088,
2774
- "loss": 0.2658,
2775
- "step": 4330
2776
- },
2777
- {
2778
- "epoch": 2.79,
2779
- "learning_rate": 0.00013430618892508141,
2780
- "loss": 0.2616,
2781
- "step": 4340
2782
- },
2783
- {
2784
- "epoch": 2.8,
2785
- "learning_rate": 0.00013391530944625407,
2786
- "loss": 0.2638,
2787
- "step": 4350
2788
- },
2789
- {
2790
- "epoch": 2.8,
2791
- "learning_rate": 0.0001335244299674267,
2792
- "loss": 0.2737,
2793
- "step": 4360
2794
- },
2795
- {
2796
- "epoch": 2.81,
2797
- "learning_rate": 0.00013313355048859933,
2798
- "loss": 0.278,
2799
- "step": 4370
2800
- },
2801
- {
2802
- "epoch": 2.82,
2803
- "learning_rate": 0.00013274267100977198,
2804
- "loss": 0.2753,
2805
- "step": 4380
2806
- },
2807
- {
2808
- "epoch": 2.82,
2809
- "learning_rate": 0.0001323517915309446,
2810
- "loss": 0.2553,
2811
- "step": 4390
2812
- },
2813
- {
2814
- "epoch": 2.83,
2815
- "learning_rate": 0.00013196091205211726,
2816
- "loss": 0.2618,
2817
- "step": 4400
2818
- },
2819
- {
2820
- "epoch": 2.83,
2821
- "eval_loss": 0.27958813309669495,
2822
- "eval_runtime": 441.9512,
2823
- "eval_samples_per_second": 4.525,
2824
- "eval_steps_per_second": 0.566,
2825
- "step": 4400
2826
- },
2827
- {
2828
- "epoch": 2.84,
2829
- "learning_rate": 0.0001315700325732899,
2830
- "loss": 0.2588,
2831
- "step": 4410
2832
- },
2833
- {
2834
- "epoch": 2.84,
2835
- "learning_rate": 0.00013117915309446252,
2836
- "loss": 0.2558,
2837
- "step": 4420
2838
- },
2839
- {
2840
- "epoch": 2.85,
2841
- "learning_rate": 0.00013078827361563518,
2842
- "loss": 0.269,
2843
- "step": 4430
2844
- },
2845
- {
2846
- "epoch": 2.86,
2847
- "learning_rate": 0.0001303973941368078,
2848
- "loss": 0.2547,
2849
- "step": 4440
2850
- },
2851
- {
2852
- "epoch": 2.86,
2853
- "learning_rate": 0.00013000651465798046,
2854
- "loss": 0.2667,
2855
- "step": 4450
2856
- },
2857
- {
2858
- "epoch": 2.87,
2859
- "learning_rate": 0.0001296156351791531,
2860
- "loss": 0.2641,
2861
- "step": 4460
2862
- },
2863
- {
2864
- "epoch": 2.87,
2865
- "learning_rate": 0.00012922475570032572,
2866
- "loss": 0.2585,
2867
- "step": 4470
2868
- },
2869
- {
2870
- "epoch": 2.88,
2871
- "learning_rate": 0.00012883387622149837,
2872
- "loss": 0.2565,
2873
- "step": 4480
2874
- },
2875
- {
2876
- "epoch": 2.89,
2877
- "learning_rate": 0.000128442996742671,
2878
- "loss": 0.2728,
2879
- "step": 4490
2880
- },
2881
- {
2882
- "epoch": 2.89,
2883
- "learning_rate": 0.00012805211726384363,
2884
- "loss": 0.2573,
2885
- "step": 4500
2886
- },
2887
- {
2888
- "epoch": 2.9,
2889
- "learning_rate": 0.00012766123778501626,
2890
- "loss": 0.2633,
2891
- "step": 4510
2892
- },
2893
- {
2894
- "epoch": 2.91,
2895
- "learning_rate": 0.0001272703583061889,
2896
- "loss": 0.2582,
2897
- "step": 4520
2898
- },
2899
- {
2900
- "epoch": 2.91,
2901
- "learning_rate": 0.00012687947882736157,
2902
- "loss": 0.2625,
2903
- "step": 4530
2904
- },
2905
- {
2906
- "epoch": 2.92,
2907
- "learning_rate": 0.0001264885993485342,
2908
- "loss": 0.2672,
2909
- "step": 4540
2910
- },
2911
- {
2912
- "epoch": 2.93,
2913
- "learning_rate": 0.00012609771986970682,
2914
- "loss": 0.2667,
2915
- "step": 4550
2916
- },
2917
- {
2918
- "epoch": 2.93,
2919
- "learning_rate": 0.00012570684039087945,
2920
- "loss": 0.2624,
2921
- "step": 4560
2922
- },
2923
- {
2924
- "epoch": 2.94,
2925
- "learning_rate": 0.0001253159609120521,
2926
- "loss": 0.2618,
2927
- "step": 4570
2928
- },
2929
- {
2930
- "epoch": 2.95,
2931
- "learning_rate": 0.00012492508143322476,
2932
- "loss": 0.2639,
2933
- "step": 4580
2934
- },
2935
- {
2936
- "epoch": 2.95,
2937
- "learning_rate": 0.0001245342019543974,
2938
- "loss": 0.2755,
2939
- "step": 4590
2940
- },
2941
- {
2942
- "epoch": 2.96,
2943
- "learning_rate": 0.00012414332247557002,
2944
- "loss": 0.2753,
2945
- "step": 4600
2946
- },
2947
- {
2948
- "epoch": 2.96,
2949
- "eval_loss": 0.27895185351371765,
2950
- "eval_runtime": 441.8565,
2951
- "eval_samples_per_second": 4.526,
2952
- "eval_steps_per_second": 0.566,
2953
- "step": 4600
2954
- },
2955
- {
2956
- "epoch": 2.96,
2957
- "learning_rate": 0.00012375244299674267,
2958
- "loss": 0.267,
2959
- "step": 4610
2960
- },
2961
- {
2962
- "epoch": 2.97,
2963
- "learning_rate": 0.0001233615635179153,
2964
- "loss": 0.2726,
2965
- "step": 4620
2966
- },
2967
- {
2968
- "epoch": 2.98,
2969
- "learning_rate": 0.00012297068403908793,
2970
- "loss": 0.2629,
2971
- "step": 4630
2972
- },
2973
- {
2974
- "epoch": 2.98,
2975
- "learning_rate": 0.00012257980456026056,
2976
- "loss": 0.2636,
2977
- "step": 4640
2978
- },
2979
- {
2980
- "epoch": 2.99,
2981
- "learning_rate": 0.0001221889250814332,
2982
- "loss": 0.2676,
2983
- "step": 4650
2984
- },
2985
- {
2986
- "epoch": 3.0,
2987
- "learning_rate": 0.00012179804560260585,
2988
- "loss": 0.2575,
2989
- "step": 4660
2990
- },
2991
- {
2992
- "epoch": 3.0,
2993
- "learning_rate": 0.00012140716612377848,
2994
- "loss": 0.2478,
2995
- "step": 4670
2996
- },
2997
- {
2998
- "epoch": 3.01,
2999
- "learning_rate": 0.00012101628664495114,
3000
- "loss": 0.2558,
3001
- "step": 4680
3002
- },
3003
- {
3004
- "epoch": 3.02,
3005
- "learning_rate": 0.00012062540716612377,
3006
- "loss": 0.2479,
3007
- "step": 4690
3008
- },
3009
- {
3010
- "epoch": 3.02,
3011
- "learning_rate": 0.00012023452768729641,
3012
- "loss": 0.2592,
3013
- "step": 4700
3014
- },
3015
- {
3016
- "epoch": 3.03,
3017
- "learning_rate": 0.00011984364820846905,
3018
- "loss": 0.2509,
3019
- "step": 4710
3020
- },
3021
- {
3022
- "epoch": 3.04,
3023
- "learning_rate": 0.00011945276872964168,
3024
- "loss": 0.2567,
3025
- "step": 4720
3026
- },
3027
- {
3028
- "epoch": 3.04,
3029
- "learning_rate": 0.00011906188925081432,
3030
- "loss": 0.2404,
3031
- "step": 4730
3032
- },
3033
- {
3034
- "epoch": 3.05,
3035
- "learning_rate": 0.00011867100977198695,
3036
- "loss": 0.2438,
3037
- "step": 4740
3038
- },
3039
- {
3040
- "epoch": 3.05,
3041
- "learning_rate": 0.0001182801302931596,
3042
- "loss": 0.2502,
3043
- "step": 4750
3044
- },
3045
- {
3046
- "epoch": 3.06,
3047
- "learning_rate": 0.00011788925081433224,
3048
- "loss": 0.2543,
3049
- "step": 4760
3050
- },
3051
- {
3052
- "epoch": 3.07,
3053
- "learning_rate": 0.00011749837133550487,
3054
- "loss": 0.252,
3055
- "step": 4770
3056
- },
3057
- {
3058
- "epoch": 3.07,
3059
- "learning_rate": 0.00011710749185667751,
3060
- "loss": 0.2583,
3061
- "step": 4780
3062
- },
3063
- {
3064
- "epoch": 3.08,
3065
- "learning_rate": 0.00011671661237785016,
3066
- "loss": 0.2575,
3067
- "step": 4790
3068
- },
3069
- {
3070
- "epoch": 3.09,
3071
- "learning_rate": 0.00011632573289902278,
3072
- "loss": 0.2559,
3073
- "step": 4800
3074
- },
3075
- {
3076
- "epoch": 3.09,
3077
- "eval_loss": 0.28064459562301636,
3078
- "eval_runtime": 442.0127,
3079
- "eval_samples_per_second": 4.525,
3080
- "eval_steps_per_second": 0.566,
3081
- "step": 4800
3082
- },
3083
- {
3084
- "epoch": 3.09,
3085
- "learning_rate": 0.00011593485342019544,
3086
- "loss": 0.2543,
3087
- "step": 4810
3088
- },
3089
- {
3090
- "epoch": 3.1,
3091
- "learning_rate": 0.00011554397394136807,
3092
- "loss": 0.2478,
3093
- "step": 4820
3094
- },
3095
- {
3096
- "epoch": 3.11,
3097
- "learning_rate": 0.00011515309446254071,
3098
- "loss": 0.2502,
3099
- "step": 4830
3100
- },
3101
- {
3102
- "epoch": 3.11,
3103
- "learning_rate": 0.00011476221498371335,
3104
- "loss": 0.255,
3105
- "step": 4840
3106
- },
3107
- {
3108
- "epoch": 3.12,
3109
- "learning_rate": 0.00011437133550488598,
3110
- "loss": 0.2542,
3111
- "step": 4850
3112
- },
3113
- {
3114
- "epoch": 3.13,
3115
- "learning_rate": 0.00011398045602605862,
3116
- "loss": 0.2562,
3117
- "step": 4860
3118
- },
3119
- {
3120
- "epoch": 3.13,
3121
- "learning_rate": 0.00011358957654723125,
3122
- "loss": 0.2463,
3123
- "step": 4870
3124
- },
3125
- {
3126
- "epoch": 3.14,
3127
- "learning_rate": 0.0001131986970684039,
3128
- "loss": 0.2592,
3129
- "step": 4880
3130
- },
3131
- {
3132
- "epoch": 3.14,
3133
- "learning_rate": 0.00011280781758957655,
3134
- "loss": 0.2601,
3135
- "step": 4890
3136
- },
3137
- {
3138
- "epoch": 3.15,
3139
- "learning_rate": 0.00011241693811074917,
3140
- "loss": 0.2548,
3141
- "step": 4900
3142
- },
3143
- {
3144
- "epoch": 3.16,
3145
- "learning_rate": 0.00011202605863192181,
3146
- "loss": 0.2569,
3147
- "step": 4910
3148
- },
3149
- {
3150
- "epoch": 3.16,
3151
- "learning_rate": 0.00011163517915309444,
3152
- "loss": 0.2543,
3153
- "step": 4920
3154
- },
3155
- {
3156
- "epoch": 3.17,
3157
- "learning_rate": 0.00011124429967426708,
3158
- "loss": 0.2532,
3159
- "step": 4930
3160
- },
3161
- {
3162
- "epoch": 3.18,
3163
- "learning_rate": 0.00011085342019543974,
3164
- "loss": 0.2493,
3165
- "step": 4940
3166
- },
3167
- {
3168
- "epoch": 3.18,
3169
- "learning_rate": 0.00011046254071661237,
3170
- "loss": 0.2533,
3171
- "step": 4950
3172
- },
3173
- {
3174
- "epoch": 3.19,
3175
- "learning_rate": 0.00011007166123778501,
3176
- "loss": 0.2632,
3177
- "step": 4960
3178
- },
3179
- {
3180
- "epoch": 3.2,
3181
- "learning_rate": 0.00010968078175895765,
3182
- "loss": 0.2457,
3183
- "step": 4970
3184
- },
3185
- {
3186
- "epoch": 3.2,
3187
- "learning_rate": 0.00010928990228013028,
3188
- "loss": 0.2538,
3189
- "step": 4980
3190
- },
3191
- {
3192
- "epoch": 3.21,
3193
- "learning_rate": 0.00010889902280130292,
3194
- "loss": 0.2536,
3195
- "step": 4990
3196
- },
3197
- {
3198
- "epoch": 3.22,
3199
- "learning_rate": 0.00010850814332247555,
3200
- "loss": 0.2555,
3201
- "step": 5000
3202
- },
3203
- {
3204
- "epoch": 3.22,
3205
- "eval_loss": 0.2805168330669403,
3206
- "eval_runtime": 442.1047,
3207
- "eval_samples_per_second": 4.524,
3208
- "eval_steps_per_second": 0.565,
3209
- "step": 5000
3210
- },
3211
- {
3212
- "epoch": 3.22,
3213
- "learning_rate": 0.0001081172638436482,
3214
- "loss": 0.253,
3215
- "step": 5010
3216
- },
3217
- {
3218
- "epoch": 3.23,
3219
- "learning_rate": 0.00010772638436482085,
3220
- "loss": 0.255,
3221
- "step": 5020
3222
- },
3223
- {
3224
- "epoch": 3.23,
3225
- "learning_rate": 0.00010733550488599347,
3226
- "loss": 0.2529,
3227
- "step": 5030
3228
- },
3229
- {
3230
- "epoch": 3.24,
3231
- "learning_rate": 0.00010694462540716612,
3232
- "loss": 0.2541,
3233
- "step": 5040
3234
- },
3235
- {
3236
- "epoch": 3.25,
3237
- "learning_rate": 0.00010655374592833874,
3238
- "loss": 0.2576,
3239
- "step": 5050
3240
- },
3241
- {
3242
- "epoch": 3.25,
3243
- "learning_rate": 0.00010616286644951139,
3244
- "loss": 0.2408,
3245
- "step": 5060
3246
- },
3247
- {
3248
- "epoch": 3.26,
3249
- "learning_rate": 0.00010577198697068404,
3250
- "loss": 0.259,
3251
- "step": 5070
3252
- },
3253
- {
3254
- "epoch": 3.27,
3255
- "learning_rate": 0.00010538110749185667,
3256
- "loss": 0.2534,
3257
- "step": 5080
3258
- },
3259
- {
3260
- "epoch": 3.27,
3261
- "learning_rate": 0.00010499022801302931,
3262
- "loss": 0.2491,
3263
- "step": 5090
3264
- },
3265
- {
3266
- "epoch": 3.28,
3267
- "learning_rate": 0.00010459934853420194,
3268
- "loss": 0.2507,
3269
- "step": 5100
3270
- },
3271
- {
3272
- "epoch": 3.29,
3273
- "learning_rate": 0.00010420846905537458,
3274
- "loss": 0.2583,
3275
- "step": 5110
3276
- },
3277
- {
3278
- "epoch": 3.29,
3279
- "learning_rate": 0.00010381758957654722,
3280
- "loss": 0.2478,
3281
- "step": 5120
3282
- },
3283
- {
3284
- "epoch": 3.3,
3285
- "learning_rate": 0.00010342671009771985,
3286
- "loss": 0.2574,
3287
- "step": 5130
3288
- },
3289
- {
3290
- "epoch": 3.31,
3291
- "learning_rate": 0.0001030358306188925,
3292
- "loss": 0.2578,
3293
- "step": 5140
3294
- },
3295
- {
3296
- "epoch": 3.31,
3297
- "learning_rate": 0.00010264495114006515,
3298
- "loss": 0.2561,
3299
- "step": 5150
3300
- },
3301
- {
3302
- "epoch": 3.32,
3303
- "learning_rate": 0.00010225407166123778,
3304
- "loss": 0.257,
3305
- "step": 5160
3306
- },
3307
- {
3308
- "epoch": 3.32,
3309
- "learning_rate": 0.00010186319218241042,
3310
- "loss": 0.2458,
3311
- "step": 5170
3312
- },
3313
- {
3314
- "epoch": 3.33,
3315
- "learning_rate": 0.00010147231270358305,
3316
- "loss": 0.2484,
3317
- "step": 5180
3318
- },
3319
- {
3320
- "epoch": 3.34,
3321
- "learning_rate": 0.00010108143322475569,
3322
- "loss": 0.2503,
3323
- "step": 5190
3324
- },
3325
- {
3326
- "epoch": 3.34,
3327
- "learning_rate": 0.00010069055374592834,
3328
- "loss": 0.2661,
3329
- "step": 5200
3330
- },
3331
- {
3332
- "epoch": 3.34,
3333
- "eval_loss": 0.2801346182823181,
3334
- "eval_runtime": 442.4045,
3335
- "eval_samples_per_second": 4.521,
3336
- "eval_steps_per_second": 0.565,
3337
- "step": 5200
3338
- },
3339
- {
3340
- "epoch": 3.35,
3341
- "learning_rate": 0.00010029967426710097,
3342
- "loss": 0.2526,
3343
- "step": 5210
3344
- },
3345
- {
3346
- "epoch": 3.36,
3347
- "learning_rate": 9.990879478827361e-05,
3348
- "loss": 0.25,
3349
- "step": 5220
3350
- },
3351
- {
3352
- "epoch": 3.36,
3353
- "learning_rate": 9.951791530944624e-05,
3354
- "loss": 0.2536,
3355
- "step": 5230
3356
- },
3357
- {
3358
- "epoch": 3.37,
3359
- "learning_rate": 9.912703583061888e-05,
3360
- "loss": 0.2444,
3361
- "step": 5240
3362
- },
3363
- {
3364
- "epoch": 3.38,
3365
- "learning_rate": 9.873615635179152e-05,
3366
- "loss": 0.2491,
3367
- "step": 5250
3368
- },
3369
- {
3370
- "epoch": 3.38,
3371
- "learning_rate": 9.834527687296415e-05,
3372
- "loss": 0.2494,
3373
- "step": 5260
3374
- },
3375
- {
3376
- "epoch": 3.39,
3377
- "learning_rate": 9.795439739413681e-05,
3378
- "loss": 0.251,
3379
- "step": 5270
3380
- },
3381
- {
3382
- "epoch": 3.4,
3383
- "learning_rate": 9.756351791530944e-05,
3384
- "loss": 0.2453,
3385
- "step": 5280
3386
- },
3387
- {
3388
- "epoch": 3.4,
3389
- "learning_rate": 9.717263843648208e-05,
3390
- "loss": 0.2524,
3391
- "step": 5290
3392
- },
3393
- {
3394
- "epoch": 3.41,
3395
- "learning_rate": 9.678175895765472e-05,
3396
- "loss": 0.2535,
3397
- "step": 5300
3398
- },
3399
- {
3400
- "epoch": 3.41,
3401
- "learning_rate": 9.639087947882735e-05,
3402
- "loss": 0.2525,
3403
- "step": 5310
3404
- },
3405
- {
3406
- "epoch": 3.42,
3407
- "learning_rate": 9.599999999999999e-05,
3408
- "loss": 0.25,
3409
- "step": 5320
3410
- },
3411
- {
3412
- "epoch": 3.43,
3413
- "learning_rate": 9.560912052117262e-05,
3414
- "loss": 0.2542,
3415
- "step": 5330
3416
- },
3417
- {
3418
- "epoch": 3.43,
3419
- "learning_rate": 9.521824104234527e-05,
3420
- "loss": 0.2575,
3421
- "step": 5340
3422
- },
3423
- {
3424
- "epoch": 3.44,
3425
- "learning_rate": 9.482736156351791e-05,
3426
- "loss": 0.2526,
3427
- "step": 5350
3428
- },
3429
- {
3430
- "epoch": 3.45,
3431
- "learning_rate": 9.443648208469054e-05,
3432
- "loss": 0.2505,
3433
- "step": 5360
3434
- },
3435
- {
3436
- "epoch": 3.45,
3437
- "learning_rate": 9.404560260586318e-05,
3438
- "loss": 0.2537,
3439
- "step": 5370
3440
- },
3441
- {
3442
- "epoch": 3.46,
3443
- "learning_rate": 9.365472312703583e-05,
3444
- "loss": 0.2563,
3445
- "step": 5380
3446
- },
3447
- {
3448
- "epoch": 3.47,
3449
- "learning_rate": 9.326384364820845e-05,
3450
- "loss": 0.2506,
3451
- "step": 5390
3452
- },
3453
- {
3454
- "epoch": 3.47,
3455
- "learning_rate": 9.287296416938111e-05,
3456
- "loss": 0.2635,
3457
- "step": 5400
3458
- },
3459
- {
3460
- "epoch": 3.47,
3461
- "eval_loss": 0.27963170409202576,
3462
- "eval_runtime": 442.3292,
3463
- "eval_samples_per_second": 4.522,
3464
- "eval_steps_per_second": 0.565,
3465
- "step": 5400
3466
- },
3467
- {
3468
- "epoch": 3.48,
3469
- "learning_rate": 9.248208469055374e-05,
3470
- "loss": 0.243,
3471
- "step": 5410
3472
- },
3473
- {
3474
- "epoch": 3.49,
3475
- "learning_rate": 9.209120521172638e-05,
3476
- "loss": 0.2507,
3477
- "step": 5420
3478
- },
3479
- {
3480
- "epoch": 3.49,
3481
- "learning_rate": 9.170032573289902e-05,
3482
- "loss": 0.2567,
3483
- "step": 5430
3484
- },
3485
- {
3486
- "epoch": 3.5,
3487
- "learning_rate": 9.130944625407165e-05,
3488
- "loss": 0.2509,
3489
- "step": 5440
3490
- },
3491
- {
3492
- "epoch": 3.5,
3493
- "learning_rate": 9.091856677524429e-05,
3494
- "loss": 0.2593,
3495
- "step": 5450
3496
- },
3497
- {
3498
- "epoch": 3.51,
3499
- "learning_rate": 9.052768729641692e-05,
3500
- "loss": 0.2485,
3501
- "step": 5460
3502
- },
3503
- {
3504
- "epoch": 3.52,
3505
- "learning_rate": 9.013680781758957e-05,
3506
- "loss": 0.2534,
3507
- "step": 5470
3508
- },
3509
- {
3510
- "epoch": 3.52,
3511
- "learning_rate": 8.974592833876222e-05,
3512
- "loss": 0.2489,
3513
- "step": 5480
3514
- },
3515
- {
3516
- "epoch": 3.53,
3517
- "learning_rate": 8.935504885993484e-05,
3518
- "loss": 0.2488,
3519
- "step": 5490
3520
- },
3521
- {
3522
- "epoch": 3.54,
3523
- "learning_rate": 8.896416938110749e-05,
3524
- "loss": 0.2578,
3525
- "step": 5500
3526
- },
3527
- {
3528
- "epoch": 3.54,
3529
- "learning_rate": 8.857328990228011e-05,
3530
- "loss": 0.2408,
3531
- "step": 5510
3532
- },
3533
- {
3534
- "epoch": 3.55,
3535
- "learning_rate": 8.818241042345275e-05,
3536
- "loss": 0.2478,
3537
- "step": 5520
3538
- },
3539
- {
3540
- "epoch": 3.56,
3541
- "learning_rate": 8.779153094462541e-05,
3542
- "loss": 0.2476,
3543
- "step": 5530
3544
- },
3545
- {
3546
- "epoch": 3.56,
3547
- "learning_rate": 8.740065146579804e-05,
3548
- "loss": 0.2476,
3549
- "step": 5540
3550
- },
3551
- {
3552
- "epoch": 3.57,
3553
- "learning_rate": 8.700977198697068e-05,
3554
- "loss": 0.2516,
3555
- "step": 5550
3556
- },
3557
- {
3558
- "epoch": 3.58,
3559
- "learning_rate": 8.661889250814332e-05,
3560
- "loss": 0.2595,
3561
- "step": 5560
3562
- },
3563
- {
3564
- "epoch": 3.58,
3565
- "learning_rate": 8.622801302931595e-05,
3566
- "loss": 0.2523,
3567
- "step": 5570
3568
- },
3569
- {
3570
- "epoch": 3.59,
3571
- "learning_rate": 8.583713355048859e-05,
3572
- "loss": 0.2594,
3573
- "step": 5580
3574
- },
3575
- {
3576
- "epoch": 3.59,
3577
- "learning_rate": 8.544625407166122e-05,
3578
- "loss": 0.2558,
3579
- "step": 5590
3580
- },
3581
- {
3582
- "epoch": 3.6,
3583
- "learning_rate": 8.505537459283387e-05,
3584
- "loss": 0.2497,
3585
- "step": 5600
3586
- },
3587
- {
3588
- "epoch": 3.6,
3589
- "eval_loss": 0.27932220697402954,
3590
- "eval_runtime": 442.4202,
3591
- "eval_samples_per_second": 4.521,
3592
- "eval_steps_per_second": 0.565,
3593
- "step": 5600
3594
- },
3595
- {
3596
- "epoch": 3.61,
3597
- "learning_rate": 8.466449511400652e-05,
3598
- "loss": 0.2692,
3599
- "step": 5610
3600
- },
3601
- {
3602
- "epoch": 3.61,
3603
- "learning_rate": 8.427361563517914e-05,
3604
- "loss": 0.2538,
3605
- "step": 5620
3606
- },
3607
- {
3608
- "epoch": 3.62,
3609
- "learning_rate": 8.388273615635179e-05,
3610
- "loss": 0.2529,
3611
- "step": 5630
3612
- },
3613
- {
3614
- "epoch": 3.63,
3615
- "learning_rate": 8.349185667752441e-05,
3616
- "loss": 0.244,
3617
- "step": 5640
3618
- },
3619
- {
3620
- "epoch": 3.63,
3621
- "learning_rate": 8.310097719869706e-05,
3622
- "loss": 0.2619,
3623
- "step": 5650
3624
- },
3625
- {
3626
- "epoch": 3.64,
3627
- "learning_rate": 8.271009771986971e-05,
3628
- "loss": 0.2532,
3629
- "step": 5660
3630
- },
3631
- {
3632
- "epoch": 3.65,
3633
- "learning_rate": 8.231921824104234e-05,
3634
- "loss": 0.2441,
3635
- "step": 5670
3636
- },
3637
- {
3638
- "epoch": 3.65,
3639
- "learning_rate": 8.192833876221498e-05,
3640
- "loss": 0.2607,
3641
- "step": 5680
3642
- },
3643
- {
3644
- "epoch": 3.66,
3645
- "learning_rate": 8.153745928338761e-05,
3646
- "loss": 0.2571,
3647
- "step": 5690
3648
- },
3649
- {
3650
- "epoch": 3.67,
3651
- "learning_rate": 8.114657980456025e-05,
3652
- "loss": 0.2599,
3653
- "step": 5700
3654
- },
3655
- {
3656
- "epoch": 3.67,
3657
- "learning_rate": 8.075570032573289e-05,
3658
- "loss": 0.2415,
3659
- "step": 5710
3660
- },
3661
- {
3662
- "epoch": 3.68,
3663
- "learning_rate": 8.036482084690552e-05,
3664
- "loss": 0.247,
3665
- "step": 5720
3666
- },
3667
- {
3668
- "epoch": 3.68,
3669
- "learning_rate": 7.997394136807818e-05,
3670
- "loss": 0.2544,
3671
- "step": 5730
3672
- },
3673
- {
3674
- "epoch": 3.69,
3675
- "learning_rate": 7.958306188925082e-05,
3676
- "loss": 0.2498,
3677
- "step": 5740
3678
- },
3679
- {
3680
- "epoch": 3.7,
3681
- "learning_rate": 7.919218241042345e-05,
3682
- "loss": 0.2616,
3683
- "step": 5750
3684
- },
3685
- {
3686
- "epoch": 3.7,
3687
- "learning_rate": 7.880130293159609e-05,
3688
- "loss": 0.2575,
3689
- "step": 5760
3690
- },
3691
- {
3692
- "epoch": 3.71,
3693
- "learning_rate": 7.841042345276872e-05,
3694
- "loss": 0.2509,
3695
- "step": 5770
3696
- },
3697
- {
3698
- "epoch": 3.72,
3699
- "learning_rate": 7.801954397394136e-05,
3700
- "loss": 0.2573,
3701
- "step": 5780
3702
- },
3703
- {
3704
- "epoch": 3.72,
3705
- "learning_rate": 7.762866449511401e-05,
3706
- "loss": 0.2506,
3707
- "step": 5790
3708
- },
3709
- {
3710
- "epoch": 3.73,
3711
- "learning_rate": 7.723778501628664e-05,
3712
- "loss": 0.2638,
3713
- "step": 5800
3714
- },
3715
- {
3716
- "epoch": 3.73,
3717
- "eval_loss": 0.27924951910972595,
3718
- "eval_runtime": 442.3315,
3719
- "eval_samples_per_second": 4.521,
3720
- "eval_steps_per_second": 0.565,
3721
- "step": 5800
3722
- },
3723
- {
3724
- "epoch": 3.74,
3725
- "learning_rate": 7.684690553745928e-05,
3726
- "loss": 0.2502,
3727
- "step": 5810
3728
- },
3729
- {
3730
- "epoch": 3.74,
3731
- "learning_rate": 7.645602605863191e-05,
3732
- "loss": 0.2421,
3733
- "step": 5820
3734
- },
3735
- {
3736
- "epoch": 3.75,
3737
- "learning_rate": 7.606514657980455e-05,
3738
- "loss": 0.257,
3739
- "step": 5830
3740
- },
3741
- {
3742
- "epoch": 3.76,
3743
- "learning_rate": 7.56742671009772e-05,
3744
- "loss": 0.2429,
3745
- "step": 5840
3746
- },
3747
- {
3748
- "epoch": 3.76,
3749
- "learning_rate": 7.528338762214982e-05,
3750
- "loss": 0.2514,
3751
- "step": 5850
3752
- },
3753
- {
3754
- "epoch": 3.77,
3755
- "learning_rate": 7.489250814332248e-05,
3756
- "loss": 0.2513,
3757
- "step": 5860
3758
- },
3759
- {
3760
- "epoch": 3.77,
3761
- "learning_rate": 7.45016286644951e-05,
3762
- "loss": 0.2445,
3763
- "step": 5870
3764
- },
3765
- {
3766
- "epoch": 3.78,
3767
- "learning_rate": 7.411074918566775e-05,
3768
- "loss": 0.2577,
3769
- "step": 5880
3770
- },
3771
- {
3772
- "epoch": 3.79,
3773
- "learning_rate": 7.371986970684038e-05,
3774
- "loss": 0.2543,
3775
- "step": 5890
3776
- },
3777
- {
3778
- "epoch": 3.79,
3779
- "learning_rate": 7.332899022801303e-05,
3780
- "loss": 0.2548,
3781
- "step": 5900
3782
- },
3783
- {
3784
- "epoch": 3.8,
3785
- "learning_rate": 7.293811074918566e-05,
3786
- "loss": 0.2631,
3787
- "step": 5910
3788
- },
3789
- {
3790
- "epoch": 3.81,
3791
- "learning_rate": 7.25472312703583e-05,
3792
- "loss": 0.2521,
3793
- "step": 5920
3794
- },
3795
- {
3796
- "epoch": 3.81,
3797
- "learning_rate": 7.215635179153094e-05,
3798
- "loss": 0.255,
3799
- "step": 5930
3800
- },
3801
- {
3802
- "epoch": 3.82,
3803
- "learning_rate": 7.176547231270357e-05,
3804
- "loss": 0.2528,
3805
- "step": 5940
3806
- },
3807
- {
3808
- "epoch": 3.83,
3809
- "learning_rate": 7.137459283387621e-05,
3810
- "loss": 0.2469,
3811
- "step": 5950
3812
- },
3813
- {
3814
- "epoch": 3.83,
3815
- "learning_rate": 7.098371335504885e-05,
3816
- "loss": 0.2568,
3817
- "step": 5960
3818
- },
3819
- {
3820
- "epoch": 3.84,
3821
- "learning_rate": 7.05928338762215e-05,
3822
- "loss": 0.2428,
3823
- "step": 5970
3824
- },
3825
- {
3826
- "epoch": 3.85,
3827
- "learning_rate": 7.020195439739412e-05,
3828
- "loss": 0.2415,
3829
- "step": 5980
3830
- },
3831
- {
3832
- "epoch": 3.85,
3833
- "learning_rate": 6.981107491856678e-05,
3834
- "loss": 0.2537,
3835
- "step": 5990
3836
- },
3837
- {
3838
- "epoch": 3.86,
3839
- "learning_rate": 6.942019543973941e-05,
3840
- "loss": 0.255,
3841
- "step": 6000
3842
- },
3843
- {
3844
- "epoch": 3.86,
3845
- "eval_loss": 0.2787570357322693,
3846
- "eval_runtime": 442.4383,
3847
- "eval_samples_per_second": 4.52,
3848
- "eval_steps_per_second": 0.565,
3849
- "step": 6000
3850
- },
3851
- {
3852
- "epoch": 3.86,
3853
- "learning_rate": 6.902931596091205e-05,
3854
- "loss": 0.2502,
3855
- "step": 6010
3856
- },
3857
- {
3858
- "epoch": 3.87,
3859
- "learning_rate": 6.863843648208468e-05,
3860
- "loss": 0.2528,
3861
- "step": 6020
3862
- },
3863
- {
3864
- "epoch": 3.88,
3865
- "learning_rate": 6.824755700325732e-05,
3866
- "loss": 0.2561,
3867
- "step": 6030
3868
- },
3869
- {
3870
- "epoch": 3.88,
3871
- "learning_rate": 6.785667752442996e-05,
3872
- "loss": 0.2487,
3873
- "step": 6040
3874
- },
3875
- {
3876
- "epoch": 3.89,
3877
- "learning_rate": 6.74657980456026e-05,
3878
- "loss": 0.2548,
3879
- "step": 6050
3880
- },
3881
- {
3882
- "epoch": 3.9,
3883
- "learning_rate": 6.707491856677524e-05,
3884
- "loss": 0.2496,
3885
- "step": 6060
3886
- },
3887
- {
3888
- "epoch": 3.9,
3889
- "learning_rate": 6.668403908794787e-05,
3890
- "loss": 0.2564,
3891
- "step": 6070
3892
- },
3893
- {
3894
- "epoch": 3.91,
3895
- "learning_rate": 6.629315960912051e-05,
3896
- "loss": 0.2467,
3897
- "step": 6080
3898
- },
3899
- {
3900
- "epoch": 3.92,
3901
- "learning_rate": 6.590228013029316e-05,
3902
- "loss": 0.2614,
3903
- "step": 6090
3904
- },
3905
- {
3906
- "epoch": 3.92,
3907
- "learning_rate": 6.55114006514658e-05,
3908
- "loss": 0.2529,
3909
- "step": 6100
3910
- },
3911
- {
3912
- "epoch": 3.93,
3913
- "learning_rate": 6.512052117263843e-05,
3914
- "loss": 0.2508,
3915
- "step": 6110
3916
- },
3917
- {
3918
- "epoch": 3.94,
3919
- "learning_rate": 6.472964169381107e-05,
3920
- "loss": 0.2495,
3921
- "step": 6120
3922
- },
3923
- {
3924
- "epoch": 3.94,
3925
- "learning_rate": 6.433876221498371e-05,
3926
- "loss": 0.2556,
3927
- "step": 6130
3928
- },
3929
- {
3930
- "epoch": 3.95,
3931
- "learning_rate": 6.394788273615635e-05,
3932
- "loss": 0.2543,
3933
- "step": 6140
3934
- },
3935
- {
3936
- "epoch": 3.95,
3937
- "learning_rate": 6.355700325732898e-05,
3938
- "loss": 0.2528,
3939
- "step": 6150
3940
- },
3941
- {
3942
- "epoch": 3.96,
3943
- "learning_rate": 6.316612377850162e-05,
3944
- "loss": 0.2512,
3945
- "step": 6160
3946
- },
3947
- {
3948
- "epoch": 3.97,
3949
- "learning_rate": 6.277524429967426e-05,
3950
- "loss": 0.249,
3951
- "step": 6170
3952
- },
3953
- {
3954
- "epoch": 3.97,
3955
- "learning_rate": 6.23843648208469e-05,
3956
- "loss": 0.2591,
3957
- "step": 6180
3958
- },
3959
- {
3960
- "epoch": 3.98,
3961
- "learning_rate": 6.199348534201955e-05,
3962
- "loss": 0.2459,
3963
- "step": 6190
3964
- },
3965
- {
3966
- "epoch": 3.99,
3967
- "learning_rate": 6.160260586319217e-05,
3968
- "loss": 0.2633,
3969
- "step": 6200
3970
- },
3971
- {
3972
- "epoch": 3.99,
3973
- "eval_loss": 0.27837860584259033,
3974
- "eval_runtime": 442.2743,
3975
- "eval_samples_per_second": 4.522,
3976
- "eval_steps_per_second": 0.565,
3977
- "step": 6200
3978
- }
3979
- ],
3980
- "logging_steps": 10,
3981
- "max_steps": 7775,
3982
- "num_train_epochs": 5,
3983
- "save_steps": 200,
3984
- "total_flos": 6.311538956559385e+18,
3985
- "trial_name": null,
3986
- "trial_params": null
3987
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
with_input/decomp_code_with_intermediates/best_model/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:811709a41c93ebb0a7d5b7817a4a00f1ea401d394af55af88e1e3c43b8ada772
3
- size 4091