CocoRoF commited on
Commit
97832bd
·
verified ·
1 Parent(s): 8fd5a8f

Training in progress, step 7500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:feb062e6d7fe33f43428cc5462071c99d5b872995393da30804845a2a39068ec
3
  size 368988278
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b177f8e6e610416c9eaa2d19105d6cca9a831064b7726e5c8f341bd53944c5f8
3
  size 368988278
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8521c588712e3e156735b65edb7baaa4fbddc5f3c5187317da040ac6818a425
3
  size 1107079290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bec3ea0eef4469085df15e53e18783f858a9600d04a856b6095374533482744
3
  size 1107079290
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74386f26f36ed67f56395205881e5db2d0c28ffcbeed50dd95b28771d2dac588
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c461c9d337dfc684e9352ec72bfa344e2f5d377f7cfc4475de9acae294dca89
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41c88f9de084200454883a13c3717941ea3fd433e2f8735507fc30611f9c5501
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fae392ec6232cbf9da21d6ed12bc8247d0d24e7f3a3606acd23be00f3e8bbfc5
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:965b00d4cb4710ebab57c8787b9925bb3f77b8eeba94a186ec4bc1c2f326ef3f
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbf3e7ca9991a58b0b16574a3c653483c551c270aa05aba06c162ea593f7b0f2
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5dc374b8b9a4c45c950f9d136feab85a767081fa59f0c7d68ed3a62060c4949
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c695bebf6bcb75cbe26378bfe0ab7e2a33c49f713b9d6e4d10632b24322977e7
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c7c212fb779217f1edac0baf44f67b608eefc1e0e4e3f5a9dd7eb557032c1bc
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5ebb13c71265c5464c9aa9bb9b66f07764d73befe6cd63a2aaf8e781bf0a374
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86e1effd626ce1e95dd68a0c8089fe19218f2b24dfe9e45ed2cab1c0ebc10ba1
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12cc6e245e189be568c8dfd43a4dd8f04bb3dbd9f17f41458107935d2c2a6a9d
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:799cc83f60dfc1c4243cfd6403592112414a2eec494e6832f10221c96ff62c20
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36086646e9a8f76fea69f8a227112e83bb63524964ccdfb82f4cdad88b90e5e4
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:586777c398770c3255d3a1f48c7fef44ea9d89117c627c9ea490e16bfd9a49ba
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b44153bacf860d0ca6ce4c6b9380a199feab8a72ca613e6745bfb671b02c4e4
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e873dd7321fd30d3e4e5e5f7f797439300a2628eaf93d05b3409de6ede53350
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79dbc195ec36394029af26ba3c5335808fe0aa70a0021f62a3ae19ac05477bba
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5424464334147003,
5
  "eval_steps": 2500,
6
- "global_step": 5000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3523,6 +3523,1764 @@
3523
  "eval_samples_per_second": 2695.142,
3524
  "eval_steps_per_second": 42.117,
3525
  "step": 5000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3526
  }
3527
  ],
3528
  "logging_steps": 10,
@@ -3542,7 +5300,7 @@
3542
  "attributes": {}
3543
  }
3544
  },
3545
- "total_flos": 1.3805582888730624e+19,
3546
  "train_batch_size": 16,
3547
  "trial_name": null,
3548
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8136696501220505,
5
  "eval_steps": 2500,
6
+ "global_step": 7500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3523
  "eval_samples_per_second": 2695.142,
3524
  "eval_steps_per_second": 42.117,
3525
  "step": 5000
3526
+ },
3527
+ {
3528
+ "epoch": 0.5435313262815297,
3529
+ "grad_norm": 15.25,
3530
+ "learning_rate": 9.957536759096964e-08,
3531
+ "loss": 25.5473,
3532
+ "step": 5010
3533
+ },
3534
+ {
3535
+ "epoch": 0.5446162191483591,
3536
+ "grad_norm": 16.15625,
3537
+ "learning_rate": 9.957452002129094e-08,
3538
+ "loss": 25.4106,
3539
+ "step": 5020
3540
+ },
3541
+ {
3542
+ "epoch": 0.5457011120151884,
3543
+ "grad_norm": 16.21875,
3544
+ "learning_rate": 9.957367245161224e-08,
3545
+ "loss": 25.8692,
3546
+ "step": 5030
3547
+ },
3548
+ {
3549
+ "epoch": 0.5467860048820179,
3550
+ "grad_norm": 17.0,
3551
+ "learning_rate": 9.957282488193353e-08,
3552
+ "loss": 25.8226,
3553
+ "step": 5040
3554
+ },
3555
+ {
3556
+ "epoch": 0.5478708977488473,
3557
+ "grad_norm": 16.046875,
3558
+ "learning_rate": 9.957197731225484e-08,
3559
+ "loss": 25.5776,
3560
+ "step": 5050
3561
+ },
3562
+ {
3563
+ "epoch": 0.5489557906156767,
3564
+ "grad_norm": 15.8046875,
3565
+ "learning_rate": 9.957112974257613e-08,
3566
+ "loss": 25.6557,
3567
+ "step": 5060
3568
+ },
3569
+ {
3570
+ "epoch": 0.5500406834825061,
3571
+ "grad_norm": 16.78125,
3572
+ "learning_rate": 9.957028217289743e-08,
3573
+ "loss": 25.3625,
3574
+ "step": 5070
3575
+ },
3576
+ {
3577
+ "epoch": 0.5511255763493355,
3578
+ "grad_norm": 15.46875,
3579
+ "learning_rate": 9.956943460321873e-08,
3580
+ "loss": 25.7849,
3581
+ "step": 5080
3582
+ },
3583
+ {
3584
+ "epoch": 0.5522104692161649,
3585
+ "grad_norm": 15.7421875,
3586
+ "learning_rate": 9.956858703354002e-08,
3587
+ "loss": 25.441,
3588
+ "step": 5090
3589
+ },
3590
+ {
3591
+ "epoch": 0.5532953620829943,
3592
+ "grad_norm": 16.625,
3593
+ "learning_rate": 9.956773946386132e-08,
3594
+ "loss": 25.295,
3595
+ "step": 5100
3596
+ },
3597
+ {
3598
+ "epoch": 0.5543802549498237,
3599
+ "grad_norm": 15.984375,
3600
+ "learning_rate": 9.956689189418261e-08,
3601
+ "loss": 25.7138,
3602
+ "step": 5110
3603
+ },
3604
+ {
3605
+ "epoch": 0.5554651478166531,
3606
+ "grad_norm": 15.5546875,
3607
+ "learning_rate": 9.956604432450391e-08,
3608
+ "loss": 25.5789,
3609
+ "step": 5120
3610
+ },
3611
+ {
3612
+ "epoch": 0.5565500406834825,
3613
+ "grad_norm": 15.859375,
3614
+ "learning_rate": 9.956519675482521e-08,
3615
+ "loss": 26.0074,
3616
+ "step": 5130
3617
+ },
3618
+ {
3619
+ "epoch": 0.5576349335503119,
3620
+ "grad_norm": 16.328125,
3621
+ "learning_rate": 9.95643491851465e-08,
3622
+ "loss": 25.6203,
3623
+ "step": 5140
3624
+ },
3625
+ {
3626
+ "epoch": 0.5587198264171414,
3627
+ "grad_norm": 16.015625,
3628
+ "learning_rate": 9.95635016154678e-08,
3629
+ "loss": 25.5567,
3630
+ "step": 5150
3631
+ },
3632
+ {
3633
+ "epoch": 0.5598047192839707,
3634
+ "grad_norm": 16.53125,
3635
+ "learning_rate": 9.95626540457891e-08,
3636
+ "loss": 25.5907,
3637
+ "step": 5160
3638
+ },
3639
+ {
3640
+ "epoch": 0.5608896121508001,
3641
+ "grad_norm": 15.859375,
3642
+ "learning_rate": 9.95618064761104e-08,
3643
+ "loss": 25.5578,
3644
+ "step": 5170
3645
+ },
3646
+ {
3647
+ "epoch": 0.5619745050176295,
3648
+ "grad_norm": 15.3515625,
3649
+ "learning_rate": 9.95609589064317e-08,
3650
+ "loss": 25.5828,
3651
+ "step": 5180
3652
+ },
3653
+ {
3654
+ "epoch": 0.5630593978844589,
3655
+ "grad_norm": 15.9375,
3656
+ "learning_rate": 9.956011133675299e-08,
3657
+ "loss": 25.5452,
3658
+ "step": 5190
3659
+ },
3660
+ {
3661
+ "epoch": 0.5641442907512884,
3662
+ "grad_norm": 16.484375,
3663
+ "learning_rate": 9.955926376707428e-08,
3664
+ "loss": 25.6447,
3665
+ "step": 5200
3666
+ },
3667
+ {
3668
+ "epoch": 0.5652291836181177,
3669
+ "grad_norm": 16.578125,
3670
+ "learning_rate": 9.955841619739557e-08,
3671
+ "loss": 25.5724,
3672
+ "step": 5210
3673
+ },
3674
+ {
3675
+ "epoch": 0.5663140764849471,
3676
+ "grad_norm": 16.6875,
3677
+ "learning_rate": 9.955756862771688e-08,
3678
+ "loss": 25.3112,
3679
+ "step": 5220
3680
+ },
3681
+ {
3682
+ "epoch": 0.5673989693517765,
3683
+ "grad_norm": 17.03125,
3684
+ "learning_rate": 9.955672105803817e-08,
3685
+ "loss": 25.9633,
3686
+ "step": 5230
3687
+ },
3688
+ {
3689
+ "epoch": 0.5684838622186059,
3690
+ "grad_norm": 15.171875,
3691
+ "learning_rate": 9.955587348835948e-08,
3692
+ "loss": 25.6802,
3693
+ "step": 5240
3694
+ },
3695
+ {
3696
+ "epoch": 0.5695687550854354,
3697
+ "grad_norm": 16.046875,
3698
+ "learning_rate": 9.955502591868077e-08,
3699
+ "loss": 25.806,
3700
+ "step": 5250
3701
+ },
3702
+ {
3703
+ "epoch": 0.5706536479522647,
3704
+ "grad_norm": 15.6640625,
3705
+ "learning_rate": 9.955417834900208e-08,
3706
+ "loss": 25.5945,
3707
+ "step": 5260
3708
+ },
3709
+ {
3710
+ "epoch": 0.5717385408190941,
3711
+ "grad_norm": 16.875,
3712
+ "learning_rate": 9.955333077932337e-08,
3713
+ "loss": 25.5887,
3714
+ "step": 5270
3715
+ },
3716
+ {
3717
+ "epoch": 0.5728234336859235,
3718
+ "grad_norm": 15.578125,
3719
+ "learning_rate": 9.955248320964466e-08,
3720
+ "loss": 25.6609,
3721
+ "step": 5280
3722
+ },
3723
+ {
3724
+ "epoch": 0.5739083265527529,
3725
+ "grad_norm": 15.765625,
3726
+ "learning_rate": 9.955163563996595e-08,
3727
+ "loss": 25.9165,
3728
+ "step": 5290
3729
+ },
3730
+ {
3731
+ "epoch": 0.5749932194195824,
3732
+ "grad_norm": 16.78125,
3733
+ "learning_rate": 9.955078807028725e-08,
3734
+ "loss": 25.7408,
3735
+ "step": 5300
3736
+ },
3737
+ {
3738
+ "epoch": 0.5760781122864117,
3739
+ "grad_norm": 16.125,
3740
+ "learning_rate": 9.954994050060855e-08,
3741
+ "loss": 25.3137,
3742
+ "step": 5310
3743
+ },
3744
+ {
3745
+ "epoch": 0.5771630051532411,
3746
+ "grad_norm": 16.875,
3747
+ "learning_rate": 9.954909293092984e-08,
3748
+ "loss": 25.5784,
3749
+ "step": 5320
3750
+ },
3751
+ {
3752
+ "epoch": 0.5782478980200705,
3753
+ "grad_norm": 16.140625,
3754
+ "learning_rate": 9.954824536125115e-08,
3755
+ "loss": 25.7218,
3756
+ "step": 5330
3757
+ },
3758
+ {
3759
+ "epoch": 0.5793327908868999,
3760
+ "grad_norm": 15.3828125,
3761
+ "learning_rate": 9.954739779157244e-08,
3762
+ "loss": 25.5828,
3763
+ "step": 5340
3764
+ },
3765
+ {
3766
+ "epoch": 0.5804176837537294,
3767
+ "grad_norm": 15.828125,
3768
+ "learning_rate": 9.954655022189373e-08,
3769
+ "loss": 25.8361,
3770
+ "step": 5350
3771
+ },
3772
+ {
3773
+ "epoch": 0.5815025766205587,
3774
+ "grad_norm": 16.875,
3775
+ "learning_rate": 9.954570265221504e-08,
3776
+ "loss": 25.5323,
3777
+ "step": 5360
3778
+ },
3779
+ {
3780
+ "epoch": 0.5825874694873882,
3781
+ "grad_norm": 16.796875,
3782
+ "learning_rate": 9.954485508253633e-08,
3783
+ "loss": 25.5234,
3784
+ "step": 5370
3785
+ },
3786
+ {
3787
+ "epoch": 0.5836723623542175,
3788
+ "grad_norm": 15.953125,
3789
+ "learning_rate": 9.954400751285762e-08,
3790
+ "loss": 25.3838,
3791
+ "step": 5380
3792
+ },
3793
+ {
3794
+ "epoch": 0.5847572552210469,
3795
+ "grad_norm": 16.53125,
3796
+ "learning_rate": 9.954315994317892e-08,
3797
+ "loss": 25.6446,
3798
+ "step": 5390
3799
+ },
3800
+ {
3801
+ "epoch": 0.5858421480878763,
3802
+ "grad_norm": 16.0625,
3803
+ "learning_rate": 9.954231237350022e-08,
3804
+ "loss": 25.5635,
3805
+ "step": 5400
3806
+ },
3807
+ {
3808
+ "epoch": 0.5869270409547057,
3809
+ "grad_norm": 16.59375,
3810
+ "learning_rate": 9.954146480382152e-08,
3811
+ "loss": 25.7279,
3812
+ "step": 5410
3813
+ },
3814
+ {
3815
+ "epoch": 0.5880119338215352,
3816
+ "grad_norm": 17.4375,
3817
+ "learning_rate": 9.954061723414281e-08,
3818
+ "loss": 25.78,
3819
+ "step": 5420
3820
+ },
3821
+ {
3822
+ "epoch": 0.5890968266883645,
3823
+ "grad_norm": 16.453125,
3824
+ "learning_rate": 9.953976966446411e-08,
3825
+ "loss": 26.013,
3826
+ "step": 5430
3827
+ },
3828
+ {
3829
+ "epoch": 0.5901817195551939,
3830
+ "grad_norm": 16.796875,
3831
+ "learning_rate": 9.95389220947854e-08,
3832
+ "loss": 25.1266,
3833
+ "step": 5440
3834
+ },
3835
+ {
3836
+ "epoch": 0.5912666124220233,
3837
+ "grad_norm": 15.0625,
3838
+ "learning_rate": 9.953807452510671e-08,
3839
+ "loss": 25.5315,
3840
+ "step": 5450
3841
+ },
3842
+ {
3843
+ "epoch": 0.5923515052888527,
3844
+ "grad_norm": 16.09375,
3845
+ "learning_rate": 9.9537226955428e-08,
3846
+ "loss": 25.8088,
3847
+ "step": 5460
3848
+ },
3849
+ {
3850
+ "epoch": 0.5934363981556822,
3851
+ "grad_norm": 17.921875,
3852
+ "learning_rate": 9.95363793857493e-08,
3853
+ "loss": 25.5707,
3854
+ "step": 5470
3855
+ },
3856
+ {
3857
+ "epoch": 0.5945212910225115,
3858
+ "grad_norm": 15.9453125,
3859
+ "learning_rate": 9.953553181607059e-08,
3860
+ "loss": 25.5832,
3861
+ "step": 5480
3862
+ },
3863
+ {
3864
+ "epoch": 0.595606183889341,
3865
+ "grad_norm": 16.25,
3866
+ "learning_rate": 9.953468424639188e-08,
3867
+ "loss": 25.4021,
3868
+ "step": 5490
3869
+ },
3870
+ {
3871
+ "epoch": 0.5966910767561703,
3872
+ "grad_norm": 16.859375,
3873
+ "learning_rate": 9.953383667671319e-08,
3874
+ "loss": 25.7499,
3875
+ "step": 5500
3876
+ },
3877
+ {
3878
+ "epoch": 0.5977759696229997,
3879
+ "grad_norm": 15.3125,
3880
+ "learning_rate": 9.953298910703448e-08,
3881
+ "loss": 25.458,
3882
+ "step": 5510
3883
+ },
3884
+ {
3885
+ "epoch": 0.5988608624898292,
3886
+ "grad_norm": 16.390625,
3887
+ "learning_rate": 9.953214153735579e-08,
3888
+ "loss": 25.9301,
3889
+ "step": 5520
3890
+ },
3891
+ {
3892
+ "epoch": 0.5999457553566585,
3893
+ "grad_norm": 16.25,
3894
+ "learning_rate": 9.953129396767708e-08,
3895
+ "loss": 25.3097,
3896
+ "step": 5530
3897
+ },
3898
+ {
3899
+ "epoch": 0.601030648223488,
3900
+ "grad_norm": 15.6171875,
3901
+ "learning_rate": 9.953044639799838e-08,
3902
+ "loss": 25.4981,
3903
+ "step": 5540
3904
+ },
3905
+ {
3906
+ "epoch": 0.6021155410903173,
3907
+ "grad_norm": 16.109375,
3908
+ "learning_rate": 9.952959882831968e-08,
3909
+ "loss": 25.3685,
3910
+ "step": 5550
3911
+ },
3912
+ {
3913
+ "epoch": 0.6032004339571467,
3914
+ "grad_norm": 16.296875,
3915
+ "learning_rate": 9.952875125864097e-08,
3916
+ "loss": 25.561,
3917
+ "step": 5560
3918
+ },
3919
+ {
3920
+ "epoch": 0.6042853268239762,
3921
+ "grad_norm": 16.625,
3922
+ "learning_rate": 9.952790368896226e-08,
3923
+ "loss": 25.661,
3924
+ "step": 5570
3925
+ },
3926
+ {
3927
+ "epoch": 0.6053702196908055,
3928
+ "grad_norm": 15.4609375,
3929
+ "learning_rate": 9.952705611928355e-08,
3930
+ "loss": 25.9727,
3931
+ "step": 5580
3932
+ },
3933
+ {
3934
+ "epoch": 0.606455112557635,
3935
+ "grad_norm": 16.46875,
3936
+ "learning_rate": 9.952620854960486e-08,
3937
+ "loss": 25.7807,
3938
+ "step": 5590
3939
+ },
3940
+ {
3941
+ "epoch": 0.6075400054244643,
3942
+ "grad_norm": 17.0625,
3943
+ "learning_rate": 9.952536097992615e-08,
3944
+ "loss": 25.785,
3945
+ "step": 5600
3946
+ },
3947
+ {
3948
+ "epoch": 0.6086248982912937,
3949
+ "grad_norm": 16.1875,
3950
+ "learning_rate": 9.952451341024746e-08,
3951
+ "loss": 25.3121,
3952
+ "step": 5610
3953
+ },
3954
+ {
3955
+ "epoch": 0.6097097911581232,
3956
+ "grad_norm": 16.859375,
3957
+ "learning_rate": 9.952366584056875e-08,
3958
+ "loss": 25.6218,
3959
+ "step": 5620
3960
+ },
3961
+ {
3962
+ "epoch": 0.6107946840249525,
3963
+ "grad_norm": 16.1875,
3964
+ "learning_rate": 9.952281827089004e-08,
3965
+ "loss": 25.5746,
3966
+ "step": 5630
3967
+ },
3968
+ {
3969
+ "epoch": 0.611879576891782,
3970
+ "grad_norm": 15.625,
3971
+ "learning_rate": 9.952197070121135e-08,
3972
+ "loss": 25.7354,
3973
+ "step": 5640
3974
+ },
3975
+ {
3976
+ "epoch": 0.6129644697586113,
3977
+ "grad_norm": 15.8203125,
3978
+ "learning_rate": 9.952112313153264e-08,
3979
+ "loss": 25.3926,
3980
+ "step": 5650
3981
+ },
3982
+ {
3983
+ "epoch": 0.6140493626254407,
3984
+ "grad_norm": 16.703125,
3985
+ "learning_rate": 9.952027556185393e-08,
3986
+ "loss": 25.4138,
3987
+ "step": 5660
3988
+ },
3989
+ {
3990
+ "epoch": 0.6151342554922702,
3991
+ "grad_norm": 15.90625,
3992
+ "learning_rate": 9.951942799217523e-08,
3993
+ "loss": 25.4505,
3994
+ "step": 5670
3995
+ },
3996
+ {
3997
+ "epoch": 0.6162191483590995,
3998
+ "grad_norm": 15.625,
3999
+ "learning_rate": 9.951858042249653e-08,
4000
+ "loss": 24.9731,
4001
+ "step": 5680
4002
+ },
4003
+ {
4004
+ "epoch": 0.617304041225929,
4005
+ "grad_norm": 15.984375,
4006
+ "learning_rate": 9.951773285281782e-08,
4007
+ "loss": 25.5634,
4008
+ "step": 5690
4009
+ },
4010
+ {
4011
+ "epoch": 0.6183889340927583,
4012
+ "grad_norm": 15.9765625,
4013
+ "learning_rate": 9.951688528313912e-08,
4014
+ "loss": 25.3609,
4015
+ "step": 5700
4016
+ },
4017
+ {
4018
+ "epoch": 0.6194738269595877,
4019
+ "grad_norm": 16.234375,
4020
+ "learning_rate": 9.951603771346042e-08,
4021
+ "loss": 25.2691,
4022
+ "step": 5710
4023
+ },
4024
+ {
4025
+ "epoch": 0.6205587198264172,
4026
+ "grad_norm": 15.6328125,
4027
+ "learning_rate": 9.951519014378171e-08,
4028
+ "loss": 25.3243,
4029
+ "step": 5720
4030
+ },
4031
+ {
4032
+ "epoch": 0.6216436126932465,
4033
+ "grad_norm": 16.328125,
4034
+ "learning_rate": 9.951434257410302e-08,
4035
+ "loss": 25.3393,
4036
+ "step": 5730
4037
+ },
4038
+ {
4039
+ "epoch": 0.622728505560076,
4040
+ "grad_norm": 15.7578125,
4041
+ "learning_rate": 9.951349500442431e-08,
4042
+ "loss": 25.53,
4043
+ "step": 5740
4044
+ },
4045
+ {
4046
+ "epoch": 0.6238133984269053,
4047
+ "grad_norm": 16.515625,
4048
+ "learning_rate": 9.95126474347456e-08,
4049
+ "loss": 25.6338,
4050
+ "step": 5750
4051
+ },
4052
+ {
4053
+ "epoch": 0.6248982912937348,
4054
+ "grad_norm": 16.328125,
4055
+ "learning_rate": 9.95117998650669e-08,
4056
+ "loss": 25.7997,
4057
+ "step": 5760
4058
+ },
4059
+ {
4060
+ "epoch": 0.6259831841605642,
4061
+ "grad_norm": 15.484375,
4062
+ "learning_rate": 9.951095229538819e-08,
4063
+ "loss": 25.5708,
4064
+ "step": 5770
4065
+ },
4066
+ {
4067
+ "epoch": 0.6270680770273935,
4068
+ "grad_norm": 16.21875,
4069
+ "learning_rate": 9.95101047257095e-08,
4070
+ "loss": 25.4739,
4071
+ "step": 5780
4072
+ },
4073
+ {
4074
+ "epoch": 0.628152969894223,
4075
+ "grad_norm": 15.59375,
4076
+ "learning_rate": 9.950925715603079e-08,
4077
+ "loss": 25.9438,
4078
+ "step": 5790
4079
+ },
4080
+ {
4081
+ "epoch": 0.6292378627610523,
4082
+ "grad_norm": 16.609375,
4083
+ "learning_rate": 9.95084095863521e-08,
4084
+ "loss": 25.3082,
4085
+ "step": 5800
4086
+ },
4087
+ {
4088
+ "epoch": 0.6303227556278818,
4089
+ "grad_norm": 16.109375,
4090
+ "learning_rate": 9.950756201667339e-08,
4091
+ "loss": 25.5058,
4092
+ "step": 5810
4093
+ },
4094
+ {
4095
+ "epoch": 0.6314076484947111,
4096
+ "grad_norm": 15.9921875,
4097
+ "learning_rate": 9.950671444699469e-08,
4098
+ "loss": 25.9045,
4099
+ "step": 5820
4100
+ },
4101
+ {
4102
+ "epoch": 0.6324925413615405,
4103
+ "grad_norm": 16.765625,
4104
+ "learning_rate": 9.950586687731598e-08,
4105
+ "loss": 25.4663,
4106
+ "step": 5830
4107
+ },
4108
+ {
4109
+ "epoch": 0.63357743422837,
4110
+ "grad_norm": 15.7578125,
4111
+ "learning_rate": 9.950501930763728e-08,
4112
+ "loss": 25.3552,
4113
+ "step": 5840
4114
+ },
4115
+ {
4116
+ "epoch": 0.6346623270951993,
4117
+ "grad_norm": 15.78125,
4118
+ "learning_rate": 9.950417173795857e-08,
4119
+ "loss": 25.6831,
4120
+ "step": 5850
4121
+ },
4122
+ {
4123
+ "epoch": 0.6357472199620288,
4124
+ "grad_norm": 17.40625,
4125
+ "learning_rate": 9.950332416827986e-08,
4126
+ "loss": 25.7711,
4127
+ "step": 5860
4128
+ },
4129
+ {
4130
+ "epoch": 0.6368321128288581,
4131
+ "grad_norm": 15.9140625,
4132
+ "learning_rate": 9.950247659860117e-08,
4133
+ "loss": 25.505,
4134
+ "step": 5870
4135
+ },
4136
+ {
4137
+ "epoch": 0.6379170056956875,
4138
+ "grad_norm": 16.765625,
4139
+ "learning_rate": 9.950162902892246e-08,
4140
+ "loss": 25.4254,
4141
+ "step": 5880
4142
+ },
4143
+ {
4144
+ "epoch": 0.639001898562517,
4145
+ "grad_norm": 16.5625,
4146
+ "learning_rate": 9.950078145924375e-08,
4147
+ "loss": 25.3663,
4148
+ "step": 5890
4149
+ },
4150
+ {
4151
+ "epoch": 0.6400867914293463,
4152
+ "grad_norm": 16.34375,
4153
+ "learning_rate": 9.949993388956506e-08,
4154
+ "loss": 25.484,
4155
+ "step": 5900
4156
+ },
4157
+ {
4158
+ "epoch": 0.6411716842961758,
4159
+ "grad_norm": 17.03125,
4160
+ "learning_rate": 9.949908631988635e-08,
4161
+ "loss": 25.6846,
4162
+ "step": 5910
4163
+ },
4164
+ {
4165
+ "epoch": 0.6422565771630051,
4166
+ "grad_norm": 16.0625,
4167
+ "learning_rate": 9.949823875020766e-08,
4168
+ "loss": 26.0782,
4169
+ "step": 5920
4170
+ },
4171
+ {
4172
+ "epoch": 0.6433414700298346,
4173
+ "grad_norm": 15.3828125,
4174
+ "learning_rate": 9.949739118052895e-08,
4175
+ "loss": 25.653,
4176
+ "step": 5930
4177
+ },
4178
+ {
4179
+ "epoch": 0.644426362896664,
4180
+ "grad_norm": 15.8828125,
4181
+ "learning_rate": 9.949654361085024e-08,
4182
+ "loss": 25.5483,
4183
+ "step": 5940
4184
+ },
4185
+ {
4186
+ "epoch": 0.6455112557634933,
4187
+ "grad_norm": 15.40625,
4188
+ "learning_rate": 9.949569604117153e-08,
4189
+ "loss": 25.1034,
4190
+ "step": 5950
4191
+ },
4192
+ {
4193
+ "epoch": 0.6465961486303228,
4194
+ "grad_norm": 16.75,
4195
+ "learning_rate": 9.949484847149283e-08,
4196
+ "loss": 25.5698,
4197
+ "step": 5960
4198
+ },
4199
+ {
4200
+ "epoch": 0.6476810414971521,
4201
+ "grad_norm": 15.5703125,
4202
+ "learning_rate": 9.949400090181413e-08,
4203
+ "loss": 25.7175,
4204
+ "step": 5970
4205
+ },
4206
+ {
4207
+ "epoch": 0.6487659343639816,
4208
+ "grad_norm": 16.40625,
4209
+ "learning_rate": 9.949315333213542e-08,
4210
+ "loss": 25.5801,
4211
+ "step": 5980
4212
+ },
4213
+ {
4214
+ "epoch": 0.649850827230811,
4215
+ "grad_norm": 16.578125,
4216
+ "learning_rate": 9.949230576245673e-08,
4217
+ "loss": 25.549,
4218
+ "step": 5990
4219
+ },
4220
+ {
4221
+ "epoch": 0.6509357200976403,
4222
+ "grad_norm": 15.734375,
4223
+ "learning_rate": 9.949145819277802e-08,
4224
+ "loss": 25.7726,
4225
+ "step": 6000
4226
+ },
4227
+ {
4228
+ "epoch": 0.6520206129644698,
4229
+ "grad_norm": 16.03125,
4230
+ "learning_rate": 9.949061062309933e-08,
4231
+ "loss": 25.474,
4232
+ "step": 6010
4233
+ },
4234
+ {
4235
+ "epoch": 0.6531055058312991,
4236
+ "grad_norm": 16.84375,
4237
+ "learning_rate": 9.948976305342062e-08,
4238
+ "loss": 25.6461,
4239
+ "step": 6020
4240
+ },
4241
+ {
4242
+ "epoch": 0.6541903986981286,
4243
+ "grad_norm": 16.3125,
4244
+ "learning_rate": 9.948891548374191e-08,
4245
+ "loss": 25.6342,
4246
+ "step": 6030
4247
+ },
4248
+ {
4249
+ "epoch": 0.655275291564958,
4250
+ "grad_norm": 16.34375,
4251
+ "learning_rate": 9.94880679140632e-08,
4252
+ "loss": 25.0423,
4253
+ "step": 6040
4254
+ },
4255
+ {
4256
+ "epoch": 0.6563601844317873,
4257
+ "grad_norm": 15.984375,
4258
+ "learning_rate": 9.94872203443845e-08,
4259
+ "loss": 25.4624,
4260
+ "step": 6050
4261
+ },
4262
+ {
4263
+ "epoch": 0.6574450772986168,
4264
+ "grad_norm": 14.7578125,
4265
+ "learning_rate": 9.94863727747058e-08,
4266
+ "loss": 25.7943,
4267
+ "step": 6060
4268
+ },
4269
+ {
4270
+ "epoch": 0.6585299701654461,
4271
+ "grad_norm": 16.453125,
4272
+ "learning_rate": 9.94855252050271e-08,
4273
+ "loss": 25.8636,
4274
+ "step": 6070
4275
+ },
4276
+ {
4277
+ "epoch": 0.6596148630322756,
4278
+ "grad_norm": 16.796875,
4279
+ "learning_rate": 9.94846776353484e-08,
4280
+ "loss": 25.6451,
4281
+ "step": 6080
4282
+ },
4283
+ {
4284
+ "epoch": 0.660699755899105,
4285
+ "grad_norm": 16.859375,
4286
+ "learning_rate": 9.94838300656697e-08,
4287
+ "loss": 25.633,
4288
+ "step": 6090
4289
+ },
4290
+ {
4291
+ "epoch": 0.6617846487659343,
4292
+ "grad_norm": 15.90625,
4293
+ "learning_rate": 9.948298249599099e-08,
4294
+ "loss": 25.4134,
4295
+ "step": 6100
4296
+ },
4297
+ {
4298
+ "epoch": 0.6628695416327638,
4299
+ "grad_norm": 16.671875,
4300
+ "learning_rate": 9.948213492631229e-08,
4301
+ "loss": 25.8753,
4302
+ "step": 6110
4303
+ },
4304
+ {
4305
+ "epoch": 0.6639544344995931,
4306
+ "grad_norm": 16.953125,
4307
+ "learning_rate": 9.948128735663359e-08,
4308
+ "loss": 25.5192,
4309
+ "step": 6120
4310
+ },
4311
+ {
4312
+ "epoch": 0.6650393273664226,
4313
+ "grad_norm": 16.96875,
4314
+ "learning_rate": 9.948043978695488e-08,
4315
+ "loss": 25.7765,
4316
+ "step": 6130
4317
+ },
4318
+ {
4319
+ "epoch": 0.666124220233252,
4320
+ "grad_norm": 16.03125,
4321
+ "learning_rate": 9.947959221727617e-08,
4322
+ "loss": 26.0615,
4323
+ "step": 6140
4324
+ },
4325
+ {
4326
+ "epoch": 0.6672091131000814,
4327
+ "grad_norm": 16.875,
4328
+ "learning_rate": 9.947874464759748e-08,
4329
+ "loss": 25.7446,
4330
+ "step": 6150
4331
+ },
4332
+ {
4333
+ "epoch": 0.6682940059669108,
4334
+ "grad_norm": 17.078125,
4335
+ "learning_rate": 9.947789707791877e-08,
4336
+ "loss": 25.66,
4337
+ "step": 6160
4338
+ },
4339
+ {
4340
+ "epoch": 0.6693788988337401,
4341
+ "grad_norm": 16.921875,
4342
+ "learning_rate": 9.947704950824006e-08,
4343
+ "loss": 25.6866,
4344
+ "step": 6170
4345
+ },
4346
+ {
4347
+ "epoch": 0.6704637917005696,
4348
+ "grad_norm": 15.953125,
4349
+ "learning_rate": 9.947620193856137e-08,
4350
+ "loss": 25.5425,
4351
+ "step": 6180
4352
+ },
4353
+ {
4354
+ "epoch": 0.671548684567399,
4355
+ "grad_norm": 16.21875,
4356
+ "learning_rate": 9.947535436888266e-08,
4357
+ "loss": 25.5211,
4358
+ "step": 6190
4359
+ },
4360
+ {
4361
+ "epoch": 0.6726335774342284,
4362
+ "grad_norm": 16.421875,
4363
+ "learning_rate": 9.947450679920397e-08,
4364
+ "loss": 25.6975,
4365
+ "step": 6200
4366
+ },
4367
+ {
4368
+ "epoch": 0.6737184703010578,
4369
+ "grad_norm": 16.265625,
4370
+ "learning_rate": 9.947365922952526e-08,
4371
+ "loss": 25.173,
4372
+ "step": 6210
4373
+ },
4374
+ {
4375
+ "epoch": 0.6748033631678871,
4376
+ "grad_norm": 16.28125,
4377
+ "learning_rate": 9.947281165984655e-08,
4378
+ "loss": 25.3564,
4379
+ "step": 6220
4380
+ },
4381
+ {
4382
+ "epoch": 0.6758882560347166,
4383
+ "grad_norm": 15.984375,
4384
+ "learning_rate": 9.947196409016784e-08,
4385
+ "loss": 25.2739,
4386
+ "step": 6230
4387
+ },
4388
+ {
4389
+ "epoch": 0.6769731489015459,
4390
+ "grad_norm": 15.9609375,
4391
+ "learning_rate": 9.947111652048913e-08,
4392
+ "loss": 25.6314,
4393
+ "step": 6240
4394
+ },
4395
+ {
4396
+ "epoch": 0.6780580417683754,
4397
+ "grad_norm": 15.984375,
4398
+ "learning_rate": 9.947026895081044e-08,
4399
+ "loss": 25.3637,
4400
+ "step": 6250
4401
+ },
4402
+ {
4403
+ "epoch": 0.6791429346352048,
4404
+ "grad_norm": 16.109375,
4405
+ "learning_rate": 9.946942138113173e-08,
4406
+ "loss": 25.3983,
4407
+ "step": 6260
4408
+ },
4409
+ {
4410
+ "epoch": 0.6802278275020341,
4411
+ "grad_norm": 15.828125,
4412
+ "learning_rate": 9.946857381145304e-08,
4413
+ "loss": 25.3871,
4414
+ "step": 6270
4415
+ },
4416
+ {
4417
+ "epoch": 0.6813127203688636,
4418
+ "grad_norm": 15.734375,
4419
+ "learning_rate": 9.946772624177433e-08,
4420
+ "loss": 25.7047,
4421
+ "step": 6280
4422
+ },
4423
+ {
4424
+ "epoch": 0.6823976132356929,
4425
+ "grad_norm": 16.78125,
4426
+ "learning_rate": 9.946687867209564e-08,
4427
+ "loss": 25.5006,
4428
+ "step": 6290
4429
+ },
4430
+ {
4431
+ "epoch": 0.6834825061025224,
4432
+ "grad_norm": 15.4609375,
4433
+ "learning_rate": 9.946603110241693e-08,
4434
+ "loss": 25.687,
4435
+ "step": 6300
4436
+ },
4437
+ {
4438
+ "epoch": 0.6845673989693518,
4439
+ "grad_norm": 16.453125,
4440
+ "learning_rate": 9.946518353273822e-08,
4441
+ "loss": 26.0965,
4442
+ "step": 6310
4443
+ },
4444
+ {
4445
+ "epoch": 0.6856522918361811,
4446
+ "grad_norm": 16.171875,
4447
+ "learning_rate": 9.946433596305951e-08,
4448
+ "loss": 25.6929,
4449
+ "step": 6320
4450
+ },
4451
+ {
4452
+ "epoch": 0.6867371847030106,
4453
+ "grad_norm": 15.6953125,
4454
+ "learning_rate": 9.946348839338081e-08,
4455
+ "loss": 25.4518,
4456
+ "step": 6330
4457
+ },
4458
+ {
4459
+ "epoch": 0.6878220775698399,
4460
+ "grad_norm": 16.671875,
4461
+ "learning_rate": 9.946264082370211e-08,
4462
+ "loss": 25.556,
4463
+ "step": 6340
4464
+ },
4465
+ {
4466
+ "epoch": 0.6889069704366694,
4467
+ "grad_norm": 16.234375,
4468
+ "learning_rate": 9.94617932540234e-08,
4469
+ "loss": 25.6254,
4470
+ "step": 6350
4471
+ },
4472
+ {
4473
+ "epoch": 0.6899918633034988,
4474
+ "grad_norm": 16.203125,
4475
+ "learning_rate": 9.946094568434471e-08,
4476
+ "loss": 26.0094,
4477
+ "step": 6360
4478
+ },
4479
+ {
4480
+ "epoch": 0.6910767561703282,
4481
+ "grad_norm": 15.90625,
4482
+ "learning_rate": 9.9460098114666e-08,
4483
+ "loss": 25.6406,
4484
+ "step": 6370
4485
+ },
4486
+ {
4487
+ "epoch": 0.6921616490371576,
4488
+ "grad_norm": 16.640625,
4489
+ "learning_rate": 9.94592505449873e-08,
4490
+ "loss": 25.7946,
4491
+ "step": 6380
4492
+ },
4493
+ {
4494
+ "epoch": 0.6932465419039869,
4495
+ "grad_norm": 16.234375,
4496
+ "learning_rate": 9.94584029753086e-08,
4497
+ "loss": 25.6378,
4498
+ "step": 6390
4499
+ },
4500
+ {
4501
+ "epoch": 0.6943314347708164,
4502
+ "grad_norm": 15.8828125,
4503
+ "learning_rate": 9.94575554056299e-08,
4504
+ "loss": 25.6764,
4505
+ "step": 6400
4506
+ },
4507
+ {
4508
+ "epoch": 0.6954163276376458,
4509
+ "grad_norm": 16.578125,
4510
+ "learning_rate": 9.945670783595119e-08,
4511
+ "loss": 25.3388,
4512
+ "step": 6410
4513
+ },
4514
+ {
4515
+ "epoch": 0.6965012205044752,
4516
+ "grad_norm": 15.53125,
4517
+ "learning_rate": 9.945586026627248e-08,
4518
+ "loss": 25.7138,
4519
+ "step": 6420
4520
+ },
4521
+ {
4522
+ "epoch": 0.6975861133713046,
4523
+ "grad_norm": 16.25,
4524
+ "learning_rate": 9.945501269659378e-08,
4525
+ "loss": 25.5007,
4526
+ "step": 6430
4527
+ },
4528
+ {
4529
+ "epoch": 0.6986710062381339,
4530
+ "grad_norm": 16.0,
4531
+ "learning_rate": 9.945416512691508e-08,
4532
+ "loss": 25.2599,
4533
+ "step": 6440
4534
+ },
4535
+ {
4536
+ "epoch": 0.6997558991049634,
4537
+ "grad_norm": 15.4765625,
4538
+ "learning_rate": 9.945331755723637e-08,
4539
+ "loss": 25.7119,
4540
+ "step": 6450
4541
+ },
4542
+ {
4543
+ "epoch": 0.7008407919717928,
4544
+ "grad_norm": 16.546875,
4545
+ "learning_rate": 9.945246998755768e-08,
4546
+ "loss": 25.2432,
4547
+ "step": 6460
4548
+ },
4549
+ {
4550
+ "epoch": 0.7019256848386222,
4551
+ "grad_norm": 15.59375,
4552
+ "learning_rate": 9.945162241787897e-08,
4553
+ "loss": 25.4265,
4554
+ "step": 6470
4555
+ },
4556
+ {
4557
+ "epoch": 0.7030105777054516,
4558
+ "grad_norm": 16.4375,
4559
+ "learning_rate": 9.945077484820027e-08,
4560
+ "loss": 25.6269,
4561
+ "step": 6480
4562
+ },
4563
+ {
4564
+ "epoch": 0.704095470572281,
4565
+ "grad_norm": 16.5625,
4566
+ "learning_rate": 9.944992727852157e-08,
4567
+ "loss": 25.6877,
4568
+ "step": 6490
4569
+ },
4570
+ {
4571
+ "epoch": 0.7051803634391104,
4572
+ "grad_norm": 15.921875,
4573
+ "learning_rate": 9.944907970884286e-08,
4574
+ "loss": 25.9986,
4575
+ "step": 6500
4576
+ },
4577
+ {
4578
+ "epoch": 0.7062652563059398,
4579
+ "grad_norm": 16.515625,
4580
+ "learning_rate": 9.944823213916415e-08,
4581
+ "loss": 25.5435,
4582
+ "step": 6510
4583
+ },
4584
+ {
4585
+ "epoch": 0.7073501491727692,
4586
+ "grad_norm": 16.03125,
4587
+ "learning_rate": 9.944738456948544e-08,
4588
+ "loss": 25.8586,
4589
+ "step": 6520
4590
+ },
4591
+ {
4592
+ "epoch": 0.7084350420395986,
4593
+ "grad_norm": 15.9453125,
4594
+ "learning_rate": 9.944653699980675e-08,
4595
+ "loss": 25.3798,
4596
+ "step": 6530
4597
+ },
4598
+ {
4599
+ "epoch": 0.709519934906428,
4600
+ "grad_norm": 15.4140625,
4601
+ "learning_rate": 9.944568943012804e-08,
4602
+ "loss": 25.4956,
4603
+ "step": 6540
4604
+ },
4605
+ {
4606
+ "epoch": 0.7106048277732574,
4607
+ "grad_norm": 16.234375,
4608
+ "learning_rate": 9.944484186044935e-08,
4609
+ "loss": 25.7285,
4610
+ "step": 6550
4611
+ },
4612
+ {
4613
+ "epoch": 0.7116897206400868,
4614
+ "grad_norm": 15.9453125,
4615
+ "learning_rate": 9.944399429077064e-08,
4616
+ "loss": 25.5327,
4617
+ "step": 6560
4618
+ },
4619
+ {
4620
+ "epoch": 0.7127746135069162,
4621
+ "grad_norm": 16.390625,
4622
+ "learning_rate": 9.944314672109195e-08,
4623
+ "loss": 25.114,
4624
+ "step": 6570
4625
+ },
4626
+ {
4627
+ "epoch": 0.7138595063737456,
4628
+ "grad_norm": 16.234375,
4629
+ "learning_rate": 9.944229915141324e-08,
4630
+ "loss": 25.3845,
4631
+ "step": 6580
4632
+ },
4633
+ {
4634
+ "epoch": 0.714944399240575,
4635
+ "grad_norm": 16.671875,
4636
+ "learning_rate": 9.944145158173453e-08,
4637
+ "loss": 25.6654,
4638
+ "step": 6590
4639
+ },
4640
+ {
4641
+ "epoch": 0.7160292921074044,
4642
+ "grad_norm": 15.578125,
4643
+ "learning_rate": 9.944060401205582e-08,
4644
+ "loss": 25.7109,
4645
+ "step": 6600
4646
+ },
4647
+ {
4648
+ "epoch": 0.7171141849742338,
4649
+ "grad_norm": 15.8828125,
4650
+ "learning_rate": 9.943975644237712e-08,
4651
+ "loss": 25.6193,
4652
+ "step": 6610
4653
+ },
4654
+ {
4655
+ "epoch": 0.7181990778410632,
4656
+ "grad_norm": 16.4375,
4657
+ "learning_rate": 9.943890887269842e-08,
4658
+ "loss": 25.7856,
4659
+ "step": 6620
4660
+ },
4661
+ {
4662
+ "epoch": 0.7192839707078926,
4663
+ "grad_norm": 15.8125,
4664
+ "learning_rate": 9.943806130301971e-08,
4665
+ "loss": 25.5048,
4666
+ "step": 6630
4667
+ },
4668
+ {
4669
+ "epoch": 0.720368863574722,
4670
+ "grad_norm": 21.3125,
4671
+ "learning_rate": 9.943721373334102e-08,
4672
+ "loss": 26.0719,
4673
+ "step": 6640
4674
+ },
4675
+ {
4676
+ "epoch": 0.7214537564415514,
4677
+ "grad_norm": 16.46875,
4678
+ "learning_rate": 9.943636616366231e-08,
4679
+ "loss": 25.5579,
4680
+ "step": 6650
4681
+ },
4682
+ {
4683
+ "epoch": 0.7225386493083807,
4684
+ "grad_norm": 15.75,
4685
+ "learning_rate": 9.94355185939836e-08,
4686
+ "loss": 25.8645,
4687
+ "step": 6660
4688
+ },
4689
+ {
4690
+ "epoch": 0.7236235421752102,
4691
+ "grad_norm": 15.4921875,
4692
+ "learning_rate": 9.943467102430491e-08,
4693
+ "loss": 25.5501,
4694
+ "step": 6670
4695
+ },
4696
+ {
4697
+ "epoch": 0.7247084350420396,
4698
+ "grad_norm": 16.828125,
4699
+ "learning_rate": 9.94338234546262e-08,
4700
+ "loss": 25.5519,
4701
+ "step": 6680
4702
+ },
4703
+ {
4704
+ "epoch": 0.725793327908869,
4705
+ "grad_norm": 15.3984375,
4706
+ "learning_rate": 9.94329758849475e-08,
4707
+ "loss": 25.8021,
4708
+ "step": 6690
4709
+ },
4710
+ {
4711
+ "epoch": 0.7268782207756984,
4712
+ "grad_norm": 15.078125,
4713
+ "learning_rate": 9.943212831526879e-08,
4714
+ "loss": 25.4819,
4715
+ "step": 6700
4716
+ },
4717
+ {
4718
+ "epoch": 0.7279631136425277,
4719
+ "grad_norm": 17.046875,
4720
+ "learning_rate": 9.943128074559009e-08,
4721
+ "loss": 25.6646,
4722
+ "step": 6710
4723
+ },
4724
+ {
4725
+ "epoch": 0.7290480065093572,
4726
+ "grad_norm": 15.421875,
4727
+ "learning_rate": 9.943043317591139e-08,
4728
+ "loss": 25.6539,
4729
+ "step": 6720
4730
+ },
4731
+ {
4732
+ "epoch": 0.7301328993761866,
4733
+ "grad_norm": 16.265625,
4734
+ "learning_rate": 9.942958560623268e-08,
4735
+ "loss": 25.3754,
4736
+ "step": 6730
4737
+ },
4738
+ {
4739
+ "epoch": 0.731217792243016,
4740
+ "grad_norm": 16.1875,
4741
+ "learning_rate": 9.942873803655398e-08,
4742
+ "loss": 25.4876,
4743
+ "step": 6740
4744
+ },
4745
+ {
4746
+ "epoch": 0.7323026851098454,
4747
+ "grad_norm": 15.578125,
4748
+ "learning_rate": 9.942789046687528e-08,
4749
+ "loss": 25.6933,
4750
+ "step": 6750
4751
+ },
4752
+ {
4753
+ "epoch": 0.7333875779766748,
4754
+ "grad_norm": 15.75,
4755
+ "learning_rate": 9.942704289719658e-08,
4756
+ "loss": 25.7143,
4757
+ "step": 6760
4758
+ },
4759
+ {
4760
+ "epoch": 0.7344724708435042,
4761
+ "grad_norm": 15.3359375,
4762
+ "learning_rate": 9.942619532751787e-08,
4763
+ "loss": 25.5687,
4764
+ "step": 6770
4765
+ },
4766
+ {
4767
+ "epoch": 0.7355573637103336,
4768
+ "grad_norm": 16.140625,
4769
+ "learning_rate": 9.942534775783917e-08,
4770
+ "loss": 25.8673,
4771
+ "step": 6780
4772
+ },
4773
+ {
4774
+ "epoch": 0.736642256577163,
4775
+ "grad_norm": 15.484375,
4776
+ "learning_rate": 9.942450018816046e-08,
4777
+ "loss": 25.805,
4778
+ "step": 6790
4779
+ },
4780
+ {
4781
+ "epoch": 0.7377271494439924,
4782
+ "grad_norm": 16.484375,
4783
+ "learning_rate": 9.942365261848175e-08,
4784
+ "loss": 25.4912,
4785
+ "step": 6800
4786
+ },
4787
+ {
4788
+ "epoch": 0.7388120423108218,
4789
+ "grad_norm": 16.890625,
4790
+ "learning_rate": 9.942280504880306e-08,
4791
+ "loss": 25.1321,
4792
+ "step": 6810
4793
+ },
4794
+ {
4795
+ "epoch": 0.7398969351776512,
4796
+ "grad_norm": 16.390625,
4797
+ "learning_rate": 9.942195747912435e-08,
4798
+ "loss": 25.6943,
4799
+ "step": 6820
4800
+ },
4801
+ {
4802
+ "epoch": 0.7409818280444807,
4803
+ "grad_norm": 16.40625,
4804
+ "learning_rate": 9.942110990944566e-08,
4805
+ "loss": 25.8169,
4806
+ "step": 6830
4807
+ },
4808
+ {
4809
+ "epoch": 0.74206672091131,
4810
+ "grad_norm": 16.5,
4811
+ "learning_rate": 9.942026233976695e-08,
4812
+ "loss": 25.2886,
4813
+ "step": 6840
4814
+ },
4815
+ {
4816
+ "epoch": 0.7431516137781394,
4817
+ "grad_norm": 15.6796875,
4818
+ "learning_rate": 9.941941477008824e-08,
4819
+ "loss": 25.3904,
4820
+ "step": 6850
4821
+ },
4822
+ {
4823
+ "epoch": 0.7442365066449688,
4824
+ "grad_norm": 15.9375,
4825
+ "learning_rate": 9.941856720040955e-08,
4826
+ "loss": 25.4449,
4827
+ "step": 6860
4828
+ },
4829
+ {
4830
+ "epoch": 0.7453213995117982,
4831
+ "grad_norm": 15.734375,
4832
+ "learning_rate": 9.941771963073084e-08,
4833
+ "loss": 25.6265,
4834
+ "step": 6870
4835
+ },
4836
+ {
4837
+ "epoch": 0.7464062923786277,
4838
+ "grad_norm": 15.84375,
4839
+ "learning_rate": 9.941687206105213e-08,
4840
+ "loss": 25.7452,
4841
+ "step": 6880
4842
+ },
4843
+ {
4844
+ "epoch": 0.747491185245457,
4845
+ "grad_norm": 15.8125,
4846
+ "learning_rate": 9.941602449137342e-08,
4847
+ "loss": 25.9536,
4848
+ "step": 6890
4849
+ },
4850
+ {
4851
+ "epoch": 0.7485760781122864,
4852
+ "grad_norm": 15.8125,
4853
+ "learning_rate": 9.941517692169473e-08,
4854
+ "loss": 25.6145,
4855
+ "step": 6900
4856
+ },
4857
+ {
4858
+ "epoch": 0.7496609709791158,
4859
+ "grad_norm": 16.0625,
4860
+ "learning_rate": 9.941432935201602e-08,
4861
+ "loss": 25.6004,
4862
+ "step": 6910
4863
+ },
4864
+ {
4865
+ "epoch": 0.7507458638459452,
4866
+ "grad_norm": 16.484375,
4867
+ "learning_rate": 9.941348178233731e-08,
4868
+ "loss": 25.7197,
4869
+ "step": 6920
4870
+ },
4871
+ {
4872
+ "epoch": 0.7518307567127747,
4873
+ "grad_norm": 16.3125,
4874
+ "learning_rate": 9.941263421265862e-08,
4875
+ "loss": 25.6203,
4876
+ "step": 6930
4877
+ },
4878
+ {
4879
+ "epoch": 0.752915649579604,
4880
+ "grad_norm": 16.109375,
4881
+ "learning_rate": 9.941178664297991e-08,
4882
+ "loss": 25.27,
4883
+ "step": 6940
4884
+ },
4885
+ {
4886
+ "epoch": 0.7540005424464334,
4887
+ "grad_norm": 16.25,
4888
+ "learning_rate": 9.941093907330122e-08,
4889
+ "loss": 25.8581,
4890
+ "step": 6950
4891
+ },
4892
+ {
4893
+ "epoch": 0.7550854353132628,
4894
+ "grad_norm": 15.6796875,
4895
+ "learning_rate": 9.941009150362251e-08,
4896
+ "loss": 25.5339,
4897
+ "step": 6960
4898
+ },
4899
+ {
4900
+ "epoch": 0.7561703281800922,
4901
+ "grad_norm": 16.34375,
4902
+ "learning_rate": 9.94092439339438e-08,
4903
+ "loss": 25.6158,
4904
+ "step": 6970
4905
+ },
4906
+ {
4907
+ "epoch": 0.7572552210469217,
4908
+ "grad_norm": 16.125,
4909
+ "learning_rate": 9.94083963642651e-08,
4910
+ "loss": 25.7242,
4911
+ "step": 6980
4912
+ },
4913
+ {
4914
+ "epoch": 0.758340113913751,
4915
+ "grad_norm": 16.1875,
4916
+ "learning_rate": 9.940754879458639e-08,
4917
+ "loss": 25.2094,
4918
+ "step": 6990
4919
+ },
4920
+ {
4921
+ "epoch": 0.7594250067805804,
4922
+ "grad_norm": 15.7578125,
4923
+ "learning_rate": 9.94067012249077e-08,
4924
+ "loss": 25.8482,
4925
+ "step": 7000
4926
+ },
4927
+ {
4928
+ "epoch": 0.7605098996474098,
4929
+ "grad_norm": 16.078125,
4930
+ "learning_rate": 9.940585365522899e-08,
4931
+ "loss": 25.8589,
4932
+ "step": 7010
4933
+ },
4934
+ {
4935
+ "epoch": 0.7615947925142392,
4936
+ "grad_norm": 16.046875,
4937
+ "learning_rate": 9.940500608555029e-08,
4938
+ "loss": 25.6722,
4939
+ "step": 7020
4940
+ },
4941
+ {
4942
+ "epoch": 0.7626796853810687,
4943
+ "grad_norm": 16.703125,
4944
+ "learning_rate": 9.940415851587158e-08,
4945
+ "loss": 25.1011,
4946
+ "step": 7030
4947
+ },
4948
+ {
4949
+ "epoch": 0.763764578247898,
4950
+ "grad_norm": 16.96875,
4951
+ "learning_rate": 9.940331094619289e-08,
4952
+ "loss": 25.7928,
4953
+ "step": 7040
4954
+ },
4955
+ {
4956
+ "epoch": 0.7648494711147275,
4957
+ "grad_norm": 16.140625,
4958
+ "learning_rate": 9.940246337651418e-08,
4959
+ "loss": 25.4724,
4960
+ "step": 7050
4961
+ },
4962
+ {
4963
+ "epoch": 0.7659343639815568,
4964
+ "grad_norm": 15.7265625,
4965
+ "learning_rate": 9.940161580683548e-08,
4966
+ "loss": 25.5213,
4967
+ "step": 7060
4968
+ },
4969
+ {
4970
+ "epoch": 0.7670192568483862,
4971
+ "grad_norm": 16.96875,
4972
+ "learning_rate": 9.940076823715677e-08,
4973
+ "loss": 25.6735,
4974
+ "step": 7070
4975
+ },
4976
+ {
4977
+ "epoch": 0.7681041497152156,
4978
+ "grad_norm": 16.40625,
4979
+ "learning_rate": 9.939992066747806e-08,
4980
+ "loss": 25.4332,
4981
+ "step": 7080
4982
+ },
4983
+ {
4984
+ "epoch": 0.769189042582045,
4985
+ "grad_norm": 15.890625,
4986
+ "learning_rate": 9.939907309779937e-08,
4987
+ "loss": 25.5559,
4988
+ "step": 7090
4989
+ },
4990
+ {
4991
+ "epoch": 0.7702739354488745,
4992
+ "grad_norm": 17.15625,
4993
+ "learning_rate": 9.939822552812066e-08,
4994
+ "loss": 25.4288,
4995
+ "step": 7100
4996
+ },
4997
+ {
4998
+ "epoch": 0.7713588283157038,
4999
+ "grad_norm": 17.296875,
5000
+ "learning_rate": 9.939737795844196e-08,
5001
+ "loss": 25.7736,
5002
+ "step": 7110
5003
+ },
5004
+ {
5005
+ "epoch": 0.7724437211825332,
5006
+ "grad_norm": 16.203125,
5007
+ "learning_rate": 9.939653038876326e-08,
5008
+ "loss": 25.4418,
5009
+ "step": 7120
5010
+ },
5011
+ {
5012
+ "epoch": 0.7735286140493626,
5013
+ "grad_norm": 16.015625,
5014
+ "learning_rate": 9.939568281908455e-08,
5015
+ "loss": 25.5955,
5016
+ "step": 7130
5017
+ },
5018
+ {
5019
+ "epoch": 0.774613506916192,
5020
+ "grad_norm": 15.3671875,
5021
+ "learning_rate": 9.939483524940585e-08,
5022
+ "loss": 25.706,
5023
+ "step": 7140
5024
+ },
5025
+ {
5026
+ "epoch": 0.7756983997830215,
5027
+ "grad_norm": 15.84375,
5028
+ "learning_rate": 9.939398767972715e-08,
5029
+ "loss": 25.1998,
5030
+ "step": 7150
5031
+ },
5032
+ {
5033
+ "epoch": 0.7767832926498508,
5034
+ "grad_norm": 15.8359375,
5035
+ "learning_rate": 9.939314011004844e-08,
5036
+ "loss": 25.6622,
5037
+ "step": 7160
5038
+ },
5039
+ {
5040
+ "epoch": 0.7778681855166802,
5041
+ "grad_norm": 16.015625,
5042
+ "learning_rate": 9.939229254036973e-08,
5043
+ "loss": 25.3702,
5044
+ "step": 7170
5045
+ },
5046
+ {
5047
+ "epoch": 0.7789530783835096,
5048
+ "grad_norm": 17.71875,
5049
+ "learning_rate": 9.939144497069104e-08,
5050
+ "loss": 25.4542,
5051
+ "step": 7180
5052
+ },
5053
+ {
5054
+ "epoch": 0.780037971250339,
5055
+ "grad_norm": 15.8515625,
5056
+ "learning_rate": 9.939059740101233e-08,
5057
+ "loss": 25.819,
5058
+ "step": 7190
5059
+ },
5060
+ {
5061
+ "epoch": 0.7811228641171685,
5062
+ "grad_norm": 15.1484375,
5063
+ "learning_rate": 9.938974983133362e-08,
5064
+ "loss": 25.4974,
5065
+ "step": 7200
5066
+ },
5067
+ {
5068
+ "epoch": 0.7822077569839978,
5069
+ "grad_norm": 16.328125,
5070
+ "learning_rate": 9.938890226165493e-08,
5071
+ "loss": 25.531,
5072
+ "step": 7210
5073
+ },
5074
+ {
5075
+ "epoch": 0.7832926498508272,
5076
+ "grad_norm": 16.46875,
5077
+ "learning_rate": 9.938805469197622e-08,
5078
+ "loss": 25.6273,
5079
+ "step": 7220
5080
+ },
5081
+ {
5082
+ "epoch": 0.7843775427176566,
5083
+ "grad_norm": 16.5,
5084
+ "learning_rate": 9.938720712229753e-08,
5085
+ "loss": 25.8583,
5086
+ "step": 7230
5087
+ },
5088
+ {
5089
+ "epoch": 0.785462435584486,
5090
+ "grad_norm": 16.703125,
5091
+ "learning_rate": 9.938635955261882e-08,
5092
+ "loss": 25.7639,
5093
+ "step": 7240
5094
+ },
5095
+ {
5096
+ "epoch": 0.7865473284513155,
5097
+ "grad_norm": 15.234375,
5098
+ "learning_rate": 9.938551198294011e-08,
5099
+ "loss": 25.5778,
5100
+ "step": 7250
5101
+ },
5102
+ {
5103
+ "epoch": 0.7876322213181448,
5104
+ "grad_norm": 16.640625,
5105
+ "learning_rate": 9.93846644132614e-08,
5106
+ "loss": 25.6404,
5107
+ "step": 7260
5108
+ },
5109
+ {
5110
+ "epoch": 0.7887171141849743,
5111
+ "grad_norm": 15.7578125,
5112
+ "learning_rate": 9.93838168435827e-08,
5113
+ "loss": 25.5173,
5114
+ "step": 7270
5115
+ },
5116
+ {
5117
+ "epoch": 0.7898020070518036,
5118
+ "grad_norm": 16.625,
5119
+ "learning_rate": 9.9382969273904e-08,
5120
+ "loss": 25.6344,
5121
+ "step": 7280
5122
+ },
5123
+ {
5124
+ "epoch": 0.790886899918633,
5125
+ "grad_norm": 16.453125,
5126
+ "learning_rate": 9.93821217042253e-08,
5127
+ "loss": 25.6382,
5128
+ "step": 7290
5129
+ },
5130
+ {
5131
+ "epoch": 0.7919717927854625,
5132
+ "grad_norm": 16.46875,
5133
+ "learning_rate": 9.93812741345466e-08,
5134
+ "loss": 25.5216,
5135
+ "step": 7300
5136
+ },
5137
+ {
5138
+ "epoch": 0.7930566856522918,
5139
+ "grad_norm": 15.890625,
5140
+ "learning_rate": 9.938042656486789e-08,
5141
+ "loss": 25.5863,
5142
+ "step": 7310
5143
+ },
5144
+ {
5145
+ "epoch": 0.7941415785191213,
5146
+ "grad_norm": 16.375,
5147
+ "learning_rate": 9.93795789951892e-08,
5148
+ "loss": 25.7816,
5149
+ "step": 7320
5150
+ },
5151
+ {
5152
+ "epoch": 0.7952264713859506,
5153
+ "grad_norm": 15.8984375,
5154
+ "learning_rate": 9.937873142551049e-08,
5155
+ "loss": 25.8711,
5156
+ "step": 7330
5157
+ },
5158
+ {
5159
+ "epoch": 0.79631136425278,
5160
+ "grad_norm": 16.203125,
5161
+ "learning_rate": 9.937788385583178e-08,
5162
+ "loss": 25.5444,
5163
+ "step": 7340
5164
+ },
5165
+ {
5166
+ "epoch": 0.7973962571196095,
5167
+ "grad_norm": 15.9140625,
5168
+ "learning_rate": 9.937703628615308e-08,
5169
+ "loss": 25.8317,
5170
+ "step": 7350
5171
+ },
5172
+ {
5173
+ "epoch": 0.7984811499864388,
5174
+ "grad_norm": 17.171875,
5175
+ "learning_rate": 9.937618871647437e-08,
5176
+ "loss": 25.6675,
5177
+ "step": 7360
5178
+ },
5179
+ {
5180
+ "epoch": 0.7995660428532683,
5181
+ "grad_norm": 15.5703125,
5182
+ "learning_rate": 9.937534114679567e-08,
5183
+ "loss": 25.4769,
5184
+ "step": 7370
5185
+ },
5186
+ {
5187
+ "epoch": 0.8006509357200976,
5188
+ "grad_norm": 15.421875,
5189
+ "learning_rate": 9.937449357711697e-08,
5190
+ "loss": 25.6663,
5191
+ "step": 7380
5192
+ },
5193
+ {
5194
+ "epoch": 0.801735828586927,
5195
+ "grad_norm": 15.8828125,
5196
+ "learning_rate": 9.937364600743827e-08,
5197
+ "loss": 25.3388,
5198
+ "step": 7390
5199
+ },
5200
+ {
5201
+ "epoch": 0.8028207214537565,
5202
+ "grad_norm": 16.28125,
5203
+ "learning_rate": 9.937279843775957e-08,
5204
+ "loss": 25.4267,
5205
+ "step": 7400
5206
+ },
5207
+ {
5208
+ "epoch": 0.8039056143205858,
5209
+ "grad_norm": 15.546875,
5210
+ "learning_rate": 9.937195086808086e-08,
5211
+ "loss": 25.4507,
5212
+ "step": 7410
5213
+ },
5214
+ {
5215
+ "epoch": 0.8049905071874153,
5216
+ "grad_norm": 16.953125,
5217
+ "learning_rate": 9.937110329840216e-08,
5218
+ "loss": 25.368,
5219
+ "step": 7420
5220
+ },
5221
+ {
5222
+ "epoch": 0.8060754000542446,
5223
+ "grad_norm": 15.8515625,
5224
+ "learning_rate": 9.937025572872346e-08,
5225
+ "loss": 25.6991,
5226
+ "step": 7430
5227
+ },
5228
+ {
5229
+ "epoch": 0.807160292921074,
5230
+ "grad_norm": 15.578125,
5231
+ "learning_rate": 9.936940815904475e-08,
5232
+ "loss": 25.4753,
5233
+ "step": 7440
5234
+ },
5235
+ {
5236
+ "epoch": 0.8082451857879035,
5237
+ "grad_norm": 16.109375,
5238
+ "learning_rate": 9.936856058936604e-08,
5239
+ "loss": 25.5059,
5240
+ "step": 7450
5241
+ },
5242
+ {
5243
+ "epoch": 0.8093300786547328,
5244
+ "grad_norm": 16.28125,
5245
+ "learning_rate": 9.936771301968735e-08,
5246
+ "loss": 25.4038,
5247
+ "step": 7460
5248
+ },
5249
+ {
5250
+ "epoch": 0.8104149715215623,
5251
+ "grad_norm": 15.6875,
5252
+ "learning_rate": 9.936686545000864e-08,
5253
+ "loss": 25.4121,
5254
+ "step": 7470
5255
+ },
5256
+ {
5257
+ "epoch": 0.8114998643883916,
5258
+ "grad_norm": 18.078125,
5259
+ "learning_rate": 9.936601788032993e-08,
5260
+ "loss": 25.6417,
5261
+ "step": 7480
5262
+ },
5263
+ {
5264
+ "epoch": 0.8125847572552211,
5265
+ "grad_norm": 16.046875,
5266
+ "learning_rate": 9.936517031065124e-08,
5267
+ "loss": 25.707,
5268
+ "step": 7490
5269
+ },
5270
+ {
5271
+ "epoch": 0.8136696501220505,
5272
+ "grad_norm": 17.1875,
5273
+ "learning_rate": 9.936432274097253e-08,
5274
+ "loss": 25.8678,
5275
+ "step": 7500
5276
+ },
5277
+ {
5278
+ "epoch": 0.8136696501220505,
5279
+ "eval_loss": 1.6150429248809814,
5280
+ "eval_runtime": 179.2871,
5281
+ "eval_samples_per_second": 2699.798,
5282
+ "eval_steps_per_second": 42.189,
5283
+ "step": 7500
5284
  }
5285
  ],
5286
  "logging_steps": 10,
 
5300
  "attributes": {}
5301
  }
5302
  },
5303
+ "total_flos": 2.0708374333095936e+19,
5304
  "train_batch_size": 16,
5305
  "trial_name": null,
5306
  "trial_params": null