Romain-XV commited on
Commit
2a65c32
·
verified ·
1 Parent(s): a1b0b18

Training in progress, step 552, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a5302ed989b61bbf3c5a89d0ce9797280641e435ec2e9f80f205c1902119adb
3
  size 144805440
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f5adb3a5e16f935dab431d5c7fc988b0cb1691a418940636f10652d62b82db7
3
  size 144805440
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2280588a779f8600beb802cf022367cb692da9b34ac083f171a98b0ae124ae64
3
  size 74292308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00a072bec46c4c9546123646ed3daf91f92526592704bf014cc7ef83d90b4303
3
  size 74292308
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79d41988296ff7641dd7197ea779bba77e30e921990ba55a18499232a9f57fb0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e1aa12142861e36681d375c8477ba6f862e4a421988518266fc08b6e26330f9
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9dc8764f26b1f60541ddcf4504ff4cf226063bff9c4b473f4392b732534a0b84
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf3863c8948e41c40f37017b57744f08214b2c38da69d49fe98001649774bc48
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.22141695022583,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-500",
4
- "epoch": 0.06356521393667315,
5
  "eval_steps": 100,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3555,6 +3555,370 @@
3555
  "eval_samples_per_second": 3.977,
3556
  "eval_steps_per_second": 0.994,
3557
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3558
  }
3559
  ],
3560
  "logging_steps": 1,
@@ -3578,12 +3942,12 @@
3578
  "should_evaluate": false,
3579
  "should_log": false,
3580
  "should_save": true,
3581
- "should_training_stop": false
3582
  },
3583
  "attributes": {}
3584
  }
3585
  },
3586
- "total_flos": 5.182000113647616e+18,
3587
  "train_batch_size": 4,
3588
  "trial_name": null,
3589
  "trial_params": null
 
1
  {
2
  "best_metric": 1.22141695022583,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-500",
4
+ "epoch": 0.07017599618608716,
5
  "eval_steps": 100,
6
+ "global_step": 552,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3555
  "eval_samples_per_second": 3.977,
3556
  "eval_steps_per_second": 0.994,
3557
  "step": 500
3558
+ },
3559
+ {
3560
+ "epoch": 0.0636923443645465,
3561
+ "grad_norm": 0.20111538469791412,
3562
+ "learning_rate": 4.337571235498628e-06,
3563
+ "loss": 1.2477,
3564
+ "step": 501
3565
+ },
3566
+ {
3567
+ "epoch": 0.06381947479241985,
3568
+ "grad_norm": 0.19941484928131104,
3569
+ "learning_rate": 4.170318841070708e-06,
3570
+ "loss": 1.2335,
3571
+ "step": 502
3572
+ },
3573
+ {
3574
+ "epoch": 0.06394660522029319,
3575
+ "grad_norm": 0.19625377655029297,
3576
+ "learning_rate": 4.00628603241544e-06,
3577
+ "loss": 1.2302,
3578
+ "step": 503
3579
+ },
3580
+ {
3581
+ "epoch": 0.06407373564816654,
3582
+ "grad_norm": 0.2074848711490631,
3583
+ "learning_rate": 3.845478320536178e-06,
3584
+ "loss": 1.22,
3585
+ "step": 504
3586
+ },
3587
+ {
3588
+ "epoch": 0.06420086607603989,
3589
+ "grad_norm": 0.19643086194992065,
3590
+ "learning_rate": 3.687901108082892e-06,
3591
+ "loss": 1.2234,
3592
+ "step": 505
3593
+ },
3594
+ {
3595
+ "epoch": 0.06432799650391323,
3596
+ "grad_norm": 0.19488604366779327,
3597
+ "learning_rate": 3.53355968917054e-06,
3598
+ "loss": 1.2088,
3599
+ "step": 506
3600
+ },
3601
+ {
3602
+ "epoch": 0.06445512693178658,
3603
+ "grad_norm": 0.20913287997245789,
3604
+ "learning_rate": 3.3824592492013085e-06,
3605
+ "loss": 1.1734,
3606
+ "step": 507
3607
+ },
3608
+ {
3609
+ "epoch": 0.06458225735965993,
3610
+ "grad_norm": 0.19445528090000153,
3611
+ "learning_rate": 3.2346048646903494e-06,
3612
+ "loss": 1.2014,
3613
+ "step": 508
3614
+ },
3615
+ {
3616
+ "epoch": 0.06470938778753327,
3617
+ "grad_norm": 0.19821353256702423,
3618
+ "learning_rate": 3.0900015030951744e-06,
3619
+ "loss": 1.2446,
3620
+ "step": 509
3621
+ },
3622
+ {
3623
+ "epoch": 0.06483651821540662,
3624
+ "grad_norm": 0.20283670723438263,
3625
+ "learning_rate": 2.9486540226488557e-06,
3626
+ "loss": 1.1858,
3627
+ "step": 510
3628
+ },
3629
+ {
3630
+ "epoch": 0.06496364864327997,
3631
+ "grad_norm": 0.20396962761878967,
3632
+ "learning_rate": 2.8105671721967875e-06,
3633
+ "loss": 1.2166,
3634
+ "step": 511
3635
+ },
3636
+ {
3637
+ "epoch": 0.0650907790711533,
3638
+ "grad_norm": 0.2022467404603958,
3639
+ "learning_rate": 2.6757455910370488e-06,
3640
+ "loss": 1.1738,
3641
+ "step": 512
3642
+ },
3643
+ {
3644
+ "epoch": 0.06521790949902666,
3645
+ "grad_norm": 0.1929662674665451,
3646
+ "learning_rate": 2.5441938087646612e-06,
3647
+ "loss": 1.205,
3648
+ "step": 513
3649
+ },
3650
+ {
3651
+ "epoch": 0.06534503992690001,
3652
+ "grad_norm": 0.19997857511043549,
3653
+ "learning_rate": 2.4159162451193097e-06,
3654
+ "loss": 1.2103,
3655
+ "step": 514
3656
+ },
3657
+ {
3658
+ "epoch": 0.06547217035477335,
3659
+ "grad_norm": 0.1969158947467804,
3660
+ "learning_rate": 2.290917209836918e-06,
3661
+ "loss": 1.1912,
3662
+ "step": 515
3663
+ },
3664
+ {
3665
+ "epoch": 0.0655993007826467,
3666
+ "grad_norm": 0.19717784225940704,
3667
+ "learning_rate": 2.1692009025048422e-06,
3668
+ "loss": 1.2374,
3669
+ "step": 516
3670
+ },
3671
+ {
3672
+ "epoch": 0.06572643121052005,
3673
+ "grad_norm": 0.20149968564510345,
3674
+ "learning_rate": 2.0507714124207157e-06,
3675
+ "loss": 1.1857,
3676
+ "step": 517
3677
+ },
3678
+ {
3679
+ "epoch": 0.06585356163839338,
3680
+ "grad_norm": 0.19946229457855225,
3681
+ "learning_rate": 1.9356327184551714e-06,
3682
+ "loss": 1.1947,
3683
+ "step": 518
3684
+ },
3685
+ {
3686
+ "epoch": 0.06598069206626674,
3687
+ "grad_norm": 0.19773566722869873,
3688
+ "learning_rate": 1.8237886889180489e-06,
3689
+ "loss": 1.2825,
3690
+ "step": 519
3691
+ },
3692
+ {
3693
+ "epoch": 0.06610782249414009,
3694
+ "grad_norm": 0.20150107145309448,
3695
+ "learning_rate": 1.7152430814285303e-06,
3696
+ "loss": 1.2352,
3697
+ "step": 520
3698
+ },
3699
+ {
3700
+ "epoch": 0.06623495292201342,
3701
+ "grad_norm": 0.20660698413848877,
3702
+ "learning_rate": 1.6099995427888315e-06,
3703
+ "loss": 1.2127,
3704
+ "step": 521
3705
+ },
3706
+ {
3707
+ "epoch": 0.06636208334988677,
3708
+ "grad_norm": 0.19696985185146332,
3709
+ "learning_rate": 1.5080616088616884e-06,
3710
+ "loss": 1.2197,
3711
+ "step": 522
3712
+ },
3713
+ {
3714
+ "epoch": 0.06648921377776013,
3715
+ "grad_norm": 0.20150014758110046,
3716
+ "learning_rate": 1.4094327044515853e-06,
3717
+ "loss": 1.2534,
3718
+ "step": 523
3719
+ },
3720
+ {
3721
+ "epoch": 0.06661634420563346,
3722
+ "grad_norm": 0.1950562745332718,
3723
+ "learning_rate": 1.3141161431896808e-06,
3724
+ "loss": 1.165,
3725
+ "step": 524
3726
+ },
3727
+ {
3728
+ "epoch": 0.06674347463350681,
3729
+ "grad_norm": 0.20011726021766663,
3730
+ "learning_rate": 1.222115127422485e-06,
3731
+ "loss": 1.2179,
3732
+ "step": 525
3733
+ },
3734
+ {
3735
+ "epoch": 0.06687060506138016,
3736
+ "grad_norm": 0.20083405077457428,
3737
+ "learning_rate": 1.1334327481042573e-06,
3738
+ "loss": 1.305,
3739
+ "step": 526
3740
+ },
3741
+ {
3742
+ "epoch": 0.0669977354892535,
3743
+ "grad_norm": 0.20200292766094208,
3744
+ "learning_rate": 1.0480719846931774e-06,
3745
+ "loss": 1.2263,
3746
+ "step": 527
3747
+ },
3748
+ {
3749
+ "epoch": 0.06712486591712685,
3750
+ "grad_norm": 0.20572660863399506,
3751
+ "learning_rate": 9.660357050512158e-07,
3752
+ "loss": 1.2029,
3753
+ "step": 528
3754
+ },
3755
+ {
3756
+ "epoch": 0.0672519963450002,
3757
+ "grad_norm": 0.20432178676128387,
3758
+ "learning_rate": 8.873266653478208e-07,
3759
+ "loss": 1.2703,
3760
+ "step": 529
3761
+ },
3762
+ {
3763
+ "epoch": 0.06737912677287354,
3764
+ "grad_norm": 0.1951807290315628,
3765
+ "learning_rate": 8.119475099673036e-07,
3766
+ "loss": 1.2298,
3767
+ "step": 530
3768
+ },
3769
+ {
3770
+ "epoch": 0.06750625720074689,
3771
+ "grad_norm": 0.2002389281988144,
3772
+ "learning_rate": 7.399007714199658e-07,
3773
+ "loss": 1.2342,
3774
+ "step": 531
3775
+ },
3776
+ {
3777
+ "epoch": 0.06763338762862024,
3778
+ "grad_norm": 0.19941021502017975,
3779
+ "learning_rate": 6.711888702570556e-07,
3780
+ "loss": 1.146,
3781
+ "step": 532
3782
+ },
3783
+ {
3784
+ "epoch": 0.06776051805649358,
3785
+ "grad_norm": 0.19345982372760773,
3786
+ "learning_rate": 6.058141149894336e-07,
3787
+ "loss": 1.1954,
3788
+ "step": 533
3789
+ },
3790
+ {
3791
+ "epoch": 0.06788764848436693,
3792
+ "grad_norm": 0.1961802840232849,
3793
+ "learning_rate": 5.437787020100115e-07,
3794
+ "loss": 1.2165,
3795
+ "step": 534
3796
+ },
3797
+ {
3798
+ "epoch": 0.06801477891224028,
3799
+ "grad_norm": 0.20169439911842346,
3800
+ "learning_rate": 4.850847155199567e-07,
3801
+ "loss": 1.2445,
3802
+ "step": 535
3803
+ },
3804
+ {
3805
+ "epoch": 0.06814190934011362,
3806
+ "grad_norm": 0.19823016226291656,
3807
+ "learning_rate": 4.297341274586475e-07,
3808
+ "loss": 1.2371,
3809
+ "step": 536
3810
+ },
3811
+ {
3812
+ "epoch": 0.06826903976798697,
3813
+ "grad_norm": 0.2043391764163971,
3814
+ "learning_rate": 3.777287974374932e-07,
3815
+ "loss": 1.2942,
3816
+ "step": 537
3817
+ },
3818
+ {
3819
+ "epoch": 0.06839617019586032,
3820
+ "grad_norm": 0.20149071514606476,
3821
+ "learning_rate": 3.290704726773619e-07,
3822
+ "loss": 1.1842,
3823
+ "step": 538
3824
+ },
3825
+ {
3826
+ "epoch": 0.06852330062373366,
3827
+ "grad_norm": 0.20085620880126953,
3828
+ "learning_rate": 2.837607879499604e-07,
3829
+ "loss": 1.1982,
3830
+ "step": 539
3831
+ },
3832
+ {
3833
+ "epoch": 0.06865043105160701,
3834
+ "grad_norm": 0.2070370465517044,
3835
+ "learning_rate": 2.418012655228452e-07,
3836
+ "loss": 1.211,
3837
+ "step": 540
3838
+ },
3839
+ {
3840
+ "epoch": 0.06877756147948036,
3841
+ "grad_norm": 0.2030269056558609,
3842
+ "learning_rate": 2.0319331510835205e-07,
3843
+ "loss": 1.2534,
3844
+ "step": 541
3845
+ },
3846
+ {
3847
+ "epoch": 0.0689046919073537,
3848
+ "grad_norm": 0.1966077983379364,
3849
+ "learning_rate": 1.6793823381614505e-07,
3850
+ "loss": 1.1683,
3851
+ "step": 542
3852
+ },
3853
+ {
3854
+ "epoch": 0.06903182233522705,
3855
+ "grad_norm": 0.205659419298172,
3856
+ "learning_rate": 1.3603720610972925e-07,
3857
+ "loss": 1.141,
3858
+ "step": 543
3859
+ },
3860
+ {
3861
+ "epoch": 0.0691589527631004,
3862
+ "grad_norm": 0.2047136127948761,
3863
+ "learning_rate": 1.0749130376659366e-07,
3864
+ "loss": 1.2415,
3865
+ "step": 544
3866
+ },
3867
+ {
3868
+ "epoch": 0.06928608319097374,
3869
+ "grad_norm": 0.2038879543542862,
3870
+ "learning_rate": 8.230148584219554e-08,
3871
+ "loss": 1.2148,
3872
+ "step": 545
3873
+ },
3874
+ {
3875
+ "epoch": 0.06941321361884709,
3876
+ "grad_norm": 0.19984766840934753,
3877
+ "learning_rate": 6.046859863781951e-08,
3878
+ "loss": 1.1954,
3879
+ "step": 546
3880
+ },
3881
+ {
3882
+ "epoch": 0.06954034404672044,
3883
+ "grad_norm": 0.2025536447763443,
3884
+ "learning_rate": 4.199337567203365e-08,
3885
+ "loss": 1.2076,
3886
+ "step": 547
3887
+ },
3888
+ {
3889
+ "epoch": 0.06966747447459377,
3890
+ "grad_norm": 0.2022514045238495,
3891
+ "learning_rate": 2.6876437656153665e-08,
3892
+ "loss": 1.2369,
3893
+ "step": 548
3894
+ },
3895
+ {
3896
+ "epoch": 0.06979460490246713,
3897
+ "grad_norm": 0.21228355169296265,
3898
+ "learning_rate": 1.5118292473292885e-08,
3899
+ "loss": 1.2304,
3900
+ "step": 549
3901
+ },
3902
+ {
3903
+ "epoch": 0.06992173533034048,
3904
+ "grad_norm": 0.20524141192436218,
3905
+ "learning_rate": 6.719335161364804e-09,
3906
+ "loss": 1.2158,
3907
+ "step": 550
3908
+ },
3909
+ {
3910
+ "epoch": 0.07004886575821381,
3911
+ "grad_norm": 0.18994790315628052,
3912
+ "learning_rate": 1.6798478997825939e-09,
3913
+ "loss": 1.1945,
3914
+ "step": 551
3915
+ },
3916
+ {
3917
+ "epoch": 0.07017599618608716,
3918
+ "grad_norm": 0.20824959874153137,
3919
+ "learning_rate": 0.0,
3920
+ "loss": 1.241,
3921
+ "step": 552
3922
  }
3923
  ],
3924
  "logging_steps": 1,
 
3942
  "should_evaluate": false,
3943
  "should_log": false,
3944
  "should_save": true,
3945
+ "should_training_stop": true
3946
  },
3947
  "attributes": {}
3948
  }
3949
  },
3950
+ "total_flos": 5.720928125466968e+18,
3951
  "train_batch_size": 4,
3952
  "trial_name": null,
3953
  "trial_params": null