romainnn commited on
Commit
51a28d5
·
verified ·
1 Parent(s): 8c1f051

Training in progress, step 840, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19f74b98bd7db98d10b0fb68f70fcdf71de22ad1e538962ff35f13c8025e719f
3
  size 289512208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27db5c66916d7daad771886e225cd7152669ee12e611a380fbf0009c9af37adc
3
  size 289512208
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50a8ad4105eb139aadf14070d8274eae21445d239100a2810eeb1b389f2eafd5
3
  size 147781972
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3577d633018a2488773a311af50b3f09dfc5134434176462a126cedd7dcc57c2
3
  size 147781972
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7aff307a49579ac3e61540d33701ae1ea5a0e55c95055f52be6be0c9f63b5e12
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30a9264446d9bfcf977beea433026295798ed92bc03fae79d89f70494644af49
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5dde15a2105449c70bf902869dd9486a1bea61f66479c56f9a16426a3f85e33c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:699b3777e1ea7a60123ef22ecc366f524146f7231f57273c73780dc41dc98d5c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.203278660774231,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-800",
4
- "epoch": 0.05085217114933852,
5
  "eval_steps": 100,
6
- "global_step": 800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5679,6 +5679,286 @@
5679
  "eval_samples_per_second": 4.035,
5680
  "eval_steps_per_second": 1.009,
5681
  "step": 800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5682
  }
5683
  ],
5684
  "logging_steps": 1,
@@ -5702,12 +5982,12 @@
5702
  "should_evaluate": false,
5703
  "should_log": false,
5704
  "should_save": true,
5705
- "should_training_stop": false
5706
  },
5707
  "attributes": {}
5708
  }
5709
  },
5710
- "total_flos": 4.156980197326848e+18,
5711
  "train_batch_size": 4,
5712
  "trial_name": null,
5713
  "trial_params": null
 
1
  {
2
  "best_metric": 1.203278660774231,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-800",
4
+ "epoch": 0.05339477970680545,
5
  "eval_steps": 100,
6
+ "global_step": 840,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5679
  "eval_samples_per_second": 4.035,
5680
  "eval_steps_per_second": 1.009,
5681
  "step": 800
5682
+ },
5683
+ {
5684
+ "epoch": 0.0509157363632752,
5685
+ "grad_norm": 0.2525332570075989,
5686
+ "learning_rate": 1.0875619410158466e-06,
5687
+ "loss": 1.1738,
5688
+ "step": 801
5689
+ },
5690
+ {
5691
+ "epoch": 0.05097930157721187,
5692
+ "grad_norm": 0.24210986495018005,
5693
+ "learning_rate": 1.0325995198509409e-06,
5694
+ "loss": 1.1435,
5695
+ "step": 802
5696
+ },
5697
+ {
5698
+ "epoch": 0.05104286679114854,
5699
+ "grad_norm": 0.25146523118019104,
5700
+ "learning_rate": 9.79054964740911e-07,
5701
+ "loss": 1.2707,
5702
+ "step": 803
5703
+ },
5704
+ {
5705
+ "epoch": 0.05110643200508522,
5706
+ "grad_norm": 0.2529788315296173,
5707
+ "learning_rate": 9.269290427969868e-07,
5708
+ "loss": 1.1679,
5709
+ "step": 804
5710
+ },
5711
+ {
5712
+ "epoch": 0.05116999721902189,
5713
+ "grad_norm": 0.25673815608024597,
5714
+ "learning_rate": 8.762225008062674e-07,
5715
+ "loss": 1.2141,
5716
+ "step": 805
5717
+ },
5718
+ {
5719
+ "epoch": 0.05123356243295856,
5720
+ "grad_norm": 0.25803902745246887,
5721
+ "learning_rate": 8.26936065220929e-07,
5722
+ "loss": 1.2018,
5723
+ "step": 806
5724
+ },
5725
+ {
5726
+ "epoch": 0.05129712764689524,
5727
+ "grad_norm": 0.2565945088863373,
5728
+ "learning_rate": 7.790704421478557e-07,
5729
+ "loss": 1.2261,
5730
+ "step": 807
5731
+ },
5732
+ {
5733
+ "epoch": 0.05136069286083191,
5734
+ "grad_norm": 0.2625206410884857,
5735
+ "learning_rate": 7.326263173385584e-07,
5736
+ "loss": 1.1934,
5737
+ "step": 808
5738
+ },
5739
+ {
5740
+ "epoch": 0.05142425807476858,
5741
+ "grad_norm": 0.25721174478530884,
5742
+ "learning_rate": 6.876043561792833e-07,
5743
+ "loss": 1.2349,
5744
+ "step": 809
5745
+ },
5746
+ {
5747
+ "epoch": 0.051487823288705256,
5748
+ "grad_norm": 0.25995710492134094,
5749
+ "learning_rate": 6.440052036815081e-07,
5750
+ "loss": 1.3027,
5751
+ "step": 810
5752
+ },
5753
+ {
5754
+ "epoch": 0.05155138850264193,
5755
+ "grad_norm": 0.25100308656692505,
5756
+ "learning_rate": 6.018294844727379e-07,
5757
+ "loss": 1.1802,
5758
+ "step": 811
5759
+ },
5760
+ {
5761
+ "epoch": 0.0516149537165786,
5762
+ "grad_norm": 0.2459433674812317,
5763
+ "learning_rate": 5.610778027874908e-07,
5764
+ "loss": 1.1474,
5765
+ "step": 812
5766
+ },
5767
+ {
5768
+ "epoch": 0.051678518930515276,
5769
+ "grad_norm": 0.2367779165506363,
5770
+ "learning_rate": 5.217507424586821e-07,
5771
+ "loss": 1.168,
5772
+ "step": 813
5773
+ },
5774
+ {
5775
+ "epoch": 0.05174208414445195,
5776
+ "grad_norm": 0.2512117922306061,
5777
+ "learning_rate": 4.838488669092534e-07,
5778
+ "loss": 1.091,
5779
+ "step": 814
5780
+ },
5781
+ {
5782
+ "epoch": 0.05180564935838862,
5783
+ "grad_norm": 0.2595987319946289,
5784
+ "learning_rate": 4.4737271914411236e-07,
5785
+ "loss": 1.1756,
5786
+ "step": 815
5787
+ },
5788
+ {
5789
+ "epoch": 0.051869214572325295,
5790
+ "grad_norm": 0.26023730635643005,
5791
+ "learning_rate": 4.123228217422948e-07,
5792
+ "loss": 1.068,
5793
+ "step": 816
5794
+ },
5795
+ {
5796
+ "epoch": 0.05193277978626197,
5797
+ "grad_norm": 0.26552048325538635,
5798
+ "learning_rate": 3.7869967684958094e-07,
5799
+ "loss": 1.1605,
5800
+ "step": 817
5801
+ },
5802
+ {
5803
+ "epoch": 0.05199634500019864,
5804
+ "grad_norm": 0.24736690521240234,
5805
+ "learning_rate": 3.465037661712134e-07,
5806
+ "loss": 1.2006,
5807
+ "step": 818
5808
+ },
5809
+ {
5810
+ "epoch": 0.052059910214135315,
5811
+ "grad_norm": 0.26172155141830444,
5812
+ "learning_rate": 3.1573555096501283e-07,
5813
+ "loss": 1.2359,
5814
+ "step": 819
5815
+ },
5816
+ {
5817
+ "epoch": 0.05212347542807199,
5818
+ "grad_norm": 0.25399184226989746,
5819
+ "learning_rate": 2.86395472034795e-07,
5820
+ "loss": 1.2153,
5821
+ "step": 820
5822
+ },
5823
+ {
5824
+ "epoch": 0.05218704064200866,
5825
+ "grad_norm": 0.25162798166275024,
5826
+ "learning_rate": 2.584839497240643e-07,
5827
+ "loss": 1.2581,
5828
+ "step": 821
5829
+ },
5830
+ {
5831
+ "epoch": 0.052250605855945334,
5832
+ "grad_norm": 0.2551822066307068,
5833
+ "learning_rate": 2.3200138390993e-07,
5834
+ "loss": 1.1388,
5835
+ "step": 822
5836
+ },
5837
+ {
5838
+ "epoch": 0.05231417106988201,
5839
+ "grad_norm": 0.24114681780338287,
5840
+ "learning_rate": 2.0694815399744382e-07,
5841
+ "loss": 1.2377,
5842
+ "step": 823
5843
+ },
5844
+ {
5845
+ "epoch": 0.05237773628381868,
5846
+ "grad_norm": 0.26416000723838806,
5847
+ "learning_rate": 1.83324618914138e-07,
5848
+ "loss": 1.2193,
5849
+ "step": 824
5850
+ },
5851
+ {
5852
+ "epoch": 0.052441301497755353,
5853
+ "grad_norm": 0.25959083437919617,
5854
+ "learning_rate": 1.611311171048735e-07,
5855
+ "loss": 1.1987,
5856
+ "step": 825
5857
+ },
5858
+ {
5859
+ "epoch": 0.05250486671169203,
5860
+ "grad_norm": 0.24999088048934937,
5861
+ "learning_rate": 1.4036796652701078e-07,
5862
+ "loss": 1.1644,
5863
+ "step": 826
5864
+ },
5865
+ {
5866
+ "epoch": 0.0525684319256287,
5867
+ "grad_norm": 0.25357383489608765,
5868
+ "learning_rate": 1.210354646458245e-07,
5869
+ "loss": 1.2345,
5870
+ "step": 827
5871
+ },
5872
+ {
5873
+ "epoch": 0.05263199713956537,
5874
+ "grad_norm": 0.25583428144454956,
5875
+ "learning_rate": 1.031338884302846e-07,
5876
+ "loss": 1.2685,
5877
+ "step": 828
5878
+ },
5879
+ {
5880
+ "epoch": 0.05269556235350205,
5881
+ "grad_norm": 0.25566795468330383,
5882
+ "learning_rate": 8.666349434907073e-08,
5883
+ "loss": 1.2141,
5884
+ "step": 829
5885
+ },
5886
+ {
5887
+ "epoch": 0.05275912756743872,
5888
+ "grad_norm": 0.2603313624858856,
5889
+ "learning_rate": 7.162451836685291e-08,
5890
+ "loss": 1.2535,
5891
+ "step": 830
5892
+ },
5893
+ {
5894
+ "epoch": 0.05282269278137539,
5895
+ "grad_norm": 0.24881498515605927,
5896
+ "learning_rate": 5.8017175941005306e-08,
5897
+ "loss": 1.1596,
5898
+ "step": 831
5899
+ },
5900
+ {
5901
+ "epoch": 0.05288625799531207,
5902
+ "grad_norm": 0.2581416070461273,
5903
+ "learning_rate": 4.584166201841988e-08,
5904
+ "loss": 1.2291,
5905
+ "step": 832
5906
+ },
5907
+ {
5908
+ "epoch": 0.052949823209248736,
5909
+ "grad_norm": 0.2521674335002899,
5910
+ "learning_rate": 3.5098151032786355e-08,
5911
+ "loss": 1.2752,
5912
+ "step": 833
5913
+ },
5914
+ {
5915
+ "epoch": 0.05301338842318541,
5916
+ "grad_norm": 0.2460847645998001,
5917
+ "learning_rate": 2.578679690204977e-08,
5918
+ "loss": 1.1633,
5919
+ "step": 834
5920
+ },
5921
+ {
5922
+ "epoch": 0.05307695363712209,
5923
+ "grad_norm": 0.2515714764595032,
5924
+ "learning_rate": 1.7907733026223394e-08,
5925
+ "loss": 1.1517,
5926
+ "step": 835
5927
+ },
5928
+ {
5929
+ "epoch": 0.053140518851058756,
5930
+ "grad_norm": 0.2554892301559448,
5931
+ "learning_rate": 1.1461072285490204e-08,
5932
+ "loss": 1.1205,
5933
+ "step": 836
5934
+ },
5935
+ {
5936
+ "epoch": 0.05320408406499543,
5937
+ "grad_norm": 0.2557508945465088,
5938
+ "learning_rate": 6.446907038559769e-09,
5939
+ "loss": 1.1845,
5940
+ "step": 837
5941
+ },
5942
+ {
5943
+ "epoch": 0.05326764927893211,
5944
+ "grad_norm": 0.25483280420303345,
5945
+ "learning_rate": 2.865309121358184e-09,
5946
+ "loss": 1.1348,
5947
+ "step": 838
5948
+ },
5949
+ {
5950
+ "epoch": 0.053331214492868775,
5951
+ "grad_norm": 0.2680445909500122,
5952
+ "learning_rate": 7.163298459844647e-10,
5953
+ "loss": 1.1985,
5954
+ "step": 839
5955
+ },
5956
+ {
5957
+ "epoch": 0.05339477970680545,
5958
+ "grad_norm": 0.26120489835739136,
5959
+ "learning_rate": 0.0,
5960
+ "loss": 1.264,
5961
+ "step": 840
5962
  }
5963
  ],
5964
  "logging_steps": 1,
 
5982
  "should_evaluate": false,
5983
  "should_log": false,
5984
  "should_save": true,
5985
+ "should_training_stop": true
5986
  },
5987
  "attributes": {}
5988
  }
5989
  },
5990
+ "total_flos": 4.3648292071931904e+18,
5991
  "train_batch_size": 4,
5992
  "trial_name": null,
5993
  "trial_params": null