Romain-XV commited on
Commit
14d28d8
·
verified ·
1 Parent(s): e7fa746

Training in progress, step 155, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0416a67038d04a975850f5728359c9aba648aa6350e20d7acbfd3fb2971d208
3
  size 50624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26657c3db366efc7fa6d92372aa3219664b8571ca133e00feda4d1f205b15d90
3
  size 50624
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c99b267690272d6e4f628fa5599135199e250d82fd58007b6ef13455963166f8
3
  size 111142
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88fd526672df865f81c4176a06dc3da1929c1ce8317f76c85d58e8c3304b2eb9
3
  size 111142
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c661340682e18e4f90539ed362e83ed15d19b9a2a7967a40c1f4912323266dc0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14a3afbab8271bee510684da18634a513067dc3f106f7cf4fa953ce19ce526db
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:598909828fb132bf741be4aea9ee4f44f8aebeb16d890d68d870b1973643d2d8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fad1344d0d68cec31f4379da8eb730afca29401bda64dafa2bff69b67bba283b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 10.353182792663574,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 0.22139200221392002,
5
  "eval_steps": 100,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -723,6 +723,391 @@
723
  "eval_samples_per_second": 328.204,
724
  "eval_steps_per_second": 82.159,
725
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
726
  }
727
  ],
728
  "logging_steps": 1,
@@ -746,12 +1131,12 @@
746
  "should_evaluate": false,
747
  "should_log": false,
748
  "should_save": true,
749
- "should_training_stop": false
750
  },
751
  "attributes": {}
752
  }
753
  },
754
- "total_flos": 41841957273600.0,
755
  "train_batch_size": 4,
756
  "trial_name": null,
757
  "trial_params": null
 
1
  {
2
  "best_metric": 10.353182792663574,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 0.34315760343157603,
5
  "eval_steps": 100,
6
+ "global_step": 155,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
723
  "eval_samples_per_second": 328.204,
724
  "eval_steps_per_second": 82.159,
725
  "step": 100
726
+ },
727
+ {
728
+ "epoch": 0.2236059222360592,
729
+ "grad_norm": 0.03327897563576698,
730
+ "learning_rate": 6.0982293673944544e-05,
731
+ "loss": 10.3556,
732
+ "step": 101
733
+ },
734
+ {
735
+ "epoch": 0.22581984225819843,
736
+ "grad_norm": 0.03691767901182175,
737
+ "learning_rate": 5.899671794785839e-05,
738
+ "loss": 10.3553,
739
+ "step": 102
740
+ },
741
+ {
742
+ "epoch": 0.22803376228033762,
743
+ "grad_norm": 0.030172044411301613,
744
+ "learning_rate": 5.703038932486484e-05,
745
+ "loss": 10.3542,
746
+ "step": 103
747
+ },
748
+ {
749
+ "epoch": 0.23024768230247683,
750
+ "grad_norm": 0.021300997585058212,
751
+ "learning_rate": 5.5084230807412126e-05,
752
+ "loss": 10.3543,
753
+ "step": 104
754
+ },
755
+ {
756
+ "epoch": 0.23246160232461602,
757
+ "grad_norm": 0.03279775753617287,
758
+ "learning_rate": 5.3159155930021e-05,
759
+ "loss": 10.3539,
760
+ "step": 105
761
+ },
762
+ {
763
+ "epoch": 0.23467552234675523,
764
+ "grad_norm": 0.02924364060163498,
765
+ "learning_rate": 5.12560683304681e-05,
766
+ "loss": 10.3543,
767
+ "step": 106
768
+ },
769
+ {
770
+ "epoch": 0.23688944236889442,
771
+ "grad_norm": 0.03262259438633919,
772
+ "learning_rate": 4.9375861325614606e-05,
773
+ "loss": 10.355,
774
+ "step": 107
775
+ },
776
+ {
777
+ "epoch": 0.23910336239103364,
778
+ "grad_norm": 0.034073278307914734,
779
+ "learning_rate": 4.751941749207995e-05,
780
+ "loss": 10.357,
781
+ "step": 108
782
+ },
783
+ {
784
+ "epoch": 0.24131728241317282,
785
+ "grad_norm": 0.03489963710308075,
786
+ "learning_rate": 4.5687608251956714e-05,
787
+ "loss": 10.355,
788
+ "step": 109
789
+ },
790
+ {
791
+ "epoch": 0.243531202435312,
792
+ "grad_norm": 0.02581014297902584,
793
+ "learning_rate": 4.388129346376178e-05,
794
+ "loss": 10.3537,
795
+ "step": 110
796
+ },
797
+ {
798
+ "epoch": 0.24574512245745123,
799
+ "grad_norm": 0.023335812613368034,
800
+ "learning_rate": 4.210132101881516e-05,
801
+ "loss": 10.3553,
802
+ "step": 111
803
+ },
804
+ {
805
+ "epoch": 0.2479590424795904,
806
+ "grad_norm": 0.03612133115530014,
807
+ "learning_rate": 4.034852644323661e-05,
808
+ "loss": 10.3534,
809
+ "step": 112
810
+ },
811
+ {
812
+ "epoch": 0.2501729625017296,
813
+ "grad_norm": 0.02821163646876812,
814
+ "learning_rate": 3.862373250574626e-05,
815
+ "loss": 10.3556,
816
+ "step": 113
817
+ },
818
+ {
819
+ "epoch": 0.2523868825238688,
820
+ "grad_norm": 0.0300295390188694,
821
+ "learning_rate": 3.6927748831453836e-05,
822
+ "loss": 10.3551,
823
+ "step": 114
824
+ },
825
+ {
826
+ "epoch": 0.25460080254600803,
827
+ "grad_norm": 0.025033898651599884,
828
+ "learning_rate": 3.5261371521817244e-05,
829
+ "loss": 10.3525,
830
+ "step": 115
831
+ },
832
+ {
833
+ "epoch": 0.25681472256814725,
834
+ "grad_norm": 0.02417595498263836,
835
+ "learning_rate": 3.3625382780949574e-05,
836
+ "loss": 10.3542,
837
+ "step": 116
838
+ },
839
+ {
840
+ "epoch": 0.2590286425902864,
841
+ "grad_norm": 0.02323267050087452,
842
+ "learning_rate": 3.202055054844921e-05,
843
+ "loss": 10.3521,
844
+ "step": 117
845
+ },
846
+ {
847
+ "epoch": 0.2612425626124256,
848
+ "grad_norm": 0.026705941185355186,
849
+ "learning_rate": 3.0447628138926156e-05,
850
+ "loss": 10.3536,
851
+ "step": 118
852
+ },
853
+ {
854
+ "epoch": 0.26345648263456484,
855
+ "grad_norm": 0.033880215138196945,
856
+ "learning_rate": 2.890735388839295e-05,
857
+ "loss": 10.3554,
858
+ "step": 119
859
+ },
860
+ {
861
+ "epoch": 0.26567040265670405,
862
+ "grad_norm": 0.029678767547011375,
863
+ "learning_rate": 2.7400450807686938e-05,
864
+ "loss": 10.353,
865
+ "step": 120
866
+ },
867
+ {
868
+ "epoch": 0.2678843226788432,
869
+ "grad_norm": 0.02781762182712555,
870
+ "learning_rate": 2.59276262430861e-05,
871
+ "loss": 10.3553,
872
+ "step": 121
873
+ },
874
+ {
875
+ "epoch": 0.2700982427009824,
876
+ "grad_norm": 0.027942579239606857,
877
+ "learning_rate": 2.4489571544277945e-05,
878
+ "loss": 10.3556,
879
+ "step": 122
880
+ },
881
+ {
882
+ "epoch": 0.27231216272312164,
883
+ "grad_norm": 0.02813226915895939,
884
+ "learning_rate": 2.308696173983711e-05,
885
+ "loss": 10.3528,
886
+ "step": 123
887
+ },
888
+ {
889
+ "epoch": 0.2745260827452608,
890
+ "grad_norm": 0.030037103220820427,
891
+ "learning_rate": 2.1720455220364444e-05,
892
+ "loss": 10.353,
893
+ "step": 124
894
+ },
895
+ {
896
+ "epoch": 0.2767400027674,
897
+ "grad_norm": 0.030153660103678703,
898
+ "learning_rate": 2.0390693429435627e-05,
899
+ "loss": 10.3552,
900
+ "step": 125
901
+ },
902
+ {
903
+ "epoch": 0.27895392278953923,
904
+ "grad_norm": 0.037454936653375626,
905
+ "learning_rate": 1.9098300562505266e-05,
906
+ "loss": 10.3531,
907
+ "step": 126
908
+ },
909
+ {
910
+ "epoch": 0.28116784281167845,
911
+ "grad_norm": 0.029572051018476486,
912
+ "learning_rate": 1.784388327390687e-05,
913
+ "loss": 10.3504,
914
+ "step": 127
915
+ },
916
+ {
917
+ "epoch": 0.2833817628338176,
918
+ "grad_norm": 0.027182403951883316,
919
+ "learning_rate": 1.6628030392087e-05,
920
+ "loss": 10.3504,
921
+ "step": 128
922
+ },
923
+ {
924
+ "epoch": 0.2855956828559568,
925
+ "grad_norm": 0.03518354520201683,
926
+ "learning_rate": 1.5451312643206827e-05,
927
+ "loss": 10.3536,
928
+ "step": 129
929
+ },
930
+ {
931
+ "epoch": 0.28780960287809604,
932
+ "grad_norm": 0.024024929851293564,
933
+ "learning_rate": 1.4314282383241096e-05,
934
+ "loss": 10.3533,
935
+ "step": 130
936
+ },
937
+ {
938
+ "epoch": 0.29002352290023525,
939
+ "grad_norm": 0.020221339538693428,
940
+ "learning_rate": 1.3217473338699859e-05,
941
+ "loss": 10.3521,
942
+ "step": 131
943
+ },
944
+ {
945
+ "epoch": 0.2922374429223744,
946
+ "grad_norm": 0.02575266920030117,
947
+ "learning_rate": 1.2161400356095375e-05,
948
+ "loss": 10.3528,
949
+ "step": 132
950
+ },
951
+ {
952
+ "epoch": 0.29445136294451363,
953
+ "grad_norm": 0.019320376217365265,
954
+ "learning_rate": 1.1146559160270875e-05,
955
+ "loss": 10.3529,
956
+ "step": 133
957
+ },
958
+ {
959
+ "epoch": 0.29666528296665284,
960
+ "grad_norm": 0.022721335291862488,
961
+ "learning_rate": 1.0173426121705576e-05,
962
+ "loss": 10.3534,
963
+ "step": 134
964
+ },
965
+ {
966
+ "epoch": 0.298879202988792,
967
+ "grad_norm": 0.030306054279208183,
968
+ "learning_rate": 9.242458032904311e-06,
969
+ "loss": 10.3529,
970
+ "step": 135
971
+ },
972
+ {
973
+ "epoch": 0.3010931230109312,
974
+ "grad_norm": 0.03088550828397274,
975
+ "learning_rate": 8.354091893977401e-06,
976
+ "loss": 10.3542,
977
+ "step": 136
978
+ },
979
+ {
980
+ "epoch": 0.30330704303307043,
981
+ "grad_norm": 0.02875097282230854,
982
+ "learning_rate": 7.508744707511117e-06,
983
+ "loss": 10.3556,
984
+ "step": 137
985
+ },
986
+ {
987
+ "epoch": 0.30552096305520965,
988
+ "grad_norm": 0.022224275395274162,
989
+ "learning_rate": 6.70681328282492e-06,
990
+ "loss": 10.3536,
991
+ "step": 138
992
+ },
993
+ {
994
+ "epoch": 0.3077348830773488,
995
+ "grad_norm": 0.028994860127568245,
996
+ "learning_rate": 5.948674049707603e-06,
997
+ "loss": 10.3531,
998
+ "step": 139
999
+ },
1000
+ {
1001
+ "epoch": 0.309948803099488,
1002
+ "grad_norm": 0.03301481530070305,
1003
+ "learning_rate": 5.2346828817197655e-06,
1004
+ "loss": 10.3539,
1005
+ "step": 140
1006
+ },
1007
+ {
1008
+ "epoch": 0.31216272312162724,
1009
+ "grad_norm": 0.031127754598855972,
1010
+ "learning_rate": 4.565174929145188e-06,
1011
+ "loss": 10.3541,
1012
+ "step": 141
1013
+ },
1014
+ {
1015
+ "epoch": 0.31437664314376645,
1016
+ "grad_norm": 0.03324393928050995,
1017
+ "learning_rate": 3.940464461670135e-06,
1018
+ "loss": 10.3558,
1019
+ "step": 142
1020
+ },
1021
+ {
1022
+ "epoch": 0.3165905631659056,
1023
+ "grad_norm": 0.025115065276622772,
1024
+ "learning_rate": 3.360844720863765e-06,
1025
+ "loss": 10.3528,
1026
+ "step": 143
1027
+ },
1028
+ {
1029
+ "epoch": 0.31880448318804483,
1030
+ "grad_norm": 0.02497878670692444,
1031
+ "learning_rate": 2.826587782529444e-06,
1032
+ "loss": 10.353,
1033
+ "step": 144
1034
+ },
1035
+ {
1036
+ "epoch": 0.32101840321018404,
1037
+ "grad_norm": 0.026006096974015236,
1038
+ "learning_rate": 2.3379444289913342e-06,
1039
+ "loss": 10.3548,
1040
+ "step": 145
1041
+ },
1042
+ {
1043
+ "epoch": 0.32323232323232326,
1044
+ "grad_norm": 0.028647800907492638,
1045
+ "learning_rate": 1.8951440313760837e-06,
1046
+ "loss": 10.355,
1047
+ "step": 146
1048
+ },
1049
+ {
1050
+ "epoch": 0.3254462432544624,
1051
+ "grad_norm": 0.03259943798184395,
1052
+ "learning_rate": 1.4983944419451613e-06,
1053
+ "loss": 10.3541,
1054
+ "step": 147
1055
+ },
1056
+ {
1057
+ "epoch": 0.32766016327660163,
1058
+ "grad_norm": 0.033153582364320755,
1059
+ "learning_rate": 1.1478818965281911e-06,
1060
+ "loss": 10.3529,
1061
+ "step": 148
1062
+ },
1063
+ {
1064
+ "epoch": 0.32987408329874085,
1065
+ "grad_norm": 0.026517199352383614,
1066
+ "learning_rate": 8.437709271030603e-07,
1067
+ "loss": 10.3537,
1068
+ "step": 149
1069
+ },
1070
+ {
1071
+ "epoch": 0.33208800332088,
1072
+ "grad_norm": 0.02658323012292385,
1073
+ "learning_rate": 5.862042845640403e-07,
1074
+ "loss": 10.3527,
1075
+ "step": 150
1076
+ },
1077
+ {
1078
+ "epoch": 0.3343019233430192,
1079
+ "grad_norm": 0.024668825790286064,
1080
+ "learning_rate": 3.7530287171387843e-07,
1081
+ "loss": 10.3539,
1082
+ "step": 151
1083
+ },
1084
+ {
1085
+ "epoch": 0.33651584336515844,
1086
+ "grad_norm": 0.030534988269209862,
1087
+ "learning_rate": 2.1116568651156076e-07,
1088
+ "loss": 10.3545,
1089
+ "step": 152
1090
+ },
1091
+ {
1092
+ "epoch": 0.33872976338729766,
1093
+ "grad_norm": 0.024195190519094467,
1094
+ "learning_rate": 9.386977560232879e-08,
1095
+ "loss": 10.3539,
1096
+ "step": 153
1097
+ },
1098
+ {
1099
+ "epoch": 0.3409436834094368,
1100
+ "grad_norm": 0.027664266526699066,
1101
+ "learning_rate": 2.347019815158724e-08,
1102
+ "loss": 10.3528,
1103
+ "step": 154
1104
+ },
1105
+ {
1106
+ "epoch": 0.34315760343157603,
1107
+ "grad_norm": 0.03868336230516434,
1108
+ "learning_rate": 0.0,
1109
+ "loss": 10.3539,
1110
+ "step": 155
1111
  }
1112
  ],
1113
  "logging_steps": 1,
 
1131
  "should_evaluate": false,
1132
  "should_log": false,
1133
  "should_save": true,
1134
+ "should_training_stop": true
1135
  },
1136
  "attributes": {}
1137
  }
1138
  },
1139
+ "total_flos": 64855033774080.0,
1140
  "train_batch_size": 4,
1141
  "trial_name": null,
1142
  "trial_params": null