lesso16 commited on
Commit
4be388d
·
verified ·
1 Parent(s): 5fb5afc

Training in progress, step 137, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:154c87b8473e80aeb5b89159f45f19c8b794138abb255f2e28e1e2c70fea9381
3
  size 138995824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5721b50238bda1610129cd85c1b675957641dc7971a4f050b7ea679189538399
3
  size 138995824
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e30d1c6d72de1919df731779b5d6811439aa040ac1b8c11a79086875410b345
3
  size 71077780
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5d43ba32f23601abad793ddf7b3462d760eb8fb53b286ffd4d02efbd6f748c3
3
  size 71077780
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e82113e33e7a1e4ce347c0eada1da615b742f2bfabd373277829b5cea5f1fa7
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c8a5b70642a2b6a2f5da463c24cc5576b689519c228d81584cc51dbe0c5f176
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2508c772ba86371f3a590745841cba23b527d151f21432017e9ef067a9957090
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2a46a95433c990f0fc7ea2279a4d7ef872632e22a0cddf9f06d248d79811bce
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d42c6bf63e218f5406c44d31b8aa11834fed1b2398dfeca30eca5e3e990274ab
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5966402bb91072dfb39aeac267afdd314764f0f8a39f61ebacab0abaf520852a
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12eaa1223d2b1b1f63e28e29e3ced91d41c8c0605be080f162f1f7aad46797da
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:340e23f29e49ae0de9d24c424d0e75711375af6c07eb1018270eace124c394c4
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6ef2c9584769d9fdc685b2e0efcec32c1cfc2561e3629afdf144643e3e40022
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae58c0015ae3245509678fc95cc964de9b9689008a1e332cb36ae0d8cfc8af92
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:feeb2d69f79376ea1fa617a71804f5c0f5284f43cebf97ac527777d4e340e7ad
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e2e4c6723359328055b006b34af6fa7832379c5eec7cb9ff2047e4013cee364
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb35cae220a392aa22473b8899a64994de7fb2ace81da19fa3fa5a3ad9b7a6f7
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4a3a7fb8d5bb28d511fd5dc9c053d921143d5f629d55be1dceeac7789f655dc
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e431bf77724e0e87f69f8f82501b4bab4c9b45e607b3048cdcd6ff4a009e6483
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c8c608b87806f49f94a44169cdd9e975774b16f6a9921419257cfb2be1c83c2
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9039f0f0a84afc54b351471b92047e36f111a9a61347c05d7d907f45fef718b2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39f31cdc7b9d265f3b70743c4752aa49e538ef342a12e3962eb9a559085eeb7c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.041056513786316,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 2.2076502732240435,
5
  "eval_steps": 50,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -731,6 +731,265 @@
731
  "eval_samples_per_second": 308.415,
732
  "eval_steps_per_second": 10.03,
733
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
734
  }
735
  ],
736
  "logging_steps": 1,
@@ -754,12 +1013,12 @@
754
  "should_evaluate": false,
755
  "should_log": false,
756
  "should_save": true,
757
- "should_training_stop": false
758
  },
759
  "attributes": {}
760
  }
761
  },
762
- "total_flos": 5.647936171225907e+16,
763
  "train_batch_size": 8,
764
  "trial_name": null,
765
  "trial_params": null
 
1
  {
2
  "best_metric": 1.041056513786316,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 3.0273224043715845,
5
  "eval_steps": 50,
6
+ "global_step": 137,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
731
  "eval_samples_per_second": 308.415,
732
  "eval_steps_per_second": 10.03,
733
  "step": 100
734
+ },
735
+ {
736
+ "epoch": 2.2295081967213113,
737
+ "grad_norm": 0.04856366664171219,
738
+ "learning_rate": 2.1518003834162954e-05,
739
+ "loss": 1.0995,
740
+ "step": 101
741
+ },
742
+ {
743
+ "epoch": 2.251366120218579,
744
+ "grad_norm": 0.0619901567697525,
745
+ "learning_rate": 2.0413902018106895e-05,
746
+ "loss": 1.0795,
747
+ "step": 102
748
+ },
749
+ {
750
+ "epoch": 2.273224043715847,
751
+ "grad_norm": 0.053943440318107605,
752
+ "learning_rate": 1.9332798590175797e-05,
753
+ "loss": 0.5584,
754
+ "step": 103
755
+ },
756
+ {
757
+ "epoch": 2.2950819672131146,
758
+ "grad_norm": 0.06046655401587486,
759
+ "learning_rate": 1.8275355061874515e-05,
760
+ "loss": 1.3933,
761
+ "step": 104
762
+ },
763
+ {
764
+ "epoch": 2.3169398907103824,
765
+ "grad_norm": 0.04138614237308502,
766
+ "learning_rate": 1.724221846755858e-05,
767
+ "loss": 0.9474,
768
+ "step": 105
769
+ },
770
+ {
771
+ "epoch": 2.33879781420765,
772
+ "grad_norm": 0.0419883206486702,
773
+ "learning_rate": 1.623402096852318e-05,
774
+ "loss": 1.0178,
775
+ "step": 106
776
+ },
777
+ {
778
+ "epoch": 2.360655737704918,
779
+ "grad_norm": 0.04966486990451813,
780
+ "learning_rate": 1.5251379466192902e-05,
781
+ "loss": 1.1369,
782
+ "step": 107
783
+ },
784
+ {
785
+ "epoch": 2.3825136612021858,
786
+ "grad_norm": 0.05595370754599571,
787
+ "learning_rate": 1.4294895224648664e-05,
788
+ "loss": 1.1341,
789
+ "step": 108
790
+ },
791
+ {
792
+ "epoch": 2.4043715846994536,
793
+ "grad_norm": 0.05111997202038765,
794
+ "learning_rate": 1.3365153502722967e-05,
795
+ "loss": 0.6285,
796
+ "step": 109
797
+ },
798
+ {
799
+ "epoch": 2.4262295081967213,
800
+ "grad_norm": 0.052137341350317,
801
+ "learning_rate": 1.2462723195888415e-05,
802
+ "loss": 1.2566,
803
+ "step": 110
804
+ },
805
+ {
806
+ "epoch": 2.448087431693989,
807
+ "grad_norm": 0.03865412250161171,
808
+ "learning_rate": 1.1588156488159008e-05,
809
+ "loss": 0.9759,
810
+ "step": 111
811
+ },
812
+ {
813
+ "epoch": 2.469945355191257,
814
+ "grad_norm": 0.03843948617577553,
815
+ "learning_rate": 1.074198851421659e-05,
816
+ "loss": 1.0035,
817
+ "step": 112
818
+ },
819
+ {
820
+ "epoch": 2.4918032786885247,
821
+ "grad_norm": 0.04497023671865463,
822
+ "learning_rate": 9.924737031969744e-06,
823
+ "loss": 1.0914,
824
+ "step": 113
825
+ },
826
+ {
827
+ "epoch": 2.5136612021857925,
828
+ "grad_norm": 0.05429847911000252,
829
+ "learning_rate": 9.136902105745273e-06,
830
+ "loss": 1.1209,
831
+ "step": 114
832
+ },
833
+ {
834
+ "epoch": 2.5355191256830603,
835
+ "grad_norm": 0.08493578433990479,
836
+ "learning_rate": 8.378965800306078e-06,
837
+ "loss": 1.1134,
838
+ "step": 115
839
+ },
840
+ {
841
+ "epoch": 2.557377049180328,
842
+ "grad_norm": 0.06231605261564255,
843
+ "learning_rate": 7.651391885882701e-06,
844
+ "loss": 0.965,
845
+ "step": 116
846
+ },
847
+ {
848
+ "epoch": 2.579234972677596,
849
+ "grad_norm": 0.03692341595888138,
850
+ "learning_rate": 6.954625554399086e-06,
851
+ "loss": 0.8894,
852
+ "step": 117
853
+ },
854
+ {
855
+ "epoch": 2.6010928961748636,
856
+ "grad_norm": 0.04275006055831909,
857
+ "learning_rate": 6.289093147066023e-06,
858
+ "loss": 1.0013,
859
+ "step": 118
860
+ },
861
+ {
862
+ "epoch": 2.6229508196721314,
863
+ "grad_norm": 0.04334869980812073,
864
+ "learning_rate": 5.655201893509272e-06,
865
+ "loss": 1.0516,
866
+ "step": 119
867
+ },
868
+ {
869
+ "epoch": 2.644808743169399,
870
+ "grad_norm": 0.052981842309236526,
871
+ "learning_rate": 5.053339662591549e-06,
872
+ "loss": 1.0457,
873
+ "step": 120
874
+ },
875
+ {
876
+ "epoch": 2.6666666666666665,
877
+ "grad_norm": 0.0736251100897789,
878
+ "learning_rate": 4.483874725081219e-06,
879
+ "loss": 1.1736,
880
+ "step": 121
881
+ },
882
+ {
883
+ "epoch": 2.6885245901639343,
884
+ "grad_norm": 0.057101909071207047,
885
+ "learning_rate": 3.9471555283128005e-06,
886
+ "loss": 0.8181,
887
+ "step": 122
888
+ },
889
+ {
890
+ "epoch": 2.710382513661202,
891
+ "grad_norm": 0.04115651920437813,
892
+ "learning_rate": 3.4435104829770587e-06,
893
+ "loss": 1.0691,
894
+ "step": 123
895
+ },
896
+ {
897
+ "epoch": 2.73224043715847,
898
+ "grad_norm": 0.038360998034477234,
899
+ "learning_rate": 2.9732477621712853e-06,
900
+ "loss": 0.986,
901
+ "step": 124
902
+ },
903
+ {
904
+ "epoch": 2.7540983606557377,
905
+ "grad_norm": 0.0409964919090271,
906
+ "learning_rate": 2.53665511283261e-06,
907
+ "loss": 1.0381,
908
+ "step": 125
909
+ },
910
+ {
911
+ "epoch": 2.7759562841530054,
912
+ "grad_norm": 0.04703905060887337,
913
+ "learning_rate": 2.1339996796698887e-06,
914
+ "loss": 1.0692,
915
+ "step": 126
916
+ },
917
+ {
918
+ "epoch": 2.797814207650273,
919
+ "grad_norm": 0.06132422015070915,
920
+ "learning_rate": 1.7655278417016956e-06,
921
+ "loss": 1.117,
922
+ "step": 127
923
+ },
924
+ {
925
+ "epoch": 2.819672131147541,
926
+ "grad_norm": 0.05300451070070267,
927
+ "learning_rate": 1.4314650615005687e-06,
928
+ "loss": 0.543,
929
+ "step": 128
930
+ },
931
+ {
932
+ "epoch": 2.841530054644809,
933
+ "grad_norm": 0.055577926337718964,
934
+ "learning_rate": 1.1320157472357307e-06,
935
+ "loss": 1.3244,
936
+ "step": 129
937
+ },
938
+ {
939
+ "epoch": 2.8633879781420766,
940
+ "grad_norm": 0.04094787687063217,
941
+ "learning_rate": 8.673631275987297e-07,
942
+ "loss": 0.9801,
943
+ "step": 130
944
+ },
945
+ {
946
+ "epoch": 2.8852459016393444,
947
+ "grad_norm": 0.0408557653427124,
948
+ "learning_rate": 6.376691396884168e-07,
949
+ "loss": 1.0152,
950
+ "step": 131
951
+ },
952
+ {
953
+ "epoch": 2.907103825136612,
954
+ "grad_norm": 0.047284748405218124,
955
+ "learning_rate": 4.430743299240307e-07,
956
+ "loss": 1.0816,
957
+ "step": 132
958
+ },
959
+ {
960
+ "epoch": 2.92896174863388,
961
+ "grad_norm": 0.055055923759937286,
962
+ "learning_rate": 2.836977680468222e-07,
963
+ "loss": 1.0597,
964
+ "step": 133
965
+ },
966
+ {
967
+ "epoch": 2.9508196721311473,
968
+ "grad_norm": 0.05081977695226669,
969
+ "learning_rate": 1.5963697426306723e-07,
970
+ "loss": 0.6892,
971
+ "step": 134
972
+ },
973
+ {
974
+ "epoch": 2.972677595628415,
975
+ "grad_norm": 0.055216483771800995,
976
+ "learning_rate": 7.096785957284602e-08,
977
+ "loss": 1.341,
978
+ "step": 135
979
+ },
980
+ {
981
+ "epoch": 3.0054644808743167,
982
+ "grad_norm": 0.10709080845117569,
983
+ "learning_rate": 1.774467932117818e-08,
984
+ "loss": 1.761,
985
+ "step": 136
986
+ },
987
+ {
988
+ "epoch": 3.0273224043715845,
989
+ "grad_norm": 0.03700735419988632,
990
+ "learning_rate": 0.0,
991
+ "loss": 0.8896,
992
+ "step": 137
993
  }
994
  ],
995
  "logging_steps": 1,
 
1013
  "should_evaluate": false,
1014
  "should_log": false,
1015
  "should_save": true,
1016
+ "should_training_stop": true
1017
  },
1018
  "attributes": {}
1019
  }
1020
  },
1021
+ "total_flos": 7.74466163399721e+16,
1022
  "train_batch_size": 8,
1023
  "trial_name": null,
1024
  "trial_params": null