Training in progress, step 911, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +1075 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:510a52ac805c222aef3cf677e392c568b0e01bfafa8b84d5fe860020a4affe93
 size 60010048

 version https://git-lfs.github.com/spec/v1
+oid sha256:be633fd5b9a184214787cf8de804cd47d2d08122c6886a59a13dd6075ceb7cb7
 size 60010048

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cd03054be855d638dc8730b0926abbe4afba52392c7c1ae7bffc7808f73ee7c2
 size 30428180

 version https://git-lfs.github.com/spec/v1
+oid sha256:9edb22bd00df21c10ec53c4bbfa2093db527d86ac59bfb6f02154aa19d5da6c1
 size 30428180

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2eaf8013c84ba7b2a92613bbe740d03f2043c8adf3497981ed60c3e17ec19ae9
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:04e2e9566872b589840ca5036c48f962e578ce55c36abdd70f84617113393bbb
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8318244170096022,
   "eval_steps": 500,
-  "global_step": 758,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -5313,6 +5313,1077 @@
       "learning_rate": 3.773119605425401e-06,
       "loss": 1.2571,
       "step": 758
     }
   ],
   "logging_steps": 1,
@@ -5327,12 +6398,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4.6050522467731046e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9997256515775035,
   "eval_steps": 500,
+  "global_step": 911,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.773119605425401e-06,
       "loss": 1.2571,
       "step": 758
+    },
+    {
+      "epoch": 0.8329218106995885,
+      "grad_norm": 0.2670471668243408,
+      "learning_rate": 3.7484586929716402e-06,
+      "loss": 1.1585,
+      "step": 759
+    },
+    {
+      "epoch": 0.8340192043895748,
+      "grad_norm": 0.2522693872451782,
+      "learning_rate": 3.723797780517879e-06,
+      "loss": 1.2441,
+      "step": 760
+    },
+    {
+      "epoch": 0.8351165980795611,
+      "grad_norm": 0.26325473189353943,
+      "learning_rate": 3.699136868064119e-06,
+      "loss": 1.231,
+      "step": 761
+    },
+    {
+      "epoch": 0.8362139917695474,
+      "grad_norm": 0.2654995024204254,
+      "learning_rate": 3.674475955610358e-06,
+      "loss": 1.2638,
+      "step": 762
+    },
+    {
+      "epoch": 0.8373113854595337,
+      "grad_norm": 0.24272461235523224,
+      "learning_rate": 3.649815043156597e-06,
+      "loss": 1.2356,
+      "step": 763
+    },
+    {
+      "epoch": 0.83840877914952,
+      "grad_norm": 0.3264164924621582,
+      "learning_rate": 3.6251541307028365e-06,
+      "loss": 1.2741,
+      "step": 764
+    },
+    {
+      "epoch": 0.8395061728395061,
+      "grad_norm": 0.2254675030708313,
+      "learning_rate": 3.6004932182490754e-06,
+      "loss": 1.1681,
+      "step": 765
+    },
+    {
+      "epoch": 0.8406035665294924,
+      "grad_norm": 0.25938257575035095,
+      "learning_rate": 3.5758323057953147e-06,
+      "loss": 1.1809,
+      "step": 766
+    },
+    {
+      "epoch": 0.8417009602194787,
+      "grad_norm": 0.27756911516189575,
+      "learning_rate": 3.5511713933415536e-06,
+      "loss": 1.2611,
+      "step": 767
+    },
+    {
+      "epoch": 0.842798353909465,
+      "grad_norm": 0.2840558886528015,
+      "learning_rate": 3.526510480887793e-06,
+      "loss": 1.2804,
+      "step": 768
+    },
+    {
+      "epoch": 0.8438957475994513,
+      "grad_norm": 0.28387823700904846,
+      "learning_rate": 3.5018495684340327e-06,
+      "loss": 1.2598,
+      "step": 769
+    },
+    {
+      "epoch": 0.8449931412894376,
+      "grad_norm": 0.27318140864372253,
+      "learning_rate": 3.4771886559802716e-06,
+      "loss": 1.2617,
+      "step": 770
+    },
+    {
+      "epoch": 0.8460905349794239,
+      "grad_norm": 0.24867716431617737,
+      "learning_rate": 3.452527743526511e-06,
+      "loss": 1.172,
+      "step": 771
+    },
+    {
+      "epoch": 0.8471879286694102,
+      "grad_norm": 0.24067752063274384,
+      "learning_rate": 3.42786683107275e-06,
+      "loss": 1.2622,
+      "step": 772
+    },
+    {
+      "epoch": 0.8482853223593965,
+      "grad_norm": 0.23819519579410553,
+      "learning_rate": 3.403205918618989e-06,
+      "loss": 1.2249,
+      "step": 773
+    },
+    {
+      "epoch": 0.8493827160493828,
+      "grad_norm": 0.2725595533847809,
+      "learning_rate": 3.3785450061652285e-06,
+      "loss": 1.1911,
+      "step": 774
+    },
+    {
+      "epoch": 0.850480109739369,
+      "grad_norm": 0.27524641156196594,
+      "learning_rate": 3.3538840937114674e-06,
+      "loss": 1.255,
+      "step": 775
+    },
+    {
+      "epoch": 0.8515775034293552,
+      "grad_norm": 0.24099332094192505,
+      "learning_rate": 3.3292231812577068e-06,
+      "loss": 1.2023,
+      "step": 776
+    },
+    {
+      "epoch": 0.8526748971193415,
+      "grad_norm": 0.2646848261356354,
+      "learning_rate": 3.3045622688039457e-06,
+      "loss": 1.197,
+      "step": 777
+    },
+    {
+      "epoch": 0.8537722908093278,
+      "grad_norm": 0.24707616865634918,
+      "learning_rate": 3.2799013563501854e-06,
+      "loss": 1.1769,
+      "step": 778
+    },
+    {
+      "epoch": 0.8548696844993141,
+      "grad_norm": 0.2570493817329407,
+      "learning_rate": 3.2552404438964248e-06,
+      "loss": 1.2645,
+      "step": 779
+    },
+    {
+      "epoch": 0.8559670781893004,
+      "grad_norm": 0.2723713517189026,
+      "learning_rate": 3.2305795314426637e-06,
+      "loss": 1.2125,
+      "step": 780
+    },
+    {
+      "epoch": 0.8570644718792867,
+      "grad_norm": 0.23607034981250763,
+      "learning_rate": 3.205918618988903e-06,
+      "loss": 1.2958,
+      "step": 781
+    },
+    {
+      "epoch": 0.858161865569273,
+      "grad_norm": 0.24119311571121216,
+      "learning_rate": 3.181257706535142e-06,
+      "loss": 1.2814,
+      "step": 782
+    },
+    {
+      "epoch": 0.8592592592592593,
+      "grad_norm": 0.2970694303512573,
+      "learning_rate": 3.1565967940813812e-06,
+      "loss": 1.1962,
+      "step": 783
+    },
+    {
+      "epoch": 0.8603566529492456,
+      "grad_norm": 0.24748800694942474,
+      "learning_rate": 3.1319358816276206e-06,
+      "loss": 1.2468,
+      "step": 784
+    },
+    {
+      "epoch": 0.8614540466392319,
+      "grad_norm": 0.2634020745754242,
+      "learning_rate": 3.1072749691738595e-06,
+      "loss": 1.2125,
+      "step": 785
+    },
+    {
+      "epoch": 0.8625514403292182,
+      "grad_norm": 0.24719679355621338,
+      "learning_rate": 3.0826140567200992e-06,
+      "loss": 1.2097,
+      "step": 786
+    },
+    {
+      "epoch": 0.8636488340192043,
+      "grad_norm": 0.28501445055007935,
+      "learning_rate": 3.057953144266338e-06,
+      "loss": 1.1259,
+      "step": 787
+    },
+    {
+      "epoch": 0.8647462277091906,
+      "grad_norm": 0.25702810287475586,
+      "learning_rate": 3.0332922318125775e-06,
+      "loss": 1.2083,
+      "step": 788
+    },
+    {
+      "epoch": 0.8658436213991769,
+      "grad_norm": 0.25063031911849976,
+      "learning_rate": 3.0086313193588164e-06,
+      "loss": 1.172,
+      "step": 789
+    },
+    {
+      "epoch": 0.8669410150891632,
+      "grad_norm": 0.2768210172653198,
+      "learning_rate": 2.9839704069050557e-06,
+      "loss": 1.181,
+      "step": 790
+    },
+    {
+      "epoch": 0.8680384087791495,
+      "grad_norm": 0.24431820213794708,
+      "learning_rate": 2.959309494451295e-06,
+      "loss": 1.2165,
+      "step": 791
+    },
+    {
+      "epoch": 0.8691358024691358,
+      "grad_norm": 0.2454931139945984,
+      "learning_rate": 2.934648581997534e-06,
+      "loss": 1.2469,
+      "step": 792
+    },
+    {
+      "epoch": 0.8702331961591221,
+      "grad_norm": 0.2588542401790619,
+      "learning_rate": 2.9099876695437733e-06,
+      "loss": 1.2271,
+      "step": 793
+    },
+    {
+      "epoch": 0.8713305898491084,
+      "grad_norm": 0.22791962325572968,
+      "learning_rate": 2.885326757090012e-06,
+      "loss": 1.2226,
+      "step": 794
+    },
+    {
+      "epoch": 0.8724279835390947,
+      "grad_norm": 0.25886085629463196,
+      "learning_rate": 2.860665844636252e-06,
+      "loss": 1.2904,
+      "step": 795
+    },
+    {
+      "epoch": 0.873525377229081,
+      "grad_norm": 0.24065649509429932,
+      "learning_rate": 2.8360049321824913e-06,
+      "loss": 1.201,
+      "step": 796
+    },
+    {
+      "epoch": 0.8746227709190673,
+      "grad_norm": 0.2704163193702698,
+      "learning_rate": 2.81134401972873e-06,
+      "loss": 1.2145,
+      "step": 797
+    },
+    {
+      "epoch": 0.8757201646090536,
+      "grad_norm": 0.25260040163993835,
+      "learning_rate": 2.7866831072749695e-06,
+      "loss": 1.2277,
+      "step": 798
+    },
+    {
+      "epoch": 0.8768175582990397,
+      "grad_norm": 0.2618052363395691,
+      "learning_rate": 2.7620221948212084e-06,
+      "loss": 1.2252,
+      "step": 799
+    },
+    {
+      "epoch": 0.877914951989026,
+      "grad_norm": 0.29944562911987305,
+      "learning_rate": 2.7373612823674478e-06,
+      "loss": 1.2221,
+      "step": 800
+    },
+    {
+      "epoch": 0.8790123456790123,
+      "grad_norm": 0.2519710958003998,
+      "learning_rate": 2.712700369913687e-06,
+      "loss": 1.2022,
+      "step": 801
+    },
+    {
+      "epoch": 0.8801097393689986,
+      "grad_norm": 0.29498758912086487,
+      "learning_rate": 2.688039457459926e-06,
+      "loss": 1.1939,
+      "step": 802
+    },
+    {
+      "epoch": 0.8812071330589849,
+      "grad_norm": 0.3040591776371002,
+      "learning_rate": 2.6633785450061657e-06,
+      "loss": 1.1721,
+      "step": 803
+    },
+    {
+      "epoch": 0.8823045267489712,
+      "grad_norm": 0.24407289922237396,
+      "learning_rate": 2.6387176325524042e-06,
+      "loss": 1.281,
+      "step": 804
+    },
+    {
+      "epoch": 0.8834019204389575,
+      "grad_norm": 0.3141189217567444,
+      "learning_rate": 2.614056720098644e-06,
+      "loss": 1.1731,
+      "step": 805
+    },
+    {
+      "epoch": 0.8844993141289438,
+      "grad_norm": 0.2557481527328491,
+      "learning_rate": 2.5893958076448833e-06,
+      "loss": 1.2577,
+      "step": 806
+    },
+    {
+      "epoch": 0.8855967078189301,
+      "grad_norm": 0.256987601518631,
+      "learning_rate": 2.5647348951911222e-06,
+      "loss": 1.1678,
+      "step": 807
+    },
+    {
+      "epoch": 0.8866941015089164,
+      "grad_norm": 0.26355645060539246,
+      "learning_rate": 2.5400739827373616e-06,
+      "loss": 1.2432,
+      "step": 808
+    },
+    {
+      "epoch": 0.8877914951989027,
+      "grad_norm": 0.2442682534456253,
+      "learning_rate": 2.5154130702836005e-06,
+      "loss": 1.3616,
+      "step": 809
+    },
+    {
+      "epoch": 0.8888888888888888,
+      "grad_norm": 0.29615169763565063,
+      "learning_rate": 2.49075215782984e-06,
+      "loss": 1.2012,
+      "step": 810
+    },
+    {
+      "epoch": 0.8899862825788751,
+      "grad_norm": 0.25238725543022156,
+      "learning_rate": 2.466091245376079e-06,
+      "loss": 1.1903,
+      "step": 811
+    },
+    {
+      "epoch": 0.8910836762688614,
+      "grad_norm": 0.25462445616722107,
+      "learning_rate": 2.441430332922318e-06,
+      "loss": 1.1539,
+      "step": 812
+    },
+    {
+      "epoch": 0.8921810699588477,
+      "grad_norm": 0.2639220356941223,
+      "learning_rate": 2.416769420468558e-06,
+      "loss": 1.1004,
+      "step": 813
+    },
+    {
+      "epoch": 0.893278463648834,
+      "grad_norm": 0.287663072347641,
+      "learning_rate": 2.3921085080147967e-06,
+      "loss": 1.2643,
+      "step": 814
+    },
+    {
+      "epoch": 0.8943758573388203,
+      "grad_norm": 0.24938149750232697,
+      "learning_rate": 2.367447595561036e-06,
+      "loss": 1.3602,
+      "step": 815
+    },
+    {
+      "epoch": 0.8954732510288066,
+      "grad_norm": 0.2602677643299103,
+      "learning_rate": 2.342786683107275e-06,
+      "loss": 1.2043,
+      "step": 816
+    },
+    {
+      "epoch": 0.8965706447187929,
+      "grad_norm": 0.25690793991088867,
+      "learning_rate": 2.3181257706535143e-06,
+      "loss": 1.1586,
+      "step": 817
+    },
+    {
+      "epoch": 0.8976680384087792,
+      "grad_norm": 0.2565641701221466,
+      "learning_rate": 2.2934648581997536e-06,
+      "loss": 1.2512,
+      "step": 818
+    },
+    {
+      "epoch": 0.8987654320987655,
+      "grad_norm": 0.2628938853740692,
+      "learning_rate": 2.268803945745993e-06,
+      "loss": 1.1438,
+      "step": 819
+    },
+    {
+      "epoch": 0.8998628257887518,
+      "grad_norm": 0.23171505331993103,
+      "learning_rate": 2.244143033292232e-06,
+      "loss": 1.1741,
+      "step": 820
+    },
+    {
+      "epoch": 0.900960219478738,
+      "grad_norm": 0.2506265640258789,
+      "learning_rate": 2.219482120838471e-06,
+      "loss": 1.1697,
+      "step": 821
+    },
+    {
+      "epoch": 0.9020576131687242,
+      "grad_norm": 0.27947020530700684,
+      "learning_rate": 2.1948212083847105e-06,
+      "loss": 1.1571,
+      "step": 822
+    },
+    {
+      "epoch": 0.9031550068587105,
+      "grad_norm": 0.2594459354877472,
+      "learning_rate": 2.1701602959309494e-06,
+      "loss": 1.2105,
+      "step": 823
+    },
+    {
+      "epoch": 0.9042524005486968,
+      "grad_norm": 0.24918478727340698,
+      "learning_rate": 2.1454993834771887e-06,
+      "loss": 1.2497,
+      "step": 824
+    },
+    {
+      "epoch": 0.9053497942386831,
+      "grad_norm": 0.2924598157405853,
+      "learning_rate": 2.120838471023428e-06,
+      "loss": 1.3235,
+      "step": 825
+    },
+    {
+      "epoch": 0.9064471879286694,
+      "grad_norm": 0.24772346019744873,
+      "learning_rate": 2.0961775585696674e-06,
+      "loss": 1.2818,
+      "step": 826
+    },
+    {
+      "epoch": 0.9075445816186557,
+      "grad_norm": 0.2838573157787323,
+      "learning_rate": 2.0715166461159063e-06,
+      "loss": 1.1918,
+      "step": 827
+    },
+    {
+      "epoch": 0.908641975308642,
+      "grad_norm": 0.26798808574676514,
+      "learning_rate": 2.0468557336621456e-06,
+      "loss": 1.1874,
+      "step": 828
+    },
+    {
+      "epoch": 0.9097393689986283,
+      "grad_norm": 0.2984868586063385,
+      "learning_rate": 2.022194821208385e-06,
+      "loss": 1.2417,
+      "step": 829
+    },
+    {
+      "epoch": 0.9108367626886146,
+      "grad_norm": 0.27059322595596313,
+      "learning_rate": 1.9975339087546243e-06,
+      "loss": 1.2273,
+      "step": 830
+    },
+    {
+      "epoch": 0.9119341563786009,
+      "grad_norm": 0.2572599947452545,
+      "learning_rate": 1.9728729963008632e-06,
+      "loss": 1.186,
+      "step": 831
+    },
+    {
+      "epoch": 0.9130315500685872,
+      "grad_norm": 0.27578267455101013,
+      "learning_rate": 1.9482120838471025e-06,
+      "loss": 1.1343,
+      "step": 832
+    },
+    {
+      "epoch": 0.9141289437585733,
+      "grad_norm": 0.25030946731567383,
+      "learning_rate": 1.9235511713933415e-06,
+      "loss": 1.2599,
+      "step": 833
+    },
+    {
+      "epoch": 0.9152263374485596,
+      "grad_norm": 0.23245501518249512,
+      "learning_rate": 1.8988902589395808e-06,
+      "loss": 1.2471,
+      "step": 834
+    },
+    {
+      "epoch": 0.9163237311385459,
+      "grad_norm": 0.23760074377059937,
+      "learning_rate": 1.8742293464858201e-06,
+      "loss": 1.2457,
+      "step": 835
+    },
+    {
+      "epoch": 0.9174211248285322,
+      "grad_norm": 0.2975304424762726,
+      "learning_rate": 1.8495684340320595e-06,
+      "loss": 1.2025,
+      "step": 836
+    },
+    {
+      "epoch": 0.9185185185185185,
+      "grad_norm": 0.240847647190094,
+      "learning_rate": 1.8249075215782986e-06,
+      "loss": 1.2289,
+      "step": 837
+    },
+    {
+      "epoch": 0.9196159122085048,
+      "grad_norm": 0.23974715173244476,
+      "learning_rate": 1.8002466091245377e-06,
+      "loss": 1.2117,
+      "step": 838
+    },
+    {
+      "epoch": 0.9207133058984911,
+      "grad_norm": 0.27474868297576904,
+      "learning_rate": 1.7755856966707768e-06,
+      "loss": 1.2863,
+      "step": 839
+    },
+    {
+      "epoch": 0.9218106995884774,
+      "grad_norm": 0.28905266523361206,
+      "learning_rate": 1.7509247842170164e-06,
+      "loss": 1.2041,
+      "step": 840
+    },
+    {
+      "epoch": 0.9229080932784637,
+      "grad_norm": 0.25439831614494324,
+      "learning_rate": 1.7262638717632555e-06,
+      "loss": 1.2385,
+      "step": 841
+    },
+    {
+      "epoch": 0.92400548696845,
+      "grad_norm": 0.2607545256614685,
+      "learning_rate": 1.7016029593094946e-06,
+      "loss": 1.2837,
+      "step": 842
+    },
+    {
+      "epoch": 0.9251028806584363,
+      "grad_norm": 0.34511038661003113,
+      "learning_rate": 1.6769420468557337e-06,
+      "loss": 1.2155,
+      "step": 843
+    },
+    {
+      "epoch": 0.9262002743484224,
+      "grad_norm": 0.26956722140312195,
+      "learning_rate": 1.6522811344019728e-06,
+      "loss": 1.1794,
+      "step": 844
+    },
+    {
+      "epoch": 0.9272976680384087,
+      "grad_norm": 0.25592947006225586,
+      "learning_rate": 1.6276202219482124e-06,
+      "loss": 1.1996,
+      "step": 845
+    },
+    {
+      "epoch": 0.928395061728395,
+      "grad_norm": 0.25805869698524475,
+      "learning_rate": 1.6029593094944515e-06,
+      "loss": 1.2655,
+      "step": 846
+    },
+    {
+      "epoch": 0.9294924554183813,
+      "grad_norm": 0.2707035541534424,
+      "learning_rate": 1.5782983970406906e-06,
+      "loss": 1.3274,
+      "step": 847
+    },
+    {
+      "epoch": 0.9305898491083676,
+      "grad_norm": 0.2707644999027252,
+      "learning_rate": 1.5536374845869297e-06,
+      "loss": 1.2375,
+      "step": 848
+    },
+    {
+      "epoch": 0.9316872427983539,
+      "grad_norm": 0.2761702537536621,
+      "learning_rate": 1.528976572133169e-06,
+      "loss": 1.1769,
+      "step": 849
+    },
+    {
+      "epoch": 0.9327846364883402,
+      "grad_norm": 0.26850977540016174,
+      "learning_rate": 1.5043156596794082e-06,
+      "loss": 1.1833,
+      "step": 850
+    },
+    {
+      "epoch": 0.9338820301783265,
+      "grad_norm": 0.26352792978286743,
+      "learning_rate": 1.4796547472256475e-06,
+      "loss": 1.2284,
+      "step": 851
+    },
+    {
+      "epoch": 0.9349794238683128,
+      "grad_norm": 0.24324378371238708,
+      "learning_rate": 1.4549938347718866e-06,
+      "loss": 1.2005,
+      "step": 852
+    },
+    {
+      "epoch": 0.9360768175582991,
+      "grad_norm": 0.24241109192371368,
+      "learning_rate": 1.430332922318126e-06,
+      "loss": 1.2059,
+      "step": 853
+    },
+    {
+      "epoch": 0.9371742112482854,
+      "grad_norm": 0.26090410351753235,
+      "learning_rate": 1.405672009864365e-06,
+      "loss": 1.2379,
+      "step": 854
+    },
+    {
+      "epoch": 0.9382716049382716,
+      "grad_norm": 0.2450953871011734,
+      "learning_rate": 1.3810110974106042e-06,
+      "loss": 1.2946,
+      "step": 855
+    },
+    {
+      "epoch": 0.9393689986282578,
+      "grad_norm": 0.24933604896068573,
+      "learning_rate": 1.3563501849568435e-06,
+      "loss": 1.1612,
+      "step": 856
+    },
+    {
+      "epoch": 0.9404663923182441,
+      "grad_norm": 0.2418948858976364,
+      "learning_rate": 1.3316892725030829e-06,
+      "loss": 1.229,
+      "step": 857
+    },
+    {
+      "epoch": 0.9415637860082304,
+      "grad_norm": 0.26510903239250183,
+      "learning_rate": 1.307028360049322e-06,
+      "loss": 1.1778,
+      "step": 858
+    },
+    {
+      "epoch": 0.9426611796982167,
+      "grad_norm": 0.2577672302722931,
+      "learning_rate": 1.2823674475955611e-06,
+      "loss": 1.2076,
+      "step": 859
+    },
+    {
+      "epoch": 0.943758573388203,
+      "grad_norm": 0.24987109005451202,
+      "learning_rate": 1.2577065351418002e-06,
+      "loss": 1.2105,
+      "step": 860
+    },
+    {
+      "epoch": 0.9448559670781893,
+      "grad_norm": 0.25255313515663147,
+      "learning_rate": 1.2330456226880396e-06,
+      "loss": 1.1777,
+      "step": 861
+    },
+    {
+      "epoch": 0.9459533607681756,
+      "grad_norm": 0.2602977752685547,
+      "learning_rate": 1.208384710234279e-06,
+      "loss": 1.2409,
+      "step": 862
+    },
+    {
+      "epoch": 0.9470507544581619,
+      "grad_norm": 0.2703125476837158,
+      "learning_rate": 1.183723797780518e-06,
+      "loss": 1.3063,
+      "step": 863
+    },
+    {
+      "epoch": 0.9481481481481482,
+      "grad_norm": 0.23141594231128693,
+      "learning_rate": 1.1590628853267571e-06,
+      "loss": 1.223,
+      "step": 864
+    },
+    {
+      "epoch": 0.9492455418381345,
+      "grad_norm": 0.26518914103507996,
+      "learning_rate": 1.1344019728729965e-06,
+      "loss": 1.2147,
+      "step": 865
+    },
+    {
+      "epoch": 0.9503429355281207,
+      "grad_norm": 0.28556376695632935,
+      "learning_rate": 1.1097410604192356e-06,
+      "loss": 1.1871,
+      "step": 866
+    },
+    {
+      "epoch": 0.951440329218107,
+      "grad_norm": 0.2666340172290802,
+      "learning_rate": 1.0850801479654747e-06,
+      "loss": 1.1842,
+      "step": 867
+    },
+    {
+      "epoch": 0.9525377229080932,
+      "grad_norm": 0.23198631405830383,
+      "learning_rate": 1.060419235511714e-06,
+      "loss": 1.2356,
+      "step": 868
+    },
+    {
+      "epoch": 0.9536351165980795,
+      "grad_norm": 0.2616305947303772,
+      "learning_rate": 1.0357583230579532e-06,
+      "loss": 1.3017,
+      "step": 869
+    },
+    {
+      "epoch": 0.9547325102880658,
+      "grad_norm": 0.2699725925922394,
+      "learning_rate": 1.0110974106041925e-06,
+      "loss": 1.1505,
+      "step": 870
+    },
+    {
+      "epoch": 0.9558299039780521,
+      "grad_norm": 0.2656715512275696,
+      "learning_rate": 9.864364981504316e-07,
+      "loss": 1.2487,
+      "step": 871
+    },
+    {
+      "epoch": 0.9569272976680384,
+      "grad_norm": 0.24682074785232544,
+      "learning_rate": 9.617755856966707e-07,
+      "loss": 1.2081,
+      "step": 872
+    },
+    {
+      "epoch": 0.9580246913580247,
+      "grad_norm": 0.26323434710502625,
+      "learning_rate": 9.371146732429101e-07,
+      "loss": 1.2228,
+      "step": 873
+    },
+    {
+      "epoch": 0.959122085048011,
+      "grad_norm": 0.2442006766796112,
+      "learning_rate": 9.124537607891493e-07,
+      "loss": 1.2603,
+      "step": 874
+    },
+    {
+      "epoch": 0.9602194787379973,
+      "grad_norm": 0.28010329604148865,
+      "learning_rate": 8.877928483353884e-07,
+      "loss": 1.2486,
+      "step": 875
+    },
+    {
+      "epoch": 0.9613168724279836,
+      "grad_norm": 0.2618809640407562,
+      "learning_rate": 8.631319358816277e-07,
+      "loss": 1.2433,
+      "step": 876
+    },
+    {
+      "epoch": 0.9624142661179699,
+      "grad_norm": 0.2693452537059784,
+      "learning_rate": 8.384710234278669e-07,
+      "loss": 1.2296,
+      "step": 877
+    },
+    {
+      "epoch": 0.9635116598079561,
+      "grad_norm": 0.256401926279068,
+      "learning_rate": 8.138101109741062e-07,
+      "loss": 1.2447,
+      "step": 878
+    },
+    {
+      "epoch": 0.9646090534979423,
+      "grad_norm": 0.2823677659034729,
+      "learning_rate": 7.891491985203453e-07,
+      "loss": 1.1789,
+      "step": 879
+    },
+    {
+      "epoch": 0.9657064471879286,
+      "grad_norm": 0.29250475764274597,
+      "learning_rate": 7.644882860665845e-07,
+      "loss": 1.1497,
+      "step": 880
+    },
+    {
+      "epoch": 0.9668038408779149,
+      "grad_norm": 0.25502100586891174,
+      "learning_rate": 7.398273736128238e-07,
+      "loss": 1.2713,
+      "step": 881
+    },
+    {
+      "epoch": 0.9679012345679012,
+      "grad_norm": 0.2642868161201477,
+      "learning_rate": 7.15166461159063e-07,
+      "loss": 1.2178,
+      "step": 882
+    },
+    {
+      "epoch": 0.9689986282578875,
+      "grad_norm": 0.25020915269851685,
+      "learning_rate": 6.905055487053021e-07,
+      "loss": 1.2125,
+      "step": 883
+    },
+    {
+      "epoch": 0.9700960219478738,
+      "grad_norm": 0.25948819518089294,
+      "learning_rate": 6.658446362515414e-07,
+      "loss": 1.2355,
+      "step": 884
+    },
+    {
+      "epoch": 0.9711934156378601,
+      "grad_norm": 0.25626036524772644,
+      "learning_rate": 6.411837237977806e-07,
+      "loss": 1.227,
+      "step": 885
+    },
+    {
+      "epoch": 0.9722908093278464,
+      "grad_norm": 0.24282559752464294,
+      "learning_rate": 6.165228113440198e-07,
+      "loss": 1.2019,
+      "step": 886
+    },
+    {
+      "epoch": 0.9733882030178327,
+      "grad_norm": 0.2501373291015625,
+      "learning_rate": 5.91861898890259e-07,
+      "loss": 1.2002,
+      "step": 887
+    },
+    {
+      "epoch": 0.974485596707819,
+      "grad_norm": 0.27020156383514404,
+      "learning_rate": 5.672009864364982e-07,
+      "loss": 1.2451,
+      "step": 888
+    },
+    {
+      "epoch": 0.9755829903978052,
+      "grad_norm": 0.2579203248023987,
+      "learning_rate": 5.425400739827374e-07,
+      "loss": 1.277,
+      "step": 889
+    },
+    {
+      "epoch": 0.9766803840877915,
+      "grad_norm": 0.24406872689723969,
+      "learning_rate": 5.178791615289766e-07,
+      "loss": 1.2816,
+      "step": 890
+    },
+    {
+      "epoch": 0.9777777777777777,
+      "grad_norm": 0.2537878453731537,
+      "learning_rate": 4.932182490752158e-07,
+      "loss": 1.23,
+      "step": 891
+    },
+    {
+      "epoch": 0.978875171467764,
+      "grad_norm": 0.2799593508243561,
+      "learning_rate": 4.6855733662145503e-07,
+      "loss": 1.2479,
+      "step": 892
+    },
+    {
+      "epoch": 0.9799725651577503,
+      "grad_norm": 0.23126421868801117,
+      "learning_rate": 4.438964241676942e-07,
+      "loss": 1.2648,
+      "step": 893
+    },
+    {
+      "epoch": 0.9810699588477366,
+      "grad_norm": 0.2650638818740845,
+      "learning_rate": 4.1923551171393343e-07,
+      "loss": 1.2301,
+      "step": 894
+    },
+    {
+      "epoch": 0.9821673525377229,
+      "grad_norm": 0.27923524379730225,
+      "learning_rate": 3.9457459926017265e-07,
+      "loss": 1.188,
+      "step": 895
+    },
+    {
+      "epoch": 0.9832647462277092,
+      "grad_norm": 0.4018268287181854,
+      "learning_rate": 3.699136868064119e-07,
+      "loss": 1.234,
+      "step": 896
+    },
+    {
+      "epoch": 0.9843621399176955,
+      "grad_norm": 0.26936185359954834,
+      "learning_rate": 3.4525277435265105e-07,
+      "loss": 1.2962,
+      "step": 897
+    },
+    {
+      "epoch": 0.9854595336076818,
+      "grad_norm": 0.2637123167514801,
+      "learning_rate": 3.205918618988903e-07,
+      "loss": 1.231,
+      "step": 898
+    },
+    {
+      "epoch": 0.9865569272976681,
+      "grad_norm": 0.2618735730648041,
+      "learning_rate": 2.959309494451295e-07,
+      "loss": 1.2333,
+      "step": 899
+    },
+    {
+      "epoch": 0.9876543209876543,
+      "grad_norm": 0.2631166875362396,
+      "learning_rate": 2.712700369913687e-07,
+      "loss": 1.2565,
+      "step": 900
+    },
+    {
+      "epoch": 0.9887517146776406,
+      "grad_norm": 0.25191444158554077,
+      "learning_rate": 2.466091245376079e-07,
+      "loss": 1.2109,
+      "step": 901
+    },
+    {
+      "epoch": 0.9898491083676269,
+      "grad_norm": 0.24879272282123566,
+      "learning_rate": 2.219482120838471e-07,
+      "loss": 1.2384,
+      "step": 902
+    },
+    {
+      "epoch": 0.9909465020576131,
+      "grad_norm": 0.2868032455444336,
+      "learning_rate": 1.9728729963008633e-07,
+      "loss": 1.1736,
+      "step": 903
+    },
+    {
+      "epoch": 0.9920438957475994,
+      "grad_norm": 0.25794628262519836,
+      "learning_rate": 1.7262638717632553e-07,
+      "loss": 1.1557,
+      "step": 904
+    },
+    {
+      "epoch": 0.9931412894375857,
+      "grad_norm": 0.29972419142723083,
+      "learning_rate": 1.4796547472256475e-07,
+      "loss": 1.1637,
+      "step": 905
+    },
+    {
+      "epoch": 0.994238683127572,
+      "grad_norm": 0.24752122163772583,
+      "learning_rate": 1.2330456226880395e-07,
+      "loss": 1.2375,
+      "step": 906
+    },
+    {
+      "epoch": 0.9953360768175583,
+      "grad_norm": 0.2406410425901413,
+      "learning_rate": 9.864364981504316e-08,
+      "loss": 1.319,
+      "step": 907
+    },
+    {
+      "epoch": 0.9964334705075446,
+      "grad_norm": 0.2575162351131439,
+      "learning_rate": 7.398273736128238e-08,
+      "loss": 1.3036,
+      "step": 908
+    },
+    {
+      "epoch": 0.9975308641975309,
+      "grad_norm": 0.2430945187807083,
+      "learning_rate": 4.932182490752158e-08,
+      "loss": 1.2212,
+      "step": 909
+    },
+    {
+      "epoch": 0.9986282578875172,
+      "grad_norm": 0.2730814218521118,
+      "learning_rate": 2.466091245376079e-08,
+      "loss": 1.1903,
+      "step": 910
+    },
+    {
+      "epoch": 0.9997256515775035,
+      "grad_norm": 0.294842928647995,
+      "learning_rate": 0.0,
+      "loss": 1.2108,
+      "step": 911
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.538444460514181e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null