Training in progress, step 840, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +284 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:19f74b98bd7db98d10b0fb68f70fcdf71de22ad1e538962ff35f13c8025e719f
 size 289512208

 version https://git-lfs.github.com/spec/v1
+oid sha256:27db5c66916d7daad771886e225cd7152669ee12e611a380fbf0009c9af37adc
 size 289512208

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:50a8ad4105eb139aadf14070d8274eae21445d239100a2810eeb1b389f2eafd5
 size 147781972

 version https://git-lfs.github.com/spec/v1
+oid sha256:3577d633018a2488773a311af50b3f09dfc5134434176462a126cedd7dcc57c2
 size 147781972

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7aff307a49579ac3e61540d33701ae1ea5a0e55c95055f52be6be0c9f63b5e12
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:30a9264446d9bfcf977beea433026295798ed92bc03fae79d89f70494644af49
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5dde15a2105449c70bf902869dd9486a1bea61f66479c56f9a16426a3f85e33c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:699b3777e1ea7a60123ef22ecc366f524146f7231f57273c73780dc41dc98d5c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.203278660774231,
   "best_model_checkpoint": "miner_id_24/checkpoint-800",
-  "epoch": 0.05085217114933852,
   "eval_steps": 100,
-  "global_step": 800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -5679,6 +5679,286 @@
       "eval_samples_per_second": 4.035,
       "eval_steps_per_second": 1.009,
       "step": 800
     }
   ],
   "logging_steps": 1,
@@ -5702,12 +5982,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4.156980197326848e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.203278660774231,
   "best_model_checkpoint": "miner_id_24/checkpoint-800",
+  "epoch": 0.05339477970680545,
   "eval_steps": 100,
+  "global_step": 840,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 4.035,
       "eval_steps_per_second": 1.009,
       "step": 800
+    },
+    {
+      "epoch": 0.0509157363632752,
+      "grad_norm": 0.2525332570075989,
+      "learning_rate": 1.0875619410158466e-06,
+      "loss": 1.1738,
+      "step": 801
+    },
+    {
+      "epoch": 0.05097930157721187,
+      "grad_norm": 0.24210986495018005,
+      "learning_rate": 1.0325995198509409e-06,
+      "loss": 1.1435,
+      "step": 802
+    },
+    {
+      "epoch": 0.05104286679114854,
+      "grad_norm": 0.25146523118019104,
+      "learning_rate": 9.79054964740911e-07,
+      "loss": 1.2707,
+      "step": 803
+    },
+    {
+      "epoch": 0.05110643200508522,
+      "grad_norm": 0.2529788315296173,
+      "learning_rate": 9.269290427969868e-07,
+      "loss": 1.1679,
+      "step": 804
+    },
+    {
+      "epoch": 0.05116999721902189,
+      "grad_norm": 0.25673815608024597,
+      "learning_rate": 8.762225008062674e-07,
+      "loss": 1.2141,
+      "step": 805
+    },
+    {
+      "epoch": 0.05123356243295856,
+      "grad_norm": 0.25803902745246887,
+      "learning_rate": 8.26936065220929e-07,
+      "loss": 1.2018,
+      "step": 806
+    },
+    {
+      "epoch": 0.05129712764689524,
+      "grad_norm": 0.2565945088863373,
+      "learning_rate": 7.790704421478557e-07,
+      "loss": 1.2261,
+      "step": 807
+    },
+    {
+      "epoch": 0.05136069286083191,
+      "grad_norm": 0.2625206410884857,
+      "learning_rate": 7.326263173385584e-07,
+      "loss": 1.1934,
+      "step": 808
+    },
+    {
+      "epoch": 0.05142425807476858,
+      "grad_norm": 0.25721174478530884,
+      "learning_rate": 6.876043561792833e-07,
+      "loss": 1.2349,
+      "step": 809
+    },
+    {
+      "epoch": 0.051487823288705256,
+      "grad_norm": 0.25995710492134094,
+      "learning_rate": 6.440052036815081e-07,
+      "loss": 1.3027,
+      "step": 810
+    },
+    {
+      "epoch": 0.05155138850264193,
+      "grad_norm": 0.25100308656692505,
+      "learning_rate": 6.018294844727379e-07,
+      "loss": 1.1802,
+      "step": 811
+    },
+    {
+      "epoch": 0.0516149537165786,
+      "grad_norm": 0.2459433674812317,
+      "learning_rate": 5.610778027874908e-07,
+      "loss": 1.1474,
+      "step": 812
+    },
+    {
+      "epoch": 0.051678518930515276,
+      "grad_norm": 0.2367779165506363,
+      "learning_rate": 5.217507424586821e-07,
+      "loss": 1.168,
+      "step": 813
+    },
+    {
+      "epoch": 0.05174208414445195,
+      "grad_norm": 0.2512117922306061,
+      "learning_rate": 4.838488669092534e-07,
+      "loss": 1.091,
+      "step": 814
+    },
+    {
+      "epoch": 0.05180564935838862,
+      "grad_norm": 0.2595987319946289,
+      "learning_rate": 4.4737271914411236e-07,
+      "loss": 1.1756,
+      "step": 815
+    },
+    {
+      "epoch": 0.051869214572325295,
+      "grad_norm": 0.26023730635643005,
+      "learning_rate": 4.123228217422948e-07,
+      "loss": 1.068,
+      "step": 816
+    },
+    {
+      "epoch": 0.05193277978626197,
+      "grad_norm": 0.26552048325538635,
+      "learning_rate": 3.7869967684958094e-07,
+      "loss": 1.1605,
+      "step": 817
+    },
+    {
+      "epoch": 0.05199634500019864,
+      "grad_norm": 0.24736690521240234,
+      "learning_rate": 3.465037661712134e-07,
+      "loss": 1.2006,
+      "step": 818
+    },
+    {
+      "epoch": 0.052059910214135315,
+      "grad_norm": 0.26172155141830444,
+      "learning_rate": 3.1573555096501283e-07,
+      "loss": 1.2359,
+      "step": 819
+    },
+    {
+      "epoch": 0.05212347542807199,
+      "grad_norm": 0.25399184226989746,
+      "learning_rate": 2.86395472034795e-07,
+      "loss": 1.2153,
+      "step": 820
+    },
+    {
+      "epoch": 0.05218704064200866,
+      "grad_norm": 0.25162798166275024,
+      "learning_rate": 2.584839497240643e-07,
+      "loss": 1.2581,
+      "step": 821
+    },
+    {
+      "epoch": 0.052250605855945334,
+      "grad_norm": 0.2551822066307068,
+      "learning_rate": 2.3200138390993e-07,
+      "loss": 1.1388,
+      "step": 822
+    },
+    {
+      "epoch": 0.05231417106988201,
+      "grad_norm": 0.24114681780338287,
+      "learning_rate": 2.0694815399744382e-07,
+      "loss": 1.2377,
+      "step": 823
+    },
+    {
+      "epoch": 0.05237773628381868,
+      "grad_norm": 0.26416000723838806,
+      "learning_rate": 1.83324618914138e-07,
+      "loss": 1.2193,
+      "step": 824
+    },
+    {
+      "epoch": 0.052441301497755353,
+      "grad_norm": 0.25959083437919617,
+      "learning_rate": 1.611311171048735e-07,
+      "loss": 1.1987,
+      "step": 825
+    },
+    {
+      "epoch": 0.05250486671169203,
+      "grad_norm": 0.24999088048934937,
+      "learning_rate": 1.4036796652701078e-07,
+      "loss": 1.1644,
+      "step": 826
+    },
+    {
+      "epoch": 0.0525684319256287,
+      "grad_norm": 0.25357383489608765,
+      "learning_rate": 1.210354646458245e-07,
+      "loss": 1.2345,
+      "step": 827
+    },
+    {
+      "epoch": 0.05263199713956537,
+      "grad_norm": 0.25583428144454956,
+      "learning_rate": 1.031338884302846e-07,
+      "loss": 1.2685,
+      "step": 828
+    },
+    {
+      "epoch": 0.05269556235350205,
+      "grad_norm": 0.25566795468330383,
+      "learning_rate": 8.666349434907073e-08,
+      "loss": 1.2141,
+      "step": 829
+    },
+    {
+      "epoch": 0.05275912756743872,
+      "grad_norm": 0.2603313624858856,
+      "learning_rate": 7.162451836685291e-08,
+      "loss": 1.2535,
+      "step": 830
+    },
+    {
+      "epoch": 0.05282269278137539,
+      "grad_norm": 0.24881498515605927,
+      "learning_rate": 5.8017175941005306e-08,
+      "loss": 1.1596,
+      "step": 831
+    },
+    {
+      "epoch": 0.05288625799531207,
+      "grad_norm": 0.2581416070461273,
+      "learning_rate": 4.584166201841988e-08,
+      "loss": 1.2291,
+      "step": 832
+    },
+    {
+      "epoch": 0.052949823209248736,
+      "grad_norm": 0.2521674335002899,
+      "learning_rate": 3.5098151032786355e-08,
+      "loss": 1.2752,
+      "step": 833
+    },
+    {
+      "epoch": 0.05301338842318541,
+      "grad_norm": 0.2460847645998001,
+      "learning_rate": 2.578679690204977e-08,
+      "loss": 1.1633,
+      "step": 834
+    },
+    {
+      "epoch": 0.05307695363712209,
+      "grad_norm": 0.2515714764595032,
+      "learning_rate": 1.7907733026223394e-08,
+      "loss": 1.1517,
+      "step": 835
+    },
+    {
+      "epoch": 0.053140518851058756,
+      "grad_norm": 0.2554892301559448,
+      "learning_rate": 1.1461072285490204e-08,
+      "loss": 1.1205,
+      "step": 836
+    },
+    {
+      "epoch": 0.05320408406499543,
+      "grad_norm": 0.2557508945465088,
+      "learning_rate": 6.446907038559769e-09,
+      "loss": 1.1845,
+      "step": 837
+    },
+    {
+      "epoch": 0.05326764927893211,
+      "grad_norm": 0.25483280420303345,
+      "learning_rate": 2.865309121358184e-09,
+      "loss": 1.1348,
+      "step": 838
+    },
+    {
+      "epoch": 0.053331214492868775,
+      "grad_norm": 0.2680445909500122,
+      "learning_rate": 7.163298459844647e-10,
+      "loss": 1.1985,
+      "step": 839
+    },
+    {
+      "epoch": 0.05339477970680545,
+      "grad_norm": 0.26120489835739136,
+      "learning_rate": 0.0,
+      "loss": 1.264,
+      "step": 840
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4.3648292071931904e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null