Training in progress, step 300, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +82 -4

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ced0aae4575f32da894f069a0689f8adfc695305a4161c3476b41484cbac0743
-size 103716100

 version https://git-lfs.github.com/spec/v1
+oid sha256:dabda8c6629206f6d1d2d0957fb34e4f64a7ee92a861fd7c1d43cb7821a307d7
+size 103716484

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4874bfff8f48f58dbeacd6424c17544ca4074af0f4864ca33e34f39221c537ef
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:87d9f88e578ac3bccf1cd5f62332106ab94e8c84aa4a3493cc04688b8cb59d50
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d10d0fa96665f6b4af4824faec3d1d9f4e8b4343723a14d86cab932da6ce3225
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a8d9346c4fcc90fb1ec8546736583b76a4fae6bc25cb93181337c187d15da94a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": NaN,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.07216308858019123,
   "eval_steps": 100,
-  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -171,6 +171,84 @@
       "eval_samples_per_second": 23.686,
       "eval_steps_per_second": 5.921,
       "step": 200
     }
   ],
   "logging_steps": 10,
@@ -185,7 +263,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
@@ -199,7 +277,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.32707468181504e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": NaN,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.10824463287028685,
   "eval_steps": 100,
+  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 23.686,
       "eval_steps_per_second": 5.921,
       "step": 200
+    },
+    {
+      "epoch": 0.0757712430092008,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00014845508703326504,
+      "loss": 0.0,
+      "step": 210
+    },
+    {
+      "epoch": 0.07937939743821036,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00014373073204588556,
+      "loss": 0.0,
+      "step": 220
+    },
+    {
+      "epoch": 0.08298755186721991,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00013888241754733208,
+      "loss": 0.0,
+      "step": 230
+    },
+    {
+      "epoch": 0.08659570629622948,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00013392388661180303,
+      "loss": 0.0,
+      "step": 240
+    },
+    {
+      "epoch": 0.09020386072523905,
+      "grad_norm": 0.0,
+      "learning_rate": 0.0001288691947339621,
+      "loss": 0.0,
+      "step": 250
+    },
+    {
+      "epoch": 0.0938120151542486,
+      "grad_norm": 0.0,
+      "learning_rate": 0.0001237326699871115,
+      "loss": 0.0,
+      "step": 260
+    },
+    {
+      "epoch": 0.09742016958325816,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00011852887240871145,
+      "loss": 0.0,
+      "step": 270
+    },
+    {
+      "epoch": 0.10102832401226773,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00011327255272837221,
+      "loss": 0.0,
+      "step": 280
+    },
+    {
+      "epoch": 0.10463647844127728,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00010797861055530831,
+      "loss": 0.0,
+      "step": 290
+    },
+    {
+      "epoch": 0.10824463287028685,
+      "grad_norm": 0.0,
+      "learning_rate": 0.00010266205214377748,
+      "loss": 0.0,
+      "step": 300
+    },
+    {
+      "epoch": 0.10824463287028685,
+      "eval_loss": NaN,
+      "eval_runtime": 197.2517,
+      "eval_samples_per_second": 23.665,
+      "eval_steps_per_second": 5.916,
+      "step": 300
     }
   ],
   "logging_steps": 10,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 1.99061202272256e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null