Training in progress, step 95, checkpoint

Files changed (4) hide show

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9497df20ac725fb9df4ed57bb81c9c3030f3a34d97078a219baf88f928244a2c
 size 198011252

 version https://git-lfs.github.com/spec/v1
+oid sha256:1b9facebaa7cac2cb534909fdce698ce005cdeaba1b9e81f498f51788387a078
 size 198011252

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f5ee68418676da18d7e2e2aba35d15b49b5e7dcf6ee8702ccdf52d98abaec6ee
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:de8bdf2a663312d5e7ca4f5a800a042c642275cd9fe11a9bd4bbe885dded4c9d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7f6423aaf07b0a3e5bef1b21c59ae6d997dd59505ca758247471609a32b152cd
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:60ca561a785d3802440b426c58aafe0f1cf10dc4bab5c0b5dbec38821026a8aa
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.06652253450856478,
   "eval_steps": 55,
-  "global_step": 80,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -205,6 +205,41 @@
       "learning_rate": 3.12696703292044e-05,
       "loss": 0.0,
       "step": 78
     }
   ],
   "logging_steps": 3,
@@ -219,12 +254,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.43327370412032e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.07899550972892067,
   "eval_steps": 55,
+  "global_step": 95,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.12696703292044e-05,
       "loss": 0.0,
       "step": 78
+    },
+    {
+      "epoch": 0.06735406618992183,
+      "grad_norm": NaN,
+      "learning_rate": 2.2040354826462668e-05,
+      "loss": 0.0,
+      "step": 81
+    },
+    {
+      "epoch": 0.06984866123399301,
+      "grad_norm": NaN,
+      "learning_rate": 1.4033009983067452e-05,
+      "loss": 0.0,
+      "step": 84
+    },
+    {
+      "epoch": 0.07234325627806419,
+      "grad_norm": NaN,
+      "learning_rate": 7.597595192178702e-06,
+      "loss": 0.0,
+      "step": 87
+    },
+    {
+      "epoch": 0.07483785132213537,
+      "grad_norm": NaN,
+      "learning_rate": 3.0153689607045845e-06,
+      "loss": 0.0,
+      "step": 90
+    },
+    {
+      "epoch": 0.07733244636620655,
+      "grad_norm": NaN,
+      "learning_rate": 4.865965629214819e-07,
+      "loss": 0.0,
+      "step": 93
     }
   ],
   "logging_steps": 3,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.70201252364288e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null