Training in progress, step 95, checkpoint

Files changed (4) hide show

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3d4471c13cd0a43e55564b632347ebbf2f145a6d90de8d317a867886c496735a
 size 341314196

 version https://git-lfs.github.com/spec/v1
+oid sha256:aaab921744d1856a81154cdfdd9a4488ff2401c905c4eafa4996a77b1cdc96ef
 size 341314196

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:586624b39ffd5b6aa1afacc4dbc36f26d48ff933d5db528ffa6194665ec59d97
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:08f2263427e68deaa5f58cbf4ac6b03816ced59452b2c0d8bcffc7ede09bf230
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7f6423aaf07b0a3e5bef1b21c59ae6d997dd59505ca758247471609a32b152cd
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:60ca561a785d3802440b426c58aafe0f1cf10dc4bab5c0b5dbec38821026a8aa
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.011803936612860388,
   "eval_steps": 55,
-  "global_step": 80,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -205,6 +205,41 @@
       "learning_rate": 3.12696703292044e-05,
       "loss": 0.0,
       "step": 78
     }
   ],
   "logging_steps": 3,
@@ -219,12 +254,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.77128394686464e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.014017174727771711,
   "eval_steps": 55,
+  "global_step": 95,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.12696703292044e-05,
       "loss": 0.0,
       "step": 78
+    },
+    {
+      "epoch": 0.011951485820521144,
+      "grad_norm": NaN,
+      "learning_rate": 2.2040354826462668e-05,
+      "loss": 0.0,
+      "step": 81
+    },
+    {
+      "epoch": 0.01239413344350341,
+      "grad_norm": NaN,
+      "learning_rate": 1.4033009983067452e-05,
+      "loss": 0.0,
+      "step": 84
+    },
+    {
+      "epoch": 0.012836781066485673,
+      "grad_norm": NaN,
+      "learning_rate": 7.597595192178702e-06,
+      "loss": 0.0,
+      "step": 87
+    },
+    {
+      "epoch": 0.013279428689467937,
+      "grad_norm": NaN,
+      "learning_rate": 3.0153689607045845e-06,
+      "loss": 0.0,
+      "step": 90
+    },
+    {
+      "epoch": 0.013722076312450203,
+      "grad_norm": NaN,
+      "learning_rate": 4.865965629214819e-07,
+      "loss": 0.0,
+      "step": 93
     }
   ],
   "logging_steps": 3,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4.47839968690176e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null