Training in progress, step 26, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +102 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4407c3ab101f8c1ef9f5e15505138b095f507d2bfd58abf768474ffaa4784e0d
 size 71718

 version https://git-lfs.github.com/spec/v1
+oid sha256:1e23ef905cbda047a4302c6624b0312942f966d99d94bc71717db6b7af4205d4
 size 71718

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:02c5fd8138c740194f70b268d048773555e70d025e165ecc026d15ff4300315b
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:4ce9ec22e0051f976ac081d26951edb3de92acba5c29f4c684fe32b805e5bc15
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:35e92785679980f3fcd23b14ce1acaffcae115e3e9164492d0e4b31775d32447
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d8712da1b2787df41952a507984ec77e0f72c59fac7ee6cf21606445686249de
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.0005611551162238577,
   "eval_steps": 13,
-  "global_step": 13,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -114,6 +114,105 @@
       "eval_samples_per_second": 391.695,
       "eval_steps_per_second": 195.868,
       "step": 13
     }
   ],
   "logging_steps": 1,
@@ -133,7 +232,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 333642596352.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.0011223102324477154,
   "eval_steps": 13,
+  "global_step": 26,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 391.695,
       "eval_steps_per_second": 195.868,
       "step": 13
+    },
+    {
+      "epoch": 0.0006043208943949237,
+      "grad_norm": NaN,
+      "learning_rate": 0.00019510565162951537,
+      "loss": 0.0,
+      "step": 14
+    },
+    {
+      "epoch": 0.0006474866725659897,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001923879532511287,
+      "loss": 0.0,
+      "step": 15
+    },
+    {
+      "epoch": 0.0006906524507370557,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001891006524188368,
+      "loss": 0.0,
+      "step": 16
+    },
+    {
+      "epoch": 0.0007338182289081216,
+      "grad_norm": NaN,
+      "learning_rate": 0.00018526401643540922,
+      "loss": 0.0,
+      "step": 17
+    },
+    {
+      "epoch": 0.0007769840070791876,
+      "grad_norm": NaN,
+      "learning_rate": 0.00018090169943749476,
+      "loss": 0.0,
+      "step": 18
+    },
+    {
+      "epoch": 0.0008201497852502537,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001760405965600031,
+      "loss": 0.0,
+      "step": 19
+    },
+    {
+      "epoch": 0.0008633155634213196,
+      "grad_norm": NaN,
+      "learning_rate": 0.00017071067811865476,
+      "loss": 0.0,
+      "step": 20
+    },
+    {
+      "epoch": 0.0009064813415923856,
+      "grad_norm": NaN,
+      "learning_rate": 0.00016494480483301836,
+      "loss": 0.0,
+      "step": 21
+    },
+    {
+      "epoch": 0.0009496471197634515,
+      "grad_norm": NaN,
+      "learning_rate": 0.00015877852522924732,
+      "loss": 0.0,
+      "step": 22
+    },
+    {
+      "epoch": 0.0009928128979345176,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001522498564715949,
+      "loss": 0.0,
+      "step": 23
+    },
+    {
+      "epoch": 0.0010359786761055835,
+      "grad_norm": NaN,
+      "learning_rate": 0.00014539904997395468,
+      "loss": 0.0,
+      "step": 24
+    },
+    {
+      "epoch": 0.0010791444542766495,
+      "grad_norm": NaN,
+      "learning_rate": 0.000138268343236509,
+      "loss": 0.0,
+      "step": 25
+    },
+    {
+      "epoch": 0.0011223102324477154,
+      "grad_norm": NaN,
+      "learning_rate": 0.00013090169943749476,
+      "loss": 0.0,
+      "step": 26
+    },
+    {
+      "epoch": 0.0011223102324477154,
+      "eval_loss": NaN,
+      "eval_runtime": 25.11,
+      "eval_samples_per_second": 388.491,
+      "eval_steps_per_second": 194.265,
+      "step": 26
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 667285192704.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null