Training in progress, step 19000, checkpoint

Files changed (5) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bfa8b32014bbb1116102bb096f351b477c375f02a76f56941cd43b6b2b8c9ae0
 size 891558696

 version https://git-lfs.github.com/spec/v1
+oid sha256:935e2f81b626e71b298466c51f498096f378414ac276b4d4ea19f4650105a4cf
 size 891558696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:24c06d2ce08030ea9353adcf0c618c2748ba4afafcd7e2ef1fa5088554e20156
 size 1783272762

 version https://git-lfs.github.com/spec/v1
+oid sha256:7b16c83dc7260ee2348e5de945bd725aa12ab974c28241b500cd256e88062c5e
 size 1783272762

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:40162c830f5df6b739cc1f24fd1ff7e4f55f1a27766e6deeff7911f41b300f3c
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:46737d1e5768cf0de571a9bd47793403b2513f0c1ee54de55bfe17b1b1fdc49a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b23d12d924a2c5ac71a2463338c282c759dd6e8f289f7165dd510cc1f6cd61fa
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:db8de303a11187ec0581f91507bd8428f995b03d53d2e944a4543899602e0467
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.2333333333333334,
   "eval_steps": 500,
-  "global_step": 18500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1310,6 +1310,41 @@
       "learning_rate": 1.178e-05,
       "loss": 0.0657,
       "step": 18500
     }
   ],
   "logging_steps": 100,
@@ -1329,7 +1364,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.506288390144e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.2666666666666666,
   "eval_steps": 500,
+  "global_step": 19000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.178e-05,
       "loss": 0.0657,
       "step": 18500
+    },
+    {
+      "epoch": 1.24,
+      "grad_norm": 0.21437957882881165,
+      "learning_rate": 1.1735555555555556e-05,
+      "loss": 0.0631,
+      "step": 18600
+    },
+    {
+      "epoch": 1.2466666666666666,
+      "grad_norm": 0.09303513169288635,
+      "learning_rate": 1.1691555555555556e-05,
+      "loss": 0.0594,
+      "step": 18700
+    },
+    {
+      "epoch": 1.2533333333333334,
+      "grad_norm": 0.13789591193199158,
+      "learning_rate": 1.1647111111111111e-05,
+      "loss": 0.0644,
+      "step": 18800
+    },
+    {
+      "epoch": 1.26,
+      "grad_norm": 0.19540788233280182,
+      "learning_rate": 1.1602666666666666e-05,
+      "loss": 0.0564,
+      "step": 18900
+    },
+    {
+      "epoch": 1.2666666666666666,
+      "grad_norm": 0.18746204674243927,
+      "learning_rate": 1.1558222222222223e-05,
+      "loss": 0.0581,
+      "step": 19000
     }
   ],
   "logging_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 4.628079968256e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null