Training in progress, step 38000, checkpoint

Files changed (5) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eccce9825a6e6348256698faab746bf337537d7559919bf091704418e3ecaaa5
 size 891558696

 version https://git-lfs.github.com/spec/v1
+oid sha256:9b01c6e320f3b8ce398fbd50bb34cd5deb4150ad4b0e09a91d304d07ef6a1d44
 size 891558696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5767ff2839f2befc3509511517661e7ee746fd7345f573d42e39ce30e6d908e8
 size 1783272762

 version https://git-lfs.github.com/spec/v1
+oid sha256:37f54042d6d8e987a2001e8f8b69e12f7b5e1be6322534bf71acae8f44c1d295
 size 1783272762

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8ffd6cc317d4a23512c42e23cd9356ee5c984750b736bc7a53cf0419eccfd496
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:61ae2baae6f9dfd7ca89f4f0f5818402f18a3e15e4581cd68734c6a76f2a7030
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:26829b102f440c540dc7e3f1f0e0d969a25f498ec8a83042cddb35cab37b0ab3
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e81910e79bb87f0b46a0d2aa6ab0730eb92717d10eebce124863a9bc14f71612
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.5,
   "eval_steps": 500,
-  "global_step": 37500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2648,6 +2648,41 @@
       "learning_rate": 3.3395555555555558e-06,
       "loss": 0.0517,
       "step": 37500
     }
   ],
   "logging_steps": 100,
@@ -2667,7 +2702,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 9.1343683584e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.533333333333333,
   "eval_steps": 500,
+  "global_step": 38000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.3395555555555558e-06,
       "loss": 0.0517,
       "step": 37500
+    },
+    {
+      "epoch": 2.506666666666667,
+      "grad_norm": 0.11736246943473816,
+      "learning_rate": 3.295111111111111e-06,
+      "loss": 0.0521,
+      "step": 37600
+    },
+    {
+      "epoch": 2.513333333333333,
+      "grad_norm": 0.33586665987968445,
+      "learning_rate": 3.250666666666667e-06,
+      "loss": 0.0549,
+      "step": 37700
+    },
+    {
+      "epoch": 2.52,
+      "grad_norm": 0.15800270438194275,
+      "learning_rate": 3.2062222222222223e-06,
+      "loss": 0.056,
+      "step": 37800
+    },
+    {
+      "epoch": 2.5266666666666664,
+      "grad_norm": 0.14952941238880157,
+      "learning_rate": 3.161777777777778e-06,
+      "loss": 0.055,
+      "step": 37900
+    },
+    {
+      "epoch": 2.533333333333333,
+      "grad_norm": 0.08778905123472214,
+      "learning_rate": 3.117333333333333e-06,
+      "loss": 0.0477,
+      "step": 38000
     }
   ],
   "logging_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 9.256159936512e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null