Training in progress, step 125, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dfa8c53e5e820044af6c50ee1224154243471f291bb2ea626ff5f3dbb284aa50
 size 83945296

 version https://git-lfs.github.com/spec/v1
+oid sha256:1628b2bb2347dfb854cd6be60adf8d3e2e41848076d02d48eb2cbf0c189fdd5d
 size 83945296

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f0381c9373d63945244b6821e06cbd4bd417ba7642a79644335119bda023a7c7
 size 43122580

 version https://git-lfs.github.com/spec/v1
+oid sha256:9f42869777e41217dd1ac60269c980858e1e77e66d0f40c379fc76a41f09e260
 size 43122580

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5b37c228faac27493d5dc2700eeaf67fe98afdbd0e55a68eaf314f21f0aea103
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:78ae4de6cae258994b2e610a17502d0e0897089893ff3ea440cdda7e7a8e9774
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ba16893b2c4735e8eaf86592331a8dda9b3bcccecd302e828000513277487239
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b577939e8ae09a93269bdd1ffbcc4ef41ec4027476aa914ab19034c5a6ebf492
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.05868329359985329,
   "eval_steps": 34,
-  "global_step": 120,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -879,6 +879,41 @@
       "learning_rate": 9.077316405366981e-05,
       "loss": 0.9481,
       "step": 120
     }
   ],
   "logging_steps": 1,
@@ -898,7 +933,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.3267450346602496e+17,
   "train_batch_size": 3,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.061128430833180515,
   "eval_steps": 34,
+  "global_step": 125,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.077316405366981e-05,
       "loss": 0.9481,
       "step": 120
+    },
+    {
+      "epoch": 0.05917232104651873,
+      "grad_norm": 1.3437144756317139,
+      "learning_rate": 8.893473181084994e-05,
+      "loss": 1.1713,
+      "step": 121
+    },
+    {
+      "epoch": 0.05966134849318418,
+      "grad_norm": 1.3384225368499756,
+      "learning_rate": 8.710007834697969e-05,
+      "loss": 1.0073,
+      "step": 122
+    },
+    {
+      "epoch": 0.06015037593984962,
+      "grad_norm": 1.2030497789382935,
+      "learning_rate": 8.526983019453623e-05,
+      "loss": 1.2027,
+      "step": 123
+    },
+    {
+      "epoch": 0.06063940338651507,
+      "grad_norm": 1.4308159351348877,
+      "learning_rate": 8.344461238158699e-05,
+      "loss": 1.0608,
+      "step": 124
+    },
+    {
+      "epoch": 0.061128430833180515,
+      "grad_norm": 1.2718294858932495,
+      "learning_rate": 8.162504821834295e-05,
+      "loss": 1.0317,
+      "step": 125
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 5.54869274443776e+17,
   "train_batch_size": 3,
   "trial_name": null,
   "trial_params": null