Training in progress, step 161, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +46 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1ea69b31cc9be8154118254b8a204c60bea6a96c6be5bb197bcc001d21fc2e26
 size 60599872

 version https://git-lfs.github.com/spec/v1
+oid sha256:c891645ebec7226fb58f60cc9bc84859bd92404c7c794b145abd5eadfd20ea1d
 size 60599872

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:57ee2bfe298f7fe2289501d7d86dfc1f9fccf80f15cd7596f1b1d7bf67083f35
 size 31144020

 version https://git-lfs.github.com/spec/v1
+oid sha256:82bc5d7822c56800bb01475839609901167d8a82fae6953d924a9ca579c78172
 size 31144020

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:34f08cff4d948c97a361a810ec377def8103899bfe47741c4cdcccd8935f9bf2
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:0ff1d3de00ee1559643931a25b9f668decd335e6062bb7acd66400a6c7c534c4
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a77e597b891202a729ec52794cfdebc3ea9b956ac1feb481e250623989171618
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:221f8523c4b34fd94355c57eb77ac8c13b895d28c9cec30c964370546489d660
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.4840974807739258,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 1.8691588785046729,
   "eval_steps": 100,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -93,6 +93,48 @@
       "eval_samples_per_second": 56.494,
       "eval_steps_per_second": 14.437,
       "step": 100
     }
   ],
   "logging_steps": 10,
@@ -116,12 +158,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 6357159424032768.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.4840974807739258,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 3.0093457943925235,
   "eval_steps": 100,
+  "global_step": 161,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 56.494,
       "eval_steps_per_second": 14.437,
       "step": 100
+    },
+    {
+      "epoch": 2.05607476635514,
+      "grad_norm": 1.464299201965332,
+      "learning_rate": 5.1205962578487155e-05,
+      "loss": 1.1104,
+      "step": 110
+    },
+    {
+      "epoch": 2.2429906542056073,
+      "grad_norm": 2.325424909591675,
+      "learning_rate": 3.422851293981676e-05,
+      "loss": 0.8096,
+      "step": 120
+    },
+    {
+      "epoch": 2.4299065420560746,
+      "grad_norm": 1.7495105266571045,
+      "learning_rate": 2.008778270707944e-05,
+      "loss": 0.7514,
+      "step": 130
+    },
+    {
+      "epoch": 2.616822429906542,
+      "grad_norm": 1.7600091695785522,
+      "learning_rate": 9.393660536564408e-06,
+      "loss": 0.7495,
+      "step": 140
+    },
+    {
+      "epoch": 2.803738317757009,
+      "grad_norm": 1.6766407489776611,
+      "learning_rate": 2.607383131993424e-06,
+      "loss": 0.7997,
+      "step": 150
+    },
+    {
+      "epoch": 2.9906542056074765,
+      "grad_norm": 1.9082269668579102,
+      "learning_rate": 2.164213936770576e-08,
+      "loss": 0.8264,
+      "step": 160
     }
   ],
   "logging_steps": 10,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.0229500436545536e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null