Training in progress, step 450, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +48 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b129000dd86a0c0cf261116868532177becde0a1bc466dad0a59e45bdeb95e21
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:51d5d8002a67737950eb0f093c1490cbaab74a909ce93467f47f4a8cbee3ea72
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3a540084705db1c6a1393dadb86844cf571366b2822cc36a6cb6d33299dbd7c0
 size 341314644

 version https://git-lfs.github.com/spec/v1
+oid sha256:b9d893da0aaa20dec6d65b4d1e1fe8ef4f8d8015d82810957b3c96b484a56af6
 size 341314644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0a034bfcb4f831038d9fb69a5d21e52294abb63cd539fd66b0232786deafb094
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:838098534274084aacdb2c3a85c52b42144ac8c570ede5167943969e82ec5617
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:303e83c678e93581d63a3650d04963a2cf914298a9658799e3e1f49cf7c8604f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f03fffc232f1b8a6305d6fa9076c1e1bca28e4d7666df088caa1812431cf9135
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.2352096140384674,
-  "best_model_checkpoint": "miner_id_24/checkpoint-400",
-  "epoch": 0.24524831391784183,
   "eval_steps": 50,
-  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -359,6 +359,49 @@
       "eval_samples_per_second": 16.163,
       "eval_steps_per_second": 4.047,
       "step": 400
     }
   ],
   "logging_steps": 10,
@@ -387,7 +430,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7.551996103596442e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.21818479895591736,
+  "best_model_checkpoint": "miner_id_24/checkpoint-450",
+  "epoch": 0.27590435315757206,
   "eval_steps": 50,
+  "global_step": 450,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 16.163,
       "eval_steps_per_second": 4.047,
       "step": 400
+    },
+    {
+      "epoch": 0.25137952176578787,
+      "grad_norm": 0.6863835453987122,
+      "learning_rate": 1.9384775070942844e-05,
+      "loss": 0.2289,
+      "step": 410
+    },
+    {
+      "epoch": 0.2575107296137339,
+      "grad_norm": 0.7815085053443909,
+      "learning_rate": 1.5423118240122765e-05,
+      "loss": 0.219,
+      "step": 420
+    },
+    {
+      "epoch": 0.26364193746167996,
+      "grad_norm": 0.8494090437889099,
+      "learning_rate": 1.188081932481891e-05,
+      "loss": 0.2179,
+      "step": 430
+    },
+    {
+      "epoch": 0.269773145309626,
+      "grad_norm": 1.2644402980804443,
+      "learning_rate": 8.775136049276001e-06,
+      "loss": 0.2367,
+      "step": 440
+    },
+    {
+      "epoch": 0.27590435315757206,
+      "grad_norm": 2.510753870010376,
+      "learning_rate": 6.121198990230306e-06,
+      "loss": 0.3381,
+      "step": 450
+    },
+    {
+      "epoch": 0.27590435315757206,
+      "eval_loss": 0.21818479895591736,
+      "eval_runtime": 42.5076,
+      "eval_samples_per_second": 16.162,
+      "eval_steps_per_second": 4.046,
+      "step": 450
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 8.494817090312602e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null