Training in progress, step 300, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +82 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e6c613dd358d68cc0c9e41ad85857aa2518633775e85afbb6db14a8d6bb12506
 size 80792096

 version https://git-lfs.github.com/spec/v1
+oid sha256:7271b7abd71ff28a6d8862329ed25f52ef9bdaed0cd5f9426db9af7d154d0162
 size 80792096

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9aacaf8a7a5d773f8dcab0641038797aefdb68541420ea3d2e729f9d15e02c9f
-size 41459700

 version https://git-lfs.github.com/spec/v1
+oid sha256:6c97053614ad9d594d3c931b703e869bd2ed545d776abf4c92958c8accb20b2d
+size 41460084

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4a306d7fee431a85f12f46cace07d5a9b24375c79ffade2191c9121fbb02b66e
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:cdf57361bea28743679736bace185586f54b838c0b6e89f824be140ba835deb8
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b38a0153e3fcc5f09dd2a02445f6a79e4da9e519f9e1df79817a58a22b378c5c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c14f228dcfe997eb31918193f198918e61dc44a4118e1ce6d0f02c3f7f0fb85b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.010375738143921,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 1.797752808988764,
   "eval_steps": 100,
-  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -171,6 +171,84 @@
       "eval_samples_per_second": 13.446,
       "eval_steps_per_second": 3.379,
       "step": 200
     }
   ],
   "logging_steps": 10,
@@ -185,7 +263,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
@@ -199,7 +277,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.8827556792447795e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.010375738143921,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 2.696629213483146,
   "eval_steps": 100,
+  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 13.446,
       "eval_steps_per_second": 3.379,
       "step": 200
+    },
+    {
+      "epoch": 1.8876404494382022,
+      "grad_norm": 0.5966526865959167,
+      "learning_rate": 6.398222751952899e-05,
+      "loss": 0.9513,
+      "step": 210
+    },
+    {
+      "epoch": 1.9775280898876404,
+      "grad_norm": 0.6702543497085571,
+      "learning_rate": 5.5120081979953785e-05,
+      "loss": 0.8003,
+      "step": 220
+    },
+    {
+      "epoch": 2.067415730337079,
+      "grad_norm": 0.577067494392395,
+      "learning_rate": 4.66795567198309e-05,
+      "loss": 0.9227,
+      "step": 230
+    },
+    {
+      "epoch": 2.157303370786517,
+      "grad_norm": 0.6616420149803162,
+      "learning_rate": 3.873994548067972e-05,
+      "loss": 0.8428,
+      "step": 240
+    },
+    {
+      "epoch": 2.247191011235955,
+      "grad_norm": 0.7785276770591736,
+      "learning_rate": 3.137583621312665e-05,
+      "loss": 0.6197,
+      "step": 250
+    },
+    {
+      "epoch": 2.337078651685393,
+      "grad_norm": 0.8036301136016846,
+      "learning_rate": 2.465641036723393e-05,
+      "loss": 0.8266,
+      "step": 260
+    },
+    {
+      "epoch": 2.4269662921348316,
+      "grad_norm": 0.8799476027488708,
+      "learning_rate": 1.864479297370325e-05,
+      "loss": 0.7985,
+      "step": 270
+    },
+    {
+      "epoch": 2.5168539325842696,
+      "grad_norm": 0.7558830380439758,
+      "learning_rate": 1.339745962155613e-05,
+      "loss": 0.6886,
+      "step": 280
+    },
+    {
+      "epoch": 2.606741573033708,
+      "grad_norm": 0.8995586633682251,
+      "learning_rate": 8.963705903385345e-06,
+      "loss": 0.8577,
+      "step": 290
+    },
+    {
+      "epoch": 2.696629213483146,
+      "grad_norm": 0.8656787276268005,
+      "learning_rate": 5.385184312424974e-06,
+      "loss": 0.7566,
+      "step": 300
+    },
+    {
+      "epoch": 2.696629213483146,
+      "eval_loss": 1.0554652214050293,
+      "eval_runtime": 13.9222,
+      "eval_samples_per_second": 13.432,
+      "eval_steps_per_second": 3.376,
+      "step": 300
     }
   ],
   "logging_steps": 10,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 4.312152896050299e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null