Training in progress, step 137, checkpoint

Browse files

Files changed (12) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/rng_state_4.pth +1 -1
last-checkpoint/rng_state_5.pth +1 -1
last-checkpoint/rng_state_6.pth +1 -1
last-checkpoint/rng_state_7.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +263 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:154c87b8473e80aeb5b89159f45f19c8b794138abb255f2e28e1e2c70fea9381
 size 138995824

 version https://git-lfs.github.com/spec/v1
+oid sha256:5721b50238bda1610129cd85c1b675957641dc7971a4f050b7ea679189538399
 size 138995824

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1e30d1c6d72de1919df731779b5d6811439aa040ac1b8c11a79086875410b345
 size 71077780

 version https://git-lfs.github.com/spec/v1
+oid sha256:d5d43ba32f23601abad793ddf7b3462d760eb8fb53b286ffd4d02efbd6f748c3
 size 71077780

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5e82113e33e7a1e4ce347c0eada1da615b742f2bfabd373277829b5cea5f1fa7
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:8c8a5b70642a2b6a2f5da463c24cc5576b689519c228d81584cc51dbe0c5f176
 size 15984

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2508c772ba86371f3a590745841cba23b527d151f21432017e9ef067a9957090
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2a46a95433c990f0fc7ea2279a4d7ef872632e22a0cddf9f06d248d79811bce
 size 15984

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d42c6bf63e218f5406c44d31b8aa11834fed1b2398dfeca30eca5e3e990274ab
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:5966402bb91072dfb39aeac267afdd314764f0f8a39f61ebacab0abaf520852a
 size 15984

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:12eaa1223d2b1b1f63e28e29e3ced91d41c8c0605be080f162f1f7aad46797da
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:340e23f29e49ae0de9d24c424d0e75711375af6c07eb1018270eace124c394c4
 size 15984

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e6ef2c9584769d9fdc685b2e0efcec32c1cfc2561e3629afdf144643e3e40022
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:ae58c0015ae3245509678fc95cc964de9b9689008a1e332cb36ae0d8cfc8af92
 size 15984

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:feeb2d69f79376ea1fa617a71804f5c0f5284f43cebf97ac527777d4e340e7ad
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:8e2e4c6723359328055b006b34af6fa7832379c5eec7cb9ff2047e4013cee364
 size 15984

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cb35cae220a392aa22473b8899a64994de7fb2ace81da19fa3fa5a3ad9b7a6f7
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:c4a3a7fb8d5bb28d511fd5dc9c053d921143d5f629d55be1dceeac7789f655dc
 size 15984

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e431bf77724e0e87f69f8f82501b4bab4c9b45e607b3048cdcd6ff4a009e6483
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:9c8c608b87806f49f94a44169cdd9e975774b16f6a9921419257cfb2be1c83c2
 size 15984

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9039f0f0a84afc54b351471b92047e36f111a9a61347c05d7d907f45fef718b2
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:39f31cdc7b9d265f3b70743c4752aa49e538ef342a12e3962eb9a559085eeb7c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.041056513786316,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 2.2076502732240435,
   "eval_steps": 50,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -731,6 +731,265 @@
       "eval_samples_per_second": 308.415,
       "eval_steps_per_second": 10.03,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -754,12 +1013,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.647936171225907e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.041056513786316,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 3.0273224043715845,
   "eval_steps": 50,
+  "global_step": 137,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 308.415,
       "eval_steps_per_second": 10.03,
       "step": 100
+    },
+    {
+      "epoch": 2.2295081967213113,
+      "grad_norm": 0.04856366664171219,
+      "learning_rate": 2.1518003834162954e-05,
+      "loss": 1.0995,
+      "step": 101
+    },
+    {
+      "epoch": 2.251366120218579,
+      "grad_norm": 0.0619901567697525,
+      "learning_rate": 2.0413902018106895e-05,
+      "loss": 1.0795,
+      "step": 102
+    },
+    {
+      "epoch": 2.273224043715847,
+      "grad_norm": 0.053943440318107605,
+      "learning_rate": 1.9332798590175797e-05,
+      "loss": 0.5584,
+      "step": 103
+    },
+    {
+      "epoch": 2.2950819672131146,
+      "grad_norm": 0.06046655401587486,
+      "learning_rate": 1.8275355061874515e-05,
+      "loss": 1.3933,
+      "step": 104
+    },
+    {
+      "epoch": 2.3169398907103824,
+      "grad_norm": 0.04138614237308502,
+      "learning_rate": 1.724221846755858e-05,
+      "loss": 0.9474,
+      "step": 105
+    },
+    {
+      "epoch": 2.33879781420765,
+      "grad_norm": 0.0419883206486702,
+      "learning_rate": 1.623402096852318e-05,
+      "loss": 1.0178,
+      "step": 106
+    },
+    {
+      "epoch": 2.360655737704918,
+      "grad_norm": 0.04966486990451813,
+      "learning_rate": 1.5251379466192902e-05,
+      "loss": 1.1369,
+      "step": 107
+    },
+    {
+      "epoch": 2.3825136612021858,
+      "grad_norm": 0.05595370754599571,
+      "learning_rate": 1.4294895224648664e-05,
+      "loss": 1.1341,
+      "step": 108
+    },
+    {
+      "epoch": 2.4043715846994536,
+      "grad_norm": 0.05111997202038765,
+      "learning_rate": 1.3365153502722967e-05,
+      "loss": 0.6285,
+      "step": 109
+    },
+    {
+      "epoch": 2.4262295081967213,
+      "grad_norm": 0.052137341350317,
+      "learning_rate": 1.2462723195888415e-05,
+      "loss": 1.2566,
+      "step": 110
+    },
+    {
+      "epoch": 2.448087431693989,
+      "grad_norm": 0.03865412250161171,
+      "learning_rate": 1.1588156488159008e-05,
+      "loss": 0.9759,
+      "step": 111
+    },
+    {
+      "epoch": 2.469945355191257,
+      "grad_norm": 0.03843948617577553,
+      "learning_rate": 1.074198851421659e-05,
+      "loss": 1.0035,
+      "step": 112
+    },
+    {
+      "epoch": 2.4918032786885247,
+      "grad_norm": 0.04497023671865463,
+      "learning_rate": 9.924737031969744e-06,
+      "loss": 1.0914,
+      "step": 113
+    },
+    {
+      "epoch": 2.5136612021857925,
+      "grad_norm": 0.05429847911000252,
+      "learning_rate": 9.136902105745273e-06,
+      "loss": 1.1209,
+      "step": 114
+    },
+    {
+      "epoch": 2.5355191256830603,
+      "grad_norm": 0.08493578433990479,
+      "learning_rate": 8.378965800306078e-06,
+      "loss": 1.1134,
+      "step": 115
+    },
+    {
+      "epoch": 2.557377049180328,
+      "grad_norm": 0.06231605261564255,
+      "learning_rate": 7.651391885882701e-06,
+      "loss": 0.965,
+      "step": 116
+    },
+    {
+      "epoch": 2.579234972677596,
+      "grad_norm": 0.03692341595888138,
+      "learning_rate": 6.954625554399086e-06,
+      "loss": 0.8894,
+      "step": 117
+    },
+    {
+      "epoch": 2.6010928961748636,
+      "grad_norm": 0.04275006055831909,
+      "learning_rate": 6.289093147066023e-06,
+      "loss": 1.0013,
+      "step": 118
+    },
+    {
+      "epoch": 2.6229508196721314,
+      "grad_norm": 0.04334869980812073,
+      "learning_rate": 5.655201893509272e-06,
+      "loss": 1.0516,
+      "step": 119
+    },
+    {
+      "epoch": 2.644808743169399,
+      "grad_norm": 0.052981842309236526,
+      "learning_rate": 5.053339662591549e-06,
+      "loss": 1.0457,
+      "step": 120
+    },
+    {
+      "epoch": 2.6666666666666665,
+      "grad_norm": 0.0736251100897789,
+      "learning_rate": 4.483874725081219e-06,
+      "loss": 1.1736,
+      "step": 121
+    },
+    {
+      "epoch": 2.6885245901639343,
+      "grad_norm": 0.057101909071207047,
+      "learning_rate": 3.9471555283128005e-06,
+      "loss": 0.8181,
+      "step": 122
+    },
+    {
+      "epoch": 2.710382513661202,
+      "grad_norm": 0.04115651920437813,
+      "learning_rate": 3.4435104829770587e-06,
+      "loss": 1.0691,
+      "step": 123
+    },
+    {
+      "epoch": 2.73224043715847,
+      "grad_norm": 0.038360998034477234,
+      "learning_rate": 2.9732477621712853e-06,
+      "loss": 0.986,
+      "step": 124
+    },
+    {
+      "epoch": 2.7540983606557377,
+      "grad_norm": 0.0409964919090271,
+      "learning_rate": 2.53665511283261e-06,
+      "loss": 1.0381,
+      "step": 125
+    },
+    {
+      "epoch": 2.7759562841530054,
+      "grad_norm": 0.04703905060887337,
+      "learning_rate": 2.1339996796698887e-06,
+      "loss": 1.0692,
+      "step": 126
+    },
+    {
+      "epoch": 2.797814207650273,
+      "grad_norm": 0.06132422015070915,
+      "learning_rate": 1.7655278417016956e-06,
+      "loss": 1.117,
+      "step": 127
+    },
+    {
+      "epoch": 2.819672131147541,
+      "grad_norm": 0.05300451070070267,
+      "learning_rate": 1.4314650615005687e-06,
+      "loss": 0.543,
+      "step": 128
+    },
+    {
+      "epoch": 2.841530054644809,
+      "grad_norm": 0.055577926337718964,
+      "learning_rate": 1.1320157472357307e-06,
+      "loss": 1.3244,
+      "step": 129
+    },
+    {
+      "epoch": 2.8633879781420766,
+      "grad_norm": 0.04094787687063217,
+      "learning_rate": 8.673631275987297e-07,
+      "loss": 0.9801,
+      "step": 130
+    },
+    {
+      "epoch": 2.8852459016393444,
+      "grad_norm": 0.0408557653427124,
+      "learning_rate": 6.376691396884168e-07,
+      "loss": 1.0152,
+      "step": 131
+    },
+    {
+      "epoch": 2.907103825136612,
+      "grad_norm": 0.047284748405218124,
+      "learning_rate": 4.430743299240307e-07,
+      "loss": 1.0816,
+      "step": 132
+    },
+    {
+      "epoch": 2.92896174863388,
+      "grad_norm": 0.055055923759937286,
+      "learning_rate": 2.836977680468222e-07,
+      "loss": 1.0597,
+      "step": 133
+    },
+    {
+      "epoch": 2.9508196721311473,
+      "grad_norm": 0.05081977695226669,
+      "learning_rate": 1.5963697426306723e-07,
+      "loss": 0.6892,
+      "step": 134
+    },
+    {
+      "epoch": 2.972677595628415,
+      "grad_norm": 0.055216483771800995,
+      "learning_rate": 7.096785957284602e-08,
+      "loss": 1.341,
+      "step": 135
+    },
+    {
+      "epoch": 3.0054644808743167,
+      "grad_norm": 0.10709080845117569,
+      "learning_rate": 1.774467932117818e-08,
+      "loss": 1.761,
+      "step": 136
+    },
+    {
+      "epoch": 3.0273224043715845,
+      "grad_norm": 0.03700735419988632,
+      "learning_rate": 0.0,
+      "loss": 0.8896,
+      "step": 137
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 7.74466163399721e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null