Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0d524bf2136781d5528f5aea3d8a01e798e36c0d2ba4d0ebc35ce212e823cff9
 size 201892112

 version https://git-lfs.github.com/spec/v1
+oid sha256:8a390ab90c983b502232f6dd978c1d10bd073cbd71369a7a0c14a61fba37ebca
 size 201892112

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:88d23e730fb275105225fd43ccdf2f6b04de210bae8980989fba0545af4c033c
 size 102864548

 version https://git-lfs.github.com/spec/v1
+oid sha256:e4b3bc4c954ea220c7ba8aecc83e5b5ceea33371e9415d87cb5ab5603a4f2566
 size 102864548

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dd6b7d0b5bd3ebe3fb5a1d6ddd6a28facb1cf25d5362c637a7af23ee92e8ab4d
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:b25b5f3b462fb3e29221ae0588e7591e3e9adfa54f981e07b5869f484f6b0ecc
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bcbe1ba74b58330afa49026b3467516d09bde409f813ca3b749b16b9edb36df4
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:7ab3d2b0401484126213dd055d9044edf00d7b06db4fe9dbad6027ee8b5d34b1
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.8269708752632141,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.0023223409196470044,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 55.688,
       "eval_steps_per_second": 13.925,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -401,7 +759,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3119728734240768.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.7575440406799316,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.004644681839294009,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 55.688,
       "eval_steps_per_second": 13.925,
       "step": 50
+    },
+    {
+      "epoch": 0.002368787738039944,
+      "grad_norm": 2.7304725646972656,
+      "learning_rate": 7.983263157894736e-05,
+      "loss": 0.7953,
+      "step": 51
+    },
+    {
+      "epoch": 0.002415234556432884,
+      "grad_norm": 2.282052516937256,
+      "learning_rate": 7.929684210526315e-05,
+      "loss": 0.8493,
+      "step": 52
+    },
+    {
+      "epoch": 0.0024616813748258243,
+      "grad_norm": 2.524898052215576,
+      "learning_rate": 7.876105263157895e-05,
+      "loss": 1.0108,
+      "step": 53
+    },
+    {
+      "epoch": 0.0025081281932187644,
+      "grad_norm": 2.6745500564575195,
+      "learning_rate": 7.822526315789473e-05,
+      "loss": 1.0531,
+      "step": 54
+    },
+    {
+      "epoch": 0.0025545750116117046,
+      "grad_norm": 3.1213998794555664,
+      "learning_rate": 7.768947368421053e-05,
+      "loss": 1.1068,
+      "step": 55
+    },
+    {
+      "epoch": 0.0026010218300046447,
+      "grad_norm": 3.499636173248291,
+      "learning_rate": 7.715368421052631e-05,
+      "loss": 1.0808,
+      "step": 56
+    },
+    {
+      "epoch": 0.002647468648397585,
+      "grad_norm": 3.125180721282959,
+      "learning_rate": 7.66178947368421e-05,
+      "loss": 0.8485,
+      "step": 57
+    },
+    {
+      "epoch": 0.002693915466790525,
+      "grad_norm": 2.547490119934082,
+      "learning_rate": 7.608210526315788e-05,
+      "loss": 0.7604,
+      "step": 58
+    },
+    {
+      "epoch": 0.002740362285183465,
+      "grad_norm": 2.058576822280884,
+      "learning_rate": 7.554631578947368e-05,
+      "loss": 0.665,
+      "step": 59
+    },
+    {
+      "epoch": 0.002786809103576405,
+      "grad_norm": 2.624077320098877,
+      "learning_rate": 7.501052631578947e-05,
+      "loss": 0.6822,
+      "step": 60
+    },
+    {
+      "epoch": 0.0028332559219693453,
+      "grad_norm": 1.4456878900527954,
+      "learning_rate": 7.447473684210527e-05,
+      "loss": 0.554,
+      "step": 61
+    },
+    {
+      "epoch": 0.002879702740362285,
+      "grad_norm": 1.9477028846740723,
+      "learning_rate": 7.393894736842105e-05,
+      "loss": 0.712,
+      "step": 62
+    },
+    {
+      "epoch": 0.002926149558755225,
+      "grad_norm": 2.0057406425476074,
+      "learning_rate": 7.340315789473684e-05,
+      "loss": 0.7368,
+      "step": 63
+    },
+    {
+      "epoch": 0.0029725963771481652,
+      "grad_norm": 2.041309356689453,
+      "learning_rate": 7.286736842105262e-05,
+      "loss": 0.9478,
+      "step": 64
+    },
+    {
+      "epoch": 0.0030190431955411053,
+      "grad_norm": 1.8585742712020874,
+      "learning_rate": 7.233157894736842e-05,
+      "loss": 0.5883,
+      "step": 65
+    },
+    {
+      "epoch": 0.0030654900139340455,
+      "grad_norm": 2.3726940155029297,
+      "learning_rate": 7.179578947368421e-05,
+      "loss": 0.8448,
+      "step": 66
+    },
+    {
+      "epoch": 0.0031119368323269856,
+      "grad_norm": 4.274697303771973,
+      "learning_rate": 7.125999999999999e-05,
+      "loss": 0.693,
+      "step": 67
+    },
+    {
+      "epoch": 0.0031583836507199257,
+      "grad_norm": 1.7456036806106567,
+      "learning_rate": 7.072421052631579e-05,
+      "loss": 0.6139,
+      "step": 68
+    },
+    {
+      "epoch": 0.003204830469112866,
+      "grad_norm": 2.077462673187256,
+      "learning_rate": 7.018842105263158e-05,
+      "loss": 0.6687,
+      "step": 69
+    },
+    {
+      "epoch": 0.003251277287505806,
+      "grad_norm": 1.6591328382492065,
+      "learning_rate": 6.965263157894736e-05,
+      "loss": 0.6602,
+      "step": 70
+    },
+    {
+      "epoch": 0.003297724105898746,
+      "grad_norm": 2.040104866027832,
+      "learning_rate": 6.911684210526316e-05,
+      "loss": 0.6718,
+      "step": 71
+    },
+    {
+      "epoch": 0.003344170924291686,
+      "grad_norm": 2.031517505645752,
+      "learning_rate": 6.858105263157894e-05,
+      "loss": 0.6965,
+      "step": 72
+    },
+    {
+      "epoch": 0.003390617742684626,
+      "grad_norm": 1.677396297454834,
+      "learning_rate": 6.804526315789473e-05,
+      "loss": 0.6022,
+      "step": 73
+    },
+    {
+      "epoch": 0.003437064561077566,
+      "grad_norm": 2.6971142292022705,
+      "learning_rate": 6.750947368421052e-05,
+      "loss": 0.7803,
+      "step": 74
+    },
+    {
+      "epoch": 0.003483511379470506,
+      "grad_norm": 1.2442923784255981,
+      "learning_rate": 6.697368421052631e-05,
+      "loss": 0.4904,
+      "step": 75
+    },
+    {
+      "epoch": 0.0035299581978634463,
+      "grad_norm": 1.520882487297058,
+      "learning_rate": 6.64378947368421e-05,
+      "loss": 0.5449,
+      "step": 76
+    },
+    {
+      "epoch": 0.0035764050162563864,
+      "grad_norm": 2.945136070251465,
+      "learning_rate": 6.59021052631579e-05,
+      "loss": 0.5945,
+      "step": 77
+    },
+    {
+      "epoch": 0.0036228518346493265,
+      "grad_norm": 2.225796937942505,
+      "learning_rate": 6.536631578947368e-05,
+      "loss": 0.8414,
+      "step": 78
+    },
+    {
+      "epoch": 0.0036692986530422666,
+      "grad_norm": 3.5419042110443115,
+      "learning_rate": 6.483052631578947e-05,
+      "loss": 0.951,
+      "step": 79
+    },
+    {
+      "epoch": 0.0037157454714352067,
+      "grad_norm": 2.4470789432525635,
+      "learning_rate": 6.429473684210525e-05,
+      "loss": 0.6766,
+      "step": 80
+    },
+    {
+      "epoch": 0.003762192289828147,
+      "grad_norm": 1.8150739669799805,
+      "learning_rate": 6.375894736842104e-05,
+      "loss": 0.6212,
+      "step": 81
+    },
+    {
+      "epoch": 0.003808639108221087,
+      "grad_norm": 2.2378828525543213,
+      "learning_rate": 6.322315789473684e-05,
+      "loss": 0.912,
+      "step": 82
+    },
+    {
+      "epoch": 0.003855085926614027,
+      "grad_norm": 2.66448974609375,
+      "learning_rate": 6.268736842105264e-05,
+      "loss": 0.7284,
+      "step": 83
+    },
+    {
+      "epoch": 0.0039015327450069672,
+      "grad_norm": 2.0171289443969727,
+      "learning_rate": 6.215157894736842e-05,
+      "loss": 0.5339,
+      "step": 84
+    },
+    {
+      "epoch": 0.003947979563399907,
+      "grad_norm": 1.829827070236206,
+      "learning_rate": 6.16157894736842e-05,
+      "loss": 0.6982,
+      "step": 85
+    },
+    {
+      "epoch": 0.003994426381792847,
+      "grad_norm": 1.3786966800689697,
+      "learning_rate": 6.107999999999999e-05,
+      "loss": 0.4433,
+      "step": 86
+    },
+    {
+      "epoch": 0.004040873200185788,
+      "grad_norm": 2.0562403202056885,
+      "learning_rate": 6.054421052631578e-05,
+      "loss": 0.791,
+      "step": 87
+    },
+    {
+      "epoch": 0.004087320018578727,
+      "grad_norm": 1.8710417747497559,
+      "learning_rate": 6.000842105263157e-05,
+      "loss": 0.5487,
+      "step": 88
+    },
+    {
+      "epoch": 0.004133766836971668,
+      "grad_norm": 2.46244215965271,
+      "learning_rate": 5.947263157894737e-05,
+      "loss": 0.9045,
+      "step": 89
+    },
+    {
+      "epoch": 0.0041802136553646075,
+      "grad_norm": 1.9283982515335083,
+      "learning_rate": 5.893684210526316e-05,
+      "loss": 0.6472,
+      "step": 90
+    },
+    {
+      "epoch": 0.004226660473757547,
+      "grad_norm": 1.794073462486267,
+      "learning_rate": 5.8401052631578944e-05,
+      "loss": 0.7332,
+      "step": 91
+    },
+    {
+      "epoch": 0.004273107292150488,
+      "grad_norm": 2.4211764335632324,
+      "learning_rate": 5.7865263157894736e-05,
+      "loss": 0.9175,
+      "step": 92
+    },
+    {
+      "epoch": 0.0043195541105434275,
+      "grad_norm": 2.131087064743042,
+      "learning_rate": 5.732947368421052e-05,
+      "loss": 0.846,
+      "step": 93
+    },
+    {
+      "epoch": 0.004366000928936368,
+      "grad_norm": 3.606595993041992,
+      "learning_rate": 5.6793684210526306e-05,
+      "loss": 1.023,
+      "step": 94
+    },
+    {
+      "epoch": 0.004412447747329308,
+      "grad_norm": 2.0817458629608154,
+      "learning_rate": 5.6257894736842105e-05,
+      "loss": 0.7585,
+      "step": 95
+    },
+    {
+      "epoch": 0.004458894565722248,
+      "grad_norm": 2.736661672592163,
+      "learning_rate": 5.57221052631579e-05,
+      "loss": 0.9443,
+      "step": 96
+    },
+    {
+      "epoch": 0.004505341384115188,
+      "grad_norm": 1.7814656496047974,
+      "learning_rate": 5.518631578947368e-05,
+      "loss": 0.9056,
+      "step": 97
+    },
+    {
+      "epoch": 0.0045517882025081285,
+      "grad_norm": 2.098845958709717,
+      "learning_rate": 5.4650526315789474e-05,
+      "loss": 0.7014,
+      "step": 98
+    },
+    {
+      "epoch": 0.004598235020901068,
+      "grad_norm": 2.316159963607788,
+      "learning_rate": 5.411473684210526e-05,
+      "loss": 0.7147,
+      "step": 99
+    },
+    {
+      "epoch": 0.004644681839294009,
+      "grad_norm": 2.182925224304199,
+      "learning_rate": 5.3578947368421044e-05,
+      "loss": 0.7637,
+      "step": 100
+    },
+    {
+      "epoch": 0.004644681839294009,
+      "eval_loss": 0.7575440406799316,
+      "eval_runtime": 162.5731,
+      "eval_samples_per_second": 55.766,
+      "eval_steps_per_second": 13.944,
+      "step": 100
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 6106135727702016.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null