Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a671110c1409a67987e3cc6f6badddbd1a79e37f43258e913645e31fd2f9f98b
 size 201892112

 version https://git-lfs.github.com/spec/v1
+oid sha256:7bc3de9d922b59fcb8756ca860d217458b20f218445cd2416ccda5ba04738885
 size 201892112

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f5dc121e5785116a9e4eab21d5734b169aab51b84bc921fca000c9108f5e1685
 size 102864548

 version https://git-lfs.github.com/spec/v1
+oid sha256:1b16f5d61831a45764516dee6d3adf8f3a3543920bb2c5a6721e41420cd395cc
 size 102864548

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cd42d77a0d1deb56b88fea8a242532a0d5da06db4f00129a68e2f2d9e34bc44a
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f291da43a3a16005783fd2e2f718242f727f95f73001f3a2cfb96d96e661c79d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:123ecf29cfd4fe3b008c987ce1ef9f63c2ad00365e06a3691aa36827aaded381
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d703f9adb617aa1ec13556a7b7482c741f765121a5a04f3cafdcfbce6ed485ee
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.7381066679954529,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.006967022758941012,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 55.42,
       "eval_steps_per_second": 13.858,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 9119207069319168.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.7234218120574951,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.009289363678588018,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 55.42,
       "eval_steps_per_second": 13.858,
       "step": 150
+    },
+    {
+      "epoch": 0.007013469577333953,
+      "grad_norm": 1.321113109588623,
+      "learning_rate": 2.6253684210526317e-05,
+      "loss": 0.6275,
+      "step": 151
+    },
+    {
+      "epoch": 0.0070599163957268925,
+      "grad_norm": 1.9313557147979736,
+      "learning_rate": 2.5717894736842106e-05,
+      "loss": 0.8083,
+      "step": 152
+    },
+    {
+      "epoch": 0.007106363214119833,
+      "grad_norm": 2.39707350730896,
+      "learning_rate": 2.518210526315789e-05,
+      "loss": 1.1125,
+      "step": 153
+    },
+    {
+      "epoch": 0.007152810032512773,
+      "grad_norm": 2.2258388996124268,
+      "learning_rate": 2.4646315789473683e-05,
+      "loss": 0.9885,
+      "step": 154
+    },
+    {
+      "epoch": 0.007199256850905713,
+      "grad_norm": 2.207796096801758,
+      "learning_rate": 2.411052631578947e-05,
+      "loss": 0.8165,
+      "step": 155
+    },
+    {
+      "epoch": 0.007245703669298653,
+      "grad_norm": 2.068021774291992,
+      "learning_rate": 2.357473684210526e-05,
+      "loss": 0.9621,
+      "step": 156
+    },
+    {
+      "epoch": 0.0072921504876915936,
+      "grad_norm": 3.123298168182373,
+      "learning_rate": 2.3038947368421052e-05,
+      "loss": 0.9623,
+      "step": 157
+    },
+    {
+      "epoch": 0.007338597306084533,
+      "grad_norm": 1.7516857385635376,
+      "learning_rate": 2.250315789473684e-05,
+      "loss": 0.7126,
+      "step": 158
+    },
+    {
+      "epoch": 0.007385044124477473,
+      "grad_norm": 1.756352424621582,
+      "learning_rate": 2.196736842105263e-05,
+      "loss": 0.6112,
+      "step": 159
+    },
+    {
+      "epoch": 0.0074314909428704135,
+      "grad_norm": 1.324313998222351,
+      "learning_rate": 2.1431578947368418e-05,
+      "loss": 0.4837,
+      "step": 160
+    },
+    {
+      "epoch": 0.007477937761263353,
+      "grad_norm": 1.6090558767318726,
+      "learning_rate": 2.089578947368421e-05,
+      "loss": 0.5255,
+      "step": 161
+    },
+    {
+      "epoch": 0.007524384579656294,
+      "grad_norm": 1.3804148435592651,
+      "learning_rate": 2.036e-05,
+      "loss": 0.4674,
+      "step": 162
+    },
+    {
+      "epoch": 0.007570831398049233,
+      "grad_norm": 1.3651041984558105,
+      "learning_rate": 1.9824210526315787e-05,
+      "loss": 0.7306,
+      "step": 163
+    },
+    {
+      "epoch": 0.007617278216442174,
+      "grad_norm": 1.8530007600784302,
+      "learning_rate": 1.928842105263158e-05,
+      "loss": 0.7491,
+      "step": 164
+    },
+    {
+      "epoch": 0.007663725034835114,
+      "grad_norm": 1.493820309638977,
+      "learning_rate": 1.8752631578947367e-05,
+      "loss": 0.6576,
+      "step": 165
+    },
+    {
+      "epoch": 0.007710171853228054,
+      "grad_norm": 1.199458360671997,
+      "learning_rate": 1.8216842105263156e-05,
+      "loss": 0.3963,
+      "step": 166
+    },
+    {
+      "epoch": 0.007756618671620994,
+      "grad_norm": 1.6788829565048218,
+      "learning_rate": 1.7681052631578948e-05,
+      "loss": 0.7574,
+      "step": 167
+    },
+    {
+      "epoch": 0.0078030654900139345,
+      "grad_norm": 1.2864102125167847,
+      "learning_rate": 1.7145263157894736e-05,
+      "loss": 0.5522,
+      "step": 168
+    },
+    {
+      "epoch": 0.007849512308406874,
+      "grad_norm": 1.8316515684127808,
+      "learning_rate": 1.6609473684210525e-05,
+      "loss": 0.5285,
+      "step": 169
+    },
+    {
+      "epoch": 0.007895959126799815,
+      "grad_norm": 1.116195559501648,
+      "learning_rate": 1.6073684210526313e-05,
+      "loss": 0.5598,
+      "step": 170
+    },
+    {
+      "epoch": 0.007942405945192754,
+      "grad_norm": 1.7328448295593262,
+      "learning_rate": 1.5537894736842105e-05,
+      "loss": 0.5905,
+      "step": 171
+    },
+    {
+      "epoch": 0.007988852763585694,
+      "grad_norm": 1.5131913423538208,
+      "learning_rate": 1.5002105263157892e-05,
+      "loss": 0.5292,
+      "step": 172
+    },
+    {
+      "epoch": 0.008035299581978635,
+      "grad_norm": 1.8316717147827148,
+      "learning_rate": 1.4466315789473684e-05,
+      "loss": 0.547,
+      "step": 173
+    },
+    {
+      "epoch": 0.008081746400371575,
+      "grad_norm": 1.4963319301605225,
+      "learning_rate": 1.3930526315789474e-05,
+      "loss": 0.6936,
+      "step": 174
+    },
+    {
+      "epoch": 0.008128193218764514,
+      "grad_norm": 1.6800758838653564,
+      "learning_rate": 1.3394736842105261e-05,
+      "loss": 0.6586,
+      "step": 175
+    },
+    {
+      "epoch": 0.008174640037157455,
+      "grad_norm": 1.0843589305877686,
+      "learning_rate": 1.2858947368421053e-05,
+      "loss": 0.4544,
+      "step": 176
+    },
+    {
+      "epoch": 0.008221086855550395,
+      "grad_norm": 1.6353403329849243,
+      "learning_rate": 1.2323157894736842e-05,
+      "loss": 0.6454,
+      "step": 177
+    },
+    {
+      "epoch": 0.008267533673943336,
+      "grad_norm": 1.6226987838745117,
+      "learning_rate": 1.178736842105263e-05,
+      "loss": 0.7083,
+      "step": 178
+    },
+    {
+      "epoch": 0.008313980492336275,
+      "grad_norm": 1.2014755010604858,
+      "learning_rate": 1.125157894736842e-05,
+      "loss": 0.518,
+      "step": 179
+    },
+    {
+      "epoch": 0.008360427310729215,
+      "grad_norm": 0.9961537718772888,
+      "learning_rate": 1.0715789473684209e-05,
+      "loss": 0.5034,
+      "step": 180
+    },
+    {
+      "epoch": 0.008406874129122156,
+      "grad_norm": 1.2505362033843994,
+      "learning_rate": 1.018e-05,
+      "loss": 0.3744,
+      "step": 181
+    },
+    {
+      "epoch": 0.008453320947515094,
+      "grad_norm": 1.209218144416809,
+      "learning_rate": 9.64421052631579e-06,
+      "loss": 0.5659,
+      "step": 182
+    },
+    {
+      "epoch": 0.008499767765908035,
+      "grad_norm": 1.5287011861801147,
+      "learning_rate": 9.108421052631578e-06,
+      "loss": 0.6864,
+      "step": 183
+    },
+    {
+      "epoch": 0.008546214584300976,
+      "grad_norm": 1.5412635803222656,
+      "learning_rate": 8.572631578947368e-06,
+      "loss": 0.784,
+      "step": 184
+    },
+    {
+      "epoch": 0.008592661402693916,
+      "grad_norm": 1.2702871561050415,
+      "learning_rate": 8.036842105263157e-06,
+      "loss": 0.6373,
+      "step": 185
+    },
+    {
+      "epoch": 0.008639108221086855,
+      "grad_norm": 1.2671583890914917,
+      "learning_rate": 7.501052631578946e-06,
+      "loss": 0.4503,
+      "step": 186
+    },
+    {
+      "epoch": 0.008685555039479795,
+      "grad_norm": 1.6440976858139038,
+      "learning_rate": 6.965263157894737e-06,
+      "loss": 0.6182,
+      "step": 187
+    },
+    {
+      "epoch": 0.008732001857872736,
+      "grad_norm": 1.6860370635986328,
+      "learning_rate": 6.4294736842105265e-06,
+      "loss": 0.6629,
+      "step": 188
+    },
+    {
+      "epoch": 0.008778448676265677,
+      "grad_norm": 1.778744101524353,
+      "learning_rate": 5.893684210526315e-06,
+      "loss": 0.707,
+      "step": 189
+    },
+    {
+      "epoch": 0.008824895494658615,
+      "grad_norm": 2.259239673614502,
+      "learning_rate": 5.3578947368421044e-06,
+      "loss": 0.8088,
+      "step": 190
+    },
+    {
+      "epoch": 0.008871342313051556,
+      "grad_norm": 1.5541491508483887,
+      "learning_rate": 4.822105263157895e-06,
+      "loss": 0.6756,
+      "step": 191
+    },
+    {
+      "epoch": 0.008917789131444497,
+      "grad_norm": 1.634876012802124,
+      "learning_rate": 4.286315789473684e-06,
+      "loss": 0.8733,
+      "step": 192
+    },
+    {
+      "epoch": 0.008964235949837435,
+      "grad_norm": 1.7315068244934082,
+      "learning_rate": 3.750526315789473e-06,
+      "loss": 0.6555,
+      "step": 193
+    },
+    {
+      "epoch": 0.009010682768230376,
+      "grad_norm": 1.7454522848129272,
+      "learning_rate": 3.2147368421052633e-06,
+      "loss": 0.754,
+      "step": 194
+    },
+    {
+      "epoch": 0.009057129586623316,
+      "grad_norm": 1.7474086284637451,
+      "learning_rate": 2.6789473684210522e-06,
+      "loss": 0.8278,
+      "step": 195
+    },
+    {
+      "epoch": 0.009103576405016257,
+      "grad_norm": 1.9185843467712402,
+      "learning_rate": 2.143157894736842e-06,
+      "loss": 0.7852,
+      "step": 196
+    },
+    {
+      "epoch": 0.009150023223409196,
+      "grad_norm": 1.923701286315918,
+      "learning_rate": 1.6073684210526316e-06,
+      "loss": 0.6713,
+      "step": 197
+    },
+    {
+      "epoch": 0.009196470041802136,
+      "grad_norm": 1.524601697921753,
+      "learning_rate": 1.071578947368421e-06,
+      "loss": 0.7535,
+      "step": 198
+    },
+    {
+      "epoch": 0.009242916860195077,
+      "grad_norm": 2.139697313308716,
+      "learning_rate": 5.357894736842105e-07,
+      "loss": 0.9147,
+      "step": 199
+    },
+    {
+      "epoch": 0.009289363678588018,
+      "grad_norm": 1.9575085639953613,
+      "learning_rate": 0.0,
+      "loss": 0.8268,
+      "step": 200
+    },
+    {
+      "epoch": 0.009289363678588018,
+      "eval_loss": 0.7234218120574951,
+      "eval_runtime": 163.2305,
+      "eval_samples_per_second": 55.541,
+      "eval_steps_per_second": 13.888,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.2158942759092224e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null