Training in progress, step 150, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8a390ab90c983b502232f6dd978c1d10bd073cbd71369a7a0c14a61fba37ebca
 size 201892112

 version https://git-lfs.github.com/spec/v1
+oid sha256:a671110c1409a67987e3cc6f6badddbd1a79e37f43258e913645e31fd2f9f98b
 size 201892112

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e4b3bc4c954ea220c7ba8aecc83e5b5ceea33371e9415d87cb5ab5603a4f2566
 size 102864548

 version https://git-lfs.github.com/spec/v1
+oid sha256:f5dc121e5785116a9e4eab21d5734b169aab51b84bc921fca000c9108f5e1685
 size 102864548

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b25b5f3b462fb3e29221ae0588e7591e3e9adfa54f981e07b5869f484f6b0ecc
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:cd42d77a0d1deb56b88fea8a242532a0d5da06db4f00129a68e2f2d9e34bc44a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7ab3d2b0401484126213dd055d9044edf00d7b06db4fe9dbad6027ee8b5d34b1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:123ecf29cfd4fe3b008c987ce1ef9f63c2ad00365e06a3691aa36827aaded381
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.7575440406799316,
-  "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.004644681839294009,
   "eval_steps": 50,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -731,6 +731,364 @@
       "eval_samples_per_second": 55.766,
       "eval_steps_per_second": 13.944,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -759,7 +1117,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6106135727702016.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.7381066679954529,
+  "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 0.006967022758941012,
   "eval_steps": 50,
+  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 55.766,
       "eval_steps_per_second": 13.944,
       "step": 100
+    },
+    {
+      "epoch": 0.0046911286576869484,
+      "grad_norm": 1.547819972038269,
+      "learning_rate": 5.3043157894736836e-05,
+      "loss": 0.7768,
+      "step": 101
+    },
+    {
+      "epoch": 0.004737575476079888,
+      "grad_norm": 2.0550365447998047,
+      "learning_rate": 5.2507368421052635e-05,
+      "loss": 0.8542,
+      "step": 102
+    },
+    {
+      "epoch": 0.004784022294472829,
+      "grad_norm": 1.7644928693771362,
+      "learning_rate": 5.197157894736842e-05,
+      "loss": 0.8406,
+      "step": 103
+    },
+    {
+      "epoch": 0.004830469112865768,
+      "grad_norm": 2.784821033477783,
+      "learning_rate": 5.143578947368421e-05,
+      "loss": 1.0809,
+      "step": 104
+    },
+    {
+      "epoch": 0.004876915931258709,
+      "grad_norm": 2.643968105316162,
+      "learning_rate": 5.09e-05,
+      "loss": 1.1358,
+      "step": 105
+    },
+    {
+      "epoch": 0.004923362749651649,
+      "grad_norm": 2.6479332447052,
+      "learning_rate": 5.036421052631578e-05,
+      "loss": 0.9534,
+      "step": 106
+    },
+    {
+      "epoch": 0.004969809568044589,
+      "grad_norm": 1.5139284133911133,
+      "learning_rate": 4.982842105263158e-05,
+      "loss": 0.6037,
+      "step": 107
+    },
+    {
+      "epoch": 0.005016256386437529,
+      "grad_norm": 2.2001686096191406,
+      "learning_rate": 4.9292631578947366e-05,
+      "loss": 1.0875,
+      "step": 108
+    },
+    {
+      "epoch": 0.005062703204830469,
+      "grad_norm": 1.906663417816162,
+      "learning_rate": 4.875684210526315e-05,
+      "loss": 0.8251,
+      "step": 109
+    },
+    {
+      "epoch": 0.005109150023223409,
+      "grad_norm": 1.6133707761764526,
+      "learning_rate": 4.822105263157894e-05,
+      "loss": 0.7804,
+      "step": 110
+    },
+    {
+      "epoch": 0.00515559684161635,
+      "grad_norm": 1.6872289180755615,
+      "learning_rate": 4.7685263157894735e-05,
+      "loss": 0.5731,
+      "step": 111
+    },
+    {
+      "epoch": 0.005202043660009289,
+      "grad_norm": 1.2829549312591553,
+      "learning_rate": 4.714947368421052e-05,
+      "loss": 0.4865,
+      "step": 112
+    },
+    {
+      "epoch": 0.005248490478402229,
+      "grad_norm": 1.8299009799957275,
+      "learning_rate": 4.661368421052631e-05,
+      "loss": 0.7806,
+      "step": 113
+    },
+    {
+      "epoch": 0.00529493729679517,
+      "grad_norm": 1.3792545795440674,
+      "learning_rate": 4.6077894736842104e-05,
+      "loss": 0.5824,
+      "step": 114
+    },
+    {
+      "epoch": 0.005341384115188109,
+      "grad_norm": 1.554002046585083,
+      "learning_rate": 4.554210526315789e-05,
+      "loss": 0.7399,
+      "step": 115
+    },
+    {
+      "epoch": 0.00538783093358105,
+      "grad_norm": 1.8911974430084229,
+      "learning_rate": 4.500631578947368e-05,
+      "loss": 0.8756,
+      "step": 116
+    },
+    {
+      "epoch": 0.0054342777519739895,
+      "grad_norm": 2.071706771850586,
+      "learning_rate": 4.447052631578947e-05,
+      "loss": 0.8007,
+      "step": 117
+    },
+    {
+      "epoch": 0.00548072457036693,
+      "grad_norm": 2.202437162399292,
+      "learning_rate": 4.393473684210526e-05,
+      "loss": 0.8207,
+      "step": 118
+    },
+    {
+      "epoch": 0.00552717138875987,
+      "grad_norm": 1.33773672580719,
+      "learning_rate": 4.339894736842105e-05,
+      "loss": 0.5947,
+      "step": 119
+    },
+    {
+      "epoch": 0.00557361820715281,
+      "grad_norm": 1.8306225538253784,
+      "learning_rate": 4.2863157894736835e-05,
+      "loss": 0.7513,
+      "step": 120
+    },
+    {
+      "epoch": 0.00562006502554575,
+      "grad_norm": 1.6813061237335205,
+      "learning_rate": 4.2327368421052634e-05,
+      "loss": 0.6929,
+      "step": 121
+    },
+    {
+      "epoch": 0.005666511843938691,
+      "grad_norm": 1.5658451318740845,
+      "learning_rate": 4.179157894736842e-05,
+      "loss": 0.5594,
+      "step": 122
+    },
+    {
+      "epoch": 0.00571295866233163,
+      "grad_norm": 1.4536268711090088,
+      "learning_rate": 4.1255789473684204e-05,
+      "loss": 0.6208,
+      "step": 123
+    },
+    {
+      "epoch": 0.00575940548072457,
+      "grad_norm": 1.9043149948120117,
+      "learning_rate": 4.072e-05,
+      "loss": 0.6332,
+      "step": 124
+    },
+    {
+      "epoch": 0.0058058522991175105,
+      "grad_norm": 2.0733814239501953,
+      "learning_rate": 4.018421052631579e-05,
+      "loss": 0.6764,
+      "step": 125
+    },
+    {
+      "epoch": 0.00585229911751045,
+      "grad_norm": 1.7627897262573242,
+      "learning_rate": 3.9648421052631573e-05,
+      "loss": 0.7384,
+      "step": 126
+    },
+    {
+      "epoch": 0.005898745935903391,
+      "grad_norm": 1.6006054878234863,
+      "learning_rate": 3.9112631578947365e-05,
+      "loss": 0.6752,
+      "step": 127
+    },
+    {
+      "epoch": 0.0059451927542963304,
+      "grad_norm": 1.4541168212890625,
+      "learning_rate": 3.857684210526316e-05,
+      "loss": 0.6692,
+      "step": 128
+    },
+    {
+      "epoch": 0.005991639572689271,
+      "grad_norm": 1.3292078971862793,
+      "learning_rate": 3.804105263157894e-05,
+      "loss": 0.5353,
+      "step": 129
+    },
+    {
+      "epoch": 0.006038086391082211,
+      "grad_norm": 1.6884562969207764,
+      "learning_rate": 3.7505263157894734e-05,
+      "loss": 0.7562,
+      "step": 130
+    },
+    {
+      "epoch": 0.006084533209475151,
+      "grad_norm": 1.0477324724197388,
+      "learning_rate": 3.6969473684210526e-05,
+      "loss": 0.3243,
+      "step": 131
+    },
+    {
+      "epoch": 0.006130980027868091,
+      "grad_norm": 1.4753937721252441,
+      "learning_rate": 3.643368421052631e-05,
+      "loss": 0.5291,
+      "step": 132
+    },
+    {
+      "epoch": 0.0061774268462610315,
+      "grad_norm": 1.7509891986846924,
+      "learning_rate": 3.5897894736842103e-05,
+      "loss": 0.6364,
+      "step": 133
+    },
+    {
+      "epoch": 0.006223873664653971,
+      "grad_norm": 2.055713653564453,
+      "learning_rate": 3.5362105263157895e-05,
+      "loss": 0.782,
+      "step": 134
+    },
+    {
+      "epoch": 0.006270320483046911,
+      "grad_norm": 2.0711967945098877,
+      "learning_rate": 3.482631578947368e-05,
+      "loss": 0.7677,
+      "step": 135
+    },
+    {
+      "epoch": 0.006316767301439851,
+      "grad_norm": 1.3271763324737549,
+      "learning_rate": 3.429052631578947e-05,
+      "loss": 0.5314,
+      "step": 136
+    },
+    {
+      "epoch": 0.006363214119832791,
+      "grad_norm": 1.7668476104736328,
+      "learning_rate": 3.375473684210526e-05,
+      "loss": 0.8441,
+      "step": 137
+    },
+    {
+      "epoch": 0.006409660938225732,
+      "grad_norm": 1.773807168006897,
+      "learning_rate": 3.321894736842105e-05,
+      "loss": 0.7551,
+      "step": 138
+    },
+    {
+      "epoch": 0.006456107756618671,
+      "grad_norm": 1.6312812566757202,
+      "learning_rate": 3.268315789473684e-05,
+      "loss": 0.8111,
+      "step": 139
+    },
+    {
+      "epoch": 0.006502554575011612,
+      "grad_norm": 1.6187984943389893,
+      "learning_rate": 3.2147368421052627e-05,
+      "loss": 0.6781,
+      "step": 140
+    },
+    {
+      "epoch": 0.006549001393404552,
+      "grad_norm": 1.6448986530303955,
+      "learning_rate": 3.161157894736842e-05,
+      "loss": 0.5815,
+      "step": 141
+    },
+    {
+      "epoch": 0.006595448211797492,
+      "grad_norm": 1.9651342630386353,
+      "learning_rate": 3.107578947368421e-05,
+      "loss": 0.7199,
+      "step": 142
+    },
+    {
+      "epoch": 0.006641895030190432,
+      "grad_norm": 2.4397366046905518,
+      "learning_rate": 3.0539999999999996e-05,
+      "loss": 0.7959,
+      "step": 143
+    },
+    {
+      "epoch": 0.006688341848583372,
+      "grad_norm": 1.7463246583938599,
+      "learning_rate": 3.0004210526315784e-05,
+      "loss": 0.7066,
+      "step": 144
+    },
+    {
+      "epoch": 0.006734788666976312,
+      "grad_norm": 1.6383179426193237,
+      "learning_rate": 2.946842105263158e-05,
+      "loss": 0.5844,
+      "step": 145
+    },
+    {
+      "epoch": 0.006781235485369252,
+      "grad_norm": 2.03802752494812,
+      "learning_rate": 2.8932631578947368e-05,
+      "loss": 0.7851,
+      "step": 146
+    },
+    {
+      "epoch": 0.006827682303762192,
+      "grad_norm": 1.5965886116027832,
+      "learning_rate": 2.8396842105263153e-05,
+      "loss": 0.7421,
+      "step": 147
+    },
+    {
+      "epoch": 0.006874129122155132,
+      "grad_norm": 1.6589584350585938,
+      "learning_rate": 2.786105263157895e-05,
+      "loss": 0.7362,
+      "step": 148
+    },
+    {
+      "epoch": 0.006920575940548073,
+      "grad_norm": 1.904215693473816,
+      "learning_rate": 2.7325263157894737e-05,
+      "loss": 0.842,
+      "step": 149
+    },
+    {
+      "epoch": 0.006967022758941012,
+      "grad_norm": 1.954518437385559,
+      "learning_rate": 2.6789473684210522e-05,
+      "loss": 0.8687,
+      "step": 150
+    },
+    {
+      "epoch": 0.006967022758941012,
+      "eval_loss": 0.7381066679954529,
+      "eval_runtime": 163.587,
+      "eval_samples_per_second": 55.42,
+      "eval_steps_per_second": 13.858,
+      "step": 150
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 9119207069319168.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null