Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:20fdfdc6ec3cf10c1a3caf7a22b73a71461a7e8dd34df5c4aeab64bdbd59f23c
 size 191968

 version https://git-lfs.github.com/spec/v1
+oid sha256:11614e394a08ccc9053c89578dd1a0613131fe505f5977da71a80e0c000178f3
 size 191968

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4f0e3a345a361dc4a322e6a1c21b22265d797b62146057c0dd4afe690bd96286
 size 400714

 version https://git-lfs.github.com/spec/v1
+oid sha256:8ceed1bfe7c38d8aa6b52f024b7d1599c484bfbfbcad03c0e2f86f16c90fc9c7
 size 400714

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f78e9308c8d3508d5c045449eace7534a48516d9c29e41b6c50d5222faad2b9d
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:268a5a3bbaabffa7a145fdaa2e0a15cc5c85ab48c574327e4aa64a7314620256
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7d90116c540b4ff0066495fbccc9c914a568905fb44c6564f227952cc4231b00
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:03ad66011cfc1fc727a51190602a41adc332b48eeef62a5ee87c2ca9f9b90b2b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 10.353822708129883,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.008128098837681867,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 281.514,
       "eval_steps_per_second": 70.406,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +754,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1868473565184.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.339950561523438,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.016256197675363734,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 281.514,
       "eval_steps_per_second": 70.406,
       "step": 50
+    },
+    {
+      "epoch": 0.008290660814435503,
+      "grad_norm": 0.44935211539268494,
+      "learning_rate": 2.3816778784387097e-05,
+      "loss": 10.3529,
+      "step": 51
+    },
+    {
+      "epoch": 0.00845322279118914,
+      "grad_norm": 0.46845120191574097,
+      "learning_rate": 2.3263454721781537e-05,
+      "loss": 10.3529,
+      "step": 52
+    },
+    {
+      "epoch": 0.008615784767942778,
+      "grad_norm": 0.4442523121833801,
+      "learning_rate": 2.2693489161088592e-05,
+      "loss": 10.353,
+      "step": 53
+    },
+    {
+      "epoch": 0.008778346744696415,
+      "grad_norm": 0.4755557179450989,
+      "learning_rate": 2.210802993709498e-05,
+      "loss": 10.3532,
+      "step": 54
+    },
+    {
+      "epoch": 0.008940908721450053,
+      "grad_norm": 0.4838990569114685,
+      "learning_rate": 2.1508256086763372e-05,
+      "loss": 10.3497,
+      "step": 55
+    },
+    {
+      "epoch": 0.00910347069820369,
+      "grad_norm": 0.48417747020721436,
+      "learning_rate": 2.0895375474808857e-05,
+      "loss": 10.3493,
+      "step": 56
+    },
+    {
+      "epoch": 0.009266032674957328,
+      "grad_norm": 0.4877360761165619,
+      "learning_rate": 2.0270622361220143e-05,
+      "loss": 10.3479,
+      "step": 57
+    },
+    {
+      "epoch": 0.009428594651710965,
+      "grad_norm": 0.5005224943161011,
+      "learning_rate": 1.963525491562421e-05,
+      "loss": 10.3512,
+      "step": 58
+    },
+    {
+      "epoch": 0.009591156628464602,
+      "grad_norm": 0.5387058258056641,
+      "learning_rate": 1.8990552683500128e-05,
+      "loss": 10.347,
+      "step": 59
+    },
+    {
+      "epoch": 0.00975371860521824,
+      "grad_norm": 0.5001525282859802,
+      "learning_rate": 1.8337814009344716e-05,
+      "loss": 10.3473,
+      "step": 60
+    },
+    {
+      "epoch": 0.009916280581971876,
+      "grad_norm": 0.4993062913417816,
+      "learning_rate": 1.767835342197955e-05,
+      "loss": 10.3476,
+      "step": 61
+    },
+    {
+      "epoch": 0.010078842558725515,
+      "grad_norm": 0.4824479818344116,
+      "learning_rate": 1.7013498987264832e-05,
+      "loss": 10.3469,
+      "step": 62
+    },
+    {
+      "epoch": 0.010241404535479151,
+      "grad_norm": 0.5112585425376892,
+      "learning_rate": 1.6344589633551502e-05,
+      "loss": 10.3464,
+      "step": 63
+    },
+    {
+      "epoch": 0.010403966512232788,
+      "grad_norm": 0.5074177384376526,
+      "learning_rate": 1.5672972455257726e-05,
+      "loss": 10.3423,
+      "step": 64
+    },
+    {
+      "epoch": 0.010566528488986426,
+      "grad_norm": 0.532660722732544,
+      "learning_rate": 1.5e-05,
+      "loss": 10.3437,
+      "step": 65
+    },
+    {
+      "epoch": 0.010729090465740063,
+      "grad_norm": 0.5157567858695984,
+      "learning_rate": 1.4327027544742281e-05,
+      "loss": 10.346,
+      "step": 66
+    },
+    {
+      "epoch": 0.010891652442493701,
+      "grad_norm": 0.48638421297073364,
+      "learning_rate": 1.36554103664485e-05,
+      "loss": 10.343,
+      "step": 67
+    },
+    {
+      "epoch": 0.011054214419247338,
+      "grad_norm": 0.5116586685180664,
+      "learning_rate": 1.2986501012735174e-05,
+      "loss": 10.3435,
+      "step": 68
+    },
+    {
+      "epoch": 0.011216776396000975,
+      "grad_norm": 0.5411083698272705,
+      "learning_rate": 1.2321646578020452e-05,
+      "loss": 10.3411,
+      "step": 69
+    },
+    {
+      "epoch": 0.011379338372754613,
+      "grad_norm": 0.5084531903266907,
+      "learning_rate": 1.1662185990655285e-05,
+      "loss": 10.3431,
+      "step": 70
+    },
+    {
+      "epoch": 0.01154190034950825,
+      "grad_norm": 0.5400840044021606,
+      "learning_rate": 1.1009447316499875e-05,
+      "loss": 10.3424,
+      "step": 71
+    },
+    {
+      "epoch": 0.011704462326261888,
+      "grad_norm": 0.5120368003845215,
+      "learning_rate": 1.036474508437579e-05,
+      "loss": 10.3401,
+      "step": 72
+    },
+    {
+      "epoch": 0.011867024303015524,
+      "grad_norm": 0.5191106200218201,
+      "learning_rate": 9.729377638779859e-06,
+      "loss": 10.344,
+      "step": 73
+    },
+    {
+      "epoch": 0.012029586279769163,
+      "grad_norm": 0.4906412959098816,
+      "learning_rate": 9.104624525191147e-06,
+      "loss": 10.3465,
+      "step": 74
+    },
+    {
+      "epoch": 0.0121921482565228,
+      "grad_norm": 0.5198729038238525,
+      "learning_rate": 8.491743913236629e-06,
+      "loss": 10.3449,
+      "step": 75
+    },
+    {
+      "epoch": 0.012354710233276436,
+      "grad_norm": 0.4855211675167084,
+      "learning_rate": 7.89197006290502e-06,
+      "loss": 10.3454,
+      "step": 76
+    },
+    {
+      "epoch": 0.012517272210030074,
+      "grad_norm": 0.5253652334213257,
+      "learning_rate": 7.30651083891141e-06,
+      "loss": 10.341,
+      "step": 77
+    },
+    {
+      "epoch": 0.012679834186783711,
+      "grad_norm": 0.5067545771598816,
+      "learning_rate": 6.736545278218464e-06,
+      "loss": 10.3401,
+      "step": 78
+    },
+    {
+      "epoch": 0.01284239616353735,
+      "grad_norm": 0.4979783296585083,
+      "learning_rate": 6.1832212156129045e-06,
+      "loss": 10.3386,
+      "step": 79
+    },
+    {
+      "epoch": 0.013004958140290986,
+      "grad_norm": 0.44555556774139404,
+      "learning_rate": 5.647652972118998e-06,
+      "loss": 10.341,
+      "step": 80
+    },
+    {
+      "epoch": 0.013167520117044623,
+      "grad_norm": 0.49356335401535034,
+      "learning_rate": 5.130919110904311e-06,
+      "loss": 10.3434,
+      "step": 81
+    },
+    {
+      "epoch": 0.01333008209379826,
+      "grad_norm": 0.5091402530670166,
+      "learning_rate": 4.6340602651970304e-06,
+      "loss": 10.3445,
+      "step": 82
+    },
+    {
+      "epoch": 0.013492644070551897,
+      "grad_norm": 0.4779127836227417,
+      "learning_rate": 4.158077042589129e-06,
+      "loss": 10.3411,
+      "step": 83
+    },
+    {
+      "epoch": 0.013655206047305536,
+      "grad_norm": 0.5306726694107056,
+      "learning_rate": 3.7039280099458373e-06,
+      "loss": 10.3444,
+      "step": 84
+    },
+    {
+      "epoch": 0.013817768024059172,
+      "grad_norm": 0.5025277137756348,
+      "learning_rate": 3.272527762979553e-06,
+      "loss": 10.3444,
+      "step": 85
+    },
+    {
+      "epoch": 0.013980330000812809,
+      "grad_norm": 0.48713839054107666,
+      "learning_rate": 2.86474508437579e-06,
+      "loss": 10.344,
+      "step": 86
+    },
+    {
+      "epoch": 0.014142891977566447,
+      "grad_norm": 0.4580845534801483,
+      "learning_rate": 2.4814011941804603e-06,
+      "loss": 10.3414,
+      "step": 87
+    },
+    {
+      "epoch": 0.014305453954320084,
+      "grad_norm": 0.5302749276161194,
+      "learning_rate": 2.1232680959720085e-06,
+      "loss": 10.3359,
+      "step": 88
+    },
+    {
+      "epoch": 0.014468015931073722,
+      "grad_norm": 0.47324472665786743,
+      "learning_rate": 1.79106702214893e-06,
+      "loss": 10.3363,
+      "step": 89
+    },
+    {
+      "epoch": 0.014630577907827359,
+      "grad_norm": 0.445722758769989,
+      "learning_rate": 1.4854669814637145e-06,
+      "loss": 10.3473,
+      "step": 90
+    },
+    {
+      "epoch": 0.014793139884580997,
+      "grad_norm": 0.49450841546058655,
+      "learning_rate": 1.2070834117282414e-06,
+      "loss": 10.3435,
+      "step": 91
+    },
+    {
+      "epoch": 0.014955701861334634,
+      "grad_norm": 0.5182689428329468,
+      "learning_rate": 9.56476940403942e-07,
+      "loss": 10.3395,
+      "step": 92
+    },
+    {
+      "epoch": 0.01511826383808827,
+      "grad_norm": 0.46022310853004456,
+      "learning_rate": 7.341522555726971e-07,
+      "loss": 10.3441,
+      "step": 93
+    },
+    {
+      "epoch": 0.015280825814841909,
+      "grad_norm": 0.42653602361679077,
+      "learning_rate": 5.405570895622014e-07,
+      "loss": 10.3433,
+      "step": 94
+    },
+    {
+      "epoch": 0.015443387791595545,
+      "grad_norm": 0.4647497832775116,
+      "learning_rate": 3.760813172726457e-07,
+      "loss": 10.3421,
+      "step": 95
+    },
+    {
+      "epoch": 0.015605949768349184,
+      "grad_norm": 0.4479852020740509,
+      "learning_rate": 2.41056171020555e-07,
+      "loss": 10.3412,
+      "step": 96
+    },
+    {
+      "epoch": 0.015768511745102822,
+      "grad_norm": 0.3695429563522339,
+      "learning_rate": 1.357535734809795e-07,
+      "loss": 10.3386,
+      "step": 97
+    },
+    {
+      "epoch": 0.015931073721856457,
+      "grad_norm": 0.4698352515697479,
+      "learning_rate": 6.038559007141397e-08,
+      "loss": 10.3484,
+      "step": 98
+    },
+    {
+      "epoch": 0.016093635698610095,
+      "grad_norm": 0.41739100217819214,
+      "learning_rate": 1.510400188028116e-08,
+      "loss": 10.35,
+      "step": 99
+    },
+    {
+      "epoch": 0.016256197675363734,
+      "grad_norm": 0.4362470507621765,
+      "learning_rate": 0.0,
+      "loss": 10.3327,
+      "step": 100
+    },
+    {
+      "epoch": 0.016256197675363734,
+      "eval_loss": 10.339950561523438,
+      "eval_runtime": 9.2038,
+      "eval_samples_per_second": 281.516,
+      "eval_steps_per_second": 70.406,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3695115632640.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null