Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +363 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7fcd9e6bad6b0d5100e958de60006d40cde40375f40e16bff61f6e2950d22d9c
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:cb610103a64f11ffddf59071f6e7b48e73e9f4f7b6076bbceb75adebad3bf54c
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c7338e43b67d5f7163731880b1705b777ce1f99bec8daecec58b67571683b54b
 size 341314196

 version https://git-lfs.github.com/spec/v1
+oid sha256:31aad0dbac4224bd9e0e5c6a62650086ee40fe2677dd32d46948ef63ec1d3a70
 size 341314196

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:20fc2b9b5a5d9506299f7be1c884e7adafa18ecae9f416ca1baac4cc66b671f1
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:045b75fd050c10a4fe14a1c98e12907f7f4ce5a7257e00e93adf6da74be770bb
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1479f554a7a167fd87e9ce99235e9d065370465268e2f8373a74df4bd50982ce
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:4fe6f0a2f7a14e66c6816df9a38574183cedfa2cc544d1c8e8e8cea1139c2f76
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.4635015726089478,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.05595970900951315,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 7.895,
       "eval_steps_per_second": 1.989,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -401,7 +759,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.093062590509875e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.4105467796325684,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.1119194180190263,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 7.895,
       "eval_steps_per_second": 1.989,
       "step": 50
+    },
+    {
+      "epoch": 0.05707890318970341,
+      "grad_norm": 0.8516172170639038,
+      "learning_rate": 7.889157894736843e-05,
+      "loss": 1.9615,
+      "step": 51
+    },
+    {
+      "epoch": 0.058198097369893675,
+      "grad_norm": 0.7036626935005188,
+      "learning_rate": 7.83621052631579e-05,
+      "loss": 2.1477,
+      "step": 52
+    },
+    {
+      "epoch": 0.05931729155008394,
+      "grad_norm": 0.7194252014160156,
+      "learning_rate": 7.783263157894737e-05,
+      "loss": 2.3045,
+      "step": 53
+    },
+    {
+      "epoch": 0.0604364857302742,
+      "grad_norm": 0.7690565586090088,
+      "learning_rate": 7.730315789473684e-05,
+      "loss": 2.2782,
+      "step": 54
+    },
+    {
+      "epoch": 0.06155567991046446,
+      "grad_norm": 0.6265071630477905,
+      "learning_rate": 7.677368421052632e-05,
+      "loss": 1.7766,
+      "step": 55
+    },
+    {
+      "epoch": 0.06267487409065473,
+      "grad_norm": 0.6072700619697571,
+      "learning_rate": 7.624421052631579e-05,
+      "loss": 1.8207,
+      "step": 56
+    },
+    {
+      "epoch": 0.063794068270845,
+      "grad_norm": 0.5296346545219421,
+      "learning_rate": 7.571473684210526e-05,
+      "loss": 1.2979,
+      "step": 57
+    },
+    {
+      "epoch": 0.06491326245103525,
+      "grad_norm": 0.6143375635147095,
+      "learning_rate": 7.518526315789475e-05,
+      "loss": 1.7014,
+      "step": 58
+    },
+    {
+      "epoch": 0.06603245663122552,
+      "grad_norm": 0.5131465196609497,
+      "learning_rate": 7.465578947368422e-05,
+      "loss": 1.1046,
+      "step": 59
+    },
+    {
+      "epoch": 0.06715165081141578,
+      "grad_norm": 0.5977643728256226,
+      "learning_rate": 7.412631578947369e-05,
+      "loss": 1.6154,
+      "step": 60
+    },
+    {
+      "epoch": 0.06827084499160604,
+      "grad_norm": 0.5476318001747131,
+      "learning_rate": 7.359684210526317e-05,
+      "loss": 1.3177,
+      "step": 61
+    },
+    {
+      "epoch": 0.0693900391717963,
+      "grad_norm": 0.6704787611961365,
+      "learning_rate": 7.306736842105264e-05,
+      "loss": 1.7062,
+      "step": 62
+    },
+    {
+      "epoch": 0.07050923335198657,
+      "grad_norm": 0.6229788064956665,
+      "learning_rate": 7.253789473684211e-05,
+      "loss": 1.4367,
+      "step": 63
+    },
+    {
+      "epoch": 0.07162842753217684,
+      "grad_norm": 0.6314613819122314,
+      "learning_rate": 7.200842105263158e-05,
+      "loss": 1.5649,
+      "step": 64
+    },
+    {
+      "epoch": 0.0727476217123671,
+      "grad_norm": 0.6403206586837769,
+      "learning_rate": 7.147894736842105e-05,
+      "loss": 1.8579,
+      "step": 65
+    },
+    {
+      "epoch": 0.07386681589255736,
+      "grad_norm": 0.5655908584594727,
+      "learning_rate": 7.094947368421052e-05,
+      "loss": 1.119,
+      "step": 66
+    },
+    {
+      "epoch": 0.07498601007274762,
+      "grad_norm": 0.502298891544342,
+      "learning_rate": 7.042e-05,
+      "loss": 0.8837,
+      "step": 67
+    },
+    {
+      "epoch": 0.07610520425293789,
+      "grad_norm": 0.5558024048805237,
+      "learning_rate": 6.989052631578948e-05,
+      "loss": 1.1857,
+      "step": 68
+    },
+    {
+      "epoch": 0.07722439843312814,
+      "grad_norm": 0.5176172852516174,
+      "learning_rate": 6.936105263157896e-05,
+      "loss": 0.9946,
+      "step": 69
+    },
+    {
+      "epoch": 0.07834359261331841,
+      "grad_norm": 0.576248824596405,
+      "learning_rate": 6.883157894736843e-05,
+      "loss": 0.9529,
+      "step": 70
+    },
+    {
+      "epoch": 0.07946278679350867,
+      "grad_norm": 0.5664923787117004,
+      "learning_rate": 6.83021052631579e-05,
+      "loss": 1.1525,
+      "step": 71
+    },
+    {
+      "epoch": 0.08058198097369894,
+      "grad_norm": 0.6141046285629272,
+      "learning_rate": 6.777263157894737e-05,
+      "loss": 1.2382,
+      "step": 72
+    },
+    {
+      "epoch": 0.0817011751538892,
+      "grad_norm": 0.5944051146507263,
+      "learning_rate": 6.724315789473684e-05,
+      "loss": 1.1227,
+      "step": 73
+    },
+    {
+      "epoch": 0.08282036933407946,
+      "grad_norm": 0.6328909397125244,
+      "learning_rate": 6.671368421052631e-05,
+      "loss": 1.1311,
+      "step": 74
+    },
+    {
+      "epoch": 0.08393956351426973,
+      "grad_norm": 0.5099555850028992,
+      "learning_rate": 6.61842105263158e-05,
+      "loss": 1.0304,
+      "step": 75
+    },
+    {
+      "epoch": 0.08505875769445999,
+      "grad_norm": 0.6328510642051697,
+      "learning_rate": 6.565473684210527e-05,
+      "loss": 0.997,
+      "step": 76
+    },
+    {
+      "epoch": 0.08617795187465026,
+      "grad_norm": 0.5725546479225159,
+      "learning_rate": 6.512526315789474e-05,
+      "loss": 0.9623,
+      "step": 77
+    },
+    {
+      "epoch": 0.08729714605484051,
+      "grad_norm": 0.6395103931427002,
+      "learning_rate": 6.459578947368421e-05,
+      "loss": 1.5526,
+      "step": 78
+    },
+    {
+      "epoch": 0.08841634023503078,
+      "grad_norm": 0.6277320981025696,
+      "learning_rate": 6.406631578947369e-05,
+      "loss": 1.1281,
+      "step": 79
+    },
+    {
+      "epoch": 0.08953553441522104,
+      "grad_norm": 0.589504063129425,
+      "learning_rate": 6.353684210526316e-05,
+      "loss": 1.0345,
+      "step": 80
+    },
+    {
+      "epoch": 0.09065472859541131,
+      "grad_norm": 0.6192626357078552,
+      "learning_rate": 6.300736842105263e-05,
+      "loss": 0.9688,
+      "step": 81
+    },
+    {
+      "epoch": 0.09177392277560156,
+      "grad_norm": 0.6085880994796753,
+      "learning_rate": 6.247789473684212e-05,
+      "loss": 0.8814,
+      "step": 82
+    },
+    {
+      "epoch": 0.09289311695579183,
+      "grad_norm": 0.6605837345123291,
+      "learning_rate": 6.194842105263159e-05,
+      "loss": 1.4382,
+      "step": 83
+    },
+    {
+      "epoch": 0.09401231113598209,
+      "grad_norm": 0.6139897108078003,
+      "learning_rate": 6.141894736842106e-05,
+      "loss": 1.091,
+      "step": 84
+    },
+    {
+      "epoch": 0.09513150531617236,
+      "grad_norm": 0.6851410269737244,
+      "learning_rate": 6.088947368421053e-05,
+      "loss": 1.1183,
+      "step": 85
+    },
+    {
+      "epoch": 0.09625069949636261,
+      "grad_norm": 0.6170366406440735,
+      "learning_rate": 6.036e-05,
+      "loss": 1.105,
+      "step": 86
+    },
+    {
+      "epoch": 0.09736989367655288,
+      "grad_norm": 0.84287029504776,
+      "learning_rate": 5.9830526315789475e-05,
+      "loss": 1.0071,
+      "step": 87
+    },
+    {
+      "epoch": 0.09848908785674315,
+      "grad_norm": 0.6654536128044128,
+      "learning_rate": 5.9301052631578946e-05,
+      "loss": 1.1307,
+      "step": 88
+    },
+    {
+      "epoch": 0.09960828203693341,
+      "grad_norm": 0.6223897933959961,
+      "learning_rate": 5.877157894736843e-05,
+      "loss": 0.885,
+      "step": 89
+    },
+    {
+      "epoch": 0.10072747621712368,
+      "grad_norm": 0.7293018102645874,
+      "learning_rate": 5.82421052631579e-05,
+      "loss": 1.1569,
+      "step": 90
+    },
+    {
+      "epoch": 0.10184667039731393,
+      "grad_norm": 0.654303789138794,
+      "learning_rate": 5.771263157894737e-05,
+      "loss": 0.9901,
+      "step": 91
+    },
+    {
+      "epoch": 0.1029658645775042,
+      "grad_norm": 0.7520757913589478,
+      "learning_rate": 5.718315789473685e-05,
+      "loss": 1.4022,
+      "step": 92
+    },
+    {
+      "epoch": 0.10408505875769446,
+      "grad_norm": 0.7011393904685974,
+      "learning_rate": 5.665368421052632e-05,
+      "loss": 1.18,
+      "step": 93
+    },
+    {
+      "epoch": 0.10520425293788473,
+      "grad_norm": 0.7474265098571777,
+      "learning_rate": 5.612421052631579e-05,
+      "loss": 1.3109,
+      "step": 94
+    },
+    {
+      "epoch": 0.10632344711807498,
+      "grad_norm": 0.8840892910957336,
+      "learning_rate": 5.559473684210527e-05,
+      "loss": 1.4722,
+      "step": 95
+    },
+    {
+      "epoch": 0.10744264129826525,
+      "grad_norm": 0.831760048866272,
+      "learning_rate": 5.506526315789474e-05,
+      "loss": 1.3972,
+      "step": 96
+    },
+    {
+      "epoch": 0.1085618354784555,
+      "grad_norm": 1.0217140913009644,
+      "learning_rate": 5.453578947368421e-05,
+      "loss": 1.6376,
+      "step": 97
+    },
+    {
+      "epoch": 0.10968102965864578,
+      "grad_norm": 2.1897904872894287,
+      "learning_rate": 5.400631578947369e-05,
+      "loss": 1.3989,
+      "step": 98
+    },
+    {
+      "epoch": 0.11080022383883603,
+      "grad_norm": 1.0960580110549927,
+      "learning_rate": 5.347684210526316e-05,
+      "loss": 1.8274,
+      "step": 99
+    },
+    {
+      "epoch": 0.1119194180190263,
+      "grad_norm": 1.469871163368225,
+      "learning_rate": 5.294736842105263e-05,
+      "loss": 2.0208,
+      "step": 100
+    },
+    {
+      "epoch": 0.1119194180190263,
+      "eval_loss": 1.4105467796325684,
+      "eval_runtime": 48.5917,
+      "eval_samples_per_second": 7.759,
+      "eval_steps_per_second": 1.955,
+      "step": 100
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 4.167268761285427e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null