lesso06 commited on
Commit
f6348fc
·
verified ·
1 Parent(s): f021059

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41fcd0c26e77ca64dc4c931a0bb512b8291dcb30e2f73a5c6beae64862e5da1c
3
  size 138995824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d590de7f58f586a410e01a95b72b3c7847a432ae56a447c23b7d77357247709d
3
  size 138995824
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c6f126d97a69a4fea24d075862c914bf51b76bb888427f5a3e8768fa5d6933b
3
  size 71077780
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02c6436fa2ab683d908c820e2392344b4dacb64100e19bf529f2252848e8b70b
3
  size 71077780
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8a4a0024556e1b863a457f3632c7a699f1e182e747c1be3fc9aaf54cae8b54d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e70bc91a0683ac5fcfb898099615306e491eb4ab424ab7a6e6d834c1c8ccb01
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9821b687b0301a05cac9bd37b28a5cb1ef278a0580c58589f7a45a55534bddb
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68bcbc5d331080b541bdffea9f92896fc830fbbca1b10945d8446fdc72f5b07e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.5498733520507812,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
- "epoch": 0.6507592190889371,
5
  "eval_steps": 50,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -144,6 +144,49 @@
144
  "eval_samples_per_second": 41.853,
145
  "eval_steps_per_second": 10.677,
146
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  }
148
  ],
149
  "logging_steps": 10,
@@ -172,7 +215,7 @@
172
  "attributes": {}
173
  }
174
  },
175
- "total_flos": 1395241156608000.0,
176
  "train_batch_size": 4,
177
  "trial_name": null,
178
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.5005075931549072,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-200",
4
+ "epoch": 0.8676789587852495,
5
  "eval_steps": 50,
6
+ "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
144
  "eval_samples_per_second": 41.853,
145
  "eval_steps_per_second": 10.677,
146
  "step": 150
147
+ },
148
+ {
149
+ "epoch": 0.6941431670281996,
150
+ "grad_norm": 0.42309829592704773,
151
+ "learning_rate": 6.880051902916351e-05,
152
+ "loss": 1.5078,
153
+ "step": 160
154
+ },
155
+ {
156
+ "epoch": 0.737527114967462,
157
+ "grad_norm": 0.4995267391204834,
158
+ "learning_rate": 5.25355867597608e-05,
159
+ "loss": 1.4928,
160
+ "step": 170
161
+ },
162
+ {
163
+ "epoch": 0.7809110629067245,
164
+ "grad_norm": 0.5781512260437012,
165
+ "learning_rate": 3.778713645853078e-05,
166
+ "loss": 1.4352,
167
+ "step": 180
168
+ },
169
+ {
170
+ "epoch": 0.824295010845987,
171
+ "grad_norm": 0.728786289691925,
172
+ "learning_rate": 2.4998366758019425e-05,
173
+ "loss": 1.4567,
174
+ "step": 190
175
+ },
176
+ {
177
+ "epoch": 0.8676789587852495,
178
+ "grad_norm": 2.4118869304656982,
179
+ "learning_rate": 1.4553586865461783e-05,
180
+ "loss": 1.8346,
181
+ "step": 200
182
+ },
183
+ {
184
+ "epoch": 0.8676789587852495,
185
+ "eval_loss": 1.5005075931549072,
186
+ "eval_runtime": 2.4042,
187
+ "eval_samples_per_second": 40.762,
188
+ "eval_steps_per_second": 10.398,
189
+ "step": 200
190
  }
191
  ],
192
  "logging_steps": 10,
 
215
  "attributes": {}
216
  }
217
  },
218
+ "total_flos": 1876062724423680.0,
219
  "train_batch_size": 4,
220
  "trial_name": null,
221
  "trial_params": null