dixedus commited on
Commit
9a195d8
·
verified ·
1 Parent(s): 4c93c1e

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ced0aae4575f32da894f069a0689f8adfc695305a4161c3476b41484cbac0743
3
- size 103716100
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dabda8c6629206f6d1d2d0957fb34e4f64a7ee92a861fd7c1d43cb7821a307d7
3
+ size 103716484
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4874bfff8f48f58dbeacd6424c17544ca4074af0f4864ca33e34f39221c537ef
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87d9f88e578ac3bccf1cd5f62332106ab94e8c84aa4a3493cc04688b8cb59d50
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d10d0fa96665f6b4af4824faec3d1d9f4e8b4343723a14d86cab932da6ce3225
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8d9346c4fcc90fb1ec8546736583b76a4fae6bc25cb93181337c187d15da94a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": NaN,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 0.07216308858019123,
5
  "eval_steps": 100,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -171,6 +171,84 @@
171
  "eval_samples_per_second": 23.686,
172
  "eval_steps_per_second": 5.921,
173
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  }
175
  ],
176
  "logging_steps": 10,
@@ -185,7 +263,7 @@
185
  "early_stopping_threshold": 0.0
186
  },
187
  "attributes": {
188
- "early_stopping_patience_counter": 1
189
  }
190
  },
191
  "TrainerControl": {
@@ -199,7 +277,7 @@
199
  "attributes": {}
200
  }
201
  },
202
- "total_flos": 1.32707468181504e+17,
203
  "train_batch_size": 8,
204
  "trial_name": null,
205
  "trial_params": null
 
1
  {
2
  "best_metric": NaN,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 0.10824463287028685,
5
  "eval_steps": 100,
6
+ "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
171
  "eval_samples_per_second": 23.686,
172
  "eval_steps_per_second": 5.921,
173
  "step": 200
174
+ },
175
+ {
176
+ "epoch": 0.0757712430092008,
177
+ "grad_norm": 0.0,
178
+ "learning_rate": 0.00014845508703326504,
179
+ "loss": 0.0,
180
+ "step": 210
181
+ },
182
+ {
183
+ "epoch": 0.07937939743821036,
184
+ "grad_norm": 0.0,
185
+ "learning_rate": 0.00014373073204588556,
186
+ "loss": 0.0,
187
+ "step": 220
188
+ },
189
+ {
190
+ "epoch": 0.08298755186721991,
191
+ "grad_norm": 0.0,
192
+ "learning_rate": 0.00013888241754733208,
193
+ "loss": 0.0,
194
+ "step": 230
195
+ },
196
+ {
197
+ "epoch": 0.08659570629622948,
198
+ "grad_norm": 0.0,
199
+ "learning_rate": 0.00013392388661180303,
200
+ "loss": 0.0,
201
+ "step": 240
202
+ },
203
+ {
204
+ "epoch": 0.09020386072523905,
205
+ "grad_norm": 0.0,
206
+ "learning_rate": 0.0001288691947339621,
207
+ "loss": 0.0,
208
+ "step": 250
209
+ },
210
+ {
211
+ "epoch": 0.0938120151542486,
212
+ "grad_norm": 0.0,
213
+ "learning_rate": 0.0001237326699871115,
214
+ "loss": 0.0,
215
+ "step": 260
216
+ },
217
+ {
218
+ "epoch": 0.09742016958325816,
219
+ "grad_norm": 0.0,
220
+ "learning_rate": 0.00011852887240871145,
221
+ "loss": 0.0,
222
+ "step": 270
223
+ },
224
+ {
225
+ "epoch": 0.10102832401226773,
226
+ "grad_norm": 0.0,
227
+ "learning_rate": 0.00011327255272837221,
228
+ "loss": 0.0,
229
+ "step": 280
230
+ },
231
+ {
232
+ "epoch": 0.10463647844127728,
233
+ "grad_norm": 0.0,
234
+ "learning_rate": 0.00010797861055530831,
235
+ "loss": 0.0,
236
+ "step": 290
237
+ },
238
+ {
239
+ "epoch": 0.10824463287028685,
240
+ "grad_norm": 0.0,
241
+ "learning_rate": 0.00010266205214377748,
242
+ "loss": 0.0,
243
+ "step": 300
244
+ },
245
+ {
246
+ "epoch": 0.10824463287028685,
247
+ "eval_loss": NaN,
248
+ "eval_runtime": 197.2517,
249
+ "eval_samples_per_second": 23.665,
250
+ "eval_steps_per_second": 5.916,
251
+ "step": 300
252
  }
253
  ],
254
  "logging_steps": 10,
 
263
  "early_stopping_threshold": 0.0
264
  },
265
  "attributes": {
266
+ "early_stopping_patience_counter": 2
267
  }
268
  },
269
  "TrainerControl": {
 
277
  "attributes": {}
278
  }
279
  },
280
+ "total_flos": 1.99061202272256e+17,
281
  "train_batch_size": 8,
282
  "trial_name": null,
283
  "trial_params": null