Nexspear commited on
Commit
f14a51f
·
verified ·
1 Parent(s): d3747eb

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96fad8af9b1a684c0a1513ff3972ec6ec341b4677b2e63a1a4a751278d70d8da
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d29fede0f9aa4ec65bb8a24612f2f54e6794fb83b8339aaee9f7a05b707b2d59
3
  size 335604696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d91c9621b9d46e49ffa01fb304bd2395a33fb94f8818fd3f6f99b57078be1df6
3
- size 170920084
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e55b5366313248d2716c4642760cc12e61afa21d9f780915b37f512e24a19d0
3
+ size 170920532
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e222c759bd3e8885d01f80f8ae9354be3426b1a8f5b586fab68f27a11091665f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20ffebf7b659fadf5edcf5590d56cf6357bd324720bac92824401509b738cb39
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fce54f61503edf00b56f4cd65020ab3a18bbd43c84561899444870908675e793
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74485e67705dc36efbfb69b1e54f842e1ff07894d01bb0e36d6d2526a318b300
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.1232517957687378,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-200",
4
- "epoch": 2.487562189054726,
5
  "eval_steps": 50,
6
- "global_step": 250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -230,6 +230,49 @@
230
  "eval_samples_per_second": 7.374,
231
  "eval_steps_per_second": 1.876,
232
  "step": 250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  }
234
  ],
235
  "logging_steps": 10,
@@ -244,7 +287,7 @@
244
  "early_stopping_threshold": 0.0
245
  },
246
  "attributes": {
247
- "early_stopping_patience_counter": 1
248
  }
249
  },
250
  "TrainerControl": {
@@ -253,12 +296,12 @@
253
  "should_evaluate": false,
254
  "should_log": false,
255
  "should_save": true,
256
- "should_training_stop": false
257
  },
258
  "attributes": {}
259
  }
260
  },
261
- "total_flos": 6.574355575335813e+17,
262
  "train_batch_size": 8,
263
  "trial_name": null,
264
  "trial_params": null
 
1
  {
2
  "best_metric": 1.1232517957687378,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-200",
4
+ "epoch": 2.9850746268656714,
5
  "eval_steps": 50,
6
+ "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
230
  "eval_samples_per_second": 7.374,
231
  "eval_steps_per_second": 1.876,
232
  "step": 250
233
+ },
234
+ {
235
+ "epoch": 2.587064676616915,
236
+ "grad_norm": 4.624186992645264,
237
+ "learning_rate": 9.242458032904311e-06,
238
+ "loss": 2.8715,
239
+ "step": 260
240
+ },
241
+ {
242
+ "epoch": 2.6865671641791042,
243
+ "grad_norm": 6.473374366760254,
244
+ "learning_rate": 5.2346828817197655e-06,
245
+ "loss": 1.3872,
246
+ "step": 270
247
+ },
248
+ {
249
+ "epoch": 2.7860696517412933,
250
+ "grad_norm": 4.681886196136475,
251
+ "learning_rate": 2.3379444289913342e-06,
252
+ "loss": 1.8138,
253
+ "step": 280
254
+ },
255
+ {
256
+ "epoch": 2.8855721393034823,
257
+ "grad_norm": 5.433272361755371,
258
+ "learning_rate": 5.862042845640403e-07,
259
+ "loss": 1.9018,
260
+ "step": 290
261
+ },
262
+ {
263
+ "epoch": 2.9850746268656714,
264
+ "grad_norm": 3.1172711849212646,
265
+ "learning_rate": 0.0,
266
+ "loss": 1.2011,
267
+ "step": 300
268
+ },
269
+ {
270
+ "epoch": 2.9850746268656714,
271
+ "eval_loss": 1.1773170232772827,
272
+ "eval_runtime": 22.8856,
273
+ "eval_samples_per_second": 7.385,
274
+ "eval_steps_per_second": 1.879,
275
+ "step": 300
276
  }
277
  ],
278
  "logging_steps": 10,
 
287
  "early_stopping_threshold": 0.0
288
  },
289
  "attributes": {
290
+ "early_stopping_patience_counter": 2
291
  }
292
  },
293
  "TrainerControl": {
 
296
  "should_evaluate": false,
297
  "should_log": false,
298
  "should_save": true,
299
+ "should_training_stop": true
300
  },
301
  "attributes": {}
302
  }
303
  },
304
+ "total_flos": 7.893381799823278e+17,
305
  "train_batch_size": 8,
306
  "trial_name": null,
307
  "trial_params": null