oldiday commited on
Commit
0665c8c
·
verified ·
1 Parent(s): e0bcd42

Training in progress, step 334, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7271b7abd71ff28a6d8862329ed25f52ef9bdaed0cd5f9426db9af7d154d0162
3
  size 80792096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a40893551892869cef6185900dc5cd060ec50c15a4dedd57c0148d6c49941d00
3
  size 80792096
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c97053614ad9d594d3c931b703e869bd2ed545d776abf4c92958c8accb20b2d
3
  size 41460084
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f47766cd7c29a3133b6ec6a8c13f2d4751a2d59c70bb6070522812124244ec3
3
  size 41460084
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cdf57361bea28743679736bace185586f54b838c0b6e89f824be140ba835deb8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23baeff9d8f6e26f2714adedc68547feeee95d2a00fc7ab711e496286b75c485
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c14f228dcfe997eb31918193f198918e61dc44a4118e1ce6d0f02c3f7f0fb85b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3decfd55b176ddf4d3636844920f3f2a82e97670ba9e40977f0230eaf3d1c0f8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.010375738143921,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 2.696629213483146,
5
  "eval_steps": 100,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -249,6 +249,27 @@
249
  "eval_samples_per_second": 13.432,
250
  "eval_steps_per_second": 3.376,
251
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  }
253
  ],
254
  "logging_steps": 10,
@@ -272,12 +293,12 @@
272
  "should_evaluate": false,
273
  "should_log": false,
274
  "should_save": true,
275
- "should_training_stop": false
276
  },
277
  "attributes": {}
278
  }
279
  },
280
- "total_flos": 4.312152896050299e+17,
281
  "train_batch_size": 8,
282
  "trial_name": null,
283
  "trial_params": null
 
1
  {
2
  "best_metric": 1.010375738143921,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 3.002247191011236,
5
  "eval_steps": 100,
6
+ "global_step": 334,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
249
  "eval_samples_per_second": 13.432,
250
  "eval_steps_per_second": 3.376,
251
  "step": 300
252
+ },
253
+ {
254
+ "epoch": 2.7865168539325844,
255
+ "grad_norm": 0.8065077066421509,
256
+ "learning_rate": 2.6955129420176196e-06,
257
+ "loss": 0.7093,
258
+ "step": 310
259
+ },
260
+ {
261
+ "epoch": 2.8764044943820224,
262
+ "grad_norm": 0.8511288166046143,
263
+ "learning_rate": 9.199596635154683e-07,
264
+ "loss": 0.8656,
265
+ "step": 320
266
+ },
267
+ {
268
+ "epoch": 2.966292134831461,
269
+ "grad_norm": 0.7981316447257996,
270
+ "learning_rate": 7.520474957699586e-08,
271
+ "loss": 0.6292,
272
+ "step": 330
273
  }
274
  ],
275
  "logging_steps": 10,
 
293
  "should_evaluate": false,
294
  "should_log": false,
295
  "should_save": true,
296
+ "should_training_stop": true
297
  },
298
  "attributes": {}
299
  }
300
  },
301
+ "total_flos": 4.8005266479670886e+17,
302
  "train_batch_size": 8,
303
  "trial_name": null,
304
  "trial_params": null