rakhman-llm commited on
Commit
97a58b9
·
verified ·
1 Parent(s): aeb1c49

Training in progress, step 6000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37eb7a5e4bceab7d32011b5d17f5f7c727abd642937f2f5113f334b54b67ecce
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:935231e882baac445be09e5015300cee4d31f5e5feef6fcef690c14e5e55b597
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9719ee3b3f8442d6fc5e8f63a83f241cd994c81df3f6f1e293214b163473444
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f96ab152b2e84116d344289867a73a5bcff139ded8e28fcfbd0dfa6edfa1330
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eca803041d1c71084d00ad4722811ed67342df4d01c4b5a158f3fbc70c751c86
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:076751cbfa70935d8290a9eb7adf2c98a5954f528f9899c6c9bad4b99c90c7a1
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c79865c40bc524fb8d08f87fe96773f5e5e19275573e12c21b3c870bc7e6d8a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a78a0f82725f5cba903915c8114309109800227c56b0ea02410918cc9d05f8aa
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.36666666666666664,
5
  "eval_steps": 500,
6
- "global_step": 5500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -392,6 +392,41 @@
392
  "learning_rate": 1.755688888888889e-05,
393
  "loss": 0.0708,
394
  "step": 5500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
395
  }
396
  ],
397
  "logging_steps": 100,
@@ -411,7 +446,7 @@
411
  "attributes": {}
412
  }
413
  },
414
- "total_flos": 1.339707359232e+16,
415
  "train_batch_size": 4,
416
  "trial_name": null,
417
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.4,
5
  "eval_steps": 500,
6
+ "global_step": 6000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
392
  "learning_rate": 1.755688888888889e-05,
393
  "loss": 0.0708,
394
  "step": 5500
395
+ },
396
+ {
397
+ "epoch": 0.37333333333333335,
398
+ "grad_norm": 0.2491353303194046,
399
+ "learning_rate": 1.7512444444444444e-05,
400
+ "loss": 0.0727,
401
+ "step": 5600
402
+ },
403
+ {
404
+ "epoch": 0.38,
405
+ "grad_norm": 0.20241300761699677,
406
+ "learning_rate": 1.7468e-05,
407
+ "loss": 0.0682,
408
+ "step": 5700
409
+ },
410
+ {
411
+ "epoch": 0.38666666666666666,
412
+ "grad_norm": 0.20645934343338013,
413
+ "learning_rate": 1.7423555555555558e-05,
414
+ "loss": 0.0711,
415
+ "step": 5800
416
+ },
417
+ {
418
+ "epoch": 0.3933333333333333,
419
+ "grad_norm": 0.280748575925827,
420
+ "learning_rate": 1.7379111111111113e-05,
421
+ "loss": 0.0682,
422
+ "step": 5900
423
+ },
424
+ {
425
+ "epoch": 0.4,
426
+ "grad_norm": 0.1867647022008896,
427
+ "learning_rate": 1.733466666666667e-05,
428
+ "loss": 0.0701,
429
+ "step": 6000
430
  }
431
  ],
432
  "logging_steps": 100,
 
446
  "attributes": {}
447
  }
448
  },
449
+ "total_flos": 1.461498937344e+16,
450
  "train_batch_size": 4,
451
  "trial_name": null,
452
  "trial_params": null