rakhman-llm commited on
Commit
ebf6551
·
verified ·
1 Parent(s): b5fa56f

Training in progress, step 7000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6c581988e145fe28231a5b363482e4cc6aa1c009b42a7cafcee97ebd43f75b8
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b0338daac15cfc77c983eea6a6122874f95852031cec6efb33fb6843974f909
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0334bff194e682ae1d5a62ead6d631a75270fccfa182f5901d35a528b0fb29db
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eff7218517ca4458c3d29b7230cefb18dfe8c171f0a8309d5494771317c7b329
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84aef586a811d916d5e08d0a9103c7f2c4e17550364204602e5b8c751b16f429
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8c94fe50942676a4b800b8807f5b832311548d8f2c40eff1078995f3e3f0ab5
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c31374bac28c19f30a1c44dbbe9200685351926b09acbd3fd6b855ef898dbb0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8e5e69cb9645219d028edd3b028ebf30583f1263680ed0ab467551ed740d562
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.43333333333333335,
5
  "eval_steps": 500,
6
- "global_step": 6500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -462,6 +462,41 @@
462
  "learning_rate": 1.7112444444444445e-05,
463
  "loss": 0.0652,
464
  "step": 6500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
465
  }
466
  ],
467
  "logging_steps": 100,
@@ -481,7 +516,7 @@
481
  "attributes": {}
482
  }
483
  },
484
- "total_flos": 1.583290515456e+16,
485
  "train_batch_size": 4,
486
  "trial_name": null,
487
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.4666666666666667,
5
  "eval_steps": 500,
6
+ "global_step": 7000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
462
  "learning_rate": 1.7112444444444445e-05,
463
  "loss": 0.0652,
464
  "step": 6500
465
+ },
466
+ {
467
+ "epoch": 0.44,
468
+ "grad_norm": 0.45798158645629883,
469
+ "learning_rate": 1.7068000000000003e-05,
470
+ "loss": 0.0778,
471
+ "step": 6600
472
+ },
473
+ {
474
+ "epoch": 0.44666666666666666,
475
+ "grad_norm": 0.24436144530773163,
476
+ "learning_rate": 1.702355555555556e-05,
477
+ "loss": 0.0725,
478
+ "step": 6700
479
+ },
480
+ {
481
+ "epoch": 0.4533333333333333,
482
+ "grad_norm": 0.2054828256368637,
483
+ "learning_rate": 1.6979111111111114e-05,
484
+ "loss": 0.0676,
485
+ "step": 6800
486
+ },
487
+ {
488
+ "epoch": 0.46,
489
+ "grad_norm": 0.12867185473442078,
490
+ "learning_rate": 1.693466666666667e-05,
491
+ "loss": 0.0689,
492
+ "step": 6900
493
+ },
494
+ {
495
+ "epoch": 0.4666666666666667,
496
+ "grad_norm": 0.17768052220344543,
497
+ "learning_rate": 1.6890222222222224e-05,
498
+ "loss": 0.067,
499
+ "step": 7000
500
  }
501
  ],
502
  "logging_steps": 100,
 
516
  "attributes": {}
517
  }
518
  },
519
+ "total_flos": 1.705082093568e+16,
520
  "train_batch_size": 4,
521
  "trial_name": null,
522
  "trial_params": null