rakhman-llm commited on
Commit
36488f8
·
verified ·
1 Parent(s): 7158899

Training in progress, step 23500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be1cf4c3d10cfbf74e0b37f9cbc786fae12e5bbc6e3706973bfac0175c00ecce
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8780b961dcbf09881cce6a4ec4f3ce77d0ac1ee0f91af800fa2edc12b4f6187
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bca9f5a43e8c43cfe44e64364c4d417027de463f09d30955f0ca8d99d5517c8a
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef9bac5420d614e9bac93f0dd5fea2e0183354a0f7d5a23ad1f55f6681e9e683
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29008ac370065a7f89ca5e36ad7037a88abffeace17ded8b029713d8007c00c5
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:243e2ec3e6c9bc99b16b0183c86988f987cd95808c30f0890be20ce20d1d5d66
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95f8453b710c139b3558b89069d0f1dbb29ca8e0859ba342fa8818ead3f6d844
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06c57df543e7b3168b7bf3184803db11f7fb923e889cb830127c49143334357b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.5333333333333332,
5
  "eval_steps": 500,
6
- "global_step": 23000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1625,6 +1625,41 @@
1625
  "learning_rate": 9.78088888888889e-06,
1626
  "loss": 0.0545,
1627
  "step": 23000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1628
  }
1629
  ],
1630
  "logging_steps": 100,
@@ -1644,7 +1679,7 @@
1644
  "attributes": {}
1645
  }
1646
  },
1647
- "total_flos": 5.602412593152e+16,
1648
  "train_batch_size": 4,
1649
  "trial_name": null,
1650
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.5666666666666667,
5
  "eval_steps": 500,
6
+ "global_step": 23500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1625
  "learning_rate": 9.78088888888889e-06,
1626
  "loss": 0.0545,
1627
  "step": 23000
1628
+ },
1629
+ {
1630
+ "epoch": 1.54,
1631
+ "grad_norm": 0.11006025224924088,
1632
+ "learning_rate": 9.736444444444444e-06,
1633
+ "loss": 0.0612,
1634
+ "step": 23100
1635
+ },
1636
+ {
1637
+ "epoch": 1.5466666666666666,
1638
+ "grad_norm": 0.14539223909378052,
1639
+ "learning_rate": 9.692e-06,
1640
+ "loss": 0.0582,
1641
+ "step": 23200
1642
+ },
1643
+ {
1644
+ "epoch": 1.5533333333333332,
1645
+ "grad_norm": 0.23037117719650269,
1646
+ "learning_rate": 9.647555555555557e-06,
1647
+ "loss": 0.0599,
1648
+ "step": 23300
1649
+ },
1650
+ {
1651
+ "epoch": 1.56,
1652
+ "grad_norm": 0.2668769657611847,
1653
+ "learning_rate": 9.603111111111112e-06,
1654
+ "loss": 0.0545,
1655
+ "step": 23400
1656
+ },
1657
+ {
1658
+ "epoch": 1.5666666666666667,
1659
+ "grad_norm": 0.3005671501159668,
1660
+ "learning_rate": 9.558666666666667e-06,
1661
+ "loss": 0.0621,
1662
+ "step": 23500
1663
  }
1664
  ],
1665
  "logging_steps": 100,
 
1679
  "attributes": {}
1680
  }
1681
  },
1682
+ "total_flos": 5.724204171264e+16,
1683
  "train_batch_size": 4,
1684
  "trial_name": null,
1685
  "trial_params": null