rakhman-llm commited on
Commit
48abe1d
·
verified ·
1 Parent(s): 2ed45bb

Training in progress, step 24000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8780b961dcbf09881cce6a4ec4f3ce77d0ac1ee0f91af800fa2edc12b4f6187
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34a5bf7d80afb43e227b324a1b6cda7e106c37feb23b0824b268c18e7b3a3d1a
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef9bac5420d614e9bac93f0dd5fea2e0183354a0f7d5a23ad1f55f6681e9e683
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd314d98a4869313e973a437be445c914aa71bb8b03a7af5f6f9a32ad34d63bf
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:243e2ec3e6c9bc99b16b0183c86988f987cd95808c30f0890be20ce20d1d5d66
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:727aa5fd6a222ba79327243b6bf0f36bc88c1b723f0949eac077fe52d4306974
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06c57df543e7b3168b7bf3184803db11f7fb923e889cb830127c49143334357b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:751ad724592c1d67b441302a9338d9d899df51042c6edaf69d5d08369bc3c9df
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.5666666666666667,
5
  "eval_steps": 500,
6
- "global_step": 23500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1660,6 +1660,41 @@
1660
  "learning_rate": 9.558666666666667e-06,
1661
  "loss": 0.0621,
1662
  "step": 23500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1663
  }
1664
  ],
1665
  "logging_steps": 100,
@@ -1679,7 +1714,7 @@
1679
  "attributes": {}
1680
  }
1681
  },
1682
- "total_flos": 5.724204171264e+16,
1683
  "train_batch_size": 4,
1684
  "trial_name": null,
1685
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.6,
5
  "eval_steps": 500,
6
+ "global_step": 24000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1660
  "learning_rate": 9.558666666666667e-06,
1661
  "loss": 0.0621,
1662
  "step": 23500
1663
+ },
1664
+ {
1665
+ "epoch": 1.5733333333333333,
1666
+ "grad_norm": 0.2637277841567993,
1667
+ "learning_rate": 9.514666666666667e-06,
1668
+ "loss": 0.0505,
1669
+ "step": 23600
1670
+ },
1671
+ {
1672
+ "epoch": 1.58,
1673
+ "grad_norm": 0.17785485088825226,
1674
+ "learning_rate": 9.470222222222222e-06,
1675
+ "loss": 0.0587,
1676
+ "step": 23700
1677
+ },
1678
+ {
1679
+ "epoch": 1.5866666666666667,
1680
+ "grad_norm": 0.11615557968616486,
1681
+ "learning_rate": 9.425777777777778e-06,
1682
+ "loss": 0.0545,
1683
+ "step": 23800
1684
+ },
1685
+ {
1686
+ "epoch": 1.5933333333333333,
1687
+ "grad_norm": 0.2337283492088318,
1688
+ "learning_rate": 9.381333333333335e-06,
1689
+ "loss": 0.0574,
1690
+ "step": 23900
1691
+ },
1692
+ {
1693
+ "epoch": 1.6,
1694
+ "grad_norm": 0.21848595142364502,
1695
+ "learning_rate": 9.33688888888889e-06,
1696
+ "loss": 0.0581,
1697
+ "step": 24000
1698
  }
1699
  ],
1700
  "logging_steps": 100,
 
1714
  "attributes": {}
1715
  }
1716
  },
1717
+ "total_flos": 5.845995749376e+16,
1718
  "train_batch_size": 4,
1719
  "trial_name": null,
1720
  "trial_params": null