rakhman-llm commited on
Commit
b05b888
·
verified ·
1 Parent(s): 43aa29c

Training in progress, step 11500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e095b06f5c65d9c3c8a3debcf3e6201bbcf41c468b57fd9fe8830b5d51ac4f48
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dfb1303c43437d0a5d16c718f1a0be0355a6bc6198d78fcee515c503884a9a2
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b9380bb79afa61cdc27180520af86aef9f4d10f83ed2161a4655c0531d4686d
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9169b770e0ac0200417a6d68811804c1fb73c586cfc8a20550b0caa6beef0a3
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fb8b27c24f221351a66f6c36da6c9928049823c3f1134500eb9faea09def09e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68393a5eefa6936320457e661ed8769751e258badc1e65d137b49f873ca59e29
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:403e883e32409c0a8b6b9b4a178312db263273af6b84b67a9175809fcce1d74a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8a1695a275cff99a2c62fc4abe2a84b274a1c4f4a080a28dd6e841bca4417f1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7333333333333333,
5
  "eval_steps": 500,
6
- "global_step": 11000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -777,6 +777,41 @@
777
  "learning_rate": 1.5112444444444445e-05,
778
  "loss": 0.068,
779
  "step": 11000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
780
  }
781
  ],
782
  "logging_steps": 100,
@@ -796,7 +831,7 @@
796
  "attributes": {}
797
  }
798
  },
799
- "total_flos": 2.679414718464e+16,
800
  "train_batch_size": 4,
801
  "trial_name": null,
802
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7666666666666667,
5
  "eval_steps": 500,
6
+ "global_step": 11500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
777
  "learning_rate": 1.5112444444444445e-05,
778
  "loss": 0.068,
779
  "step": 11000
780
+ },
781
+ {
782
+ "epoch": 0.74,
783
+ "grad_norm": 0.19551828503608704,
784
+ "learning_rate": 1.5068e-05,
785
+ "loss": 0.0693,
786
+ "step": 11100
787
+ },
788
+ {
789
+ "epoch": 0.7466666666666667,
790
+ "grad_norm": 0.1611281782388687,
791
+ "learning_rate": 1.5023555555555557e-05,
792
+ "loss": 0.0677,
793
+ "step": 11200
794
+ },
795
+ {
796
+ "epoch": 0.7533333333333333,
797
+ "grad_norm": 0.18722090125083923,
798
+ "learning_rate": 1.4979111111111113e-05,
799
+ "loss": 0.0711,
800
+ "step": 11300
801
+ },
802
+ {
803
+ "epoch": 0.76,
804
+ "grad_norm": 0.1402270644903183,
805
+ "learning_rate": 1.4934666666666668e-05,
806
+ "loss": 0.0649,
807
+ "step": 11400
808
+ },
809
+ {
810
+ "epoch": 0.7666666666666667,
811
+ "grad_norm": 0.09549852460622787,
812
+ "learning_rate": 1.4890222222222223e-05,
813
+ "loss": 0.0624,
814
+ "step": 11500
815
  }
816
  ],
817
  "logging_steps": 100,
 
831
  "attributes": {}
832
  }
833
  },
834
+ "total_flos": 2.801206296576e+16,
835
  "train_batch_size": 4,
836
  "trial_name": null,
837
  "trial_params": null