rakhman-llm commited on
Commit
bf441a0
·
verified ·
1 Parent(s): d374b35

Training in progress, step 14000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5eab5cf423687e71bb09a2192d390f2707db089b85d712fe4ba933f4e60e68f
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e948c8d9a7ad56ccbd6700926716442cecc28d16ec12db2c3ed5e7e3f659db9b
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:807f9477c6257e1784434ba0760f1f95f0fdf6abcdcb9183ee441309d58f6f04
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af58a191c850477c58ee5c9fd6d80b4f79d202f3c260b244c0bb72f25e9c46bf
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35473c59fad66809b7d88f5a452c0f7e0ed4b7e0e9e5416152035290cb60f273
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aba8071d282c3cb60c40a4d3269fc6a11f1d4d5aa00c59a4ed8e5ce671a0abf0
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f3a75abd7e2a468c4e910b9da4dc47554f88ea4d9cfaefa84fb6258d08648a5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aecb0190c300aa10c453b1ef86e11d1993afb13996c225cb9c0fcb417f89ff36
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9,
5
  "eval_steps": 500,
6
- "global_step": 13500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -952,6 +952,41 @@
952
  "learning_rate": 1.4001333333333333e-05,
953
  "loss": 0.0669,
954
  "step": 13500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
955
  }
956
  ],
957
  "logging_steps": 100,
@@ -971,7 +1006,7 @@
971
  "attributes": {}
972
  }
973
  },
974
- "total_flos": 3.288372609024e+16,
975
  "train_batch_size": 4,
976
  "trial_name": null,
977
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9333333333333333,
5
  "eval_steps": 500,
6
+ "global_step": 14000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
952
  "learning_rate": 1.4001333333333333e-05,
953
  "loss": 0.0669,
954
  "step": 13500
955
+ },
956
+ {
957
+ "epoch": 0.9066666666666666,
958
+ "grad_norm": 0.26795893907546997,
959
+ "learning_rate": 1.3956888888888891e-05,
960
+ "loss": 0.0698,
961
+ "step": 13600
962
+ },
963
+ {
964
+ "epoch": 0.9133333333333333,
965
+ "grad_norm": 0.20815995335578918,
966
+ "learning_rate": 1.3912444444444447e-05,
967
+ "loss": 0.0655,
968
+ "step": 13700
969
+ },
970
+ {
971
+ "epoch": 0.92,
972
+ "grad_norm": 0.1046639233827591,
973
+ "learning_rate": 1.3868444444444447e-05,
974
+ "loss": 0.0685,
975
+ "step": 13800
976
+ },
977
+ {
978
+ "epoch": 0.9266666666666666,
979
+ "grad_norm": 0.26044830679893494,
980
+ "learning_rate": 1.3824000000000002e-05,
981
+ "loss": 0.0717,
982
+ "step": 13900
983
+ },
984
+ {
985
+ "epoch": 0.9333333333333333,
986
+ "grad_norm": 0.14924395084381104,
987
+ "learning_rate": 1.3779555555555557e-05,
988
+ "loss": 0.0666,
989
+ "step": 14000
990
  }
991
  ],
992
  "logging_steps": 100,
 
1006
  "attributes": {}
1007
  }
1008
  },
1009
+ "total_flos": 3.410164187136e+16,
1010
  "train_batch_size": 4,
1011
  "trial_name": null,
1012
  "trial_params": null