rakhman-llm commited on
Commit
730cb2c
·
verified ·
1 Parent(s): c9669df

Training in progress, step 13500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:890ad2873a2bbd9defa42e9be53c396630c1fa479c804ecbcda9c1bef5c9a5b7
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5eab5cf423687e71bb09a2192d390f2707db089b85d712fe4ba933f4e60e68f
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7df779224de4aac7dbc2c481b18f226ef75df55b33434fd2a24aaa00199ae182
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:807f9477c6257e1784434ba0760f1f95f0fdf6abcdcb9183ee441309d58f6f04
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:392a7eb84bc6c0f11073a38c2f0980cc2fcea2ab711f0ee9d31a3b2c47437b70
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35473c59fad66809b7d88f5a452c0f7e0ed4b7e0e9e5416152035290cb60f273
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8f863bfc80aea1aeb546487ead9ef6092bc37fe14387401d70bc8b324d152f2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f3a75abd7e2a468c4e910b9da4dc47554f88ea4d9cfaefa84fb6258d08648a5
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8666666666666667,
5
  "eval_steps": 500,
6
- "global_step": 13000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -917,6 +917,41 @@
917
  "learning_rate": 1.4223555555555557e-05,
918
  "loss": 0.0723,
919
  "step": 13000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
920
  }
921
  ],
922
  "logging_steps": 100,
@@ -936,7 +971,7 @@
936
  "attributes": {}
937
  }
938
  },
939
- "total_flos": 3.166581030912e+16,
940
  "train_batch_size": 4,
941
  "trial_name": null,
942
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9,
5
  "eval_steps": 500,
6
+ "global_step": 13500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
917
  "learning_rate": 1.4223555555555557e-05,
918
  "loss": 0.0723,
919
  "step": 13000
920
+ },
921
+ {
922
+ "epoch": 0.8733333333333333,
923
+ "grad_norm": 0.2538166642189026,
924
+ "learning_rate": 1.4179111111111112e-05,
925
+ "loss": 0.065,
926
+ "step": 13100
927
+ },
928
+ {
929
+ "epoch": 0.88,
930
+ "grad_norm": 0.3290730118751526,
931
+ "learning_rate": 1.4134666666666667e-05,
932
+ "loss": 0.0712,
933
+ "step": 13200
934
+ },
935
+ {
936
+ "epoch": 0.8866666666666667,
937
+ "grad_norm": 0.1109674721956253,
938
+ "learning_rate": 1.4090222222222222e-05,
939
+ "loss": 0.0658,
940
+ "step": 13300
941
+ },
942
+ {
943
+ "epoch": 0.8933333333333333,
944
+ "grad_norm": 0.30605512857437134,
945
+ "learning_rate": 1.4045777777777777e-05,
946
+ "loss": 0.0696,
947
+ "step": 13400
948
+ },
949
+ {
950
+ "epoch": 0.9,
951
+ "grad_norm": 0.32679420709609985,
952
+ "learning_rate": 1.4001333333333333e-05,
953
+ "loss": 0.0669,
954
+ "step": 13500
955
  }
956
  ],
957
  "logging_steps": 100,
 
971
  "attributes": {}
972
  }
973
  },
974
+ "total_flos": 3.288372609024e+16,
975
  "train_batch_size": 4,
976
  "trial_name": null,
977
  "trial_params": null