rakhman-llm commited on
Commit
2b026ad
·
verified ·
1 Parent(s): 0e37413

Training in progress, step 27500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5aee5c58e4fc85927dc4013d0fe750e56e604920994d516f324190a1a8d7a05f
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fbf8423ff110283327b1f8b90f9081b1058c9376cd2711b7771ceec771519e1
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d58926cc53cc96e7baec8d580220f0a290e47d6bc25d45f7fd7e61bf35d05d9a
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ef9caf61392dd46c560c432d5d06b4ae3b7fe098c753ce284032c427faa732e
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8469fa4abc9740f9af7a5dacb10ea8c5107fb6c8cbc91f6fcad8c8b05132d7d0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e92aea66aa69545e4cc667c6773e4db275b023af14af95463bf526993dcaef01
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8dc2ab4847205143252d3ebb3c6929a744853487e6aadfb5f1c78d5dcdea141c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d7857c6094c610d98d2faa905fca5f4d7aec2a9e3d0f066c66f0a11bdc1413e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.8,
5
  "eval_steps": 500,
6
- "global_step": 27000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1905,6 +1905,41 @@
1905
  "learning_rate": 8.004e-06,
1906
  "loss": 0.0572,
1907
  "step": 27000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1908
  }
1909
  ],
1910
  "logging_steps": 100,
@@ -1924,7 +1959,7 @@
1924
  "attributes": {}
1925
  }
1926
  },
1927
- "total_flos": 6.576745218048e+16,
1928
  "train_batch_size": 4,
1929
  "trial_name": null,
1930
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.8333333333333335,
5
  "eval_steps": 500,
6
+ "global_step": 27500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1905
  "learning_rate": 8.004e-06,
1906
  "loss": 0.0572,
1907
  "step": 27000
1908
+ },
1909
+ {
1910
+ "epoch": 1.8066666666666666,
1911
+ "grad_norm": 0.10920804738998413,
1912
+ "learning_rate": 7.959555555555555e-06,
1913
+ "loss": 0.0538,
1914
+ "step": 27100
1915
+ },
1916
+ {
1917
+ "epoch": 1.8133333333333335,
1918
+ "grad_norm": 0.10139674693346024,
1919
+ "learning_rate": 7.915111111111112e-06,
1920
+ "loss": 0.0536,
1921
+ "step": 27200
1922
+ },
1923
+ {
1924
+ "epoch": 1.8199999999999998,
1925
+ "grad_norm": 0.17728354036808014,
1926
+ "learning_rate": 7.870666666666667e-06,
1927
+ "loss": 0.0582,
1928
+ "step": 27300
1929
+ },
1930
+ {
1931
+ "epoch": 1.8266666666666667,
1932
+ "grad_norm": 0.19340912997722626,
1933
+ "learning_rate": 7.826222222222223e-06,
1934
+ "loss": 0.0594,
1935
+ "step": 27400
1936
+ },
1937
+ {
1938
+ "epoch": 1.8333333333333335,
1939
+ "grad_norm": 0.2556203603744507,
1940
+ "learning_rate": 7.781777777777778e-06,
1941
+ "loss": 0.0593,
1942
+ "step": 27500
1943
  }
1944
  ],
1945
  "logging_steps": 100,
 
1959
  "attributes": {}
1960
  }
1961
  },
1962
+ "total_flos": 6.69853679616e+16,
1963
  "train_batch_size": 4,
1964
  "trial_name": null,
1965
  "trial_params": null