SystemAdmin123 commited on
Commit
429dbb7
·
verified ·
1 Parent(s): df3f137

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8a051a97436cbb9ced7d9e533e00b6fc981dc733924f80205517ea188706c54
3
  size 28130688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8d53c9b548e090317fded4ec69e6a50e79f7abd1f4e1ece6e1c034858cf7070
3
  size 28130688
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba9ca81d0e648b9f245b287b59cac8a934e081397b8c14fb05b16f75f4007821
3
  size 28684730
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d62c75d0600bae678b1f273eb2794e7fdd1c7257fad922fb6b903851d4ef365
3
  size 28684730
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca96cd8cea5e89c26f322ab0bc4569dd89f58599184456a2a043cfe41d0a9016
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63b25501508b4f34ea1a73010114c43b1739477011f6f24a5e1766c3a43b5bb5
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e1c8b411336f8c21c042cf34f54cd83fe460e6146fa041149703c71f00d1e7b
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5532b29acfbb21e7d70bbcb7b93a6c6bd479aa36c62bea63ef7a900d782c98dc
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c588e7a6251848ca72003d225e2e8cf344e2b4645d7ce1e51f191dc1db88063
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:257e790feaf3afbabfdd893ed6079c69aeb0f2fda34b2ddc62f567dd45e58200
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5b050aa4a26eb700ebc404ec1ff62750644e1e3fa94786597f4b7eda4b17bfa
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:383db1212f88ccb59b67f456f008a4b37947154cd7ae5385dd6e0aad41cfdda3
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e397ffcf130e7acafde3d20cb6e16fdb641f8e06485f1c15d8ff4eb284addba3
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be38faf20c324f0c202122a2755cf64fb4ed38213202bb53c9736c5192d6b577
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63e0a7403419853c906dc2a28ee5a75408f4aa4439daaf673dc94372a55a22d4
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89cbc29a7d4feac0baba7f44a42fde1ea828f0d85a94f6bf063c9835d05f30d5
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8f9663d63b5d724c82dcf4a32718ed3fc7be625d342141f353e8daafed1a8ab
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2d5bee6c8413e58538976b7898c63aa9a1245d3dbd7ecf2734d5b29bd6b5dd6
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e34e10848d2dbdc394eecb9997530b5e1ea02407c4d3d521c60cc28f7a872d18
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd648e71fafd1f1bff0a8c567280b45a6e3e1c3559d777e208a4f8a364f53fde
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f0c7ff54b85ffef1dcbffa5d8d256d7b7e02cf6f2a611b338e53d605c1ee098
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25d9368287315b2afbde3ac53b1bfc6e6c60e14723f23ec40564523c8a5be5ec
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.1893491124260355,
5
  "eval_steps": 20,
6
- "global_step": 160,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -191,6 +191,50 @@
191
  "eval_samples_per_second": 639.824,
192
  "eval_steps_per_second": 40.042,
193
  "step": 160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  }
195
  ],
196
  "logging_steps": 10,
@@ -210,7 +254,7 @@
210
  "attributes": {}
211
  }
212
  },
213
- "total_flos": 240251415035904.0,
214
  "train_batch_size": 2,
215
  "trial_name": null,
216
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.23668639053254437,
5
  "eval_steps": 20,
6
+ "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
191
  "eval_samples_per_second": 639.824,
192
  "eval_steps_per_second": 40.042,
193
  "step": 160
194
+ },
195
+ {
196
+ "epoch": 0.20118343195266272,
197
+ "grad_norm": 576.0,
198
+ "learning_rate": 0.00019982289153773646,
199
+ "loss": 8.9456,
200
+ "step": 170
201
+ },
202
+ {
203
+ "epoch": 0.21301775147928995,
204
+ "grad_norm": 3888.0,
205
+ "learning_rate": 0.00019973546914596623,
206
+ "loss": 9.1233,
207
+ "step": 180
208
+ },
209
+ {
210
+ "epoch": 0.21301775147928995,
211
+ "eval_loss": 9.207473754882812,
212
+ "eval_runtime": 2.8413,
213
+ "eval_samples_per_second": 528.635,
214
+ "eval_steps_per_second": 33.084,
215
+ "step": 180
216
+ },
217
+ {
218
+ "epoch": 0.22485207100591717,
219
+ "grad_norm": 884.0,
220
+ "learning_rate": 0.00019963059593496268,
221
+ "loss": 9.3116,
222
+ "step": 190
223
+ },
224
+ {
225
+ "epoch": 0.23668639053254437,
226
+ "grad_norm": 410.0,
227
+ "learning_rate": 0.00019950829025450114,
228
+ "loss": 9.2642,
229
+ "step": 200
230
+ },
231
+ {
232
+ "epoch": 0.23668639053254437,
233
+ "eval_loss": 9.775616645812988,
234
+ "eval_runtime": 2.3679,
235
+ "eval_samples_per_second": 634.329,
236
+ "eval_steps_per_second": 39.698,
237
+ "step": 200
238
  }
239
  ],
240
  "logging_steps": 10,
 
254
  "attributes": {}
255
  }
256
  },
257
+ "total_flos": 300220566994944.0,
258
  "train_batch_size": 2,
259
  "trial_name": null,
260
  "trial_params": null