rakhman-llm commited on
Commit
d40bb85
·
verified ·
1 Parent(s): 3a7a5ec

Training in progress, step 3500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:995dadf0d3d095797b5dbc557b2d1541ca751ef767140b2eac486d1f7802f08c
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8153813283c4389ef5a9863dfbf03bd90de9af06723c2ed7469560d2f6cb9016
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7361a2fab55f925d42ccfef981caf4fa3db8ce7635371819cdbf2b69ea2a0076
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e3c6d028cc61eb16c10cfeb96ae472b416bf61c2430d616409a7e075447463c
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eefe6d642f2fec79a1485caf3f0bf5664dd3a0b7470a19b36fef717b4ce4330f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b21a8df05d6c8e0e6fd608ca76fd60fcfc2fde098551b903447e393817425942
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:920ac20f3da0aa073b783e5c39cbf10201482c9a198f2029422a6d2d7dd4763e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3da069d23e3fc76f1f7013dd495a2b0f4544633e580b179d33500b4d99b2575
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2,
5
  "eval_steps": 500,
6
- "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -217,6 +217,41 @@
217
  "learning_rate": 1.8667555555555555e-05,
218
  "loss": 0.073,
219
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  }
221
  ],
222
  "logging_steps": 100,
@@ -236,7 +271,7 @@
236
  "attributes": {}
237
  }
238
  },
239
- "total_flos": 7307494686720000.0,
240
  "train_batch_size": 4,
241
  "trial_name": null,
242
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.23333333333333334,
5
  "eval_steps": 500,
6
+ "global_step": 3500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
217
  "learning_rate": 1.8667555555555555e-05,
218
  "loss": 0.073,
219
  "step": 3000
220
+ },
221
+ {
222
+ "epoch": 0.20666666666666667,
223
+ "grad_norm": 0.28241923451423645,
224
+ "learning_rate": 1.862311111111111e-05,
225
+ "loss": 0.0682,
226
+ "step": 3100
227
+ },
228
+ {
229
+ "epoch": 0.21333333333333335,
230
+ "grad_norm": 0.2821277976036072,
231
+ "learning_rate": 1.857866666666667e-05,
232
+ "loss": 0.0813,
233
+ "step": 3200
234
+ },
235
+ {
236
+ "epoch": 0.22,
237
+ "grad_norm": 0.2306368201971054,
238
+ "learning_rate": 1.8534222222222224e-05,
239
+ "loss": 0.0741,
240
+ "step": 3300
241
+ },
242
+ {
243
+ "epoch": 0.22666666666666666,
244
+ "grad_norm": 0.29834499955177307,
245
+ "learning_rate": 1.848977777777778e-05,
246
+ "loss": 0.0801,
247
+ "step": 3400
248
+ },
249
+ {
250
+ "epoch": 0.23333333333333334,
251
+ "grad_norm": 1.8099658489227295,
252
+ "learning_rate": 1.8445333333333334e-05,
253
+ "loss": 0.0726,
254
+ "step": 3500
255
  }
256
  ],
257
  "logging_steps": 100,
 
271
  "attributes": {}
272
  }
273
  },
274
+ "total_flos": 8525410467840000.0,
275
  "train_batch_size": 4,
276
  "trial_name": null,
277
  "trial_params": null