Rakhman16 commited on
Commit
0547431
·
verified ·
1 Parent(s): 29317d4

Training in progress, step 11000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a27d48d34b8ccad96e402eb4f94ec667bf372ecd65cd4746cd18d89a3ee31ae
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:720eceeb2b357ad023a64f886e05d484eb7d9c0097f7eb7db6168cd2d52a3b5b
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96bc579cba2ba220a43642dd36ae216045b6c492a0b978a80ccd6cf9f5f30491
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c19224722ae663a153f5b72e8e4abd5762e71691698100f741aa39099196b03f
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d0ff9a7184dfef904a073aa76234f185ca283c0a479c1a8c903692aec73c4aa
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6eb93bead6d30b6932224ec7fe6ef202eebeaf7927b1e6638dfda624b533562
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68bbf668b8239771e72cdecbaae5a40e09f6e926879b6dffd5fd09ed3de5395e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:723c1afc9409a31cc02de5b4cfab645904dc7aaa6048019bba9e3ae17f717c52
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.364089775561097,
5
  "eval_steps": 500,
6
- "global_step": 10500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -186,6 +186,13 @@
186
  "learning_rate": 2.550290939318371e-06,
187
  "loss": 0.3129,
188
  "step": 10500
 
 
 
 
 
 
 
189
  }
190
  ],
191
  "logging_steps": 500,
@@ -205,7 +212,7 @@
205
  "attributes": {}
206
  }
207
  },
208
- "total_flos": 2.557379557195776e+16,
209
  "train_batch_size": 4,
210
  "trial_name": null,
211
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.57190357439734,
5
  "eval_steps": 500,
6
+ "global_step": 11000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
186
  "learning_rate": 2.550290939318371e-06,
187
  "loss": 0.3129,
188
  "step": 10500
189
+ },
190
+ {
191
+ "epoch": 4.57190357439734,
192
+ "grad_norm": 0.956358015537262,
193
+ "learning_rate": 1.7190357439733998e-06,
194
+ "loss": 0.3166,
195
+ "step": 11000
196
  }
197
  ],
198
  "logging_steps": 500,
 
212
  "attributes": {}
213
  }
214
  },
215
+ "total_flos": 2.679171135307776e+16,
216
  "train_batch_size": 4,
217
  "trial_name": null,
218
  "trial_params": null