rakhman-llm commited on
Commit
88bb912
·
verified ·
1 Parent(s): a0936a5

Training in progress, step 13000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e82e347b2238a2124e52fa6bbcb52a581a8376f642371fa693a2b74ccf64bbc
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ce56d6e1b12fc08d34fa085b7ecb7582ed9bc689e265e45b39c951a16023b3a
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59483524ed5516cde7ebadc084d889b4039cb25a2ec01036cddccb6c62e4b263
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4efc04a4c9090911fd512babc5f1cb29b366fd9d6332e2ed33a01202d2743d4c
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f3dfe1e2f7f2fd5854887c0faffeef0e864d418e8da96df789b7b060dfeefbe
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e9bd9c0c5f3829618d66a8ccc40a2ee0bf94db351e00ed3ee919d3ea07ee90c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:581f26c042b017b35232c26ec1300c4f70bffc2cf5f6d147680616274578d17d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:589ca601b575cfc7f004c136eb91b382b9a1be92a7e3c7f68f79df4414805284
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.597672485453034,
5
  "eval_steps": 500,
6
- "global_step": 12500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -198,6 +198,13 @@
198
  "learning_rate": 2.6891105569409807e-06,
199
  "loss": 0.3176,
200
  "step": 12500
 
 
 
 
 
 
 
201
  }
202
  ],
203
  "logging_steps": 500,
@@ -217,7 +224,7 @@
217
  "attributes": {}
218
  }
219
  },
220
- "total_flos": 1.522272934821888e+16,
221
  "train_batch_size": 2,
222
  "trial_name": null,
223
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.7015793848711556,
5
  "eval_steps": 500,
6
+ "global_step": 13000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
198
  "learning_rate": 2.6891105569409807e-06,
199
  "loss": 0.3176,
200
  "step": 12500
201
+ },
202
+ {
203
+ "epoch": 2.7015793848711556,
204
+ "grad_norm": 1.0440038442611694,
205
+ "learning_rate": 1.99778331947908e-06,
206
+ "loss": 0.3391,
207
+ "step": 13000
208
  }
209
  ],
210
  "logging_steps": 500,
 
224
  "attributes": {}
225
  }
226
  },
227
+ "total_flos": 1.583168723877888e+16,
228
  "train_batch_size": 2,
229
  "trial_name": null,
230
  "trial_params": null