ngwgsang commited on
Commit
75dabab
·
verified ·
1 Parent(s): 8a8ea07

Training in progress, epoch 4, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7381c11af32f450ef90a1f41be45370df688105c59000f73049a6f3d855bf5bf
3
  size 442668636
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e61b427c7e90098665041fc0a0a842c2a81a5d94700dec207f9f18e8fa3e68fe
3
  size 442668636
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1fa5f76c2cf56d06474142e07bd2538df2e0f93a495907116066ebde69a2488c
3
  size 885457146
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af3007c1d17883475830f0951448eb8859176b25fee74d22b9b7c1cb61d5b54a
3
  size 885457146
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76b3b8471ca0351c811d90c5b574a45dac1c24b25ffcf13ec6b85586685c4c47
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:049c6f40a328629846cab1b27e3807d44ea469304a69ff0f3d676cc813cde6b3
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8143cf4c2b0cbae224cb6ee44d414097e32f9582a6067851f3fe7a3ab225aca6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7668389514d10a3d53f140c85ff46df71dcd9dc34fbc1ed6530f2d1a175df2a0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 5.645811716715495,
3
  "best_model_checkpoint": "./results/checkpoint-2748",
4
- "epoch": 3.0,
5
  "eval_steps": 500,
6
- "global_step": 2748,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -232,6 +232,81 @@
232
  "eval_samples_per_second": 269.899,
233
  "eval_steps_per_second": 8.435,
234
  "step": 2748
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
  }
236
  ],
237
  "logging_steps": 100,
@@ -251,7 +326,7 @@
251
  "attributes": {}
252
  }
253
  },
254
- "total_flos": 5783298700015872.0,
255
  "train_batch_size": 32,
256
  "trial_name": null,
257
  "trial_params": null
 
1
  {
2
  "best_metric": 5.645811716715495,
3
  "best_model_checkpoint": "./results/checkpoint-2748",
4
+ "epoch": 4.0,
5
  "eval_steps": 500,
6
+ "global_step": 3664,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
232
  "eval_samples_per_second": 269.899,
233
  "eval_steps_per_second": 8.435,
234
  "step": 2748
235
+ },
236
+ {
237
+ "epoch": 3.056768558951965,
238
+ "grad_norm": 31.40544319152832,
239
+ "learning_rate": 1.8537117903930135e-05,
240
+ "loss": 5.4937,
241
+ "step": 2800
242
+ },
243
+ {
244
+ "epoch": 3.165938864628821,
245
+ "grad_norm": 28.67197608947754,
246
+ "learning_rate": 1.8127729257641922e-05,
247
+ "loss": 5.5573,
248
+ "step": 2900
249
+ },
250
+ {
251
+ "epoch": 3.2751091703056767,
252
+ "grad_norm": 26.671180725097656,
253
+ "learning_rate": 1.7718340611353713e-05,
254
+ "loss": 5.6147,
255
+ "step": 3000
256
+ },
257
+ {
258
+ "epoch": 3.3842794759825328,
259
+ "grad_norm": 32.73609924316406,
260
+ "learning_rate": 1.7308951965065504e-05,
261
+ "loss": 5.2704,
262
+ "step": 3100
263
+ },
264
+ {
265
+ "epoch": 3.493449781659389,
266
+ "grad_norm": 26.268295288085938,
267
+ "learning_rate": 1.689956331877729e-05,
268
+ "loss": 5.4946,
269
+ "step": 3200
270
+ },
271
+ {
272
+ "epoch": 3.6026200873362444,
273
+ "grad_norm": 24.3873233795166,
274
+ "learning_rate": 1.649017467248908e-05,
275
+ "loss": 5.5757,
276
+ "step": 3300
277
+ },
278
+ {
279
+ "epoch": 3.7117903930131004,
280
+ "grad_norm": 26.872316360473633,
281
+ "learning_rate": 1.6080786026200872e-05,
282
+ "loss": 5.3305,
283
+ "step": 3400
284
+ },
285
+ {
286
+ "epoch": 3.8209606986899565,
287
+ "grad_norm": 31.78321647644043,
288
+ "learning_rate": 1.5671397379912666e-05,
289
+ "loss": 5.4091,
290
+ "step": 3500
291
+ },
292
+ {
293
+ "epoch": 3.930131004366812,
294
+ "grad_norm": 37.95060729980469,
295
+ "learning_rate": 1.5262008733624454e-05,
296
+ "loss": 5.464,
297
+ "step": 3600
298
+ },
299
+ {
300
+ "epoch": 4.0,
301
+ "eval_avg_mae": 5.841625213623047,
302
+ "eval_loss": 5.841624736785889,
303
+ "eval_mae_lex": 5.572142124176025,
304
+ "eval_mae_sem": 3.7872631549835205,
305
+ "eval_mae_syn": 8.1654691696167,
306
+ "eval_runtime": 27.1382,
307
+ "eval_samples_per_second": 269.988,
308
+ "eval_steps_per_second": 8.438,
309
+ "step": 3664
310
  }
311
  ],
312
  "logging_steps": 100,
 
326
  "attributes": {}
327
  }
328
  },
329
+ "total_flos": 7711064933354496.0,
330
  "train_batch_size": 32,
331
  "trial_name": null,
332
  "trial_params": null