ngwgsang commited on
Commit
a38c63b
·
verified ·
1 Parent(s): 137d603

Training in progress, epoch 6, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1dab21a3309f43892efdf6d32d3eb2a83f87740fc40912ec53fff44ce1639a8b
3
  size 442668636
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a87059de773a647f0f146c9c6d0b08d5535f799d9fa1c34e6aec16f5407fc67c
3
  size 442668636
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60ff253a82aaff7be91301625082eac8e395a14f23fc079cb32647b9b6e5388e
3
  size 885457146
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7994e3322be09b5f88a7dbc2a5814c66fcb49eac3f6f61e2db6e5b2dd3f344e8
3
  size 885457146
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5dc815408d1d7abfa9ba6c0dc7793cae2a75c860d693e06433fbcdfbd86b13d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:473ecb09e5f106de8046a76cc9b1107489610a4ca8d22c8acd37629ea6ee333c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d384c207c19b3bb34dc652f0d12803de0a7eba856a942818f2ab5a8834f9006c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2068a5d955976d9311723e9deb7bf5d0b82ab774ba392808af341e32685de248
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 5.481770197550456,
3
- "best_model_checkpoint": "./results/checkpoint-4580",
4
- "epoch": 5.0,
5
  "eval_steps": 500,
6
- "global_step": 4580,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -382,6 +382,81 @@
382
  "eval_samples_per_second": 270.375,
383
  "eval_steps_per_second": 8.45,
384
  "step": 4580
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
  }
386
  ],
387
  "logging_steps": 100,
@@ -401,7 +476,7 @@
401
  "attributes": {}
402
  }
403
  },
404
- "total_flos": 9638831166693120.0,
405
  "train_batch_size": 32,
406
  "trial_name": null,
407
  "trial_params": null
 
1
  {
2
+ "best_metric": 5.272278467814128,
3
+ "best_model_checkpoint": "./results/checkpoint-5496",
4
+ "epoch": 6.0,
5
  "eval_steps": 500,
6
+ "global_step": 5496,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
382
  "eval_samples_per_second": 270.375,
383
  "eval_steps_per_second": 8.45,
384
  "step": 4580
385
+ },
386
+ {
387
+ "epoch": 5.021834061135372,
388
+ "grad_norm": 25.387714385986328,
389
+ "learning_rate": 1.1168122270742358e-05,
390
+ "loss": 5.0585,
391
+ "step": 4600
392
+ },
393
+ {
394
+ "epoch": 5.131004366812227,
395
+ "grad_norm": 33.1529655456543,
396
+ "learning_rate": 1.0758733624454149e-05,
397
+ "loss": 4.774,
398
+ "step": 4700
399
+ },
400
+ {
401
+ "epoch": 5.240174672489083,
402
+ "grad_norm": 31.700637817382812,
403
+ "learning_rate": 1.034934497816594e-05,
404
+ "loss": 4.762,
405
+ "step": 4800
406
+ },
407
+ {
408
+ "epoch": 5.349344978165939,
409
+ "grad_norm": 34.32217025756836,
410
+ "learning_rate": 9.93995633187773e-06,
411
+ "loss": 4.8645,
412
+ "step": 4900
413
+ },
414
+ {
415
+ "epoch": 5.458515283842795,
416
+ "grad_norm": 52.338130950927734,
417
+ "learning_rate": 9.530567685589519e-06,
418
+ "loss": 4.9913,
419
+ "step": 5000
420
+ },
421
+ {
422
+ "epoch": 5.567685589519651,
423
+ "grad_norm": 27.761211395263672,
424
+ "learning_rate": 9.12117903930131e-06,
425
+ "loss": 4.9047,
426
+ "step": 5100
427
+ },
428
+ {
429
+ "epoch": 5.676855895196507,
430
+ "grad_norm": 36.54159164428711,
431
+ "learning_rate": 8.7117903930131e-06,
432
+ "loss": 4.7824,
433
+ "step": 5200
434
+ },
435
+ {
436
+ "epoch": 5.786026200873362,
437
+ "grad_norm": 31.954957962036133,
438
+ "learning_rate": 8.302401746724891e-06,
439
+ "loss": 4.7555,
440
+ "step": 5300
441
+ },
442
+ {
443
+ "epoch": 5.895196506550218,
444
+ "grad_norm": 33.35627365112305,
445
+ "learning_rate": 7.89301310043668e-06,
446
+ "loss": 4.8389,
447
+ "step": 5400
448
+ },
449
+ {
450
+ "epoch": 6.0,
451
+ "eval_avg_mae": 5.272278467814128,
452
+ "eval_loss": 5.272278308868408,
453
+ "eval_mae_lex": 4.76518440246582,
454
+ "eval_mae_sem": 3.384120225906372,
455
+ "eval_mae_syn": 7.6675310134887695,
456
+ "eval_runtime": 27.0821,
457
+ "eval_samples_per_second": 270.548,
458
+ "eval_steps_per_second": 8.456,
459
+ "step": 5496
460
  }
461
  ],
462
  "logging_steps": 100,
 
476
  "attributes": {}
477
  }
478
  },
479
+ "total_flos": 1.1566597400031744e+16,
480
  "train_batch_size": 32,
481
  "trial_name": null,
482
  "trial_params": null