ngwgsang commited on
Commit
308441a
·
verified ·
1 Parent(s): a23f4e5

Training in progress, epoch 7, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bedc646b0c32b1ad68548c5bacfc0ac7530fd21e564481050cda862faf0d974
3
  size 442668636
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:533bcedc70c1ba977edf8f964162e29dac2f2ded0e80d5ac138e3337711e299e
3
  size 442668636
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cea5909b394f56ca428fdba6c8839a075b2ff8da5f2fe574e5efdd06083bea54
3
  size 885457146
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c39e29d6e0217e4ee207e0d4bef98c40b17e4a76692586b3f4efc7b21d165f95
3
  size 885457146
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:473ecb09e5f106de8046a76cc9b1107489610a4ca8d22c8acd37629ea6ee333c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ff6cc6d0cbab2c0d1b846e878224dd13dea331cdbd300a43f50d003878abb4b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:094473adb9147cbc574c74fa5cf34500f7166f96e92615702ffcaba4852f1943
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39dce596f597946769554edb972b9f1c6abe7c7f4150e7ed88047695e2bb4c5f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 5.39035161336263,
3
  "best_model_checkpoint": "./results/checkpoint-5496",
4
- "epoch": 6.0,
5
  "eval_steps": 500,
6
- "global_step": 5496,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -457,6 +457,88 @@
457
  "eval_samples_per_second": 269.005,
458
  "eval_steps_per_second": 8.408,
459
  "step": 5496
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
460
  }
461
  ],
462
  "logging_steps": 100,
@@ -476,7 +558,7 @@
476
  "attributes": {}
477
  }
478
  },
479
- "total_flos": 1.1566597400031744e+16,
480
  "train_batch_size": 32,
481
  "trial_name": null,
482
  "trial_params": null
 
1
  {
2
  "best_metric": 5.39035161336263,
3
  "best_model_checkpoint": "./results/checkpoint-5496",
4
+ "epoch": 7.0,
5
  "eval_steps": 500,
6
+ "global_step": 6412,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
457
  "eval_samples_per_second": 269.005,
458
  "eval_steps_per_second": 8.408,
459
  "step": 5496
460
+ },
461
+ {
462
+ "epoch": 6.004366812227074,
463
+ "grad_norm": 30.764888763427734,
464
+ "learning_rate": 1.2472707423580788e-05,
465
+ "loss": 4.7749,
466
+ "step": 5500
467
+ },
468
+ {
469
+ "epoch": 6.11353711790393,
470
+ "grad_norm": 32.09075164794922,
471
+ "learning_rate": 1.1790393013100438e-05,
472
+ "loss": 4.6015,
473
+ "step": 5600
474
+ },
475
+ {
476
+ "epoch": 6.222707423580786,
477
+ "grad_norm": 30.449230194091797,
478
+ "learning_rate": 1.1108078602620089e-05,
479
+ "loss": 4.5258,
480
+ "step": 5700
481
+ },
482
+ {
483
+ "epoch": 6.331877729257642,
484
+ "grad_norm": 26.658090591430664,
485
+ "learning_rate": 1.0425764192139738e-05,
486
+ "loss": 4.5119,
487
+ "step": 5800
488
+ },
489
+ {
490
+ "epoch": 6.441048034934497,
491
+ "grad_norm": 35.33096694946289,
492
+ "learning_rate": 9.74344978165939e-06,
493
+ "loss": 4.3653,
494
+ "step": 5900
495
+ },
496
+ {
497
+ "epoch": 6.550218340611353,
498
+ "grad_norm": 31.823776245117188,
499
+ "learning_rate": 9.06113537117904e-06,
500
+ "loss": 4.4477,
501
+ "step": 6000
502
+ },
503
+ {
504
+ "epoch": 6.6593886462882095,
505
+ "grad_norm": 29.176389694213867,
506
+ "learning_rate": 8.37882096069869e-06,
507
+ "loss": 4.5485,
508
+ "step": 6100
509
+ },
510
+ {
511
+ "epoch": 6.7685589519650655,
512
+ "grad_norm": 39.900264739990234,
513
+ "learning_rate": 7.696506550218342e-06,
514
+ "loss": 4.5233,
515
+ "step": 6200
516
+ },
517
+ {
518
+ "epoch": 6.877729257641922,
519
+ "grad_norm": 28.541597366333008,
520
+ "learning_rate": 7.014192139737992e-06,
521
+ "loss": 4.5032,
522
+ "step": 6300
523
+ },
524
+ {
525
+ "epoch": 6.986899563318778,
526
+ "grad_norm": 37.577293395996094,
527
+ "learning_rate": 6.3318777292576415e-06,
528
+ "loss": 4.507,
529
+ "step": 6400
530
+ },
531
+ {
532
+ "epoch": 7.0,
533
+ "eval_avg_mae": 5.4305070241292315,
534
+ "eval_loss": 5.430506706237793,
535
+ "eval_mae_lex": 4.818454742431641,
536
+ "eval_mae_sem": 3.808347702026367,
537
+ "eval_mae_syn": 7.664718151092529,
538
+ "eval_runtime": 27.2304,
539
+ "eval_samples_per_second": 269.074,
540
+ "eval_steps_per_second": 8.41,
541
+ "step": 6412
542
  }
543
  ],
544
  "logging_steps": 100,
 
558
  "attributes": {}
559
  }
560
  },
561
+ "total_flos": 1.3494363633370368e+16,
562
  "train_batch_size": 32,
563
  "trial_name": null,
564
  "trial_params": null