ngwgsang commited on
Commit
83b8ac2
·
verified ·
1 Parent(s): 0db3d7b

Training in progress, epoch 8, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:533bcedc70c1ba977edf8f964162e29dac2f2ded0e80d5ac138e3337711e299e
3
  size 442668636
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0195e95b07a008ca0adb918c3cf710d2dd35e5922e1076eded57fcb47ec29bc3
3
  size 442668636
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c39e29d6e0217e4ee207e0d4bef98c40b17e4a76692586b3f4efc7b21d165f95
3
  size 885457146
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ed50d1f812bf88be9e3a3ef60b44e88993ed9aab461dcd502248ba4e966d769
3
  size 885457146
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ff6cc6d0cbab2c0d1b846e878224dd13dea331cdbd300a43f50d003878abb4b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea9f1a22dcc6b48df80d64487fe2cb15b598edf3b547bbcd4fb8ed5b43754655
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39dce596f597946769554edb972b9f1c6abe7c7f4150e7ed88047695e2bb4c5f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4da94ddd87a3dbe17d3a75825e2b60c2edfc2fd36631a373db6346d564b8b7bd
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 5.39035161336263,
3
- "best_model_checkpoint": "./results/checkpoint-5496",
4
- "epoch": 7.0,
5
  "eval_steps": 500,
6
- "global_step": 6412,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -539,6 +539,81 @@
539
  "eval_samples_per_second": 269.074,
540
  "eval_steps_per_second": 8.41,
541
  "step": 6412
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
542
  }
543
  ],
544
  "logging_steps": 100,
@@ -553,12 +628,12 @@
553
  "should_evaluate": false,
554
  "should_log": false,
555
  "should_save": true,
556
- "should_training_stop": false
557
  },
558
  "attributes": {}
559
  }
560
  },
561
- "total_flos": 1.3494363633370368e+16,
562
  "train_batch_size": 32,
563
  "trial_name": null,
564
  "trial_params": null
 
1
  {
2
+ "best_metric": 5.320092519124349,
3
+ "best_model_checkpoint": "./results/checkpoint-7328",
4
+ "epoch": 8.0,
5
  "eval_steps": 500,
6
+ "global_step": 7328,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
539
  "eval_samples_per_second": 269.074,
540
  "eval_steps_per_second": 8.41,
541
  "step": 6412
542
+ },
543
+ {
544
+ "epoch": 7.096069868995633,
545
+ "grad_norm": 30.43601417541504,
546
+ "learning_rate": 5.649563318777293e-06,
547
+ "loss": 4.4245,
548
+ "step": 6500
549
+ },
550
+ {
551
+ "epoch": 7.205240174672489,
552
+ "grad_norm": 32.60799789428711,
553
+ "learning_rate": 4.967248908296943e-06,
554
+ "loss": 4.2516,
555
+ "step": 6600
556
+ },
557
+ {
558
+ "epoch": 7.314410480349345,
559
+ "grad_norm": 32.61433792114258,
560
+ "learning_rate": 4.284934497816594e-06,
561
+ "loss": 4.2598,
562
+ "step": 6700
563
+ },
564
+ {
565
+ "epoch": 7.423580786026201,
566
+ "grad_norm": 28.708969116210938,
567
+ "learning_rate": 3.6026200873362447e-06,
568
+ "loss": 4.1993,
569
+ "step": 6800
570
+ },
571
+ {
572
+ "epoch": 7.532751091703057,
573
+ "grad_norm": 32.070068359375,
574
+ "learning_rate": 2.920305676855895e-06,
575
+ "loss": 4.2718,
576
+ "step": 6900
577
+ },
578
+ {
579
+ "epoch": 7.641921397379913,
580
+ "grad_norm": 32.91472625732422,
581
+ "learning_rate": 2.237991266375546e-06,
582
+ "loss": 4.3288,
583
+ "step": 7000
584
+ },
585
+ {
586
+ "epoch": 7.751091703056769,
587
+ "grad_norm": 26.058467864990234,
588
+ "learning_rate": 1.5556768558951965e-06,
589
+ "loss": 4.2426,
590
+ "step": 7100
591
+ },
592
+ {
593
+ "epoch": 7.860262008733624,
594
+ "grad_norm": 26.022476196289062,
595
+ "learning_rate": 8.733624454148472e-07,
596
+ "loss": 4.3651,
597
+ "step": 7200
598
+ },
599
+ {
600
+ "epoch": 7.96943231441048,
601
+ "grad_norm": 37.77171325683594,
602
+ "learning_rate": 1.910480349344978e-07,
603
+ "loss": 4.1779,
604
+ "step": 7300
605
+ },
606
+ {
607
+ "epoch": 8.0,
608
+ "eval_avg_mae": 5.320092519124349,
609
+ "eval_loss": 5.320092678070068,
610
+ "eval_mae_lex": 4.7238664627075195,
611
+ "eval_mae_sem": 3.55670166015625,
612
+ "eval_mae_syn": 7.6797099113464355,
613
+ "eval_runtime": 27.2585,
614
+ "eval_samples_per_second": 268.797,
615
+ "eval_steps_per_second": 8.401,
616
+ "step": 7328
617
  }
618
  ],
619
  "logging_steps": 100,
 
628
  "should_evaluate": false,
629
  "should_log": false,
630
  "should_save": true,
631
+ "should_training_stop": true
632
  },
633
  "attributes": {}
634
  }
635
  },
636
+ "total_flos": 1.5422129866708992e+16,
637
  "train_batch_size": 32,
638
  "trial_name": null,
639
  "trial_params": null