lesso17 commited on
Commit
9ef7b5d
·
verified ·
1 Parent(s): 2a33775

Training in progress, step 4000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c978c1944e0035df8707d50978324cd2fe744bc92a509226099da71e761ef8e
3
  size 295488936
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df3dc42fcc062c9a89ae6e7ea757b5fb2d753a0e13cdc868895cd4c209f98699
3
  size 295488936
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1811fce2182455da25735d34331196cb181f0f779745ee3d7bf2eb09f42b7b4
3
  size 591208618
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c384a780131306337e696be57a2b958a4e3a371f4b77faf1b07cb412a076239e
3
  size 591208618
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89473c9e743c560beae052d379c8b5909df539a4c27ccd7585b10c27a0c916f5
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8221ed7296c95f1c75b140fa25630822cdf3c0fa6bf3265b32b3687272289853
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb0e788a332b6bf63ef51bc8c958fe1595ea404b5da922f613e5b5fbb2af155d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26036ade73b34529a59dfe3f217f7cac319696cef41527ebf633fee3de40316d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.6864949464797974,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-3500",
4
- "epoch": 0.33851994245160977,
5
  "eval_steps": 500,
6
- "global_step": 3500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -561,6 +561,84 @@
561
  "eval_samples_per_second": 24.259,
562
  "eval_steps_per_second": 6.065,
563
  "step": 3500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
564
  }
565
  ],
566
  "logging_steps": 50,
@@ -584,12 +662,12 @@
584
  "should_evaluate": false,
585
  "should_log": false,
586
  "should_save": true,
587
- "should_training_stop": false
588
  },
589
  "attributes": {}
590
  }
591
  },
592
- "total_flos": 9.525745935817114e+17,
593
  "train_batch_size": 4,
594
  "trial_name": null,
595
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.6836225986480713,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-4000",
4
+ "epoch": 0.38687993423041117,
5
  "eval_steps": 500,
6
+ "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
561
  "eval_samples_per_second": 24.259,
562
  "eval_steps_per_second": 6.065,
563
  "step": 3500
564
+ },
565
+ {
566
+ "epoch": 0.3433559416294899,
567
+ "grad_norm": 1.3484987020492554,
568
+ "learning_rate": 7.050737668632502e-06,
569
+ "loss": 1.7132,
570
+ "step": 3550
571
+ },
572
+ {
573
+ "epoch": 0.34819194080737004,
574
+ "grad_norm": 1.4851126670837402,
575
+ "learning_rate": 5.583796048734715e-06,
576
+ "loss": 1.6666,
577
+ "step": 3600
578
+ },
579
+ {
580
+ "epoch": 0.3530279399852502,
581
+ "grad_norm": 1.164219856262207,
582
+ "learning_rate": 4.283784887996112e-06,
583
+ "loss": 1.7128,
584
+ "step": 3650
585
+ },
586
+ {
587
+ "epoch": 0.35786393916313036,
588
+ "grad_norm": 1.160065770149231,
589
+ "learning_rate": 3.1528128092733565e-06,
590
+ "loss": 1.6667,
591
+ "step": 3700
592
+ },
593
+ {
594
+ "epoch": 0.3626999383410105,
595
+ "grad_norm": 1.2768871784210205,
596
+ "learning_rate": 2.1927142534139574e-06,
597
+ "loss": 1.6975,
598
+ "step": 3750
599
+ },
600
+ {
601
+ "epoch": 0.36753593751889063,
602
+ "grad_norm": 1.3569271564483643,
603
+ "learning_rate": 1.4050465037864562e-06,
604
+ "loss": 1.7084,
605
+ "step": 3800
606
+ },
607
+ {
608
+ "epoch": 0.37237193669677077,
609
+ "grad_norm": 1.2699822187423706,
610
+ "learning_rate": 7.910871603611439e-07,
611
+ "loss": 1.6869,
612
+ "step": 3850
613
+ },
614
+ {
615
+ "epoch": 0.3772079358746509,
616
+ "grad_norm": 1.3335933685302734,
617
+ "learning_rate": 3.5183206743822263e-07,
618
+ "loss": 1.6952,
619
+ "step": 3900
620
+ },
621
+ {
622
+ "epoch": 0.38204393505253104,
623
+ "grad_norm": 1.3161271810531616,
624
+ "learning_rate": 8.799369838469983e-08,
625
+ "loss": 1.6841,
626
+ "step": 3950
627
+ },
628
+ {
629
+ "epoch": 0.38687993423041117,
630
+ "grad_norm": 1.3024942874908447,
631
+ "learning_rate": 0.0,
632
+ "loss": 1.6754,
633
+ "step": 4000
634
+ },
635
+ {
636
+ "epoch": 0.38687993423041117,
637
+ "eval_loss": 1.6836225986480713,
638
+ "eval_runtime": 718.0098,
639
+ "eval_samples_per_second": 24.253,
640
+ "eval_steps_per_second": 6.064,
641
+ "step": 4000
642
  }
643
  ],
644
  "logging_steps": 50,
 
662
  "should_evaluate": false,
663
  "should_log": false,
664
  "should_save": true,
665
+ "should_training_stop": true
666
  },
667
  "attributes": {}
668
  }
669
  },
670
+ "total_flos": 1.0886469589244314e+18,
671
  "train_batch_size": 4,
672
  "trial_name": null,
673
  "trial_params": null