aleegis12 commited on
Commit
bf3fb44
·
verified ·
1 Parent(s): 6393f26

Training in progress, step 3000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d1c9680990467f4f5b27c801ce52e3a2c3ff1262d1f29cd4c622c56aa687fd3
3
  size 289452128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7a9a940508f9f6c79c40565b60b743aee8833c200ddf8325493c276db8fdc78
3
  size 289452128
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aeaf49429c405b64027193334814d1c268a2931bbf62a168037a701bdf250011
3
  size 147360212
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dec974729977bfed9dd55f8e2bb2116a9c4d8d6b04fd9061e20bd9a9bcf3ea1f
3
  size 147360212
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:844d9922fee038233ae22054458295a183977341fb28c224c56fc9e2c4e3ef11
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d71d19f7e56bb076c92fbd2570dc9b4e01a59dacb70d9a3c8d5f5dc629ed65ac
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f76a102ac755d5bd19e6f0e55ad50e49a03a307409b32eeac7fb5a2a13752ba
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b5b8ae682ae9c51a247d395336eab40b1fa2d190384873d8421e390f09af11a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.44117751717567444,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-2700",
4
- "epoch": 0.2881844380403458,
5
  "eval_steps": 150,
6
- "global_step": 2850,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -566,6 +566,35 @@
566
  "eval_samples_per_second": 38.383,
567
  "eval_steps_per_second": 9.596,
568
  "step": 2850
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
569
  }
570
  ],
571
  "logging_steps": 50,
@@ -580,7 +609,7 @@
580
  "early_stopping_threshold": 0.0
581
  },
582
  "attributes": {
583
- "early_stopping_patience_counter": 1
584
  }
585
  },
586
  "TrainerControl": {
@@ -589,12 +618,12 @@
589
  "should_evaluate": false,
590
  "should_log": false,
591
  "should_save": true,
592
- "should_training_stop": false
593
  },
594
  "attributes": {}
595
  }
596
  },
597
- "total_flos": 4.192760627913032e+17,
598
  "train_batch_size": 4,
599
  "trial_name": null,
600
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.4388670027256012,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-3000",
4
+ "epoch": 0.3033520400424693,
5
  "eval_steps": 150,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
566
  "eval_samples_per_second": 38.383,
567
  "eval_steps_per_second": 9.596,
568
  "step": 2850
569
+ },
570
+ {
571
+ "epoch": 0.293240305374387,
572
+ "grad_norm": 0.2669506371021271,
573
+ "learning_rate": 5.665199789862907e-07,
574
+ "loss": 0.431,
575
+ "step": 2900
576
+ },
577
+ {
578
+ "epoch": 0.2982961727084281,
579
+ "grad_norm": 0.09382705390453339,
580
+ "learning_rate": 1.4173043232380557e-07,
581
+ "loss": 0.4565,
582
+ "step": 2950
583
+ },
584
+ {
585
+ "epoch": 0.3033520400424693,
586
+ "grad_norm": 0.16964586079120636,
587
+ "learning_rate": 0.0,
588
+ "loss": 0.4481,
589
+ "step": 3000
590
+ },
591
+ {
592
+ "epoch": 0.3033520400424693,
593
+ "eval_loss": 0.4388670027256012,
594
+ "eval_runtime": 217.1959,
595
+ "eval_samples_per_second": 38.343,
596
+ "eval_steps_per_second": 9.586,
597
+ "step": 3000
598
  }
599
  ],
600
  "logging_steps": 50,
 
609
  "early_stopping_threshold": 0.0
610
  },
611
  "attributes": {
612
+ "early_stopping_patience_counter": 0
613
  }
614
  },
615
  "TrainerControl": {
 
618
  "should_evaluate": false,
619
  "should_log": false,
620
  "should_save": true,
621
+ "should_training_stop": true
622
  },
623
  "attributes": {}
624
  }
625
  },
626
+ "total_flos": 4.4136390549150106e+17,
627
  "train_batch_size": 4,
628
  "trial_name": null,
629
  "trial_params": null