cilorku commited on
Commit
0a7a87c
·
verified ·
1 Parent(s): b5eed06

Training in progress, step 107, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16c57c3cbfc054b67bcc71f4ca4d264b570f6013309707faa52546243b1fd30b
3
  size 119975656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abb01dbc490f7b8e8d75522d5a82f8a57b15100a16fabdd744770a12ac2d414e
3
  size 119975656
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f9bbacc86066af861da5976f828395efd785209a6ddc97a049e00116a1d73af
3
  size 61297812
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d31cee0a9f7a82ece203e937776a61e9c1bd399d91418f14f297d546863d1476
3
  size 61297812
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d478d6efa8cb8ed403991510e313f6aa1bf04e96851137a85bcb4b2764c9d53
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4e60e0f7d153d8d862b9482e21c86a792560cb2b436bc698b2d47bccfa43b5b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:388d25972bb1e5c37bd606304b2b7f03e402ddc476d98d532c28ffa8e9b3a17e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4057dad12937a603317934bf171d567427185298a4cbff133d48047ff198bd5b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.6379531621932983,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 3.7383177570093458,
5
  "eval_steps": 50,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -731,6 +731,55 @@
731
  "eval_samples_per_second": 32.139,
732
  "eval_steps_per_second": 8.213,
733
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
734
  }
735
  ],
736
  "logging_steps": 1,
@@ -754,12 +803,12 @@
754
  "should_evaluate": false,
755
  "should_log": false,
756
  "should_save": true,
757
- "should_training_stop": false
758
  },
759
  "attributes": {}
760
  }
761
  },
762
- "total_flos": 6.294827588753818e+16,
763
  "train_batch_size": 8,
764
  "trial_name": null,
765
  "trial_params": null
 
1
  {
2
  "best_metric": 1.6379531621932983,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 4.0,
5
  "eval_steps": 50,
6
+ "global_step": 107,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
731
  "eval_samples_per_second": 32.139,
732
  "eval_steps_per_second": 8.213,
733
  "step": 100
734
+ },
735
+ {
736
+ "epoch": 3.7757009345794392,
737
+ "grad_norm": 0.7355567216873169,
738
+ "learning_rate": 3.366666666666667e-05,
739
+ "loss": 1.7526,
740
+ "step": 101
741
+ },
742
+ {
743
+ "epoch": 3.8130841121495327,
744
+ "grad_norm": 0.7980681657791138,
745
+ "learning_rate": 3.4000000000000007e-05,
746
+ "loss": 1.4984,
747
+ "step": 102
748
+ },
749
+ {
750
+ "epoch": 3.850467289719626,
751
+ "grad_norm": 0.8555505275726318,
752
+ "learning_rate": 3.433333333333333e-05,
753
+ "loss": 1.2448,
754
+ "step": 103
755
+ },
756
+ {
757
+ "epoch": 3.8878504672897196,
758
+ "grad_norm": 1.4027817249298096,
759
+ "learning_rate": 3.466666666666667e-05,
760
+ "loss": 1.6987,
761
+ "step": 104
762
+ },
763
+ {
764
+ "epoch": 3.925233644859813,
765
+ "grad_norm": 0.5864289402961731,
766
+ "learning_rate": 3.5e-05,
767
+ "loss": 1.9383,
768
+ "step": 105
769
+ },
770
+ {
771
+ "epoch": 3.9626168224299065,
772
+ "grad_norm": 0.575369656085968,
773
+ "learning_rate": 3.5333333333333336e-05,
774
+ "loss": 1.6603,
775
+ "step": 106
776
+ },
777
+ {
778
+ "epoch": 4.0,
779
+ "grad_norm": 1.2596912384033203,
780
+ "learning_rate": 3.566666666666667e-05,
781
+ "loss": 1.8972,
782
+ "step": 107
783
  }
784
  ],
785
  "logging_steps": 1,
 
803
  "should_evaluate": false,
804
  "should_log": false,
805
  "should_save": true,
806
+ "should_training_stop": true
807
  },
808
  "attributes": {}
809
  }
810
  },
811
+ "total_flos": 6.728755879870464e+16,
812
  "train_batch_size": 8,
813
  "trial_name": null,
814
  "trial_params": null