masatochi commited on
Commit
b07ae85
·
verified ·
1 Parent(s): 10eaafc

Training in progress, step 110, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f4a9506f73921c53994cf5aa43954f949a4111e7aebd908099675167c3e7aee
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:797691a5c1bede0a3013242e12ad082cf5b59954a8a0fc0dd7a75224b4282d66
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bde8169b298ed164c0b878c30c6b39f1068dd9204b9f052724c216b38abd4c6
3
  size 43122580
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c6da235d2be338d88881dc48554c3fe142c62bdbac18c080cd1bbe05a538c5e
3
  size 43122580
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d228656fc4662fbb158acab12ab048938d5ec4032384bd70b245c74cf2162ee1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a4992a0a541377e74f550ef72fa9af6f8fee0ce175cbdc61ec06b94024e86a4
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8c855c846898181ed358c1ef65b19ad1435172d9025fde7f25f4580bfc48faa
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:802e09b6cc63e64e726d0b68ba37b81d6a6fcf54cdf00e4821b3e38426a8a5c4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.051347881899871634,
5
  "eval_steps": 34,
6
- "global_step": 105,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -774,6 +774,41 @@
774
  "learning_rate": 0.00011837495178165706,
775
  "loss": 0.7804,
776
  "step": 105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
777
  }
778
  ],
779
  "logging_steps": 1,
@@ -793,7 +828,7 @@
793
  "attributes": {}
794
  }
795
  },
796
- "total_flos": 4.6609019053277184e+17,
797
  "train_batch_size": 3,
798
  "trial_name": null,
799
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.05379301913319885,
5
  "eval_steps": 34,
6
+ "global_step": 110,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
774
  "learning_rate": 0.00011837495178165706,
775
  "loss": 0.7804,
776
  "step": 105
777
+ },
778
+ {
779
+ "epoch": 0.05183690934653708,
780
+ "grad_norm": 1.6085052490234375,
781
+ "learning_rate": 0.000116555387618413,
782
+ "loss": 1.093,
783
+ "step": 106
784
+ },
785
+ {
786
+ "epoch": 0.05232593679320252,
787
+ "grad_norm": 1.4606610536575317,
788
+ "learning_rate": 0.00011473016980546377,
789
+ "loss": 1.0967,
790
+ "step": 107
791
+ },
792
+ {
793
+ "epoch": 0.052814964239867965,
794
+ "grad_norm": 1.591978669166565,
795
+ "learning_rate": 0.00011289992165302035,
796
+ "loss": 1.1526,
797
+ "step": 108
798
+ },
799
+ {
800
+ "epoch": 0.05330399168653341,
801
+ "grad_norm": 1.4087272882461548,
802
+ "learning_rate": 0.00011106526818915008,
803
+ "loss": 1.1814,
804
+ "step": 109
805
+ },
806
+ {
807
+ "epoch": 0.05379301913319885,
808
+ "grad_norm": 1.3588765859603882,
809
+ "learning_rate": 0.00010922683594633021,
810
+ "loss": 1.11,
811
+ "step": 110
812
  }
813
  ],
814
  "logging_steps": 1,
 
828
  "attributes": {}
829
  }
830
  },
831
+ "total_flos": 4.882849615105229e+17,
832
  "train_batch_size": 3,
833
  "trial_name": null,
834
  "trial_params": null