rakhman-llm commited on
Commit
8f79cb0
·
verified ·
1 Parent(s): f53786a

Training in progress, step 9000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d940b624e42af265966f792f158b5e6dff6cf498a4c59d70a1629ae2fdc2efc
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:336e73efd327d7a1521b5a4d27a9fc5038eee564a716b9140d3d16b364a23a50
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a2bec32040a69e58023b5509b0fb90efe9794b8b9079e790fb6687d3ff55f17
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d97c69acfb193391ec999ba15e870e63ab169b8ed9bf00f41fd88c47a66c65b8
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08998c919fed10f9ef4173715a99abc07523cdf2197bb7756b598ec6ecad7017
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:899366455e0955e5cf5c394ff7d4037b7abb9e5bade054905761b5a1a2bd5b8c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a2d7ef4474f599b4e657eaa32ef5df2560c025d5712ce31e544f317cb28ad99
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a63facda9ec9ba059fa6f2a71adafa722008934178d5e063ac43b7ad2180ad7f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5666666666666667,
5
  "eval_steps": 500,
6
- "global_step": 8500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -602,6 +602,41 @@
602
  "learning_rate": 1.6223555555555556e-05,
603
  "loss": 0.0677,
604
  "step": 8500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
605
  }
606
  ],
607
  "logging_steps": 100,
@@ -621,7 +656,7 @@
621
  "attributes": {}
622
  }
623
  },
624
- "total_flos": 2.070456827904e+16,
625
  "train_batch_size": 4,
626
  "trial_name": null,
627
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6,
5
  "eval_steps": 500,
6
+ "global_step": 9000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
602
  "learning_rate": 1.6223555555555556e-05,
603
  "loss": 0.0677,
604
  "step": 8500
605
+ },
606
+ {
607
+ "epoch": 0.5733333333333334,
608
+ "grad_norm": 0.11715523153543472,
609
+ "learning_rate": 1.617911111111111e-05,
610
+ "loss": 0.0657,
611
+ "step": 8600
612
+ },
613
+ {
614
+ "epoch": 0.58,
615
+ "grad_norm": 0.2229638546705246,
616
+ "learning_rate": 1.6134666666666666e-05,
617
+ "loss": 0.0672,
618
+ "step": 8700
619
+ },
620
+ {
621
+ "epoch": 0.5866666666666667,
622
+ "grad_norm": 0.18995241820812225,
623
+ "learning_rate": 1.609022222222222e-05,
624
+ "loss": 0.0742,
625
+ "step": 8800
626
+ },
627
+ {
628
+ "epoch": 0.5933333333333334,
629
+ "grad_norm": 0.23822380602359772,
630
+ "learning_rate": 1.604577777777778e-05,
631
+ "loss": 0.0771,
632
+ "step": 8900
633
+ },
634
+ {
635
+ "epoch": 0.6,
636
+ "grad_norm": 0.21793758869171143,
637
+ "learning_rate": 1.6001333333333336e-05,
638
+ "loss": 0.0635,
639
+ "step": 9000
640
  }
641
  ],
642
  "logging_steps": 100,
 
656
  "attributes": {}
657
  }
658
  },
659
+ "total_flos": 2.192248406016e+16,
660
  "train_batch_size": 4,
661
  "trial_name": null,
662
  "trial_params": null