rakhman-llm commited on
Commit
cd89181
·
verified ·
1 Parent(s): 3504800

Training in progress, step 37500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0fd691e92552d902e006c1dbd51666fbe879b2be04760327e9ca887670be688f
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eccce9825a6e6348256698faab746bf337537d7559919bf091704418e3ecaaa5
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f14ad18c691195cd15268660ba702ce3fc7337f8480e501b361bd9677af8b07e
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5767ff2839f2befc3509511517661e7ee746fd7345f573d42e39ce30e6d908e8
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:810ed5e7c2ae1b96c2bbc38902777e6779874b7cdcf097af17b2123a3bf9831a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ffd6cc317d4a23512c42e23cd9356ee5c984750b736bc7a53cf0419eccfd496
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f866da3a9c3c847bb3514492ca12f1bdef65b499f47966b6692e65b75559e3e5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26829b102f440c540dc7e3f1f0e0d969a25f498ec8a83042cddb35cab37b0ab3
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.466666666666667,
5
  "eval_steps": 500,
6
- "global_step": 37000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2613,6 +2613,41 @@
2613
  "learning_rate": 3.561777777777778e-06,
2614
  "loss": 0.0522,
2615
  "step": 37000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2616
  }
2617
  ],
2618
  "logging_steps": 100,
@@ -2632,7 +2667,7 @@
2632
  "attributes": {}
2633
  }
2634
  },
2635
- "total_flos": 9.012576780288e+16,
2636
  "train_batch_size": 4,
2637
  "trial_name": null,
2638
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.5,
5
  "eval_steps": 500,
6
+ "global_step": 37500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2613
  "learning_rate": 3.561777777777778e-06,
2614
  "loss": 0.0522,
2615
  "step": 37000
2616
+ },
2617
+ {
2618
+ "epoch": 2.473333333333333,
2619
+ "grad_norm": 0.2838660180568695,
2620
+ "learning_rate": 3.5173333333333336e-06,
2621
+ "loss": 0.0485,
2622
+ "step": 37100
2623
+ },
2624
+ {
2625
+ "epoch": 2.48,
2626
+ "grad_norm": 0.26960739493370056,
2627
+ "learning_rate": 3.472888888888889e-06,
2628
+ "loss": 0.0516,
2629
+ "step": 37200
2630
+ },
2631
+ {
2632
+ "epoch": 2.486666666666667,
2633
+ "grad_norm": 0.19472958147525787,
2634
+ "learning_rate": 3.428444444444445e-06,
2635
+ "loss": 0.0551,
2636
+ "step": 37300
2637
+ },
2638
+ {
2639
+ "epoch": 2.493333333333333,
2640
+ "grad_norm": 0.22085241973400116,
2641
+ "learning_rate": 3.384e-06,
2642
+ "loss": 0.0555,
2643
+ "step": 37400
2644
+ },
2645
+ {
2646
+ "epoch": 2.5,
2647
+ "grad_norm": 0.32543325424194336,
2648
+ "learning_rate": 3.3395555555555558e-06,
2649
+ "loss": 0.0517,
2650
+ "step": 37500
2651
  }
2652
  ],
2653
  "logging_steps": 100,
 
2667
  "attributes": {}
2668
  }
2669
  },
2670
+ "total_flos": 9.1343683584e+16,
2671
  "train_batch_size": 4,
2672
  "trial_name": null,
2673
  "trial_params": null