rakhman-llm commited on
Commit
94bbb24
·
verified ·
1 Parent(s): 230223b

Training in progress, step 38000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eccce9825a6e6348256698faab746bf337537d7559919bf091704418e3ecaaa5
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b01c6e320f3b8ce398fbd50bb34cd5deb4150ad4b0e09a91d304d07ef6a1d44
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5767ff2839f2befc3509511517661e7ee746fd7345f573d42e39ce30e6d908e8
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37f54042d6d8e987a2001e8f8b69e12f7b5e1be6322534bf71acae8f44c1d295
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ffd6cc317d4a23512c42e23cd9356ee5c984750b736bc7a53cf0419eccfd496
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61ae2baae6f9dfd7ca89f4f0f5818402f18a3e15e4581cd68734c6a76f2a7030
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26829b102f440c540dc7e3f1f0e0d969a25f498ec8a83042cddb35cab37b0ab3
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e81910e79bb87f0b46a0d2aa6ab0730eb92717d10eebce124863a9bc14f71612
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.5,
5
  "eval_steps": 500,
6
- "global_step": 37500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2648,6 +2648,41 @@
2648
  "learning_rate": 3.3395555555555558e-06,
2649
  "loss": 0.0517,
2650
  "step": 37500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2651
  }
2652
  ],
2653
  "logging_steps": 100,
@@ -2667,7 +2702,7 @@
2667
  "attributes": {}
2668
  }
2669
  },
2670
- "total_flos": 9.1343683584e+16,
2671
  "train_batch_size": 4,
2672
  "trial_name": null,
2673
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.533333333333333,
5
  "eval_steps": 500,
6
+ "global_step": 38000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2648
  "learning_rate": 3.3395555555555558e-06,
2649
  "loss": 0.0517,
2650
  "step": 37500
2651
+ },
2652
+ {
2653
+ "epoch": 2.506666666666667,
2654
+ "grad_norm": 0.11736246943473816,
2655
+ "learning_rate": 3.295111111111111e-06,
2656
+ "loss": 0.0521,
2657
+ "step": 37600
2658
+ },
2659
+ {
2660
+ "epoch": 2.513333333333333,
2661
+ "grad_norm": 0.33586665987968445,
2662
+ "learning_rate": 3.250666666666667e-06,
2663
+ "loss": 0.0549,
2664
+ "step": 37700
2665
+ },
2666
+ {
2667
+ "epoch": 2.52,
2668
+ "grad_norm": 0.15800270438194275,
2669
+ "learning_rate": 3.2062222222222223e-06,
2670
+ "loss": 0.056,
2671
+ "step": 37800
2672
+ },
2673
+ {
2674
+ "epoch": 2.5266666666666664,
2675
+ "grad_norm": 0.14952941238880157,
2676
+ "learning_rate": 3.161777777777778e-06,
2677
+ "loss": 0.055,
2678
+ "step": 37900
2679
+ },
2680
+ {
2681
+ "epoch": 2.533333333333333,
2682
+ "grad_norm": 0.08778905123472214,
2683
+ "learning_rate": 3.117333333333333e-06,
2684
+ "loss": 0.0477,
2685
+ "step": 38000
2686
  }
2687
  ],
2688
  "logging_steps": 100,
 
2702
  "attributes": {}
2703
  }
2704
  },
2705
+ "total_flos": 9.256159936512e+16,
2706
  "train_batch_size": 4,
2707
  "trial_name": null,
2708
  "trial_params": null