rakhman-llm commited on
Commit
cca7687
·
verified ·
1 Parent(s): eab9443

Training in progress, step 39000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61083c956d931fcb4b0a0b08aca90fdf494df1d92e15a552d2a52a3b02538c7a
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8cd71b89eb4cf5b54368e95fa13ee6cc3864c3091e759091438dd601d6f1af7
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee11215041eb0186976ef4b18acc8c3b3085b8278e5c482e739c8198da6b7b9a
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc0dd7a1f2c9ad84ebf73220aa56cda18faf115b1e79f8dbee1b26db6cddaa7d
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9244e8a4fedebd96fe9477d928481702bf2954643113838dc31cc9779fe5ad3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:823ea6f73dcec531968a187071c6ecff33e9bb6c6d030f15a43dbc4f25e7c39e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bfe4e613c5d63b9b8661f26b2521341654055e778c8bc3f3f8cced7a4c2ef7b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90e54113eca63b2259f253dad8dc263308f86084513f881434f4da417da62d75
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.5666666666666664,
5
  "eval_steps": 500,
6
- "global_step": 38500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2718,6 +2718,41 @@
2718
  "learning_rate": 2.8951111111111114e-06,
2719
  "loss": 0.0549,
2720
  "step": 38500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2721
  }
2722
  ],
2723
  "logging_steps": 100,
@@ -2737,7 +2772,7 @@
2737
  "attributes": {}
2738
  }
2739
  },
2740
- "total_flos": 9.377951514624e+16,
2741
  "train_batch_size": 4,
2742
  "trial_name": null,
2743
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.6,
5
  "eval_steps": 500,
6
+ "global_step": 39000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2718
  "learning_rate": 2.8951111111111114e-06,
2719
  "loss": 0.0549,
2720
  "step": 38500
2721
+ },
2722
+ {
2723
+ "epoch": 2.5733333333333333,
2724
+ "grad_norm": 0.16107220947742462,
2725
+ "learning_rate": 2.850666666666667e-06,
2726
+ "loss": 0.0549,
2727
+ "step": 38600
2728
+ },
2729
+ {
2730
+ "epoch": 2.58,
2731
+ "grad_norm": 0.14319832623004913,
2732
+ "learning_rate": 2.8062222222222223e-06,
2733
+ "loss": 0.0508,
2734
+ "step": 38700
2735
+ },
2736
+ {
2737
+ "epoch": 2.586666666666667,
2738
+ "grad_norm": 0.13144062459468842,
2739
+ "learning_rate": 2.7617777777777784e-06,
2740
+ "loss": 0.0491,
2741
+ "step": 38800
2742
+ },
2743
+ {
2744
+ "epoch": 2.5933333333333333,
2745
+ "grad_norm": 0.202744722366333,
2746
+ "learning_rate": 2.7173333333333336e-06,
2747
+ "loss": 0.0531,
2748
+ "step": 38900
2749
+ },
2750
+ {
2751
+ "epoch": 2.6,
2752
+ "grad_norm": 0.1617712527513504,
2753
+ "learning_rate": 2.6728888888888893e-06,
2754
+ "loss": 0.0516,
2755
+ "step": 39000
2756
  }
2757
  ],
2758
  "logging_steps": 100,
 
2772
  "attributes": {}
2773
  }
2774
  },
2775
+ "total_flos": 9.499743092736e+16,
2776
  "train_batch_size": 4,
2777
  "trial_name": null,
2778
  "trial_params": null