rakhman-llm commited on
Commit
f825855
·
verified ·
1 Parent(s): cdb3cac

Training in progress, step 44000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3f3f60f837bc2d6b892b2d0546640ca46d3f194d47c20345c8767c8ccaa5c07
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:440a49b94440cacff83f386734abd67ba7ebf0590110ed4471a084cfe5123375
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6211545bbfeb1c43922e986aa4087c1ebccc0eedc2e220fb0dba4944f73f39c0
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa8b9c57b8597bab044debeab7cc337cd4ec0227fbde8dd82185f8b22bf3a034
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85869ac30f3cee5a03da0dea6f3e99cba1ee27fffe68afedef7c1dce40526f4d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d5486da749b598a61d62adc9f231f7450ebff3f0e201a6edd91a104ce06d0fc
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d6701719fda370188bcff5587df70fa5f1ce20ee16a7c8ee75ec8b4d555e1ea
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91c87f62d5aed9d7a6d425bf9b7734b81e555049c990c49c2d251a8a04bf9890
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.9,
5
  "eval_steps": 500,
6
- "global_step": 43500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3068,6 +3068,41 @@
3068
  "learning_rate": 6.737777777777778e-07,
3069
  "loss": 0.0521,
3070
  "step": 43500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3071
  }
3072
  ],
3073
  "logging_steps": 100,
@@ -3087,7 +3122,7 @@
3087
  "attributes": {}
3088
  }
3089
  },
3090
- "total_flos": 1.0595867295744e+17,
3091
  "train_batch_size": 4,
3092
  "trial_name": null,
3093
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.9333333333333336,
5
  "eval_steps": 500,
6
+ "global_step": 44000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3068
  "learning_rate": 6.737777777777778e-07,
3069
  "loss": 0.0521,
3070
  "step": 43500
3071
+ },
3072
+ {
3073
+ "epoch": 2.9066666666666667,
3074
+ "grad_norm": 0.2009938806295395,
3075
+ "learning_rate": 6.293333333333334e-07,
3076
+ "loss": 0.0576,
3077
+ "step": 43600
3078
+ },
3079
+ {
3080
+ "epoch": 2.913333333333333,
3081
+ "grad_norm": 0.1709711253643036,
3082
+ "learning_rate": 5.848888888888889e-07,
3083
+ "loss": 0.0551,
3084
+ "step": 43700
3085
+ },
3086
+ {
3087
+ "epoch": 2.92,
3088
+ "grad_norm": 0.21005882322788239,
3089
+ "learning_rate": 5.404444444444444e-07,
3090
+ "loss": 0.0525,
3091
+ "step": 43800
3092
+ },
3093
+ {
3094
+ "epoch": 2.9266666666666667,
3095
+ "grad_norm": 0.24121809005737305,
3096
+ "learning_rate": 4.96e-07,
3097
+ "loss": 0.0559,
3098
+ "step": 43900
3099
+ },
3100
+ {
3101
+ "epoch": 2.9333333333333336,
3102
+ "grad_norm": 0.31166791915893555,
3103
+ "learning_rate": 4.5155555555555554e-07,
3104
+ "loss": 0.051,
3105
+ "step": 44000
3106
  }
3107
  ],
3108
  "logging_steps": 100,
 
3122
  "attributes": {}
3123
  }
3124
  },
3125
+ "total_flos": 1.0717658873856e+17,
3126
  "train_batch_size": 4,
3127
  "trial_name": null,
3128
  "trial_params": null