rakhman-llm commited on
Commit
beea6aa
·
verified ·
1 Parent(s): be6fb51

Training in progress, step 16500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75a86cd786a5d3a1e1f2adcb3f9a3150ccda047965e61deb6383da469625f4c5
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:175884cade4e067c9fa3009c5873e488ef4e170548016dcad81294d3815c5e3d
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d52a7d049b044518630bdac24825b9c5aace7c2129b9296a1f4f71f920000ff
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e366c1ad2c23dec12c48d6136cf496f7476337ed3fe80251ddb60c2daadec8e
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c907d42c9636b923c5bcc4fd78594e76582ee7e7c3af9d3ccfeae6b74654c31c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f95e1dd2cd7704ca98aa1bf80c979e7e594652b8372e360c80de457ba4b96d7
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3cf6ca81cadd867bd2c8ff23d5105fee09afef52498d5c300dfd5de226b319d5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edb1f065dac584748e029b7253ed918a3ede6790f2209ff8eedc0176fafb2e83
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0666666666666667,
5
  "eval_steps": 500,
6
- "global_step": 16000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1135,6 +1135,41 @@
1135
  "learning_rate": 1.2890666666666667e-05,
1136
  "loss": 0.061,
1137
  "step": 16000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1138
  }
1139
  ],
1140
  "logging_steps": 100,
@@ -1154,7 +1189,7 @@
1154
  "attributes": {}
1155
  }
1156
  },
1157
- "total_flos": 3.897330499584e+16,
1158
  "train_batch_size": 4,
1159
  "trial_name": null,
1160
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.1,
5
  "eval_steps": 500,
6
+ "global_step": 16500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1135
  "learning_rate": 1.2890666666666667e-05,
1136
  "loss": 0.061,
1137
  "step": 16000
1138
+ },
1139
+ {
1140
+ "epoch": 1.0733333333333333,
1141
+ "grad_norm": 0.16027814149856567,
1142
+ "learning_rate": 1.2846222222222224e-05,
1143
+ "loss": 0.0559,
1144
+ "step": 16100
1145
+ },
1146
+ {
1147
+ "epoch": 1.08,
1148
+ "grad_norm": 0.21083027124404907,
1149
+ "learning_rate": 1.2801777777777779e-05,
1150
+ "loss": 0.0576,
1151
+ "step": 16200
1152
+ },
1153
+ {
1154
+ "epoch": 1.0866666666666667,
1155
+ "grad_norm": 0.13497234880924225,
1156
+ "learning_rate": 1.2757333333333334e-05,
1157
+ "loss": 0.0599,
1158
+ "step": 16300
1159
+ },
1160
+ {
1161
+ "epoch": 1.0933333333333333,
1162
+ "grad_norm": 0.28260791301727295,
1163
+ "learning_rate": 1.2712888888888891e-05,
1164
+ "loss": 0.0585,
1165
+ "step": 16400
1166
+ },
1167
+ {
1168
+ "epoch": 1.1,
1169
+ "grad_norm": 0.1817014068365097,
1170
+ "learning_rate": 1.2668444444444446e-05,
1171
+ "loss": 0.0544,
1172
+ "step": 16500
1173
  }
1174
  ],
1175
  "logging_steps": 100,
 
1189
  "attributes": {}
1190
  }
1191
  },
1192
+ "total_flos": 4.019122077696e+16,
1193
  "train_batch_size": 4,
1194
  "trial_name": null,
1195
  "trial_params": null