ardaspear commited on
Commit
644ad0d
·
verified ·
1 Parent(s): 16d4056

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3fd864bbe67d6da90cc5bda1e550d1ac8c1a1f80f4f0f3b1e9b1ab21db78ca9
3
  size 599689368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:928cf2edd71caf351ebc90e657c55e687ea53f55df2d3e0f185d16524bfe7b4c
3
  size 599689368
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d80781512712077700f2d3a2d0d034448979e9df7b5a5d4e1f1ce5c0f1bfdd4f
3
- size 404854552
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f13253f2b0fb38ec20b2cefc02c8e84216eedbddd3cde1f83bfd77addf58484e
3
+ size 404854808
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a273ee62316e02b9a4e88216f8672d92839273f7781c4a93389d224775e64c2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:216c0e407dde1861ae8bd56458455e443750821353d5f0f196d4a06ed8661cbd
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e994acda9463ad5e79f11759cee0746e6d525c82215e6ea2f53a57491ac0869b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b536b0fa0a634e1c6dfafee7987ec2b47c88eb25b052693577be75945f4ed90
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 11.450271606445312,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
- "epoch": 0.2830188679245283,
5
  "eval_steps": 50,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -144,6 +144,135 @@
144
  "eval_samples_per_second": 21.569,
145
  "eval_steps_per_second": 5.41,
146
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  }
148
  ],
149
  "logging_steps": 10,
@@ -158,7 +287,7 @@
158
  "early_stopping_threshold": 0.0
159
  },
160
  "attributes": {
161
- "early_stopping_patience_counter": 0
162
  }
163
  },
164
  "TrainerControl": {
@@ -172,7 +301,7 @@
172
  "attributes": {}
173
  }
174
  },
175
- "total_flos": 1.156569026789376e+17,
176
  "train_batch_size": 8,
177
  "trial_name": null,
178
  "trial_params": null
 
1
  {
2
  "best_metric": 11.450271606445312,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
+ "epoch": 0.5660377358490566,
5
  "eval_steps": 50,
6
+ "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
144
  "eval_samples_per_second": 21.569,
145
  "eval_steps_per_second": 5.41,
146
  "step": 150
147
+ },
148
+ {
149
+ "epoch": 0.3018867924528302,
150
+ "grad_norm": 31.868478775024414,
151
+ "learning_rate": 0.0001697631521134985,
152
+ "loss": 38.3331,
153
+ "step": 160
154
+ },
155
+ {
156
+ "epoch": 0.32075471698113206,
157
+ "grad_norm": 25.038724899291992,
158
+ "learning_rate": 0.00016585113790650388,
159
+ "loss": 24.7657,
160
+ "step": 170
161
+ },
162
+ {
163
+ "epoch": 0.33962264150943394,
164
+ "grad_norm": 53.333683013916016,
165
+ "learning_rate": 0.0001617524614946192,
166
+ "loss": 23.8673,
167
+ "step": 180
168
+ },
169
+ {
170
+ "epoch": 0.3584905660377358,
171
+ "grad_norm": 183.1505889892578,
172
+ "learning_rate": 0.0001574787410214407,
173
+ "loss": 12.851,
174
+ "step": 190
175
+ },
176
+ {
177
+ "epoch": 0.37735849056603776,
178
+ "grad_norm": 199.62698364257812,
179
+ "learning_rate": 0.00015304209081197425,
180
+ "loss": 3.7041,
181
+ "step": 200
182
+ },
183
+ {
184
+ "epoch": 0.37735849056603776,
185
+ "eval_loss": 10.561553001403809,
186
+ "eval_runtime": 41.4742,
187
+ "eval_samples_per_second": 21.531,
188
+ "eval_steps_per_second": 5.401,
189
+ "step": 200
190
+ },
191
+ {
192
+ "epoch": 0.39622641509433965,
193
+ "grad_norm": 30.381412506103516,
194
+ "learning_rate": 0.00014845508703326504,
195
+ "loss": 36.4437,
196
+ "step": 210
197
+ },
198
+ {
199
+ "epoch": 0.41509433962264153,
200
+ "grad_norm": 25.108572006225586,
201
+ "learning_rate": 0.00014373073204588556,
202
+ "loss": 24.811,
203
+ "step": 220
204
+ },
205
+ {
206
+ "epoch": 0.4339622641509434,
207
+ "grad_norm": 25.86770248413086,
208
+ "learning_rate": 0.00013888241754733208,
209
+ "loss": 22.9303,
210
+ "step": 230
211
+ },
212
+ {
213
+ "epoch": 0.4528301886792453,
214
+ "grad_norm": 83.9566650390625,
215
+ "learning_rate": 0.00013392388661180303,
216
+ "loss": 14.6497,
217
+ "step": 240
218
+ },
219
+ {
220
+ "epoch": 0.4716981132075472,
221
+ "grad_norm": 46.80604934692383,
222
+ "learning_rate": 0.0001288691947339621,
223
+ "loss": 3.5489,
224
+ "step": 250
225
+ },
226
+ {
227
+ "epoch": 0.4716981132075472,
228
+ "eval_loss": 13.115553855895996,
229
+ "eval_runtime": 41.4443,
230
+ "eval_samples_per_second": 21.547,
231
+ "eval_steps_per_second": 5.405,
232
+ "step": 250
233
+ },
234
+ {
235
+ "epoch": 0.49056603773584906,
236
+ "grad_norm": 60.517303466796875,
237
+ "learning_rate": 0.0001237326699871115,
238
+ "loss": 52.7818,
239
+ "step": 260
240
+ },
241
+ {
242
+ "epoch": 0.5094339622641509,
243
+ "grad_norm": 88.43193817138672,
244
+ "learning_rate": 0.00011852887240871145,
245
+ "loss": 28.1595,
246
+ "step": 270
247
+ },
248
+ {
249
+ "epoch": 0.5283018867924528,
250
+ "grad_norm": 39.37873458862305,
251
+ "learning_rate": 0.00011327255272837221,
252
+ "loss": 23.8529,
253
+ "step": 280
254
+ },
255
+ {
256
+ "epoch": 0.5471698113207547,
257
+ "grad_norm": 118.77758026123047,
258
+ "learning_rate": 0.00010797861055530831,
259
+ "loss": 14.4378,
260
+ "step": 290
261
+ },
262
+ {
263
+ "epoch": 0.5660377358490566,
264
+ "grad_norm": 136.2257537841797,
265
+ "learning_rate": 0.00010266205214377748,
266
+ "loss": 4.6485,
267
+ "step": 300
268
+ },
269
+ {
270
+ "epoch": 0.5660377358490566,
271
+ "eval_loss": 15.816683769226074,
272
+ "eval_runtime": 41.4418,
273
+ "eval_samples_per_second": 21.548,
274
+ "eval_steps_per_second": 5.405,
275
+ "step": 300
276
  }
277
  ],
278
  "logging_steps": 10,
 
287
  "early_stopping_threshold": 0.0
288
  },
289
  "attributes": {
290
+ "early_stopping_patience_counter": 2
291
  }
292
  },
293
  "TrainerControl": {
 
301
  "attributes": {}
302
  }
303
  },
304
+ "total_flos": 2.313138053578752e+17,
305
  "train_batch_size": 8,
306
  "trial_name": null,
307
  "trial_params": null