oldiday commited on
Commit
896b6bf
·
verified ·
1 Parent(s): 4ee1737

Training in progress, step 300, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6c613dd358d68cc0c9e41ad85857aa2518633775e85afbb6db14a8d6bb12506
3
  size 80792096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7271b7abd71ff28a6d8862329ed25f52ef9bdaed0cd5f9426db9af7d154d0162
3
  size 80792096
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9aacaf8a7a5d773f8dcab0641038797aefdb68541420ea3d2e729f9d15e02c9f
3
- size 41459700
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c97053614ad9d594d3c931b703e869bd2ed545d776abf4c92958c8accb20b2d
3
+ size 41460084
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a306d7fee431a85f12f46cace07d5a9b24375c79ffade2191c9121fbb02b66e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdf57361bea28743679736bace185586f54b838c0b6e89f824be140ba835deb8
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b38a0153e3fcc5f09dd2a02445f6a79e4da9e519f9e1df79817a58a22b378c5c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c14f228dcfe997eb31918193f198918e61dc44a4118e1ce6d0f02c3f7f0fb85b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.010375738143921,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 1.797752808988764,
5
  "eval_steps": 100,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -171,6 +171,84 @@
171
  "eval_samples_per_second": 13.446,
172
  "eval_steps_per_second": 3.379,
173
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  }
175
  ],
176
  "logging_steps": 10,
@@ -185,7 +263,7 @@
185
  "early_stopping_threshold": 0.0
186
  },
187
  "attributes": {
188
- "early_stopping_patience_counter": 1
189
  }
190
  },
191
  "TrainerControl": {
@@ -199,7 +277,7 @@
199
  "attributes": {}
200
  }
201
  },
202
- "total_flos": 2.8827556792447795e+17,
203
  "train_batch_size": 8,
204
  "trial_name": null,
205
  "trial_params": null
 
1
  {
2
  "best_metric": 1.010375738143921,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 2.696629213483146,
5
  "eval_steps": 100,
6
+ "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
171
  "eval_samples_per_second": 13.446,
172
  "eval_steps_per_second": 3.379,
173
  "step": 200
174
+ },
175
+ {
176
+ "epoch": 1.8876404494382022,
177
+ "grad_norm": 0.5966526865959167,
178
+ "learning_rate": 6.398222751952899e-05,
179
+ "loss": 0.9513,
180
+ "step": 210
181
+ },
182
+ {
183
+ "epoch": 1.9775280898876404,
184
+ "grad_norm": 0.6702543497085571,
185
+ "learning_rate": 5.5120081979953785e-05,
186
+ "loss": 0.8003,
187
+ "step": 220
188
+ },
189
+ {
190
+ "epoch": 2.067415730337079,
191
+ "grad_norm": 0.577067494392395,
192
+ "learning_rate": 4.66795567198309e-05,
193
+ "loss": 0.9227,
194
+ "step": 230
195
+ },
196
+ {
197
+ "epoch": 2.157303370786517,
198
+ "grad_norm": 0.6616420149803162,
199
+ "learning_rate": 3.873994548067972e-05,
200
+ "loss": 0.8428,
201
+ "step": 240
202
+ },
203
+ {
204
+ "epoch": 2.247191011235955,
205
+ "grad_norm": 0.7785276770591736,
206
+ "learning_rate": 3.137583621312665e-05,
207
+ "loss": 0.6197,
208
+ "step": 250
209
+ },
210
+ {
211
+ "epoch": 2.337078651685393,
212
+ "grad_norm": 0.8036301136016846,
213
+ "learning_rate": 2.465641036723393e-05,
214
+ "loss": 0.8266,
215
+ "step": 260
216
+ },
217
+ {
218
+ "epoch": 2.4269662921348316,
219
+ "grad_norm": 0.8799476027488708,
220
+ "learning_rate": 1.864479297370325e-05,
221
+ "loss": 0.7985,
222
+ "step": 270
223
+ },
224
+ {
225
+ "epoch": 2.5168539325842696,
226
+ "grad_norm": 0.7558830380439758,
227
+ "learning_rate": 1.339745962155613e-05,
228
+ "loss": 0.6886,
229
+ "step": 280
230
+ },
231
+ {
232
+ "epoch": 2.606741573033708,
233
+ "grad_norm": 0.8995586633682251,
234
+ "learning_rate": 8.963705903385345e-06,
235
+ "loss": 0.8577,
236
+ "step": 290
237
+ },
238
+ {
239
+ "epoch": 2.696629213483146,
240
+ "grad_norm": 0.8656787276268005,
241
+ "learning_rate": 5.385184312424974e-06,
242
+ "loss": 0.7566,
243
+ "step": 300
244
+ },
245
+ {
246
+ "epoch": 2.696629213483146,
247
+ "eval_loss": 1.0554652214050293,
248
+ "eval_runtime": 13.9222,
249
+ "eval_samples_per_second": 13.432,
250
+ "eval_steps_per_second": 3.376,
251
+ "step": 300
252
  }
253
  ],
254
  "logging_steps": 10,
 
263
  "early_stopping_threshold": 0.0
264
  },
265
  "attributes": {
266
+ "early_stopping_patience_counter": 2
267
  }
268
  },
269
  "TrainerControl": {
 
277
  "attributes": {}
278
  }
279
  },
280
+ "total_flos": 4.312152896050299e+17,
281
  "train_batch_size": 8,
282
  "trial_name": null,
283
  "trial_params": null