brixeus commited on
Commit
57bcdfa
·
verified ·
1 Parent(s): c845415

Training in progress, step 450, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7708322f6f2ae0b3ae362c234870b0f46eedd25350ed70c2b5089815e789848d
3
  size 319876032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ac0dd6470aeaf933119e8adec87506edd50e34ef244bc4be2bc111bafac7fe9
3
  size 319876032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0707171e8bc8c714f3eb285bf25c8f573339187a9f2dc7fb13ad68bb2bb4e585
3
  size 162933844
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9c939c62a6f8d89ae3405aac942cc155a4f3278dc1f16cf465b01365351d91d
3
  size 162933844
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a694053997fb4718f925cbd68a9bfc7b79fc8eef8c18fbf50d63beb14cd55264
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff1d9539e09b7c36cf6b054c8fcfced3ce1a861812d8feb865f087854f471e3c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8d9346c4fcc90fb1ec8546736583b76a4fae6bc25cb93181337c187d15da94a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70f0f789b56065211b8c0b1a5e2a97dd0b5b08a816bbbe288fb6f9c677282af9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.7407906651496887,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-300",
4
- "epoch": 0.059769885939134335,
5
  "eval_steps": 50,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -273,6 +273,135 @@
273
  "eval_samples_per_second": 13.964,
274
  "eval_steps_per_second": 3.492,
275
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  }
277
  ],
278
  "logging_steps": 10,
@@ -301,7 +430,7 @@
301
  "attributes": {}
302
  }
303
  },
304
- "total_flos": 3.9640890727622246e+17,
305
  "train_batch_size": 8,
306
  "trial_name": null,
307
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.725193440914154,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-450",
4
+ "epoch": 0.0896548289087015,
5
  "eval_steps": 50,
6
+ "global_step": 450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
273
  "eval_samples_per_second": 13.964,
274
  "eval_steps_per_second": 3.492,
275
  "step": 300
276
+ },
277
+ {
278
+ "epoch": 0.06176221547043881,
279
+ "grad_norm": 0.30134573578834534,
280
+ "learning_rate": 9.733794785622253e-05,
281
+ "loss": 0.6773,
282
+ "step": 310
283
+ },
284
+ {
285
+ "epoch": 0.06375454500174328,
286
+ "grad_norm": 0.28048670291900635,
287
+ "learning_rate": 9.202138944469168e-05,
288
+ "loss": 0.6969,
289
+ "step": 320
290
+ },
291
+ {
292
+ "epoch": 0.06574687453304777,
293
+ "grad_norm": 0.32149261236190796,
294
+ "learning_rate": 8.672744727162781e-05,
295
+ "loss": 0.7026,
296
+ "step": 330
297
+ },
298
+ {
299
+ "epoch": 0.06773920406435224,
300
+ "grad_norm": 0.3494367301464081,
301
+ "learning_rate": 8.147112759128859e-05,
302
+ "loss": 0.7413,
303
+ "step": 340
304
+ },
305
+ {
306
+ "epoch": 0.06973153359565672,
307
+ "grad_norm": 0.4458478093147278,
308
+ "learning_rate": 7.626733001288851e-05,
309
+ "loss": 0.7612,
310
+ "step": 350
311
+ },
312
+ {
313
+ "epoch": 0.06973153359565672,
314
+ "eval_loss": 0.735527515411377,
315
+ "eval_runtime": 604.4756,
316
+ "eval_samples_per_second": 13.986,
317
+ "eval_steps_per_second": 3.497,
318
+ "step": 350
319
+ },
320
+ {
321
+ "epoch": 0.0717238631269612,
322
+ "grad_norm": 0.2828379273414612,
323
+ "learning_rate": 7.113080526603792e-05,
324
+ "loss": 0.6987,
325
+ "step": 360
326
+ },
327
+ {
328
+ "epoch": 0.07371619265826568,
329
+ "grad_norm": 0.3044438362121582,
330
+ "learning_rate": 6.607611338819697e-05,
331
+ "loss": 0.7014,
332
+ "step": 370
333
+ },
334
+ {
335
+ "epoch": 0.07570852218957015,
336
+ "grad_norm": 0.35144269466400146,
337
+ "learning_rate": 6.111758245266794e-05,
338
+ "loss": 0.7053,
339
+ "step": 380
340
+ },
341
+ {
342
+ "epoch": 0.07770085172087464,
343
+ "grad_norm": 0.37965935468673706,
344
+ "learning_rate": 5.626926795411447e-05,
345
+ "loss": 0.7327,
346
+ "step": 390
347
+ },
348
+ {
349
+ "epoch": 0.07969318125217911,
350
+ "grad_norm": 0.4066285192966461,
351
+ "learning_rate": 5.1544912966734994e-05,
352
+ "loss": 0.7586,
353
+ "step": 400
354
+ },
355
+ {
356
+ "epoch": 0.07969318125217911,
357
+ "eval_loss": 0.7312402129173279,
358
+ "eval_runtime": 604.9779,
359
+ "eval_samples_per_second": 13.974,
360
+ "eval_steps_per_second": 3.494,
361
+ "step": 400
362
+ },
363
+ {
364
+ "epoch": 0.08168551078348359,
365
+ "grad_norm": 0.28389105200767517,
366
+ "learning_rate": 4.695790918802576e-05,
367
+ "loss": 0.6942,
368
+ "step": 410
369
+ },
370
+ {
371
+ "epoch": 0.08367784031478806,
372
+ "grad_norm": 0.24920526146888733,
373
+ "learning_rate": 4.252125897855932e-05,
374
+ "loss": 0.7292,
375
+ "step": 420
376
+ },
377
+ {
378
+ "epoch": 0.08567016984609255,
379
+ "grad_norm": 0.32768023014068604,
380
+ "learning_rate": 3.824753850538082e-05,
381
+ "loss": 0.6935,
382
+ "step": 430
383
+ },
384
+ {
385
+ "epoch": 0.08766249937739702,
386
+ "grad_norm": 0.2882814407348633,
387
+ "learning_rate": 3.414886209349615e-05,
388
+ "loss": 0.6909,
389
+ "step": 440
390
+ },
391
+ {
392
+ "epoch": 0.0896548289087015,
393
+ "grad_norm": 0.35527971386909485,
394
+ "learning_rate": 3.0236847886501542e-05,
395
+ "loss": 0.7639,
396
+ "step": 450
397
+ },
398
+ {
399
+ "epoch": 0.0896548289087015,
400
+ "eval_loss": 0.725193440914154,
401
+ "eval_runtime": 604.3416,
402
+ "eval_samples_per_second": 13.989,
403
+ "eval_steps_per_second": 3.498,
404
+ "step": 450
405
  }
406
  ],
407
  "logging_steps": 10,
 
430
  "attributes": {}
431
  }
432
  },
433
+ "total_flos": 5.946133609143337e+17,
434
  "train_batch_size": 8,
435
  "trial_name": null,
436
  "trial_params": null