auxyus commited on
Commit
32b3111
·
verified ·
1 Parent(s): df96afd

Training in progress, step 450, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f8b17838efa305193c9bbccc4b3fbe63a34f2db38adf70e28420c75460d3994
3
  size 100966336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b2d074aab499d9f80bddf337d0040a5830648d9c389b9b8224fac118d49e0c3
3
  size 100966336
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52db5d30338d1dc61e4f024548240a7e9a9ceca896561d8369570e43300058b7
3
  size 51613668
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4160a0bc908544956e44c09f66a7a1476e98b135bf884d5322db7950786f491c
3
  size 51613668
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:019cea10a2ea963ce36b980aa95f96cb1364758a9b5a5a0a5acdc9b99d5f64ec
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:816123bdea76b0b8211255a74e4532d61283c516b9776b7a83d70742a8c3b82c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8d9346c4fcc90fb1ec8546736583b76a4fae6bc25cb93181337c187d15da94a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70f0f789b56065211b8c0b1a5e2a97dd0b5b08a816bbbe288fb6f9c677282af9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.248772382736206,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-300",
4
- "epoch": 0.030935010698357867,
5
  "eval_steps": 50,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -273,6 +273,135 @@
273
  "eval_samples_per_second": 48.971,
274
  "eval_steps_per_second": 12.244,
275
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  }
277
  ],
278
  "logging_steps": 10,
@@ -301,7 +430,7 @@
301
  "attributes": {}
302
  }
303
  },
304
- "total_flos": 6.281876055313613e+16,
305
  "train_batch_size": 8,
306
  "trial_name": null,
307
  "trial_params": null
 
1
  {
2
+ "best_metric": 2.169210910797119,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-450",
4
+ "epoch": 0.0464025160475368,
5
  "eval_steps": 50,
6
+ "global_step": 450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
273
  "eval_samples_per_second": 48.971,
274
  "eval_steps_per_second": 12.244,
275
  "step": 300
276
+ },
277
+ {
278
+ "epoch": 0.031966177721636464,
279
+ "grad_norm": 0.5608008503913879,
280
+ "learning_rate": 9.733794785622253e-05,
281
+ "loss": 1.7371,
282
+ "step": 310
283
+ },
284
+ {
285
+ "epoch": 0.03299734474491506,
286
+ "grad_norm": 1.3526793718338013,
287
+ "learning_rate": 9.202138944469168e-05,
288
+ "loss": 2.1361,
289
+ "step": 320
290
+ },
291
+ {
292
+ "epoch": 0.03402851176819365,
293
+ "grad_norm": 1.3870782852172852,
294
+ "learning_rate": 8.672744727162781e-05,
295
+ "loss": 2.6702,
296
+ "step": 330
297
+ },
298
+ {
299
+ "epoch": 0.035059678791472246,
300
+ "grad_norm": 2.1156375408172607,
301
+ "learning_rate": 8.147112759128859e-05,
302
+ "loss": 2.1889,
303
+ "step": 340
304
+ },
305
+ {
306
+ "epoch": 0.036090845814750847,
307
+ "grad_norm": 3.8267505168914795,
308
+ "learning_rate": 7.626733001288851e-05,
309
+ "loss": 2.1682,
310
+ "step": 350
311
+ },
312
+ {
313
+ "epoch": 0.036090845814750847,
314
+ "eval_loss": 2.2239229679107666,
315
+ "eval_runtime": 334.0546,
316
+ "eval_samples_per_second": 48.896,
317
+ "eval_steps_per_second": 12.226,
318
+ "step": 350
319
+ },
320
+ {
321
+ "epoch": 0.03712201283802944,
322
+ "grad_norm": 0.6449031829833984,
323
+ "learning_rate": 7.113080526603792e-05,
324
+ "loss": 1.7428,
325
+ "step": 360
326
+ },
327
+ {
328
+ "epoch": 0.038153179861308034,
329
+ "grad_norm": 1.3239842653274536,
330
+ "learning_rate": 6.607611338819697e-05,
331
+ "loss": 2.0759,
332
+ "step": 370
333
+ },
334
+ {
335
+ "epoch": 0.03918434688458663,
336
+ "grad_norm": 1.4793647527694702,
337
+ "learning_rate": 6.111758245266794e-05,
338
+ "loss": 2.6362,
339
+ "step": 380
340
+ },
341
+ {
342
+ "epoch": 0.04021551390786523,
343
+ "grad_norm": 1.840112328529358,
344
+ "learning_rate": 5.626926795411447e-05,
345
+ "loss": 2.2387,
346
+ "step": 390
347
+ },
348
+ {
349
+ "epoch": 0.04124668093114382,
350
+ "grad_norm": 3.4163084030151367,
351
+ "learning_rate": 5.1544912966734994e-05,
352
+ "loss": 2.0834,
353
+ "step": 400
354
+ },
355
+ {
356
+ "epoch": 0.04124668093114382,
357
+ "eval_loss": 2.179900646209717,
358
+ "eval_runtime": 332.5638,
359
+ "eval_samples_per_second": 49.115,
360
+ "eval_steps_per_second": 12.28,
361
+ "step": 400
362
+ },
363
+ {
364
+ "epoch": 0.042277847954422416,
365
+ "grad_norm": 0.5676496624946594,
366
+ "learning_rate": 4.695790918802576e-05,
367
+ "loss": 1.7509,
368
+ "step": 410
369
+ },
370
+ {
371
+ "epoch": 0.04330901497770101,
372
+ "grad_norm": 1.6149543523788452,
373
+ "learning_rate": 4.252125897855932e-05,
374
+ "loss": 2.2587,
375
+ "step": 420
376
+ },
377
+ {
378
+ "epoch": 0.04434018200097961,
379
+ "grad_norm": 1.5459808111190796,
380
+ "learning_rate": 3.824753850538082e-05,
381
+ "loss": 2.6127,
382
+ "step": 430
383
+ },
384
+ {
385
+ "epoch": 0.045371349024258205,
386
+ "grad_norm": 2.0725579261779785,
387
+ "learning_rate": 3.414886209349615e-05,
388
+ "loss": 2.1811,
389
+ "step": 440
390
+ },
391
+ {
392
+ "epoch": 0.0464025160475368,
393
+ "grad_norm": 3.765904426574707,
394
+ "learning_rate": 3.0236847886501542e-05,
395
+ "loss": 2.1086,
396
+ "step": 450
397
+ },
398
+ {
399
+ "epoch": 0.0464025160475368,
400
+ "eval_loss": 2.169210910797119,
401
+ "eval_runtime": 333.7041,
402
+ "eval_samples_per_second": 48.948,
403
+ "eval_steps_per_second": 12.238,
404
+ "step": 450
405
  }
406
  ],
407
  "logging_steps": 10,
 
430
  "attributes": {}
431
  }
432
  },
433
+ "total_flos": 9.42281408297042e+16,
434
  "train_batch_size": 8,
435
  "trial_name": null,
436
  "trial_params": null