Seosnaps commited on
Commit
3302410
·
verified ·
1 Parent(s): 58aa1c8

Training in progress, step 2000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93d63750fbfa5cd8f3d3d09d202f1a9092b56ec6d7bba4992f11110b44c05e85
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1627e1860168745dc7894ff2e9c9b8114aa1c16458ababd20bc8dd256fc593a4
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e72343fb815a2302ed4364b31ab5cf8b9f2c4258a461e9416b5ab8eee21abb27
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7446e0362efd41a2d57580f3df0c8a0989e1f34b671d5980b42b3ecc03336c36
3
  size 1925064044
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9f96556c91f78b167a3a23f1c3f779be5f90901a0a97f9cd4811d2ba7a3f74c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b23f626a7efa36d01f5e36f3f34d543aac465661afc2ed75e47913bc2ba74c7
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eac4afd95bb0ddfe3c09279bb130184beaca309f98a20634196f6c6a08c2e05d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73c1e9d4c6f9361869311d8df318c84d0329122c83c7e35b5aebcbb60aca4858
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 96.39777735198314,
3
  "best_model_checkpoint": "./whisper-small-ha-v3/checkpoint-1000",
4
- "epoch": 9.554140127388536,
5
  "eval_steps": 500,
6
- "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -457,6 +457,156 @@
457
  "eval_wer": 103.04656064380148,
458
  "eval_wer_ortho": 104.1015625,
459
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
460
  }
461
  ],
462
  "logging_steps": 25,
@@ -476,7 +626,7 @@
476
  "attributes": {}
477
  }
478
  },
479
- "total_flos": 6.91566053326848e+18,
480
  "train_batch_size": 16,
481
  "trial_name": null,
482
  "trial_params": null
 
1
  {
2
  "best_metric": 96.39777735198314,
3
  "best_model_checkpoint": "./whisper-small-ha-v3/checkpoint-1000",
4
+ "epoch": 12.738853503184714,
5
  "eval_steps": 500,
6
+ "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
457
  "eval_wer": 103.04656064380148,
458
  "eval_wer_ortho": 104.1015625,
459
  "step": 1500
460
+ },
461
+ {
462
+ "epoch": 9.713375796178344,
463
+ "grad_norm": 5.422865390777588,
464
+ "learning_rate": 0.0005,
465
+ "loss": 1.0079,
466
+ "step": 1525
467
+ },
468
+ {
469
+ "epoch": 9.872611464968152,
470
+ "grad_norm": 5.475556373596191,
471
+ "learning_rate": 0.0005,
472
+ "loss": 1.0126,
473
+ "step": 1550
474
+ },
475
+ {
476
+ "epoch": 10.031847133757962,
477
+ "grad_norm": 5.356685161590576,
478
+ "learning_rate": 0.0005,
479
+ "loss": 0.9727,
480
+ "step": 1575
481
+ },
482
+ {
483
+ "epoch": 10.19108280254777,
484
+ "grad_norm": 5.031153202056885,
485
+ "learning_rate": 0.0005,
486
+ "loss": 0.8431,
487
+ "step": 1600
488
+ },
489
+ {
490
+ "epoch": 10.35031847133758,
491
+ "grad_norm": 5.827383518218994,
492
+ "learning_rate": 0.0005,
493
+ "loss": 0.8888,
494
+ "step": 1625
495
+ },
496
+ {
497
+ "epoch": 10.509554140127388,
498
+ "grad_norm": 5.030758857727051,
499
+ "learning_rate": 0.0005,
500
+ "loss": 0.9165,
501
+ "step": 1650
502
+ },
503
+ {
504
+ "epoch": 10.668789808917197,
505
+ "grad_norm": 5.023013114929199,
506
+ "learning_rate": 0.0005,
507
+ "loss": 0.9541,
508
+ "step": 1675
509
+ },
510
+ {
511
+ "epoch": 10.828025477707007,
512
+ "grad_norm": 5.8825602531433105,
513
+ "learning_rate": 0.0005,
514
+ "loss": 0.9576,
515
+ "step": 1700
516
+ },
517
+ {
518
+ "epoch": 10.987261146496815,
519
+ "grad_norm": 5.114201068878174,
520
+ "learning_rate": 0.0005,
521
+ "loss": 0.9793,
522
+ "step": 1725
523
+ },
524
+ {
525
+ "epoch": 11.146496815286625,
526
+ "grad_norm": 4.3722333908081055,
527
+ "learning_rate": 0.0005,
528
+ "loss": 0.7909,
529
+ "step": 1750
530
+ },
531
+ {
532
+ "epoch": 11.305732484076433,
533
+ "grad_norm": 4.9860382080078125,
534
+ "learning_rate": 0.0005,
535
+ "loss": 0.858,
536
+ "step": 1775
537
+ },
538
+ {
539
+ "epoch": 11.464968152866241,
540
+ "grad_norm": 5.144904613494873,
541
+ "learning_rate": 0.0005,
542
+ "loss": 0.8842,
543
+ "step": 1800
544
+ },
545
+ {
546
+ "epoch": 11.624203821656051,
547
+ "grad_norm": 4.30189847946167,
548
+ "learning_rate": 0.0005,
549
+ "loss": 0.8925,
550
+ "step": 1825
551
+ },
552
+ {
553
+ "epoch": 11.78343949044586,
554
+ "grad_norm": 5.091893672943115,
555
+ "learning_rate": 0.0005,
556
+ "loss": 0.9188,
557
+ "step": 1850
558
+ },
559
+ {
560
+ "epoch": 11.94267515923567,
561
+ "grad_norm": 5.179553508758545,
562
+ "learning_rate": 0.0005,
563
+ "loss": 0.9309,
564
+ "step": 1875
565
+ },
566
+ {
567
+ "epoch": 12.101910828025478,
568
+ "grad_norm": 5.5085225105285645,
569
+ "learning_rate": 0.0005,
570
+ "loss": 0.8213,
571
+ "step": 1900
572
+ },
573
+ {
574
+ "epoch": 12.261146496815286,
575
+ "grad_norm": 5.253794193267822,
576
+ "learning_rate": 0.0005,
577
+ "loss": 0.8155,
578
+ "step": 1925
579
+ },
580
+ {
581
+ "epoch": 12.420382165605096,
582
+ "grad_norm": 4.998741149902344,
583
+ "learning_rate": 0.0005,
584
+ "loss": 0.8479,
585
+ "step": 1950
586
+ },
587
+ {
588
+ "epoch": 12.579617834394904,
589
+ "grad_norm": 5.674376010894775,
590
+ "learning_rate": 0.0005,
591
+ "loss": 0.8773,
592
+ "step": 1975
593
+ },
594
+ {
595
+ "epoch": 12.738853503184714,
596
+ "grad_norm": 5.010542869567871,
597
+ "learning_rate": 0.0005,
598
+ "loss": 0.8844,
599
+ "step": 2000
600
+ },
601
+ {
602
+ "epoch": 12.738853503184714,
603
+ "eval_loss": 5.002076148986816,
604
+ "eval_runtime": 288.6571,
605
+ "eval_samples_per_second": 2.286,
606
+ "eval_steps_per_second": 0.146,
607
+ "eval_wer": 109.04387813757425,
608
+ "eval_wer_ortho": 108.59375,
609
+ "step": 2000
610
  }
611
  ],
612
  "logging_steps": 25,
 
626
  "attributes": {}
627
  }
628
  },
629
+ "total_flos": 9.22088071102464e+18,
630
  "train_batch_size": 16,
631
  "trial_name": null,
632
  "trial_params": null