OsamaMo commited on
Commit
45e3c0b
·
verified ·
1 Parent(s): 56ab5db

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3bdd31848289a0579c8d938f83d9aa2c79d7376eb33a9f734b7580e4c7abc03
3
  size 295488936
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17ba270b888a201fead48ad37c2c2e228e832cc5e2304c9d48ddcc2a4ab95b9d
3
  size 295488936
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b763425d9fd22fe996ab8b1879038cdbd99d1f1e8fd06082bf71de44f3876eb0
3
  size 591203178
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94ad12c53cb7962a72e2d80a27249286394dc06a5b1f83bd4257087da8221ea0
3
  size 591203178
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c062f7f375beded48b5337f5a3f3a5cb38807fa3e85dbf3e294c0ab6b627bfc2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5edb34d031c0c2b447f3eaadb401a4c1e7e7e6d8c096e28b7092e01a8bd48c92
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fa6230140a4b6b6dc7109e0de23ef0ecf8212f6adf804ca0ad2c134b70f1b5e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7a13d6eba883692f8ed583bb8ce176c7e7a1118cd4c39d5498dbe981adfa197
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.35727045373347627,
5
  "eval_steps": 100,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -397,6 +397,396 @@
397
  "eval_news_finetune_val_samples_per_second": 1.396,
398
  "eval_news_finetune_val_steps_per_second": 1.396,
399
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
400
  }
401
  ],
402
  "logging_steps": 10,
@@ -416,7 +806,7 @@
416
  "attributes": {}
417
  }
418
  },
419
- "total_flos": 1.392907957026816e+16,
420
  "train_batch_size": 1,
421
  "trial_name": null,
422
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7145409074669525,
5
  "eval_steps": 100,
6
+ "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
397
  "eval_news_finetune_val_samples_per_second": 1.396,
398
  "eval_news_finetune_val_steps_per_second": 1.396,
399
  "step": 500
400
+ },
401
+ {
402
+ "epoch": 0.36441586280814575,
403
+ "grad_norm": 1.282714605331421,
404
+ "learning_rate": 9.985996777749747e-05,
405
+ "loss": 0.3636,
406
+ "step": 510
407
+ },
408
+ {
409
+ "epoch": 0.3715612718828153,
410
+ "grad_norm": 2.0360989570617676,
411
+ "learning_rate": 9.982713965133122e-05,
412
+ "loss": 0.4467,
413
+ "step": 520
414
+ },
415
+ {
416
+ "epoch": 0.37870668095748483,
417
+ "grad_norm": 1.7432626485824585,
418
+ "learning_rate": 9.979086430335417e-05,
419
+ "loss": 0.3875,
420
+ "step": 530
421
+ },
422
+ {
423
+ "epoch": 0.3858520900321543,
424
+ "grad_norm": 1.6053438186645508,
425
+ "learning_rate": 9.975114424322609e-05,
426
+ "loss": 0.3646,
427
+ "step": 540
428
+ },
429
+ {
430
+ "epoch": 0.39299749910682386,
431
+ "grad_norm": 1.2323070764541626,
432
+ "learning_rate": 9.970798221892452e-05,
433
+ "loss": 0.353,
434
+ "step": 550
435
+ },
436
+ {
437
+ "epoch": 0.4001429081814934,
438
+ "grad_norm": 1.16932213306427,
439
+ "learning_rate": 9.966138121655445e-05,
440
+ "loss": 0.331,
441
+ "step": 560
442
+ },
443
+ {
444
+ "epoch": 0.40728831725616294,
445
+ "grad_norm": 1.8134998083114624,
446
+ "learning_rate": 9.961134446014184e-05,
447
+ "loss": 0.3132,
448
+ "step": 570
449
+ },
450
+ {
451
+ "epoch": 0.4144337263308324,
452
+ "grad_norm": 1.4292124509811401,
453
+ "learning_rate": 9.955787541141055e-05,
454
+ "loss": 0.3017,
455
+ "step": 580
456
+ },
457
+ {
458
+ "epoch": 0.42157913540550196,
459
+ "grad_norm": 1.4605034589767456,
460
+ "learning_rate": 9.950097776954284e-05,
461
+ "loss": 0.3596,
462
+ "step": 590
463
+ },
464
+ {
465
+ "epoch": 0.4287245444801715,
466
+ "grad_norm": 1.2365972995758057,
467
+ "learning_rate": 9.944065547092345e-05,
468
+ "loss": 0.3399,
469
+ "step": 600
470
+ },
471
+ {
472
+ "epoch": 0.4287245444801715,
473
+ "eval_news_finetune_val_loss": 0.36549311876296997,
474
+ "eval_news_finetune_val_runtime": 1002.8044,
475
+ "eval_news_finetune_val_samples_per_second": 1.396,
476
+ "eval_news_finetune_val_steps_per_second": 1.396,
477
+ "step": 600
478
+ },
479
+ {
480
+ "epoch": 0.43586995355484104,
481
+ "grad_norm": 1.0590678453445435,
482
+ "learning_rate": 9.937691268886725e-05,
483
+ "loss": 0.3747,
484
+ "step": 610
485
+ },
486
+ {
487
+ "epoch": 0.4430153626295105,
488
+ "grad_norm": 0.9111473560333252,
489
+ "learning_rate": 9.930975383333056e-05,
490
+ "loss": 0.2868,
491
+ "step": 620
492
+ },
493
+ {
494
+ "epoch": 0.45016077170418006,
495
+ "grad_norm": 2.0456018447875977,
496
+ "learning_rate": 9.923918355060599e-05,
497
+ "loss": 0.3289,
498
+ "step": 630
499
+ },
500
+ {
501
+ "epoch": 0.4573061807788496,
502
+ "grad_norm": 1.5998501777648926,
503
+ "learning_rate": 9.916520672300107e-05,
504
+ "loss": 0.3664,
505
+ "step": 640
506
+ },
507
+ {
508
+ "epoch": 0.4644515898535191,
509
+ "grad_norm": 1.0773181915283203,
510
+ "learning_rate": 9.908782846850037e-05,
511
+ "loss": 0.3432,
512
+ "step": 650
513
+ },
514
+ {
515
+ "epoch": 0.4715969989281886,
516
+ "grad_norm": 1.244042158126831,
517
+ "learning_rate": 9.900705414041154e-05,
518
+ "loss": 0.3242,
519
+ "step": 660
520
+ },
521
+ {
522
+ "epoch": 0.47874240800285817,
523
+ "grad_norm": 1.8120310306549072,
524
+ "learning_rate": 9.892288932699484e-05,
525
+ "loss": 0.317,
526
+ "step": 670
527
+ },
528
+ {
529
+ "epoch": 0.4858878170775277,
530
+ "grad_norm": 0.7863224148750305,
531
+ "learning_rate": 9.883533985107663e-05,
532
+ "loss": 0.322,
533
+ "step": 680
534
+ },
535
+ {
536
+ "epoch": 0.4930332261521972,
537
+ "grad_norm": 1.223832130432129,
538
+ "learning_rate": 9.874441176964642e-05,
539
+ "loss": 0.343,
540
+ "step": 690
541
+ },
542
+ {
543
+ "epoch": 0.5001786352268668,
544
+ "grad_norm": 0.9870743155479431,
545
+ "learning_rate": 9.865011137343787e-05,
546
+ "loss": 0.3278,
547
+ "step": 700
548
+ },
549
+ {
550
+ "epoch": 0.5001786352268668,
551
+ "eval_news_finetune_val_loss": 0.35386842489242554,
552
+ "eval_news_finetune_val_runtime": 1003.4109,
553
+ "eval_news_finetune_val_samples_per_second": 1.395,
554
+ "eval_news_finetune_val_steps_per_second": 1.395,
555
+ "step": 700
556
+ },
557
+ {
558
+ "epoch": 0.5073240443015362,
559
+ "grad_norm": 1.3699963092803955,
560
+ "learning_rate": 9.85524451864936e-05,
561
+ "loss": 0.3902,
562
+ "step": 710
563
+ },
564
+ {
565
+ "epoch": 0.5144694533762058,
566
+ "grad_norm": 1.7188071012496948,
567
+ "learning_rate": 9.845141996571384e-05,
568
+ "loss": 0.369,
569
+ "step": 720
570
+ },
571
+ {
572
+ "epoch": 0.5216148624508753,
573
+ "grad_norm": 0.4889034628868103,
574
+ "learning_rate": 9.834704270038888e-05,
575
+ "loss": 0.3174,
576
+ "step": 730
577
+ },
578
+ {
579
+ "epoch": 0.5287602715255448,
580
+ "grad_norm": 0.8782143592834473,
581
+ "learning_rate": 9.823932061171561e-05,
582
+ "loss": 0.3501,
583
+ "step": 740
584
+ },
585
+ {
586
+ "epoch": 0.5359056806002144,
587
+ "grad_norm": 2.4089126586914062,
588
+ "learning_rate": 9.812826115229789e-05,
589
+ "loss": 0.3292,
590
+ "step": 750
591
+ },
592
+ {
593
+ "epoch": 0.5430510896748839,
594
+ "grad_norm": 1.6382787227630615,
595
+ "learning_rate": 9.801387200563096e-05,
596
+ "loss": 0.459,
597
+ "step": 760
598
+ },
599
+ {
600
+ "epoch": 0.5501964987495535,
601
+ "grad_norm": 1.443916916847229,
602
+ "learning_rate": 9.789616108556992e-05,
603
+ "loss": 0.3409,
604
+ "step": 770
605
+ },
606
+ {
607
+ "epoch": 0.5573419078242229,
608
+ "grad_norm": 1.632278323173523,
609
+ "learning_rate": 9.77751365357821e-05,
610
+ "loss": 0.281,
611
+ "step": 780
612
+ },
613
+ {
614
+ "epoch": 0.5644873168988924,
615
+ "grad_norm": 2.1452109813690186,
616
+ "learning_rate": 9.765080672918374e-05,
617
+ "loss": 0.3511,
618
+ "step": 790
619
+ },
620
+ {
621
+ "epoch": 0.571632725973562,
622
+ "grad_norm": 1.2721842527389526,
623
+ "learning_rate": 9.752318026736078e-05,
624
+ "loss": 0.2298,
625
+ "step": 800
626
+ },
627
+ {
628
+ "epoch": 0.571632725973562,
629
+ "eval_news_finetune_val_loss": 0.34554028511047363,
630
+ "eval_news_finetune_val_runtime": 1003.3342,
631
+ "eval_news_finetune_val_samples_per_second": 1.395,
632
+ "eval_news_finetune_val_steps_per_second": 1.395,
633
+ "step": 800
634
+ },
635
+ {
636
+ "epoch": 0.5787781350482315,
637
+ "grad_norm": 2.5264174938201904,
638
+ "learning_rate": 9.739226597997359e-05,
639
+ "loss": 0.3214,
640
+ "step": 810
641
+ },
642
+ {
643
+ "epoch": 0.585923544122901,
644
+ "grad_norm": 1.4553183317184448,
645
+ "learning_rate": 9.725807292414629e-05,
646
+ "loss": 0.2697,
647
+ "step": 820
648
+ },
649
+ {
650
+ "epoch": 0.5930689531975706,
651
+ "grad_norm": 2.2111873626708984,
652
+ "learning_rate": 9.712061038384002e-05,
653
+ "loss": 0.3315,
654
+ "step": 830
655
+ },
656
+ {
657
+ "epoch": 0.6002143622722401,
658
+ "grad_norm": 1.4308302402496338,
659
+ "learning_rate": 9.697988786921071e-05,
660
+ "loss": 0.4036,
661
+ "step": 840
662
+ },
663
+ {
664
+ "epoch": 0.6073597713469097,
665
+ "grad_norm": 1.8136054277420044,
666
+ "learning_rate": 9.683591511595107e-05,
667
+ "loss": 0.2946,
668
+ "step": 850
669
+ },
670
+ {
671
+ "epoch": 0.6145051804215791,
672
+ "grad_norm": 1.8586084842681885,
673
+ "learning_rate": 9.668870208461713e-05,
674
+ "loss": 0.2259,
675
+ "step": 860
676
+ },
677
+ {
678
+ "epoch": 0.6216505894962486,
679
+ "grad_norm": 1.1640444993972778,
680
+ "learning_rate": 9.653825895993908e-05,
681
+ "loss": 0.4,
682
+ "step": 870
683
+ },
684
+ {
685
+ "epoch": 0.6287959985709182,
686
+ "grad_norm": 1.386013388633728,
687
+ "learning_rate": 9.63845961501166e-05,
688
+ "loss": 0.2804,
689
+ "step": 880
690
+ },
691
+ {
692
+ "epoch": 0.6359414076455877,
693
+ "grad_norm": 2.1413650512695312,
694
+ "learning_rate": 9.622772428609887e-05,
695
+ "loss": 0.3593,
696
+ "step": 890
697
+ },
698
+ {
699
+ "epoch": 0.6430868167202572,
700
+ "grad_norm": 1.5462217330932617,
701
+ "learning_rate": 9.606765422084908e-05,
702
+ "loss": 0.3058,
703
+ "step": 900
704
+ },
705
+ {
706
+ "epoch": 0.6430868167202572,
707
+ "eval_news_finetune_val_loss": 0.3292103707790375,
708
+ "eval_news_finetune_val_runtime": 1003.4558,
709
+ "eval_news_finetune_val_samples_per_second": 1.395,
710
+ "eval_news_finetune_val_steps_per_second": 1.395,
711
+ "step": 900
712
+ },
713
+ {
714
+ "epoch": 0.6502322257949268,
715
+ "grad_norm": 1.0373942852020264,
716
+ "learning_rate": 9.590439702859351e-05,
717
+ "loss": 0.3318,
718
+ "step": 910
719
+ },
720
+ {
721
+ "epoch": 0.6573776348695963,
722
+ "grad_norm": 1.2724213600158691,
723
+ "learning_rate": 9.573796400405544e-05,
724
+ "loss": 0.3328,
725
+ "step": 920
726
+ },
727
+ {
728
+ "epoch": 0.6645230439442658,
729
+ "grad_norm": 0.8528966903686523,
730
+ "learning_rate": 9.55683666616737e-05,
731
+ "loss": 0.2673,
732
+ "step": 930
733
+ },
734
+ {
735
+ "epoch": 0.6716684530189353,
736
+ "grad_norm": 1.65499746799469,
737
+ "learning_rate": 9.539561673480612e-05,
738
+ "loss": 0.3538,
739
+ "step": 940
740
+ },
741
+ {
742
+ "epoch": 0.6788138620936048,
743
+ "grad_norm": 2.341379404067993,
744
+ "learning_rate": 9.521972617491767e-05,
745
+ "loss": 0.3228,
746
+ "step": 950
747
+ },
748
+ {
749
+ "epoch": 0.6859592711682744,
750
+ "grad_norm": 1.4938244819641113,
751
+ "learning_rate": 9.504070715075372e-05,
752
+ "loss": 0.3974,
753
+ "step": 960
754
+ },
755
+ {
756
+ "epoch": 0.6931046802429439,
757
+ "grad_norm": 1.0390361547470093,
758
+ "learning_rate": 9.485857204749811e-05,
759
+ "loss": 0.3236,
760
+ "step": 970
761
+ },
762
+ {
763
+ "epoch": 0.7002500893176135,
764
+ "grad_norm": 3.8845393657684326,
765
+ "learning_rate": 9.467333346591632e-05,
766
+ "loss": 0.3027,
767
+ "step": 980
768
+ },
769
+ {
770
+ "epoch": 0.707395498392283,
771
+ "grad_norm": 1.3295674324035645,
772
+ "learning_rate": 9.448500422148364e-05,
773
+ "loss": 0.3005,
774
+ "step": 990
775
+ },
776
+ {
777
+ "epoch": 0.7145409074669525,
778
+ "grad_norm": 1.0146369934082031,
779
+ "learning_rate": 9.429359734349863e-05,
780
+ "loss": 0.294,
781
+ "step": 1000
782
+ },
783
+ {
784
+ "epoch": 0.7145409074669525,
785
+ "eval_news_finetune_val_loss": 0.3208242654800415,
786
+ "eval_news_finetune_val_runtime": 1003.2491,
787
+ "eval_news_finetune_val_samples_per_second": 1.395,
788
+ "eval_news_finetune_val_steps_per_second": 1.395,
789
+ "step": 1000
790
  }
791
  ],
792
  "logging_steps": 10,
 
806
  "attributes": {}
807
  }
808
  },
809
+ "total_flos": 2.760906963972096e+16,
810
  "train_batch_size": 1,
811
  "trial_name": null,
812
  "trial_params": null