romainnn commited on
Commit
ed546b0
·
verified ·
1 Parent(s): 1d35e45

Training in progress, step 2652, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60a8247fd6ba1f9fe0dbc2ab7765133457470ca6d3a44c71bf94f3d4c10f9e4b
3
  size 58680
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf1889ef54561b0d470d85469e087b94d1371deb5fb4f4a1453244b0fbaf7b20
3
  size 58680
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba646ce7e6e999342a8ea23619ed724cecef86f809b7b64b8e29bc9e76ab814c
3
  size 127270
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28ed220a640223cfeb97a4344a0b768d38922625171661b1587c8b8e38e97fdc
3
  size 127270
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da464f1ab8886439566680dd97216fa326d723519bedac0470d5e0944caeac13
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:baff5d24dc0ac30e308dd3f1756ee34ebed09385f83a67c74ff17f238a33f7e6
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c97ae609b712bd37f007cb4326617d16b954d56d87dcc6a35ec5b0ba67ade88b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cac0cec479cc76a5d9126abb2015be2132cd8378ba61d1add8aa7d534c79d05
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 10.27136516571045,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-2600",
4
- "epoch": 1.7828434313137373,
5
  "eval_steps": 100,
6
- "global_step": 2600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -18423,6 +18423,370 @@
18423
  "eval_samples_per_second": 336.607,
18424
  "eval_steps_per_second": 84.282,
18425
  "step": 2600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18426
  }
18427
  ],
18428
  "logging_steps": 1,
@@ -18446,12 +18810,12 @@
18446
  "should_evaluate": false,
18447
  "should_log": false,
18448
  "should_save": true,
18449
- "should_training_stop": false
18450
  },
18451
  "attributes": {}
18452
  }
18453
  },
18454
- "total_flos": 1074706513920000.0,
18455
  "train_batch_size": 4,
18456
  "trial_name": null,
18457
  "trial_params": null
 
1
  {
2
  "best_metric": 10.27136516571045,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-2600",
4
+ "epoch": 1.8184934441683092,
5
  "eval_steps": 100,
6
+ "global_step": 2652,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
18423
  "eval_samples_per_second": 336.607,
18424
  "eval_steps_per_second": 84.282,
18425
  "step": 2600
18426
+ },
18427
+ {
18428
+ "epoch": 1.7835290084840176,
18429
+ "grad_norm": 0.6825293302536011,
18430
+ "learning_rate": 1.8382787692204917e-07,
18431
+ "loss": 82.2117,
18432
+ "step": 2601
18433
+ },
18434
+ {
18435
+ "epoch": 1.7842145856542977,
18436
+ "grad_norm": 0.8272702097892761,
18437
+ "learning_rate": 1.7669171929617366e-07,
18438
+ "loss": 82.1916,
18439
+ "step": 2602
18440
+ },
18441
+ {
18442
+ "epoch": 1.784900162824578,
18443
+ "grad_norm": 0.5631828904151917,
18444
+ "learning_rate": 1.6969670684205963e-07,
18445
+ "loss": 82.2169,
18446
+ "step": 2603
18447
+ },
18448
+ {
18449
+ "epoch": 1.785585739994858,
18450
+ "grad_norm": 0.6313933730125427,
18451
+ "learning_rate": 1.6284284945029537e-07,
18452
+ "loss": 82.264,
18453
+ "step": 2604
18454
+ },
18455
+ {
18456
+ "epoch": 1.7862713171651383,
18457
+ "grad_norm": 0.8243266940116882,
18458
+ "learning_rate": 1.5613015681189558e-07,
18459
+ "loss": 82.2106,
18460
+ "step": 2605
18461
+ },
18462
+ {
18463
+ "epoch": 1.7869568943354186,
18464
+ "grad_norm": 0.6138453483581543,
18465
+ "learning_rate": 1.4955863841827898e-07,
18466
+ "loss": 82.191,
18467
+ "step": 2606
18468
+ },
18469
+ {
18470
+ "epoch": 1.7876424715056989,
18471
+ "grad_norm": 0.6013806462287903,
18472
+ "learning_rate": 1.431283035612352e-07,
18473
+ "loss": 82.1591,
18474
+ "step": 2607
18475
+ },
18476
+ {
18477
+ "epoch": 1.7883280486759792,
18478
+ "grad_norm": 0.6489602327346802,
18479
+ "learning_rate": 1.3683916133293562e-07,
18480
+ "loss": 82.2066,
18481
+ "step": 2608
18482
+ },
18483
+ {
18484
+ "epoch": 1.7890136258462594,
18485
+ "grad_norm": 0.7366928458213806,
18486
+ "learning_rate": 1.3069122062591144e-07,
18487
+ "loss": 82.1006,
18488
+ "step": 2609
18489
+ },
18490
+ {
18491
+ "epoch": 1.7896992030165395,
18492
+ "grad_norm": 0.6633123159408569,
18493
+ "learning_rate": 1.246844901330535e-07,
18494
+ "loss": 82.1569,
18495
+ "step": 2610
18496
+ },
18497
+ {
18498
+ "epoch": 1.7903847801868198,
18499
+ "grad_norm": 0.6651855111122131,
18500
+ "learning_rate": 1.1881897834757904e-07,
18501
+ "loss": 82.2778,
18502
+ "step": 2611
18503
+ },
18504
+ {
18505
+ "epoch": 1.7910703573570999,
18506
+ "grad_norm": 0.6125953197479248,
18507
+ "learning_rate": 1.130946935630095e-07,
18508
+ "loss": 82.2915,
18509
+ "step": 2612
18510
+ },
18511
+ {
18512
+ "epoch": 1.7917559345273801,
18513
+ "grad_norm": 0.6645113825798035,
18514
+ "learning_rate": 1.0751164387322599e-07,
18515
+ "loss": 82.2116,
18516
+ "step": 2613
18517
+ },
18518
+ {
18519
+ "epoch": 1.7924415116976604,
18520
+ "grad_norm": 0.6014000177383423,
18521
+ "learning_rate": 1.0206983717235829e-07,
18522
+ "loss": 82.174,
18523
+ "step": 2614
18524
+ },
18525
+ {
18526
+ "epoch": 1.7931270888679407,
18527
+ "grad_norm": 0.6267678737640381,
18528
+ "learning_rate": 9.676928115486261e-08,
18529
+ "loss": 82.2418,
18530
+ "step": 2615
18531
+ },
18532
+ {
18533
+ "epoch": 1.793812666038221,
18534
+ "grad_norm": 0.6758940815925598,
18535
+ "learning_rate": 9.1609983315466e-08,
18536
+ "loss": 82.2699,
18537
+ "step": 2616
18538
+ },
18539
+ {
18540
+ "epoch": 1.7944982432085013,
18541
+ "grad_norm": 0.6676249504089355,
18542
+ "learning_rate": 8.659195094913308e-08,
18543
+ "loss": 82.1648,
18544
+ "step": 2617
18545
+ },
18546
+ {
18547
+ "epoch": 1.7951838203787815,
18548
+ "grad_norm": 0.5303319692611694,
18549
+ "learning_rate": 8.171519115113268e-08,
18550
+ "loss": 82.1988,
18551
+ "step": 2618
18552
+ },
18553
+ {
18554
+ "epoch": 1.7958693975490616,
18555
+ "grad_norm": 0.9017250537872314,
18556
+ "learning_rate": 7.6979710816949e-08,
18557
+ "loss": 82.2253,
18558
+ "step": 2619
18559
+ },
18560
+ {
18561
+ "epoch": 1.796554974719342,
18562
+ "grad_norm": 0.593103289604187,
18563
+ "learning_rate": 7.238551664232596e-08,
18564
+ "loss": 82.2429,
18565
+ "step": 2620
18566
+ },
18567
+ {
18568
+ "epoch": 1.797240551889622,
18569
+ "grad_norm": 0.859890341758728,
18570
+ "learning_rate": 6.79326151232118e-08,
18571
+ "loss": 82.1984,
18572
+ "step": 2621
18573
+ },
18574
+ {
18575
+ "epoch": 1.7979261290599022,
18576
+ "grad_norm": 0.6514622569084167,
18577
+ "learning_rate": 6.362101255579233e-08,
18578
+ "loss": 82.1653,
18579
+ "step": 2622
18580
+ },
18581
+ {
18582
+ "epoch": 1.7986117062301825,
18583
+ "grad_norm": 0.6877514719963074,
18584
+ "learning_rate": 5.945071503645761e-08,
18585
+ "loss": 82.243,
18586
+ "step": 2623
18587
+ },
18588
+ {
18589
+ "epoch": 1.7992972834004628,
18590
+ "grad_norm": 0.6594268083572388,
18591
+ "learning_rate": 5.542172846180194e-08,
18592
+ "loss": 82.2962,
18593
+ "step": 2624
18594
+ },
18595
+ {
18596
+ "epoch": 1.799982860570743,
18597
+ "grad_norm": 0.8793612718582153,
18598
+ "learning_rate": 5.1534058528612817e-08,
18599
+ "loss": 82.1784,
18600
+ "step": 2625
18601
+ },
18602
+ {
18603
+ "epoch": 1.8006684377410234,
18604
+ "grad_norm": 0.6758993864059448,
18605
+ "learning_rate": 4.7787710733859794e-08,
18606
+ "loss": 82.1447,
18607
+ "step": 2626
18608
+ },
18609
+ {
18610
+ "epoch": 1.8013540149113034,
18611
+ "grad_norm": 0.7371706962585449,
18612
+ "learning_rate": 4.418269037468337e-08,
18613
+ "loss": 82.266,
18614
+ "step": 2627
18615
+ },
18616
+ {
18617
+ "epoch": 1.8020395920815837,
18618
+ "grad_norm": 0.7626631259918213,
18619
+ "learning_rate": 4.071900254841721e-08,
18620
+ "loss": 82.238,
18621
+ "step": 2628
18622
+ },
18623
+ {
18624
+ "epoch": 1.8027251692518638,
18625
+ "grad_norm": 0.7885541915893555,
18626
+ "learning_rate": 3.739665215253263e-08,
18627
+ "loss": 82.2318,
18628
+ "step": 2629
18629
+ },
18630
+ {
18631
+ "epoch": 1.803410746422144,
18632
+ "grad_norm": 0.5564078092575073,
18633
+ "learning_rate": 3.4215643884660807e-08,
18634
+ "loss": 82.216,
18635
+ "step": 2630
18636
+ },
18637
+ {
18638
+ "epoch": 1.8040963235924243,
18639
+ "grad_norm": 0.7523298859596252,
18640
+ "learning_rate": 3.1175982242603876e-08,
18641
+ "loss": 82.1603,
18642
+ "step": 2631
18643
+ },
18644
+ {
18645
+ "epoch": 1.8047819007627046,
18646
+ "grad_norm": 0.7048127055168152,
18647
+ "learning_rate": 2.8277671524290506e-08,
18648
+ "loss": 82.223,
18649
+ "step": 2632
18650
+ },
18651
+ {
18652
+ "epoch": 1.805467477932985,
18653
+ "grad_norm": 0.6412562727928162,
18654
+ "learning_rate": 2.552071582777593e-08,
18655
+ "loss": 82.2456,
18656
+ "step": 2633
18657
+ },
18658
+ {
18659
+ "epoch": 1.8061530551032652,
18660
+ "grad_norm": 0.610815703868866,
18661
+ "learning_rate": 2.2905119051264135e-08,
18662
+ "loss": 82.2092,
18663
+ "step": 2634
18664
+ },
18665
+ {
18666
+ "epoch": 1.8068386322735452,
18667
+ "grad_norm": 0.7834846377372742,
18668
+ "learning_rate": 2.0430884893074544e-08,
18669
+ "loss": 82.243,
18670
+ "step": 2635
18671
+ },
18672
+ {
18673
+ "epoch": 1.8075242094438255,
18674
+ "grad_norm": 0.6227961778640747,
18675
+ "learning_rate": 1.809801685165313e-08,
18676
+ "loss": 82.3004,
18677
+ "step": 2636
18678
+ },
18679
+ {
18680
+ "epoch": 1.8082097866141056,
18681
+ "grad_norm": 0.7231442928314209,
18682
+ "learning_rate": 1.590651822556133e-08,
18683
+ "loss": 82.1649,
18684
+ "step": 2637
18685
+ },
18686
+ {
18687
+ "epoch": 1.8088953637843859,
18688
+ "grad_norm": 0.5944806933403015,
18689
+ "learning_rate": 1.38563921134649e-08,
18690
+ "loss": 82.1783,
18691
+ "step": 2638
18692
+ },
18693
+ {
18694
+ "epoch": 1.8095809409546662,
18695
+ "grad_norm": 0.7193268537521362,
18696
+ "learning_rate": 1.1947641414145062e-08,
18697
+ "loss": 82.2384,
18698
+ "step": 2639
18699
+ },
18700
+ {
18701
+ "epoch": 1.8102665181249464,
18702
+ "grad_norm": 0.7523531317710876,
18703
+ "learning_rate": 1.0180268826476268e-08,
18704
+ "loss": 82.2451,
18705
+ "step": 2640
18706
+ },
18707
+ {
18708
+ "epoch": 1.8109520952952267,
18709
+ "grad_norm": 0.6392510533332825,
18710
+ "learning_rate": 8.554276849426224e-09,
18711
+ "loss": 82.2968,
18712
+ "step": 2641
18713
+ },
18714
+ {
18715
+ "epoch": 1.811637672465507,
18716
+ "grad_norm": 0.6115372776985168,
18717
+ "learning_rate": 7.069667782089173e-09,
18718
+ "loss": 82.1473,
18719
+ "step": 2642
18720
+ },
18721
+ {
18722
+ "epoch": 1.8123232496357873,
18723
+ "grad_norm": 0.6424670815467834,
18724
+ "learning_rate": 5.726443723608199e-09,
18725
+ "loss": 82.2287,
18726
+ "step": 2643
18727
+ },
18728
+ {
18729
+ "epoch": 1.8130088268060673,
18730
+ "grad_norm": 0.6922582387924194,
18731
+ "learning_rate": 4.5246065732307276e-09,
18732
+ "loss": 82.1094,
18733
+ "step": 2644
18734
+ },
18735
+ {
18736
+ "epoch": 1.8136944039763476,
18737
+ "grad_norm": 0.7161763906478882,
18738
+ "learning_rate": 3.4641580303196307e-09,
18739
+ "loss": 82.2028,
18740
+ "step": 2645
18741
+ },
18742
+ {
18743
+ "epoch": 1.8143799811466277,
18744
+ "grad_norm": 0.6677362322807312,
18745
+ "learning_rate": 2.545099594275513e-09,
18746
+ "loss": 82.1961,
18747
+ "step": 2646
18748
+ },
18749
+ {
18750
+ "epoch": 1.815065558316908,
18751
+ "grad_norm": 0.6319419145584106,
18752
+ "learning_rate": 1.7674325646144241e-09,
18753
+ "loss": 82.2142,
18754
+ "step": 2647
18755
+ },
18756
+ {
18757
+ "epoch": 1.8157511354871883,
18758
+ "grad_norm": 0.7084919214248657,
18759
+ "learning_rate": 1.1311580409012478e-09,
18760
+ "loss": 82.1892,
18761
+ "step": 2648
18762
+ },
18763
+ {
18764
+ "epoch": 1.8164367126574685,
18765
+ "grad_norm": 0.6398366689682007,
18766
+ "learning_rate": 6.362769228163146e-10,
18767
+ "loss": 82.2728,
18768
+ "step": 2649
18769
+ },
18770
+ {
18771
+ "epoch": 1.8171222898277488,
18772
+ "grad_norm": 0.7294822335243225,
18773
+ "learning_rate": 2.827899100776854e-10,
18774
+ "loss": 82.3094,
18775
+ "step": 2650
18776
+ },
18777
+ {
18778
+ "epoch": 1.817807866998029,
18779
+ "grad_norm": 0.7141733169555664,
18780
+ "learning_rate": 7.069750250776608e-11,
18781
+ "loss": 82.2557,
18782
+ "step": 2651
18783
+ },
18784
+ {
18785
+ "epoch": 1.8184934441683092,
18786
+ "grad_norm": 0.6061791181564331,
18787
+ "learning_rate": 0.0,
18788
+ "loss": 82.2751,
18789
+ "step": 2652
18790
  }
18791
  ],
18792
  "logging_steps": 1,
 
18810
  "should_evaluate": false,
18811
  "should_log": false,
18812
  "should_save": true,
18813
+ "should_training_stop": true
18814
  },
18815
  "attributes": {}
18816
  }
18817
  },
18818
+ "total_flos": 1096200644198400.0,
18819
  "train_batch_size": 4,
18820
  "trial_name": null,
18821
  "trial_params": null