ManyingZ commited on
Commit
5f8ba07
·
verified ·
1 Parent(s): dafd7be

Upload folder using huggingface_hub

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:763a2e6211692a29489c663be78e38405ddeebee7123cf422bef0897660522b5
3
  size 1852600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c004f34c54fd1f3daf8e261d84a7a1757591cd731ccc03e414920ea5c14307b
3
  size 1852600
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f2e89a0b2817e3604c09bf67f0d4eb755faa1736d27ad6237f97d28a1b19ceb5
3
  size 1108346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:851725364a78e2688d08b65ac507b37eb1cd1824bb79778d93a924c368ade572
3
  size 1108346
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80f2397b349c3d7d219331d0f9f17be72806daa1aedfdb0787087c6d5818527b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:471500541aec9e4570286570c42981a081703fc6b0eca9480f56cf2521c795bd
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c26800e6aad79542c71a265990c35470e0f0c258d88e99aef73f16b83c77e16
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69c5572f1342b41c6a930522d275b4fa95be59a8658b646079d52144a96dd33c
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 146.4307504575961,
5
  "eval_steps": 500,
6
- "global_step": 30000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -10507,6 +10507,181 @@
10507
  "learning_rate": 3.934426229508197e-06,
10508
  "loss": 0.1866,
10509
  "step": 30000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10510
  }
10511
  ],
10512
  "logging_steps": 20,
@@ -10526,7 +10701,7 @@
10526
  "attributes": {}
10527
  }
10528
  },
10529
- "total_flos": 3.859765296186163e+17,
10530
  "train_batch_size": 1,
10531
  "trial_name": null,
10532
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 148.87126296522268,
5
  "eval_steps": 500,
6
+ "global_step": 30500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
10507
  "learning_rate": 3.934426229508197e-06,
10508
  "loss": 0.1866,
10509
  "step": 30000
10510
+ },
10511
+ {
10512
+ "epoch": 146.52837095790116,
10513
+ "grad_norm": 2.8058741092681885,
10514
+ "learning_rate": 3.8032786885245906e-06,
10515
+ "loss": 0.1751,
10516
+ "step": 30020
10517
+ },
10518
+ {
10519
+ "epoch": 146.62599145820622,
10520
+ "grad_norm": 2.752978563308716,
10521
+ "learning_rate": 3.672131147540984e-06,
10522
+ "loss": 0.1826,
10523
+ "step": 30040
10524
+ },
10525
+ {
10526
+ "epoch": 146.7236119585113,
10527
+ "grad_norm": 3.0315961837768555,
10528
+ "learning_rate": 3.540983606557377e-06,
10529
+ "loss": 0.1623,
10530
+ "step": 30060
10531
+ },
10532
+ {
10533
+ "epoch": 146.82123245881635,
10534
+ "grad_norm": 3.3782765865325928,
10535
+ "learning_rate": 3.409836065573771e-06,
10536
+ "loss": 0.1784,
10537
+ "step": 30080
10538
+ },
10539
+ {
10540
+ "epoch": 146.9188529591214,
10541
+ "grad_norm": 2.961002826690674,
10542
+ "learning_rate": 3.278688524590164e-06,
10543
+ "loss": 0.1764,
10544
+ "step": 30100
10545
+ },
10546
+ {
10547
+ "epoch": 147.0164734594265,
10548
+ "grad_norm": 3.775484561920166,
10549
+ "learning_rate": 3.1475409836065574e-06,
10550
+ "loss": 0.1595,
10551
+ "step": 30120
10552
+ },
10553
+ {
10554
+ "epoch": 147.11409395973155,
10555
+ "grad_norm": 4.18531608581543,
10556
+ "learning_rate": 3.016393442622951e-06,
10557
+ "loss": 0.1599,
10558
+ "step": 30140
10559
+ },
10560
+ {
10561
+ "epoch": 147.2117144600366,
10562
+ "grad_norm": 2.9723432064056396,
10563
+ "learning_rate": 2.8852459016393446e-06,
10564
+ "loss": 0.159,
10565
+ "step": 30160
10566
+ },
10567
+ {
10568
+ "epoch": 147.30933496034166,
10569
+ "grad_norm": 3.2833070755004883,
10570
+ "learning_rate": 2.754098360655738e-06,
10571
+ "loss": 0.1657,
10572
+ "step": 30180
10573
+ },
10574
+ {
10575
+ "epoch": 147.40695546064674,
10576
+ "grad_norm": 3.4174959659576416,
10577
+ "learning_rate": 2.6229508196721314e-06,
10578
+ "loss": 0.175,
10579
+ "step": 30200
10580
+ },
10581
+ {
10582
+ "epoch": 147.5045759609518,
10583
+ "grad_norm": 3.3127195835113525,
10584
+ "learning_rate": 2.491803278688525e-06,
10585
+ "loss": 0.1815,
10586
+ "step": 30220
10587
+ },
10588
+ {
10589
+ "epoch": 147.60219646125685,
10590
+ "grad_norm": 3.7137949466705322,
10591
+ "learning_rate": 2.360655737704918e-06,
10592
+ "loss": 0.198,
10593
+ "step": 30240
10594
+ },
10595
+ {
10596
+ "epoch": 147.69981696156194,
10597
+ "grad_norm": 2.630924701690674,
10598
+ "learning_rate": 2.2295081967213117e-06,
10599
+ "loss": 0.1687,
10600
+ "step": 30260
10601
+ },
10602
+ {
10603
+ "epoch": 147.797437461867,
10604
+ "grad_norm": 3.330245018005371,
10605
+ "learning_rate": 2.098360655737705e-06,
10606
+ "loss": 0.1581,
10607
+ "step": 30280
10608
+ },
10609
+ {
10610
+ "epoch": 147.89505796217205,
10611
+ "grad_norm": 3.237410068511963,
10612
+ "learning_rate": 1.9672131147540985e-06,
10613
+ "loss": 0.2022,
10614
+ "step": 30300
10615
+ },
10616
+ {
10617
+ "epoch": 147.99267846247713,
10618
+ "grad_norm": 2.633331537246704,
10619
+ "learning_rate": 1.836065573770492e-06,
10620
+ "loss": 0.1613,
10621
+ "step": 30320
10622
+ },
10623
+ {
10624
+ "epoch": 148.09029896278219,
10625
+ "grad_norm": 2.527902603149414,
10626
+ "learning_rate": 1.7049180327868855e-06,
10627
+ "loss": 0.1749,
10628
+ "step": 30340
10629
+ },
10630
+ {
10631
+ "epoch": 148.18791946308724,
10632
+ "grad_norm": 2.9230234622955322,
10633
+ "learning_rate": 1.5737704918032787e-06,
10634
+ "loss": 0.1464,
10635
+ "step": 30360
10636
+ },
10637
+ {
10638
+ "epoch": 148.28553996339232,
10639
+ "grad_norm": 2.591038703918457,
10640
+ "learning_rate": 1.4426229508196723e-06,
10641
+ "loss": 0.1819,
10642
+ "step": 30380
10643
+ },
10644
+ {
10645
+ "epoch": 148.38316046369738,
10646
+ "grad_norm": 3.6826913356781006,
10647
+ "learning_rate": 1.3114754098360657e-06,
10648
+ "loss": 0.1909,
10649
+ "step": 30400
10650
+ },
10651
+ {
10652
+ "epoch": 148.48078096400243,
10653
+ "grad_norm": 3.1828205585479736,
10654
+ "learning_rate": 1.180327868852459e-06,
10655
+ "loss": 0.1727,
10656
+ "step": 30420
10657
+ },
10658
+ {
10659
+ "epoch": 148.57840146430752,
10660
+ "grad_norm": 3.3356974124908447,
10661
+ "learning_rate": 1.0491803278688525e-06,
10662
+ "loss": 0.1624,
10663
+ "step": 30440
10664
+ },
10665
+ {
10666
+ "epoch": 148.67602196461257,
10667
+ "grad_norm": 3.1692721843719482,
10668
+ "learning_rate": 9.18032786885246e-07,
10669
+ "loss": 0.1769,
10670
+ "step": 30460
10671
+ },
10672
+ {
10673
+ "epoch": 148.77364246491763,
10674
+ "grad_norm": 2.968018054962158,
10675
+ "learning_rate": 7.868852459016393e-07,
10676
+ "loss": 0.1594,
10677
+ "step": 30480
10678
+ },
10679
+ {
10680
+ "epoch": 148.87126296522268,
10681
+ "grad_norm": 3.693136692047119,
10682
+ "learning_rate": 6.557377049180328e-07,
10683
+ "loss": 0.1927,
10684
+ "step": 30500
10685
  }
10686
  ],
10687
  "logging_steps": 20,
 
10701
  "attributes": {}
10702
  }
10703
  },
10704
+ "total_flos": 3.924112697660375e+17,
10705
  "train_batch_size": 1,
10706
  "trial_name": null,
10707
  "trial_params": null