alchemist69 commited on
Commit
eb70221
·
verified ·
1 Parent(s): 0b5d243

Training in progress, step 250, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acd384a6953547266a4ca4b35d804ddbf7f833d72e55593b64b184c17ca199fa
3
  size 138995824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d0132c9a94bb5a029d8962187619a8a9e1665a1d062842b7792739241a3d5e4
3
  size 138995824
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e18afc83d99fdcafc99120ef8f3e72599da65f004cf66bb5517df7e0c98c73f7
3
  size 71077780
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98185beb6d8450af78168c1d1f5f5aa684a9501073d9756c0739b2cd8ec126a4
3
  size 71077780
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4fac5356df4813573b45df8124d62d84e25159dcd3dd27b9cdf540d0d792b57
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:975dafaada20f650872c80fa03024307b0bb45b77cfb8bb35988f52b21675d58
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ac116b8169c53ab649a7f15f2f32735f2c71ec2f803f70de8c655a513ee9cfc
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d5e1448282b30a66cf7be83aef18a251fdb6205c0184b42e99ae724602144bf
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.3097492456436157,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-200",
4
- "epoch": 0.011229803899549404,
5
  "eval_steps": 50,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1447,6 +1447,364 @@
1447
  "eval_samples_per_second": 60.153,
1448
  "eval_steps_per_second": 15.039,
1449
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1450
  }
1451
  ],
1452
  "logging_steps": 1,
@@ -1475,7 +1833,7 @@
1475
  "attributes": {}
1476
  }
1477
  },
1478
- "total_flos": 1.3737759080448e+16,
1479
  "train_batch_size": 8,
1480
  "trial_name": null,
1481
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.296608567237854,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-250",
4
+ "epoch": 0.014037254874436755,
5
  "eval_steps": 50,
6
+ "global_step": 250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1447
  "eval_samples_per_second": 60.153,
1448
  "eval_steps_per_second": 15.039,
1449
  "step": 200
1450
+ },
1451
+ {
1452
+ "epoch": 0.011285952919047151,
1453
+ "grad_norm": 0.46652019023895264,
1454
+ "learning_rate": 5.161079439470866e-05,
1455
+ "loss": 1.4637,
1456
+ "step": 201
1457
+ },
1458
+ {
1459
+ "epoch": 0.011342101938544898,
1460
+ "grad_norm": 0.4668552279472351,
1461
+ "learning_rate": 5.1208187261806615e-05,
1462
+ "loss": 1.387,
1463
+ "step": 202
1464
+ },
1465
+ {
1466
+ "epoch": 0.011398250958042646,
1467
+ "grad_norm": 0.41647565364837646,
1468
+ "learning_rate": 5.080550173136457e-05,
1469
+ "loss": 1.3549,
1470
+ "step": 203
1471
+ },
1472
+ {
1473
+ "epoch": 0.011454399977540393,
1474
+ "grad_norm": 0.42582443356513977,
1475
+ "learning_rate": 5.0402763933069496e-05,
1476
+ "loss": 1.4554,
1477
+ "step": 204
1478
+ },
1479
+ {
1480
+ "epoch": 0.011510548997038138,
1481
+ "grad_norm": 0.4078443944454193,
1482
+ "learning_rate": 5e-05,
1483
+ "loss": 1.3543,
1484
+ "step": 205
1485
+ },
1486
+ {
1487
+ "epoch": 0.011566698016535886,
1488
+ "grad_norm": 0.40546315908432007,
1489
+ "learning_rate": 4.9597236066930516e-05,
1490
+ "loss": 1.3727,
1491
+ "step": 206
1492
+ },
1493
+ {
1494
+ "epoch": 0.011622847036033633,
1495
+ "grad_norm": 0.3524981141090393,
1496
+ "learning_rate": 4.919449826863544e-05,
1497
+ "loss": 1.344,
1498
+ "step": 207
1499
+ },
1500
+ {
1501
+ "epoch": 0.01167899605553138,
1502
+ "grad_norm": 0.35291409492492676,
1503
+ "learning_rate": 4.87918127381934e-05,
1504
+ "loss": 1.4609,
1505
+ "step": 208
1506
+ },
1507
+ {
1508
+ "epoch": 0.011735145075029128,
1509
+ "grad_norm": 0.3335376977920532,
1510
+ "learning_rate": 4.8389205605291365e-05,
1511
+ "loss": 1.2127,
1512
+ "step": 209
1513
+ },
1514
+ {
1515
+ "epoch": 0.011791294094526875,
1516
+ "grad_norm": 0.33301424980163574,
1517
+ "learning_rate": 4.798670299452926e-05,
1518
+ "loss": 1.2796,
1519
+ "step": 210
1520
+ },
1521
+ {
1522
+ "epoch": 0.01184744311402462,
1523
+ "grad_norm": 0.3301815390586853,
1524
+ "learning_rate": 4.758433102372466e-05,
1525
+ "loss": 1.2696,
1526
+ "step": 211
1527
+ },
1528
+ {
1529
+ "epoch": 0.011903592133522368,
1530
+ "grad_norm": 0.3339588940143585,
1531
+ "learning_rate": 4.7182115802218126e-05,
1532
+ "loss": 1.2953,
1533
+ "step": 212
1534
+ },
1535
+ {
1536
+ "epoch": 0.011959741153020115,
1537
+ "grad_norm": 0.3536163568496704,
1538
+ "learning_rate": 4.678008342917903e-05,
1539
+ "loss": 1.3142,
1540
+ "step": 213
1541
+ },
1542
+ {
1543
+ "epoch": 0.012015890172517862,
1544
+ "grad_norm": 0.35881930589675903,
1545
+ "learning_rate": 4.6378259991911886e-05,
1546
+ "loss": 1.2631,
1547
+ "step": 214
1548
+ },
1549
+ {
1550
+ "epoch": 0.01207203919201561,
1551
+ "grad_norm": 0.34832143783569336,
1552
+ "learning_rate": 4.597667156416371e-05,
1553
+ "loss": 1.3148,
1554
+ "step": 215
1555
+ },
1556
+ {
1557
+ "epoch": 0.012128188211513357,
1558
+ "grad_norm": 0.3691975772380829,
1559
+ "learning_rate": 4.5575344204432084e-05,
1560
+ "loss": 1.3122,
1561
+ "step": 216
1562
+ },
1563
+ {
1564
+ "epoch": 0.012184337231011104,
1565
+ "grad_norm": 0.3549201488494873,
1566
+ "learning_rate": 4.5174303954274244e-05,
1567
+ "loss": 1.3102,
1568
+ "step": 217
1569
+ },
1570
+ {
1571
+ "epoch": 0.01224048625050885,
1572
+ "grad_norm": 0.36213812232017517,
1573
+ "learning_rate": 4.477357683661734e-05,
1574
+ "loss": 1.3093,
1575
+ "step": 218
1576
+ },
1577
+ {
1578
+ "epoch": 0.012296635270006597,
1579
+ "grad_norm": 0.3908132016658783,
1580
+ "learning_rate": 4.437318885406973e-05,
1581
+ "loss": 1.3132,
1582
+ "step": 219
1583
+ },
1584
+ {
1585
+ "epoch": 0.012352784289504344,
1586
+ "grad_norm": 0.3759017586708069,
1587
+ "learning_rate": 4.397316598723385e-05,
1588
+ "loss": 1.2697,
1589
+ "step": 220
1590
+ },
1591
+ {
1592
+ "epoch": 0.012408933309002092,
1593
+ "grad_norm": 0.35085058212280273,
1594
+ "learning_rate": 4.3573534193020274e-05,
1595
+ "loss": 1.3245,
1596
+ "step": 221
1597
+ },
1598
+ {
1599
+ "epoch": 0.012465082328499839,
1600
+ "grad_norm": 0.3695438802242279,
1601
+ "learning_rate": 4.317431940296343e-05,
1602
+ "loss": 1.2508,
1603
+ "step": 222
1604
+ },
1605
+ {
1606
+ "epoch": 0.012521231347997586,
1607
+ "grad_norm": 0.34485650062561035,
1608
+ "learning_rate": 4.277554752153895e-05,
1609
+ "loss": 1.2654,
1610
+ "step": 223
1611
+ },
1612
+ {
1613
+ "epoch": 0.012577380367495332,
1614
+ "grad_norm": 0.3876595199108124,
1615
+ "learning_rate": 4.237724442448273e-05,
1616
+ "loss": 1.2942,
1617
+ "step": 224
1618
+ },
1619
+ {
1620
+ "epoch": 0.01263352938699308,
1621
+ "grad_norm": 0.355491042137146,
1622
+ "learning_rate": 4.197943595711198e-05,
1623
+ "loss": 1.2436,
1624
+ "step": 225
1625
+ },
1626
+ {
1627
+ "epoch": 0.012689678406490826,
1628
+ "grad_norm": 0.3636676073074341,
1629
+ "learning_rate": 4.1582147932648074e-05,
1630
+ "loss": 1.2598,
1631
+ "step": 226
1632
+ },
1633
+ {
1634
+ "epoch": 0.012745827425988574,
1635
+ "grad_norm": 0.36330193281173706,
1636
+ "learning_rate": 4.118540613054156e-05,
1637
+ "loss": 1.3101,
1638
+ "step": 227
1639
+ },
1640
+ {
1641
+ "epoch": 0.012801976445486321,
1642
+ "grad_norm": 0.3567420542240143,
1643
+ "learning_rate": 4.078923629479943e-05,
1644
+ "loss": 1.2313,
1645
+ "step": 228
1646
+ },
1647
+ {
1648
+ "epoch": 0.012858125464984068,
1649
+ "grad_norm": 0.3769914209842682,
1650
+ "learning_rate": 4.039366413231458e-05,
1651
+ "loss": 1.3703,
1652
+ "step": 229
1653
+ },
1654
+ {
1655
+ "epoch": 0.012914274484481814,
1656
+ "grad_norm": 0.36686626076698303,
1657
+ "learning_rate": 3.9998715311197785e-05,
1658
+ "loss": 1.2676,
1659
+ "step": 230
1660
+ },
1661
+ {
1662
+ "epoch": 0.012970423503979561,
1663
+ "grad_norm": 0.36683401465415955,
1664
+ "learning_rate": 3.960441545911204e-05,
1665
+ "loss": 1.3576,
1666
+ "step": 231
1667
+ },
1668
+ {
1669
+ "epoch": 0.013026572523477309,
1670
+ "grad_norm": 0.38010573387145996,
1671
+ "learning_rate": 3.92107901616097e-05,
1672
+ "loss": 1.318,
1673
+ "step": 232
1674
+ },
1675
+ {
1676
+ "epoch": 0.013082721542975056,
1677
+ "grad_norm": 0.387713760137558,
1678
+ "learning_rate": 3.8817864960472236e-05,
1679
+ "loss": 1.2782,
1680
+ "step": 233
1681
+ },
1682
+ {
1683
+ "epoch": 0.013138870562472803,
1684
+ "grad_norm": 0.38116419315338135,
1685
+ "learning_rate": 3.842566535205286e-05,
1686
+ "loss": 1.2469,
1687
+ "step": 234
1688
+ },
1689
+ {
1690
+ "epoch": 0.01319501958197055,
1691
+ "grad_norm": 0.4058433175086975,
1692
+ "learning_rate": 3.803421678562213e-05,
1693
+ "loss": 1.1416,
1694
+ "step": 235
1695
+ },
1696
+ {
1697
+ "epoch": 0.013251168601468298,
1698
+ "grad_norm": 0.3921027183532715,
1699
+ "learning_rate": 3.764354466171652e-05,
1700
+ "loss": 1.2828,
1701
+ "step": 236
1702
+ },
1703
+ {
1704
+ "epoch": 0.013307317620966043,
1705
+ "grad_norm": 0.40361857414245605,
1706
+ "learning_rate": 3.725367433049033e-05,
1707
+ "loss": 1.3705,
1708
+ "step": 237
1709
+ },
1710
+ {
1711
+ "epoch": 0.01336346664046379,
1712
+ "grad_norm": 0.41897809505462646,
1713
+ "learning_rate": 3.6864631090070655e-05,
1714
+ "loss": 1.3478,
1715
+ "step": 238
1716
+ },
1717
+ {
1718
+ "epoch": 0.013419615659961538,
1719
+ "grad_norm": 0.3961997628211975,
1720
+ "learning_rate": 3.6476440184915815e-05,
1721
+ "loss": 1.1923,
1722
+ "step": 239
1723
+ },
1724
+ {
1725
+ "epoch": 0.013475764679459285,
1726
+ "grad_norm": 0.3889695107936859,
1727
+ "learning_rate": 3.608912680417737e-05,
1728
+ "loss": 1.2304,
1729
+ "step": 240
1730
+ },
1731
+ {
1732
+ "epoch": 0.013531913698957032,
1733
+ "grad_norm": 0.42765000462532043,
1734
+ "learning_rate": 3.570271608006555e-05,
1735
+ "loss": 1.3064,
1736
+ "step": 241
1737
+ },
1738
+ {
1739
+ "epoch": 0.01358806271845478,
1740
+ "grad_norm": 0.4513327479362488,
1741
+ "learning_rate": 3.531723308621847e-05,
1742
+ "loss": 1.3254,
1743
+ "step": 242
1744
+ },
1745
+ {
1746
+ "epoch": 0.013644211737952525,
1747
+ "grad_norm": 0.43143191933631897,
1748
+ "learning_rate": 3.493270283607522e-05,
1749
+ "loss": 1.2714,
1750
+ "step": 243
1751
+ },
1752
+ {
1753
+ "epoch": 0.013700360757450273,
1754
+ "grad_norm": 0.4802146255970001,
1755
+ "learning_rate": 3.4549150281252636e-05,
1756
+ "loss": 1.2424,
1757
+ "step": 244
1758
+ },
1759
+ {
1760
+ "epoch": 0.01375650977694802,
1761
+ "grad_norm": 0.4541313052177429,
1762
+ "learning_rate": 3.4166600309926387e-05,
1763
+ "loss": 1.1924,
1764
+ "step": 245
1765
+ },
1766
+ {
1767
+ "epoch": 0.013812658796445767,
1768
+ "grad_norm": 0.44022858142852783,
1769
+ "learning_rate": 3.3785077745215873e-05,
1770
+ "loss": 1.1144,
1771
+ "step": 246
1772
+ },
1773
+ {
1774
+ "epoch": 0.013868807815943514,
1775
+ "grad_norm": 0.46132948994636536,
1776
+ "learning_rate": 3.340460734357359e-05,
1777
+ "loss": 1.0814,
1778
+ "step": 247
1779
+ },
1780
+ {
1781
+ "epoch": 0.013924956835441262,
1782
+ "grad_norm": 0.5146449208259583,
1783
+ "learning_rate": 3.3025213793178646e-05,
1784
+ "loss": 1.2631,
1785
+ "step": 248
1786
+ },
1787
+ {
1788
+ "epoch": 0.013981105854939007,
1789
+ "grad_norm": 0.5137025117874146,
1790
+ "learning_rate": 3.264692171233485e-05,
1791
+ "loss": 1.1387,
1792
+ "step": 249
1793
+ },
1794
+ {
1795
+ "epoch": 0.014037254874436755,
1796
+ "grad_norm": 0.6411381959915161,
1797
+ "learning_rate": 3.226975564787322e-05,
1798
+ "loss": 1.0166,
1799
+ "step": 250
1800
+ },
1801
+ {
1802
+ "epoch": 0.014037254874436755,
1803
+ "eval_loss": 1.296608567237854,
1804
+ "eval_runtime": 499.0876,
1805
+ "eval_samples_per_second": 60.1,
1806
+ "eval_steps_per_second": 15.025,
1807
+ "step": 250
1808
  }
1809
  ],
1810
  "logging_steps": 1,
 
1833
  "attributes": {}
1834
  }
1835
  },
1836
+ "total_flos": 1.717219885056e+16,
1837
  "train_batch_size": 8,
1838
  "trial_name": null,
1839
  "trial_params": null