dq158 commited on
Commit
96eeebb
·
1 Parent(s): d92ea5c

Training in progress, epoch 4, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c41d9243f7df55b50aa4775a15858d272a4ec8e8c563c0cd8ec6b9d0b3da9f8a
3
  size 37789864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16aab112374e0637635192e631493e5cc9fe41a7e4e6e216c0bb99e95ae685a5
3
  size 37789864
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29c84bda34e9c4f410c99669a1f937ad7a00bee4dfd64bcf19e7795e6886813d
3
  size 2622266
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddbb2f973e41d59a5b9f8697998020f6be69cf0c9c3ecccf1446dc42581a6fef
3
  size 2622266
last-checkpoint/rng_state.pth CHANGED
Binary files a/last-checkpoint/rng_state.pth and b/last-checkpoint/rng_state.pth differ
 
last-checkpoint/scheduler.pt CHANGED
Binary files a/last-checkpoint/scheduler.pt and b/last-checkpoint/scheduler.pt differ
 
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 3.0360162258148193,
3
- "best_model_checkpoint": "dq158/coqui/checkpoint-121044",
4
- "epoch": 3.0,
5
  "eval_steps": 500,
6
- "global_step": 121044,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1516,13 +1516,512 @@
1516
  "eval_steps_per_second": 0.552,
1517
  "eval_translation_length": 4591104,
1518
  "step": 121044
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1519
  }
1520
  ],
1521
  "logging_steps": 500,
1522
  "max_steps": 1210440,
1523
  "num_train_epochs": 30,
1524
  "save_steps": 1000,
1525
- "total_flos": 2.077405682985861e+18,
1526
  "trial_name": null,
1527
  "trial_params": null
1528
  }
 
1
  {
2
+ "best_metric": 3.0066018104553223,
3
+ "best_model_checkpoint": "dq158/coqui/checkpoint-161392",
4
+ "epoch": 4.0,
5
  "eval_steps": 500,
6
+ "global_step": 161392,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1516
  "eval_steps_per_second": 0.552,
1517
  "eval_translation_length": 4591104,
1518
  "step": 121044
1519
+ },
1520
+ {
1521
+ "epoch": 3.01,
1522
+ "learning_rate": 7.80420680811743e-05,
1523
+ "loss": 3.0822,
1524
+ "step": 121500
1525
+ },
1526
+ {
1527
+ "epoch": 3.02,
1528
+ "learning_rate": 7.80259880896051e-05,
1529
+ "loss": 3.0585,
1530
+ "step": 122000
1531
+ },
1532
+ {
1533
+ "epoch": 3.04,
1534
+ "learning_rate": 7.800984400770236e-05,
1535
+ "loss": 3.1003,
1536
+ "step": 122500
1537
+ },
1538
+ {
1539
+ "epoch": 3.05,
1540
+ "learning_rate": 7.799363586267584e-05,
1541
+ "loss": 2.9734,
1542
+ "step": 123000
1543
+ },
1544
+ {
1545
+ "epoch": 3.06,
1546
+ "learning_rate": 7.797736368184335e-05,
1547
+ "loss": 3.1722,
1548
+ "step": 123500
1549
+ },
1550
+ {
1551
+ "epoch": 3.07,
1552
+ "learning_rate": 7.79610274926306e-05,
1553
+ "loss": 3.1007,
1554
+ "step": 124000
1555
+ },
1556
+ {
1557
+ "epoch": 3.09,
1558
+ "learning_rate": 7.794462732257115e-05,
1559
+ "loss": 3.0796,
1560
+ "step": 124500
1561
+ },
1562
+ {
1563
+ "epoch": 3.1,
1564
+ "learning_rate": 7.792816319930645e-05,
1565
+ "loss": 3.0691,
1566
+ "step": 125000
1567
+ },
1568
+ {
1569
+ "epoch": 3.11,
1570
+ "learning_rate": 7.791163515058568e-05,
1571
+ "loss": 3.1111,
1572
+ "step": 125500
1573
+ },
1574
+ {
1575
+ "epoch": 3.12,
1576
+ "learning_rate": 7.78950432042658e-05,
1577
+ "loss": 3.0355,
1578
+ "step": 126000
1579
+ },
1580
+ {
1581
+ "epoch": 3.14,
1582
+ "learning_rate": 7.787838738831148e-05,
1583
+ "loss": 3.0751,
1584
+ "step": 126500
1585
+ },
1586
+ {
1587
+ "epoch": 3.15,
1588
+ "learning_rate": 7.786166773079499e-05,
1589
+ "loss": 3.1197,
1590
+ "step": 127000
1591
+ },
1592
+ {
1593
+ "epoch": 3.16,
1594
+ "learning_rate": 7.784488425989624e-05,
1595
+ "loss": 3.0997,
1596
+ "step": 127500
1597
+ },
1598
+ {
1599
+ "epoch": 3.17,
1600
+ "learning_rate": 7.782803700390268e-05,
1601
+ "loss": 3.2085,
1602
+ "step": 128000
1603
+ },
1604
+ {
1605
+ "epoch": 3.18,
1606
+ "learning_rate": 7.781112599120928e-05,
1607
+ "loss": 3.1391,
1608
+ "step": 128500
1609
+ },
1610
+ {
1611
+ "epoch": 3.2,
1612
+ "learning_rate": 7.779415125031842e-05,
1613
+ "loss": 3.1137,
1614
+ "step": 129000
1615
+ },
1616
+ {
1617
+ "epoch": 3.21,
1618
+ "learning_rate": 7.777711280983994e-05,
1619
+ "loss": 3.1373,
1620
+ "step": 129500
1621
+ },
1622
+ {
1623
+ "epoch": 3.22,
1624
+ "learning_rate": 7.776001069849104e-05,
1625
+ "loss": 3.1228,
1626
+ "step": 130000
1627
+ },
1628
+ {
1629
+ "epoch": 3.23,
1630
+ "learning_rate": 7.774284494509619e-05,
1631
+ "loss": 3.0356,
1632
+ "step": 130500
1633
+ },
1634
+ {
1635
+ "epoch": 3.25,
1636
+ "learning_rate": 7.772561557858717e-05,
1637
+ "loss": 3.074,
1638
+ "step": 131000
1639
+ },
1640
+ {
1641
+ "epoch": 3.26,
1642
+ "learning_rate": 7.770832262800298e-05,
1643
+ "loss": 3.101,
1644
+ "step": 131500
1645
+ },
1646
+ {
1647
+ "epoch": 3.27,
1648
+ "learning_rate": 7.769096612248972e-05,
1649
+ "loss": 3.0026,
1650
+ "step": 132000
1651
+ },
1652
+ {
1653
+ "epoch": 3.28,
1654
+ "learning_rate": 7.767354609130067e-05,
1655
+ "loss": 2.9642,
1656
+ "step": 132500
1657
+ },
1658
+ {
1659
+ "epoch": 3.3,
1660
+ "learning_rate": 7.765606256379617e-05,
1661
+ "loss": 3.1591,
1662
+ "step": 133000
1663
+ },
1664
+ {
1665
+ "epoch": 3.31,
1666
+ "learning_rate": 7.763851556944357e-05,
1667
+ "loss": 3.1563,
1668
+ "step": 133500
1669
+ },
1670
+ {
1671
+ "epoch": 3.32,
1672
+ "learning_rate": 7.762090513781717e-05,
1673
+ "loss": 3.0032,
1674
+ "step": 134000
1675
+ },
1676
+ {
1677
+ "epoch": 3.33,
1678
+ "learning_rate": 7.760323129859824e-05,
1679
+ "loss": 3.0752,
1680
+ "step": 134500
1681
+ },
1682
+ {
1683
+ "epoch": 3.35,
1684
+ "learning_rate": 7.758549408157487e-05,
1685
+ "loss": 3.1228,
1686
+ "step": 135000
1687
+ },
1688
+ {
1689
+ "epoch": 3.36,
1690
+ "learning_rate": 7.7567693516642e-05,
1691
+ "loss": 3.1929,
1692
+ "step": 135500
1693
+ },
1694
+ {
1695
+ "epoch": 3.37,
1696
+ "learning_rate": 7.754982963380129e-05,
1697
+ "loss": 3.0963,
1698
+ "step": 136000
1699
+ },
1700
+ {
1701
+ "epoch": 3.38,
1702
+ "learning_rate": 7.75319024631612e-05,
1703
+ "loss": 3.1731,
1704
+ "step": 136500
1705
+ },
1706
+ {
1707
+ "epoch": 3.4,
1708
+ "learning_rate": 7.751391203493678e-05,
1709
+ "loss": 3.1822,
1710
+ "step": 137000
1711
+ },
1712
+ {
1713
+ "epoch": 3.41,
1714
+ "learning_rate": 7.749585837944974e-05,
1715
+ "loss": 3.0849,
1716
+ "step": 137500
1717
+ },
1718
+ {
1719
+ "epoch": 3.42,
1720
+ "learning_rate": 7.747774152712836e-05,
1721
+ "loss": 3.1609,
1722
+ "step": 138000
1723
+ },
1724
+ {
1725
+ "epoch": 3.43,
1726
+ "learning_rate": 7.745956150850738e-05,
1727
+ "loss": 3.0784,
1728
+ "step": 138500
1729
+ },
1730
+ {
1731
+ "epoch": 3.45,
1732
+ "learning_rate": 7.744131835422808e-05,
1733
+ "loss": 3.1391,
1734
+ "step": 139000
1735
+ },
1736
+ {
1737
+ "epoch": 3.46,
1738
+ "learning_rate": 7.74230120950381e-05,
1739
+ "loss": 3.1141,
1740
+ "step": 139500
1741
+ },
1742
+ {
1743
+ "epoch": 3.47,
1744
+ "learning_rate": 7.740464276179143e-05,
1745
+ "loss": 3.092,
1746
+ "step": 140000
1747
+ },
1748
+ {
1749
+ "epoch": 3.48,
1750
+ "learning_rate": 7.738621038544842e-05,
1751
+ "loss": 3.0634,
1752
+ "step": 140500
1753
+ },
1754
+ {
1755
+ "epoch": 3.49,
1756
+ "learning_rate": 7.736771499707562e-05,
1757
+ "loss": 3.1131,
1758
+ "step": 141000
1759
+ },
1760
+ {
1761
+ "epoch": 3.51,
1762
+ "learning_rate": 7.734915662784582e-05,
1763
+ "loss": 3.1014,
1764
+ "step": 141500
1765
+ },
1766
+ {
1767
+ "epoch": 3.52,
1768
+ "learning_rate": 7.733053530903793e-05,
1769
+ "loss": 3.1118,
1770
+ "step": 142000
1771
+ },
1772
+ {
1773
+ "epoch": 3.53,
1774
+ "learning_rate": 7.7311851072037e-05,
1775
+ "loss": 3.0762,
1776
+ "step": 142500
1777
+ },
1778
+ {
1779
+ "epoch": 3.54,
1780
+ "learning_rate": 7.729310394833408e-05,
1781
+ "loss": 3.0613,
1782
+ "step": 143000
1783
+ },
1784
+ {
1785
+ "epoch": 3.56,
1786
+ "learning_rate": 7.727429396952622e-05,
1787
+ "loss": 3.1007,
1788
+ "step": 143500
1789
+ },
1790
+ {
1791
+ "epoch": 3.57,
1792
+ "learning_rate": 7.725542116731643e-05,
1793
+ "loss": 3.0766,
1794
+ "step": 144000
1795
+ },
1796
+ {
1797
+ "epoch": 3.58,
1798
+ "learning_rate": 7.72364855735136e-05,
1799
+ "loss": 3.0842,
1800
+ "step": 144500
1801
+ },
1802
+ {
1803
+ "epoch": 3.59,
1804
+ "learning_rate": 7.721748722003242e-05,
1805
+ "loss": 3.1643,
1806
+ "step": 145000
1807
+ },
1808
+ {
1809
+ "epoch": 3.61,
1810
+ "learning_rate": 7.719842613889342e-05,
1811
+ "loss": 3.0702,
1812
+ "step": 145500
1813
+ },
1814
+ {
1815
+ "epoch": 3.62,
1816
+ "learning_rate": 7.717930236222277e-05,
1817
+ "loss": 3.2394,
1818
+ "step": 146000
1819
+ },
1820
+ {
1821
+ "epoch": 3.63,
1822
+ "learning_rate": 7.716011592225239e-05,
1823
+ "loss": 3.0847,
1824
+ "step": 146500
1825
+ },
1826
+ {
1827
+ "epoch": 3.64,
1828
+ "learning_rate": 7.714086685131975e-05,
1829
+ "loss": 3.1536,
1830
+ "step": 147000
1831
+ },
1832
+ {
1833
+ "epoch": 3.66,
1834
+ "learning_rate": 7.712155518186792e-05,
1835
+ "loss": 3.0908,
1836
+ "step": 147500
1837
+ },
1838
+ {
1839
+ "epoch": 3.67,
1840
+ "learning_rate": 7.710218094644548e-05,
1841
+ "loss": 3.0379,
1842
+ "step": 148000
1843
+ },
1844
+ {
1845
+ "epoch": 3.68,
1846
+ "learning_rate": 7.708274417770644e-05,
1847
+ "loss": 3.0513,
1848
+ "step": 148500
1849
+ },
1850
+ {
1851
+ "epoch": 3.69,
1852
+ "learning_rate": 7.706324490841019e-05,
1853
+ "loss": 3.1075,
1854
+ "step": 149000
1855
+ },
1856
+ {
1857
+ "epoch": 3.71,
1858
+ "learning_rate": 7.704368317142151e-05,
1859
+ "loss": 3.1261,
1860
+ "step": 149500
1861
+ },
1862
+ {
1863
+ "epoch": 3.72,
1864
+ "learning_rate": 7.702405899971042e-05,
1865
+ "loss": 3.164,
1866
+ "step": 150000
1867
+ },
1868
+ {
1869
+ "epoch": 3.73,
1870
+ "learning_rate": 7.700437242635218e-05,
1871
+ "loss": 3.0038,
1872
+ "step": 150500
1873
+ },
1874
+ {
1875
+ "epoch": 3.74,
1876
+ "learning_rate": 7.698462348452724e-05,
1877
+ "loss": 3.1165,
1878
+ "step": 151000
1879
+ },
1880
+ {
1881
+ "epoch": 3.75,
1882
+ "learning_rate": 7.696481220752119e-05,
1883
+ "loss": 3.048,
1884
+ "step": 151500
1885
+ },
1886
+ {
1887
+ "epoch": 3.77,
1888
+ "learning_rate": 7.694493862872459e-05,
1889
+ "loss": 3.0922,
1890
+ "step": 152000
1891
+ },
1892
+ {
1893
+ "epoch": 3.78,
1894
+ "learning_rate": 7.69250027816331e-05,
1895
+ "loss": 3.1324,
1896
+ "step": 152500
1897
+ },
1898
+ {
1899
+ "epoch": 3.79,
1900
+ "learning_rate": 7.69050046998473e-05,
1901
+ "loss": 3.1076,
1902
+ "step": 153000
1903
+ },
1904
+ {
1905
+ "epoch": 3.8,
1906
+ "learning_rate": 7.688494441707267e-05,
1907
+ "loss": 3.0944,
1908
+ "step": 153500
1909
+ },
1910
+ {
1911
+ "epoch": 3.82,
1912
+ "learning_rate": 7.686482196711948e-05,
1913
+ "loss": 3.0502,
1914
+ "step": 154000
1915
+ },
1916
+ {
1917
+ "epoch": 3.83,
1918
+ "learning_rate": 7.684463738390284e-05,
1919
+ "loss": 3.0757,
1920
+ "step": 154500
1921
+ },
1922
+ {
1923
+ "epoch": 3.84,
1924
+ "learning_rate": 7.682439070144252e-05,
1925
+ "loss": 3.0951,
1926
+ "step": 155000
1927
+ },
1928
+ {
1929
+ "epoch": 3.85,
1930
+ "learning_rate": 7.680408195386303e-05,
1931
+ "loss": 3.0857,
1932
+ "step": 155500
1933
+ },
1934
+ {
1935
+ "epoch": 3.87,
1936
+ "learning_rate": 7.678371117539342e-05,
1937
+ "loss": 3.0341,
1938
+ "step": 156000
1939
+ },
1940
+ {
1941
+ "epoch": 3.88,
1942
+ "learning_rate": 7.676327840036733e-05,
1943
+ "loss": 3.0872,
1944
+ "step": 156500
1945
+ },
1946
+ {
1947
+ "epoch": 3.89,
1948
+ "learning_rate": 7.674278366322286e-05,
1949
+ "loss": 3.0744,
1950
+ "step": 157000
1951
+ },
1952
+ {
1953
+ "epoch": 3.9,
1954
+ "learning_rate": 7.672222699850256e-05,
1955
+ "loss": 3.0927,
1956
+ "step": 157500
1957
+ },
1958
+ {
1959
+ "epoch": 3.92,
1960
+ "learning_rate": 7.670160844085336e-05,
1961
+ "loss": 3.0717,
1962
+ "step": 158000
1963
+ },
1964
+ {
1965
+ "epoch": 3.93,
1966
+ "learning_rate": 7.668092802502651e-05,
1967
+ "loss": 2.964,
1968
+ "step": 158500
1969
+ },
1970
+ {
1971
+ "epoch": 3.94,
1972
+ "learning_rate": 7.666018578587749e-05,
1973
+ "loss": 2.9958,
1974
+ "step": 159000
1975
+ },
1976
+ {
1977
+ "epoch": 3.95,
1978
+ "learning_rate": 7.663938175836599e-05,
1979
+ "loss": 3.1135,
1980
+ "step": 159500
1981
+ },
1982
+ {
1983
+ "epoch": 3.97,
1984
+ "learning_rate": 7.661851597755588e-05,
1985
+ "loss": 3.0903,
1986
+ "step": 160000
1987
+ },
1988
+ {
1989
+ "epoch": 3.98,
1990
+ "learning_rate": 7.659758847861505e-05,
1991
+ "loss": 3.1399,
1992
+ "step": 160500
1993
+ },
1994
+ {
1995
+ "epoch": 3.99,
1996
+ "learning_rate": 7.657659929681545e-05,
1997
+ "loss": 3.0583,
1998
+ "step": 161000
1999
+ },
2000
+ {
2001
+ "epoch": 4.0,
2002
+ "eval_bleu": 1.0,
2003
+ "eval_brevity_penalty": 1.0,
2004
+ "eval_length_ratio": 1.0,
2005
+ "eval_loss": 3.0066018104553223,
2006
+ "eval_precisions": [
2007
+ 1.0,
2008
+ 1.0,
2009
+ 1.0,
2010
+ 1.0
2011
+ ],
2012
+ "eval_reference_length": 4591104,
2013
+ "eval_runtime": 8243.7716,
2014
+ "eval_samples_per_second": 1.088,
2015
+ "eval_steps_per_second": 0.544,
2016
+ "eval_translation_length": 4591104,
2017
+ "step": 161392
2018
  }
2019
  ],
2020
  "logging_steps": 500,
2021
  "max_steps": 1210440,
2022
  "num_train_epochs": 30,
2023
  "save_steps": 1000,
2024
+ "total_flos": 2.769874243981148e+18,
2025
  "trial_name": null,
2026
  "trial_params": null
2027
  }