Training in progress, step 300, checkpoint

59a5d2b verified 15 days ago

54.4 kB

	{
	"best_metric": 1.2362135648727417,
	"best_model_checkpoint": "miner_id_24/checkpoint-300",
	"epoch": 0.03813912836200389,
	"eval_steps": 100,
	"global_step": 300,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.0001271304278733463,
	"grad_norm": 0.5326213836669922,
	"learning_rate": 2e-05,
	"loss": 1.7856,
	"step": 1
	},
	{
	"epoch": 0.0001271304278733463,
	"eval_loss": 1.7346340417861938,
	"eval_runtime": 1249.3361,
	"eval_samples_per_second": 4.002,
	"eval_steps_per_second": 1.001,
	"step": 1
	},
	{
	"epoch": 0.0002542608557466926,
	"grad_norm": 0.5624520778656006,
	"learning_rate": 4e-05,
	"loss": 1.7626,
	"step": 2
	},
	{
	"epoch": 0.00038139128362003893,
	"grad_norm": 0.5890633463859558,
	"learning_rate": 6e-05,
	"loss": 1.739,
	"step": 3
	},
	{
	"epoch": 0.0005085217114933852,
	"grad_norm": 0.5437400937080383,
	"learning_rate": 8e-05,
	"loss": 1.6671,
	"step": 4
	},
	{
	"epoch": 0.0006356521393667316,
	"grad_norm": 0.6639446020126343,
	"learning_rate": 0.0001,
	"loss": 1.7684,
	"step": 5
	},
	{
	"epoch": 0.0007627825672400779,
	"grad_norm": 0.7031175494194031,
	"learning_rate": 0.00012,
	"loss": 1.7247,
	"step": 6
	},
	{
	"epoch": 0.0008899129951134241,
	"grad_norm": 0.5311002731323242,
	"learning_rate": 0.00014,
	"loss": 1.6195,
	"step": 7
	},
	{
	"epoch": 0.0010170434229867704,
	"grad_norm": 0.25101518630981445,
	"learning_rate": 0.00016,
	"loss": 1.5182,
	"step": 8
	},
	{
	"epoch": 0.0011441738508601168,
	"grad_norm": 0.8389205932617188,
	"learning_rate": 0.00018,
	"loss": 1.6646,
	"step": 9
	},
	{
	"epoch": 0.0012713042787334632,
	"grad_norm": 0.9317983388900757,
	"learning_rate": 0.0002,
	"loss": 1.6395,
	"step": 10
	},
	{
	"epoch": 0.0013984347066068094,
	"grad_norm": 0.48066481947898865,
	"learning_rate": 0.00019999832015210023,
	"loss": 1.5921,
	"step": 11
	},
	{
	"epoch": 0.0015255651344801557,
	"grad_norm": 0.2073744535446167,
	"learning_rate": 0.00019999328066483865,
	"loss": 1.4335,
	"step": 12
	},
	{
	"epoch": 0.0016526955623535021,
	"grad_norm": 0.22661754488945007,
	"learning_rate": 0.0001999848817075267,
	"loss": 1.477,
	"step": 13
	},
	{
	"epoch": 0.0017798259902268483,
	"grad_norm": 0.3149760663509369,
	"learning_rate": 0.00019997312356234386,
	"loss": 1.5713,
	"step": 14
	},
	{
	"epoch": 0.0019069564181001947,
	"grad_norm": 0.31392771005630493,
	"learning_rate": 0.00019995800662432798,
	"loss": 1.5414,
	"step": 15
	},
	{
	"epoch": 0.002034086845973541,
	"grad_norm": 0.2748984396457672,
	"learning_rate": 0.0001999395314013622,
	"loss": 1.5452,
	"step": 16
	},
	{
	"epoch": 0.0021612172738468874,
	"grad_norm": 0.19064395129680634,
	"learning_rate": 0.00019991769851415781,
	"loss": 1.5742,
	"step": 17
	},
	{
	"epoch": 0.0022883477017202336,
	"grad_norm": 0.1578415036201477,
	"learning_rate": 0.00019989250869623343,
	"loss": 1.5214,
	"step": 18
	},
	{
	"epoch": 0.0024154781295935798,
	"grad_norm": 0.20229928195476532,
	"learning_rate": 0.0001998639627938903,
	"loss": 1.3921,
	"step": 19
	},
	{
	"epoch": 0.0025426085574669264,
	"grad_norm": 0.2705669403076172,
	"learning_rate": 0.00019983206176618388,
	"loss": 1.4712,
	"step": 20
	},
	{
	"epoch": 0.0026697389853402725,
	"grad_norm": 0.27215054631233215,
	"learning_rate": 0.00019979680668489165,
	"loss": 1.4969,
	"step": 21
	},
	{
	"epoch": 0.0027968694132136187,
	"grad_norm": 0.20431619882583618,
	"learning_rate": 0.00019975819873447717,
	"loss": 1.431,
	"step": 22
	},
	{
	"epoch": 0.0029239998410869653,
	"grad_norm": 0.14068549871444702,
	"learning_rate": 0.00019971623921205005,
	"loss": 1.4543,
	"step": 23
	},
	{
	"epoch": 0.0030511302689603115,
	"grad_norm": 0.1594925820827484,
	"learning_rate": 0.00019967092952732264,
	"loss": 1.364,
	"step": 24
	},
	{
	"epoch": 0.0031782606968336576,
	"grad_norm": 0.17738410830497742,
	"learning_rate": 0.00019962227120256252,
	"loss": 1.4377,
	"step": 25
	},
	{
	"epoch": 0.0033053911247070042,
	"grad_norm": 0.1752498745918274,
	"learning_rate": 0.00019957026587254134,
	"loss": 1.3827,
	"step": 26
	},
	{
	"epoch": 0.0034325215525803504,
	"grad_norm": 0.18004052340984344,
	"learning_rate": 0.00019951491528448004,
	"loss": 1.3867,
	"step": 27
	},
	{
	"epoch": 0.0035596519804536966,
	"grad_norm": 0.1525774598121643,
	"learning_rate": 0.00019945622129799,
	"loss": 1.4164,
	"step": 28
	},
	{
	"epoch": 0.003686782408327043,
	"grad_norm": 0.1710219830274582,
	"learning_rate": 0.00019939418588501057,
	"loss": 1.4155,
	"step": 29
	},
	{
	"epoch": 0.0038139128362003893,
	"grad_norm": 0.20903100073337555,
	"learning_rate": 0.000199328811129743,
	"loss": 1.5239,
	"step": 30
	},
	{
	"epoch": 0.003941043264073736,
	"grad_norm": 0.18399456143379211,
	"learning_rate": 0.00019926009922858006,
	"loss": 1.3889,
	"step": 31
	},
	{
	"epoch": 0.004068173691947082,
	"grad_norm": 0.13330113887786865,
	"learning_rate": 0.0001991880524900327,
	"loss": 1.3587,
	"step": 32
	},
	{
	"epoch": 0.004195304119820428,
	"grad_norm": 0.13807597756385803,
	"learning_rate": 0.00019911267333465218,
	"loss": 1.4211,
	"step": 33
	},
	{
	"epoch": 0.004322434547693775,
	"grad_norm": 0.1550034135580063,
	"learning_rate": 0.0001990339642949488,
	"loss": 1.4317,
	"step": 34
	},
	{
	"epoch": 0.004449564975567121,
	"grad_norm": 0.1827971190214157,
	"learning_rate": 0.00019895192801530685,
	"loss": 1.4176,
	"step": 35
	},
	{
	"epoch": 0.004576695403440467,
	"grad_norm": 0.16028065979480743,
	"learning_rate": 0.00019886656725189575,
	"loss": 1.4122,
	"step": 36
	},
	{
	"epoch": 0.004703825831313814,
	"grad_norm": 0.14322146773338318,
	"learning_rate": 0.00019877788487257753,
	"loss": 1.423,
	"step": 37
	},
	{
	"epoch": 0.0048309562591871595,
	"grad_norm": 0.1685505211353302,
	"learning_rate": 0.00019868588385681032,
	"loss": 1.3702,
	"step": 38
	},
	{
	"epoch": 0.004958086687060506,
	"grad_norm": 0.1632552444934845,
	"learning_rate": 0.00019859056729554844,
	"loss": 1.3164,
	"step": 39
	},
	{
	"epoch": 0.005085217114933853,
	"grad_norm": 0.17149530351161957,
	"learning_rate": 0.00019849193839113833,
	"loss": 1.2799,
	"step": 40
	},
	{
	"epoch": 0.0052123475428071985,
	"grad_norm": 0.14993086457252502,
	"learning_rate": 0.00019839000045721118,
	"loss": 1.3412,
	"step": 41
	},
	{
	"epoch": 0.005339477970680545,
	"grad_norm": 0.15981823205947876,
	"learning_rate": 0.00019828475691857145,
	"loss": 1.3698,
	"step": 42
	},
	{
	"epoch": 0.005466608398553892,
	"grad_norm": 0.15311439335346222,
	"learning_rate": 0.00019817621131108196,
	"loss": 1.3792,
	"step": 43
	},
	{
	"epoch": 0.005593738826427237,
	"grad_norm": 0.19757720828056335,
	"learning_rate": 0.00019806436728154485,
	"loss": 1.4082,
	"step": 44
	},
	{
	"epoch": 0.005720869254300584,
	"grad_norm": 0.15765132009983063,
	"learning_rate": 0.00019794922858757928,
	"loss": 1.282,
	"step": 45
	},
	{
	"epoch": 0.005847999682173931,
	"grad_norm": 0.15940701961517334,
	"learning_rate": 0.00019783079909749515,
	"loss": 1.4016,
	"step": 46
	},
	{
	"epoch": 0.005975130110047276,
	"grad_norm": 0.1622331291437149,
	"learning_rate": 0.00019770908279016309,
	"loss": 1.3624,
	"step": 47
	},
	{
	"epoch": 0.006102260537920623,
	"grad_norm": 0.14866457879543304,
	"learning_rate": 0.00019758408375488071,
	"loss": 1.2807,
	"step": 48
	},
	{
	"epoch": 0.0062293909657939696,
	"grad_norm": 0.16648173332214355,
	"learning_rate": 0.00019745580619123535,
	"loss": 1.3617,
	"step": 49
	},
	{
	"epoch": 0.006356521393667315,
	"grad_norm": 0.16083629429340363,
	"learning_rate": 0.00019732425440896297,
	"loss": 1.3903,
	"step": 50
	},
	{
	"epoch": 0.006483651821540662,
	"grad_norm": 0.18012581765651703,
	"learning_rate": 0.00019718943282780323,
	"loss": 1.3472,
	"step": 51
	},
	{
	"epoch": 0.0066107822494140085,
	"grad_norm": 0.16914351284503937,
	"learning_rate": 0.00019705134597735113,
	"loss": 1.3765,
	"step": 52
	},
	{
	"epoch": 0.006737912677287354,
	"grad_norm": 0.15967325866222382,
	"learning_rate": 0.00019690999849690484,
	"loss": 1.3312,
	"step": 53
	},
	{
	"epoch": 0.006865043105160701,
	"grad_norm": 0.15996071696281433,
	"learning_rate": 0.00019676539513530968,
	"loss": 1.4227,
	"step": 54
	},
	{
	"epoch": 0.006992173533034047,
	"grad_norm": 0.1715613752603531,
	"learning_rate": 0.0001966175407507987,
	"loss": 1.3634,
	"step": 55
	},
	{
	"epoch": 0.007119303960907393,
	"grad_norm": 0.16633041203022003,
	"learning_rate": 0.00019646644031082948,
	"loss": 1.3279,
	"step": 56
	},
	{
	"epoch": 0.00724643438878074,
	"grad_norm": 0.16247525811195374,
	"learning_rate": 0.00019631209889191712,
	"loss": 1.3721,
	"step": 57
	},
	{
	"epoch": 0.007373564816654086,
	"grad_norm": 0.1603326052427292,
	"learning_rate": 0.00019615452167946385,
	"loss": 1.3212,
	"step": 58
	},
	{
	"epoch": 0.007500695244527432,
	"grad_norm": 0.16569744050502777,
	"learning_rate": 0.00019599371396758456,
	"loss": 1.3224,
	"step": 59
	},
	{
	"epoch": 0.007627825672400779,
	"grad_norm": 0.16010916233062744,
	"learning_rate": 0.0001958296811589293,
	"loss": 1.3022,
	"step": 60
	},
	{
	"epoch": 0.007754956100274125,
	"grad_norm": 0.1680569052696228,
	"learning_rate": 0.00019566242876450137,
	"loss": 1.3197,
	"step": 61
	},
	{
	"epoch": 0.007882086528147472,
	"grad_norm": 0.16432645916938782,
	"learning_rate": 0.00019549196240347248,
	"loss": 1.3167,
	"step": 62
	},
	{
	"epoch": 0.008009216956020818,
	"grad_norm": 0.17412547767162323,
	"learning_rate": 0.00019531828780299383,
	"loss": 1.3196,
	"step": 63
	},
	{
	"epoch": 0.008136347383894163,
	"grad_norm": 0.1736118346452713,
	"learning_rate": 0.0001951414107980036,
	"loss": 1.2966,
	"step": 64
	},
	{
	"epoch": 0.00826347781176751,
	"grad_norm": 0.16254910826683044,
	"learning_rate": 0.00019496133733103112,
	"loss": 1.3416,
	"step": 65
	},
	{
	"epoch": 0.008390608239640857,
	"grad_norm": 0.16831637918949127,
	"learning_rate": 0.00019477807345199714,
	"loss": 1.3396,
	"step": 66
	},
	{
	"epoch": 0.008517738667514202,
	"grad_norm": 0.1759282648563385,
	"learning_rate": 0.00019459162531801046,
	"loss": 1.3101,
	"step": 67
	},
	{
	"epoch": 0.00864486909538755,
	"grad_norm": 0.17314572632312775,
	"learning_rate": 0.00019440199919316123,
	"loss": 1.4026,
	"step": 68
	},
	{
	"epoch": 0.008771999523260895,
	"grad_norm": 0.17074303328990936,
	"learning_rate": 0.00019420920144831044,
	"loss": 1.3088,
	"step": 69
	},
	{
	"epoch": 0.008899129951134241,
	"grad_norm": 0.17773644626140594,
	"learning_rate": 0.0001940132385608757,
	"loss": 1.32,
	"step": 70
	},
	{
	"epoch": 0.009026260379007589,
	"grad_norm": 0.1736891269683838,
	"learning_rate": 0.0001938141171146141,
	"loss": 1.2865,
	"step": 71
	},
	{
	"epoch": 0.009153390806880934,
	"grad_norm": 0.17593072354793549,
	"learning_rate": 0.0001936118437994003,
	"loss": 1.3276,
	"step": 72
	},
	{
	"epoch": 0.00928052123475428,
	"grad_norm": 0.16799978911876678,
	"learning_rate": 0.00019340642541100248,
	"loss": 1.2585,
	"step": 73
	},
	{
	"epoch": 0.009407651662627628,
	"grad_norm": 0.17271657288074493,
	"learning_rate": 0.00019319786885085364,
	"loss": 1.3838,
	"step": 74
	},
	{
	"epoch": 0.009534782090500973,
	"grad_norm": 0.18318656086921692,
	"learning_rate": 0.0001929861811258197,
	"loss": 1.3857,
	"step": 75
	},
	{
	"epoch": 0.009661912518374319,
	"grad_norm": 0.1850346177816391,
	"learning_rate": 0.0001927713693479643,
	"loss": 1.3884,
	"step": 76
	},
	{
	"epoch": 0.009789042946247667,
	"grad_norm": 0.1707659363746643,
	"learning_rate": 0.0001925534407343097,
	"loss": 1.2674,
	"step": 77
	},
	{
	"epoch": 0.009916173374121012,
	"grad_norm": 0.1803124099969864,
	"learning_rate": 0.0001923324026065944,
	"loss": 1.2899,
	"step": 78
	},
	{
	"epoch": 0.010043303801994358,
	"grad_norm": 0.18143856525421143,
	"learning_rate": 0.0001921082623910271,
	"loss": 1.2967,
	"step": 79
	},
	{
	"epoch": 0.010170434229867705,
	"grad_norm": 0.17903882265090942,
	"learning_rate": 0.00019188102761803717,
	"loss": 1.2913,
	"step": 80
	},
	{
	"epoch": 0.010297564657741051,
	"grad_norm": 0.181906595826149,
	"learning_rate": 0.00019165070592202173,
	"loss": 1.2568,
	"step": 81
	},
	{
	"epoch": 0.010424695085614397,
	"grad_norm": 0.1655733585357666,
	"learning_rate": 0.00019141730504108922,
	"loss": 1.2758,
	"step": 82
	},
	{
	"epoch": 0.010551825513487744,
	"grad_norm": 0.17457562685012817,
	"learning_rate": 0.00019118083281679913,
	"loss": 1.2506,
	"step": 83
	},
	{
	"epoch": 0.01067895594136109,
	"grad_norm": 0.18457446992397308,
	"learning_rate": 0.00019094129719389886,
	"loss": 1.3701,
	"step": 84
	},
	{
	"epoch": 0.010806086369234436,
	"grad_norm": 0.17702506482601166,
	"learning_rate": 0.0001906987062200567,
	"loss": 1.3071,
	"step": 85
	},
	{
	"epoch": 0.010933216797107783,
	"grad_norm": 0.1763213723897934,
	"learning_rate": 0.0001904530680455914,
	"loss": 1.2996,
	"step": 86
	},
	{
	"epoch": 0.011060347224981129,
	"grad_norm": 0.17649979889392853,
	"learning_rate": 0.0001902043909231984,
	"loss": 1.314,
	"step": 87
	},
	{
	"epoch": 0.011187477652854475,
	"grad_norm": 0.1817278414964676,
	"learning_rate": 0.00018995268320767252,
	"loss": 1.315,
	"step": 88
	},
	{
	"epoch": 0.011314608080727822,
	"grad_norm": 0.17668454349040985,
	"learning_rate": 0.0001896979533556273,
	"loss": 1.2914,
	"step": 89
	},
	{
	"epoch": 0.011441738508601168,
	"grad_norm": 0.17107272148132324,
	"learning_rate": 0.0001894402099252109,
	"loss": 1.2884,
	"step": 90
	},
	{
	"epoch": 0.011568868936474514,
	"grad_norm": 0.18352022767066956,
	"learning_rate": 0.0001891794615758185,
	"loss": 1.3404,
	"step": 91
	},
	{
	"epoch": 0.011695999364347861,
	"grad_norm": 0.17999856173992157,
	"learning_rate": 0.00018891571706780146,
	"loss": 1.3001,
	"step": 92
	},
	{
	"epoch": 0.011823129792221207,
	"grad_norm": 0.17374040186405182,
	"learning_rate": 0.00018864898526217293,
	"loss": 1.266,
	"step": 93
	},
	{
	"epoch": 0.011950260220094553,
	"grad_norm": 0.18675245344638824,
	"learning_rate": 0.0001883792751203102,
	"loss": 1.3347,
	"step": 94
	},
	{
	"epoch": 0.0120773906479679,
	"grad_norm": 0.18314674496650696,
	"learning_rate": 0.0001881065957036536,
	"loss": 1.3224,
	"step": 95
	},
	{
	"epoch": 0.012204521075841246,
	"grad_norm": 0.17483913898468018,
	"learning_rate": 0.00018783095617340193,
	"loss": 1.2926,
	"step": 96
	},
	{
	"epoch": 0.012331651503714592,
	"grad_norm": 0.19491428136825562,
	"learning_rate": 0.00018755236579020502,
	"loss": 1.2636,
	"step": 97
	},
	{
	"epoch": 0.012458781931587939,
	"grad_norm": 0.17128659784793854,
	"learning_rate": 0.0001872708339138522,
	"loss": 1.2653,
	"step": 98
	},
	{
	"epoch": 0.012585912359461285,
	"grad_norm": 0.172018900513649,
	"learning_rate": 0.00018698637000295816,
	"loss": 1.2686,
	"step": 99
	},
	{
	"epoch": 0.01271304278733463,
	"grad_norm": 0.18891726434230804,
	"learning_rate": 0.0001866989836146449,
	"loss": 1.4058,
	"step": 100
	},
	{
	"epoch": 0.01271304278733463,
	"eval_loss": 1.2906723022460938,
	"eval_runtime": 1258.4463,
	"eval_samples_per_second": 3.973,
	"eval_steps_per_second": 0.993,
	"step": 100
	},
	{
	"epoch": 0.012840173215207978,
	"grad_norm": 0.1781390905380249,
	"learning_rate": 0.0001864086844042209,
	"loss": 1.3021,
	"step": 101
	},
	{
	"epoch": 0.012967303643081324,
	"grad_norm": 0.17100100219249725,
	"learning_rate": 0.00018611548212485647,
	"loss": 1.2574,
	"step": 102
	},
	{
	"epoch": 0.01309443407095467,
	"grad_norm": 0.18398095667362213,
	"learning_rate": 0.00018581938662725632,
	"loss": 1.2839,
	"step": 103
	},
	{
	"epoch": 0.013221564498828017,
	"grad_norm": 0.18981115520000458,
	"learning_rate": 0.00018552040785932845,
	"loss": 1.3149,
	"step": 104
	},
	{
	"epoch": 0.013348694926701363,
	"grad_norm": 0.18872378766536713,
	"learning_rate": 0.00018521855586584995,
	"loss": 1.279,
	"step": 105
	},
	{
	"epoch": 0.013475825354574708,
	"grad_norm": 0.1824631690979004,
	"learning_rate": 0.00018491384078812959,
	"loss": 1.2743,
	"step": 106
	},
	{
	"epoch": 0.013602955782448056,
	"grad_norm": 0.1971443146467209,
	"learning_rate": 0.000184606272863667,
	"loss": 1.3365,
	"step": 107
	},
	{
	"epoch": 0.013730086210321402,
	"grad_norm": 0.19964328408241272,
	"learning_rate": 0.00018429586242580884,
	"loss": 1.3184,
	"step": 108
	},
	{
	"epoch": 0.013857216638194747,
	"grad_norm": 0.17624543607234955,
	"learning_rate": 0.00018398261990340152,
	"loss": 1.2755,
	"step": 109
	},
	{
	"epoch": 0.013984347066068095,
	"grad_norm": 0.18599238991737366,
	"learning_rate": 0.00018366655582044094,
	"loss": 1.3025,
	"step": 110
	},
	{
	"epoch": 0.01411147749394144,
	"grad_norm": 0.19051305949687958,
	"learning_rate": 0.00018334768079571884,
	"loss": 1.351,
	"step": 111
	},
	{
	"epoch": 0.014238607921814786,
	"grad_norm": 0.1858106255531311,
	"learning_rate": 0.00018302600554246601,
	"loss": 1.2386,
	"step": 112
	},
	{
	"epoch": 0.014365738349688134,
	"grad_norm": 0.17598244547843933,
	"learning_rate": 0.00018270154086799239,
	"loss": 1.2687,
	"step": 113
	},
	{
	"epoch": 0.01449286877756148,
	"grad_norm": 0.18105947971343994,
	"learning_rate": 0.00018237429767332405,
	"loss": 1.2843,
	"step": 114
	},
	{
	"epoch": 0.014619999205434825,
	"grad_norm": 0.18796177208423615,
	"learning_rate": 0.00018204428695283687,
	"loss": 1.2999,
	"step": 115
	},
	{
	"epoch": 0.014747129633308173,
	"grad_norm": 0.18702763319015503,
	"learning_rate": 0.00018171151979388714,
	"loss": 1.2391,
	"step": 116
	},
	{
	"epoch": 0.014874260061181518,
	"grad_norm": 0.17469799518585205,
	"learning_rate": 0.00018137600737643913,
	"loss": 1.2915,
	"step": 117
	},
	{
	"epoch": 0.015001390489054864,
	"grad_norm": 0.1871766746044159,
	"learning_rate": 0.00018103776097268942,
	"loss": 1.2429,
	"step": 118
	},
	{
	"epoch": 0.015128520916928212,
	"grad_norm": 0.18426093459129333,
	"learning_rate": 0.00018069679194668826,
	"loss": 1.2678,
	"step": 119
	},
	{
	"epoch": 0.015255651344801557,
	"grad_norm": 0.1830713450908661,
	"learning_rate": 0.0001803531117539577,
	"loss": 1.3231,
	"step": 120
	},
	{
	"epoch": 0.015382781772674903,
	"grad_norm": 0.19156108796596527,
	"learning_rate": 0.00018000673194110668,
	"loss": 1.3426,
	"step": 121
	},
	{
	"epoch": 0.01550991220054825,
	"grad_norm": 0.18232569098472595,
	"learning_rate": 0.00017965766414544326,
	"loss": 1.2227,
	"step": 122
	},
	{
	"epoch": 0.015637042628421596,
	"grad_norm": 0.18696987628936768,
	"learning_rate": 0.00017930592009458352,
	"loss": 1.2933,
	"step": 123
	},
	{
	"epoch": 0.015764173056294944,
	"grad_norm": 0.18148070573806763,
	"learning_rate": 0.00017895151160605757,
	"loss": 1.3598,
	"step": 124
	},
	{
	"epoch": 0.015891303484168288,
	"grad_norm": 0.1859319657087326,
	"learning_rate": 0.00017859445058691247,
	"loss": 1.2688,
	"step": 125
	},
	{
	"epoch": 0.016018433912041635,
	"grad_norm": 0.18133966624736786,
	"learning_rate": 0.00017823474903331233,
	"loss": 1.2912,
	"step": 126
	},
	{
	"epoch": 0.016145564339914983,
	"grad_norm": 0.16695751249790192,
	"learning_rate": 0.0001778724190301351,
	"loss": 1.2772,
	"step": 127
	},
	{
	"epoch": 0.016272694767788327,
	"grad_norm": 0.17694084346294403,
	"learning_rate": 0.0001775074727505667,
	"loss": 1.2998,
	"step": 128
	},
	{
	"epoch": 0.016399825195661674,
	"grad_norm": 0.18545518815517426,
	"learning_rate": 0.0001771399224556919,
	"loss": 1.2996,
	"step": 129
	},
	{
	"epoch": 0.01652695562353502,
	"grad_norm": 0.1763446033000946,
	"learning_rate": 0.00017676978049408263,
	"loss": 1.2942,
	"step": 130
	},
	{
	"epoch": 0.016654086051408366,
	"grad_norm": 0.1751178801059723,
	"learning_rate": 0.00017639705930138272,
	"loss": 1.2491,
	"step": 131
	},
	{
	"epoch": 0.016781216479281713,
	"grad_norm": 0.17463481426239014,
	"learning_rate": 0.00017602177139989044,
	"loss": 1.3015,
	"step": 132
	},
	{
	"epoch": 0.01690834690715506,
	"grad_norm": 0.1884208619594574,
	"learning_rate": 0.0001756439293981377,
	"loss": 1.2555,
	"step": 133
	},
	{
	"epoch": 0.017035477335028405,
	"grad_norm": 0.1824871301651001,
	"learning_rate": 0.00017526354599046635,
	"loss": 1.3321,
	"step": 134
	},
	{
	"epoch": 0.017162607762901752,
	"grad_norm": 0.17852945625782013,
	"learning_rate": 0.00017488063395660177,
	"loss": 1.2134,
	"step": 135
	},
	{
	"epoch": 0.0172897381907751,
	"grad_norm": 0.17903351783752441,
	"learning_rate": 0.00017449520616122344,
	"loss": 1.202,
	"step": 136
	},
	{
	"epoch": 0.017416868618648444,
	"grad_norm": 0.19624289870262146,
	"learning_rate": 0.00017410727555353282,
	"loss": 1.2983,
	"step": 137
	},
	{
	"epoch": 0.01754399904652179,
	"grad_norm": 0.20271572470664978,
	"learning_rate": 0.00017371685516681825,
	"loss": 1.331,
	"step": 138
	},
	{
	"epoch": 0.01767112947439514,
	"grad_norm": 0.19160455465316772,
	"learning_rate": 0.00017332395811801707,
	"loss": 1.2325,
	"step": 139
	},
	{
	"epoch": 0.017798259902268482,
	"grad_norm": 0.19286282360553741,
	"learning_rate": 0.00017292859760727493,
	"loss": 1.3632,
	"step": 140
	},
	{
	"epoch": 0.01792539033014183,
	"grad_norm": 0.18525561690330505,
	"learning_rate": 0.00017253078691750227,
	"loss": 1.302,
	"step": 141
	},
	{
	"epoch": 0.018052520758015177,
	"grad_norm": 0.17999610304832458,
	"learning_rate": 0.00017213053941392818,
	"loss": 1.2617,
	"step": 142
	},
	{
	"epoch": 0.01817965118588852,
	"grad_norm": 0.1817435920238495,
	"learning_rate": 0.00017172786854365116,
	"loss": 1.285,
	"step": 143
	},
	{
	"epoch": 0.01830678161376187,
	"grad_norm": 0.18393941223621368,
	"learning_rate": 0.00017132278783518756,
	"loss": 1.2033,
	"step": 144
	},
	{
	"epoch": 0.018433912041635216,
	"grad_norm": 0.18280182778835297,
	"learning_rate": 0.00017091531089801694,
	"loss": 1.2454,
	"step": 145
	},
	{
	"epoch": 0.01856104246950856,
	"grad_norm": 0.17269238829612732,
	"learning_rate": 0.00017050545142212483,
	"loss": 1.2137,
	"step": 146
	},
	{
	"epoch": 0.018688172897381908,
	"grad_norm": 0.18515561521053314,
	"learning_rate": 0.00017009322317754278,
	"loss": 1.2876,
	"step": 147
	},
	{
	"epoch": 0.018815303325255255,
	"grad_norm": 0.18649280071258545,
	"learning_rate": 0.0001696786400138859,
	"loss": 1.3279,
	"step": 148
	},
	{
	"epoch": 0.0189424337531286,
	"grad_norm": 0.18008284270763397,
	"learning_rate": 0.00016926171585988727,
	"loss": 1.1943,
	"step": 149
	},
	{
	"epoch": 0.019069564181001947,
	"grad_norm": 0.18855896592140198,
	"learning_rate": 0.00016884246472293016,
	"loss": 1.3458,
	"step": 150
	},
	{
	"epoch": 0.019196694608875294,
	"grad_norm": 0.18721222877502441,
	"learning_rate": 0.00016842090068857742,
	"loss": 1.205,
	"step": 151
	},
	{
	"epoch": 0.019323825036748638,
	"grad_norm": 0.18609726428985596,
	"learning_rate": 0.00016799703792009827,
	"loss": 1.3147,
	"step": 152
	},
	{
	"epoch": 0.019450955464621986,
	"grad_norm": 0.18827542662620544,
	"learning_rate": 0.00016757089065799226,
	"loss": 1.2053,
	"step": 153
	},
	{
	"epoch": 0.019578085892495333,
	"grad_norm": 0.19211921095848083,
	"learning_rate": 0.00016714247321951106,
	"loss": 1.2881,
	"step": 154
	},
	{
	"epoch": 0.019705216320368677,
	"grad_norm": 0.1911146342754364,
	"learning_rate": 0.0001667117999981774,
	"loss": 1.2841,
	"step": 155
	},
	{
	"epoch": 0.019832346748242025,
	"grad_norm": 0.1876746416091919,
	"learning_rate": 0.00016627888546330138,
	"loss": 1.2795,
	"step": 156
	},
	{
	"epoch": 0.019959477176115372,
	"grad_norm": 0.18275220692157745,
	"learning_rate": 0.00016584374415949443,
	"loss": 1.2646,
	"step": 157
	},
	{
	"epoch": 0.020086607603988716,
	"grad_norm": 0.19240595400333405,
	"learning_rate": 0.0001654063907061807,
	"loss": 1.2286,
	"step": 158
	},
	{
	"epoch": 0.020213738031862064,
	"grad_norm": 0.17621144652366638,
	"learning_rate": 0.00016496683979710575,
	"loss": 1.2623,
	"step": 159
	},
	{
	"epoch": 0.02034086845973541,
	"grad_norm": 0.18566247820854187,
	"learning_rate": 0.000164525106199843,
	"loss": 1.2915,
	"step": 160
	},
	{
	"epoch": 0.020467998887608755,
	"grad_norm": 0.19843867421150208,
	"learning_rate": 0.00016408120475529763,
	"loss": 1.1703,
	"step": 161
	},
	{
	"epoch": 0.020595129315482102,
	"grad_norm": 0.20230089128017426,
	"learning_rate": 0.00016363515037720773,
	"loss": 1.274,
	"step": 162
	},
	{
	"epoch": 0.02072225974335545,
	"grad_norm": 0.1874382644891739,
	"learning_rate": 0.00016318695805164359,
	"loss": 1.267,
	"step": 163
	},
	{
	"epoch": 0.020849390171228794,
	"grad_norm": 0.19301468133926392,
	"learning_rate": 0.0001627366428365039,
	"loss": 1.3385,
	"step": 164
	},
	{
	"epoch": 0.02097652059910214,
	"grad_norm": 0.1960678994655609,
	"learning_rate": 0.00016228421986101005,
	"loss": 1.2469,
	"step": 165
	},
	{
	"epoch": 0.02110365102697549,
	"grad_norm": 0.2149035483598709,
	"learning_rate": 0.00016182970432519772,
	"loss": 1.2695,
	"step": 166
	},
	{
	"epoch": 0.021230781454848833,
	"grad_norm": 0.1928316354751587,
	"learning_rate": 0.00016137311149940633,
	"loss": 1.2581,
	"step": 167
	},
	{
	"epoch": 0.02135791188272218,
	"grad_norm": 0.18403369188308716,
	"learning_rate": 0.0001609144567237658,
	"loss": 1.2872,
	"step": 168
	},
	{
	"epoch": 0.021485042310595528,
	"grad_norm": 0.18688054382801056,
	"learning_rate": 0.00016045375540768136,
	"loss": 1.2762,
	"step": 169
	},
	{
	"epoch": 0.021612172738468872,
	"grad_norm": 0.19875864684581757,
	"learning_rate": 0.00015999102302931585,
	"loss": 1.2773,
	"step": 170
	},
	{
	"epoch": 0.02173930316634222,
	"grad_norm": 0.19474861025810242,
	"learning_rate": 0.0001595262751350695,
	"loss": 1.2329,
	"step": 171
	},
	{
	"epoch": 0.021866433594215567,
	"grad_norm": 0.1946505606174469,
	"learning_rate": 0.00015905952733905775,
	"loss": 1.1726,
	"step": 172
	},
	{
	"epoch": 0.02199356402208891,
	"grad_norm": 0.18479324877262115,
	"learning_rate": 0.00015859079532258677,
	"loss": 1.3177,
	"step": 173
	},
	{
	"epoch": 0.022120694449962258,
	"grad_norm": 0.19268646836280823,
	"learning_rate": 0.00015812009483362642,
	"loss": 1.2721,
	"step": 174
	},
	{
	"epoch": 0.022247824877835606,
	"grad_norm": 0.18371957540512085,
	"learning_rate": 0.0001576474416862812,
	"loss": 1.3083,
	"step": 175
	},
	{
	"epoch": 0.02237495530570895,
	"grad_norm": 0.1987624615430832,
	"learning_rate": 0.00015717285176025913,
	"loss": 1.2582,
	"step": 176
	},
	{
	"epoch": 0.022502085733582297,
	"grad_norm": 0.19360652565956116,
	"learning_rate": 0.00015669634100033797,
	"loss": 1.2597,
	"step": 177
	},
	{
	"epoch": 0.022629216161455645,
	"grad_norm": 0.1875244826078415,
	"learning_rate": 0.00015621792541582966,
	"loss": 1.2637,
	"step": 178
	},
	{
	"epoch": 0.02275634658932899,
	"grad_norm": 0.19594229757785797,
	"learning_rate": 0.00015573762108004262,
	"loss": 1.2907,
	"step": 179
	},
	{
	"epoch": 0.022883477017202336,
	"grad_norm": 0.1935066133737564,
	"learning_rate": 0.00015525544412974132,
	"loss": 1.2446,
	"step": 180
	},
	{
	"epoch": 0.023010607445075684,
	"grad_norm": 0.19178606569766998,
	"learning_rate": 0.0001547714107646046,
	"loss": 1.2644,
	"step": 181
	},
	{
	"epoch": 0.023137737872949028,
	"grad_norm": 0.18824580311775208,
	"learning_rate": 0.00015428553724668103,
	"loss": 1.2592,
	"step": 182
	},
	{
	"epoch": 0.023264868300822375,
	"grad_norm": 0.1857818067073822,
	"learning_rate": 0.00015379783989984277,
	"loss": 1.2547,
	"step": 183
	},
	{
	"epoch": 0.023391998728695722,
	"grad_norm": 0.18491147458553314,
	"learning_rate": 0.00015330833510923718,
	"loss": 1.3073,
	"step": 184
	},
	{
	"epoch": 0.023519129156569066,
	"grad_norm": 0.19134363532066345,
	"learning_rate": 0.00015281703932073612,
	"loss": 1.2456,
	"step": 185
	},
	{
	"epoch": 0.023646259584442414,
	"grad_norm": 0.18579505383968353,
	"learning_rate": 0.0001523239690403835,
	"loss": 1.2626,
	"step": 186
	},
	{
	"epoch": 0.02377339001231576,
	"grad_norm": 0.18687140941619873,
	"learning_rate": 0.0001518291408338409,
	"loss": 1.2795,
	"step": 187
	},
	{
	"epoch": 0.023900520440189105,
	"grad_norm": 0.1869836449623108,
	"learning_rate": 0.00015133257132583073,
	"loss": 1.2111,
	"step": 188
	},
	{
	"epoch": 0.024027650868062453,
	"grad_norm": 0.18433886766433716,
	"learning_rate": 0.00015083427719957793,
	"loss": 1.1969,
	"step": 189
	},
	{
	"epoch": 0.0241547812959358,
	"grad_norm": 0.19012001156806946,
	"learning_rate": 0.0001503342751962493,
	"loss": 1.2973,
	"step": 190
	},
	{
	"epoch": 0.024281911723809144,
	"grad_norm": 0.18975861370563507,
	"learning_rate": 0.00014983258211439117,
	"loss": 1.2964,
	"step": 191
	},
	{
	"epoch": 0.024409042151682492,
	"grad_norm": 0.17685554921627045,
	"learning_rate": 0.0001493292148093649,
	"loss": 1.2763,
	"step": 192
	},
	{
	"epoch": 0.02453617257955584,
	"grad_norm": 0.19333194196224213,
	"learning_rate": 0.00014882419019278075,
	"loss": 1.3203,
	"step": 193
	},
	{
	"epoch": 0.024663303007429183,
	"grad_norm": 0.19778768718242645,
	"learning_rate": 0.00014831752523192948,
	"loss": 1.3204,
	"step": 194
	},
	{
	"epoch": 0.02479043343530253,
	"grad_norm": 0.1869363635778427,
	"learning_rate": 0.00014780923694921255,
	"loss": 1.2258,
	"step": 195
	},
	{
	"epoch": 0.024917563863175878,
	"grad_norm": 0.17671674489974976,
	"learning_rate": 0.00014729934242157004,
	"loss": 1.1667,
	"step": 196
	},
	{
	"epoch": 0.025044694291049222,
	"grad_norm": 0.1893490105867386,
	"learning_rate": 0.00014678785877990697,
	"loss": 1.3572,
	"step": 197
	},
	{
	"epoch": 0.02517182471892257,
	"grad_norm": 0.19606593251228333,
	"learning_rate": 0.00014627480320851774,
	"loss": 1.2507,
	"step": 198
	},
	{
	"epoch": 0.025298955146795917,
	"grad_norm": 0.20087891817092896,
	"learning_rate": 0.00014576019294450888,
	"loss": 1.3149,
	"step": 199
	},
	{
	"epoch": 0.02542608557466926,
	"grad_norm": 0.1857730895280838,
	"learning_rate": 0.00014524404527721977,
	"loss": 1.2893,
	"step": 200
	},
	{
	"epoch": 0.02542608557466926,
	"eval_loss": 1.2551084756851196,
	"eval_runtime": 1258.1994,
	"eval_samples_per_second": 3.974,
	"eval_steps_per_second": 0.993,
	"step": 200
	},
	{
	"epoch": 0.02555321600254261,
	"grad_norm": 0.18368631601333618,
	"learning_rate": 0.00014472637754764196,
	"loss": 1.2125,
	"step": 201
	},
	{
	"epoch": 0.025680346430415956,
	"grad_norm": 0.18972043693065643,
	"learning_rate": 0.00014420720714783636,
	"loss": 1.2131,
	"step": 202
	},
	{
	"epoch": 0.0258074768582893,
	"grad_norm": 0.18747109174728394,
	"learning_rate": 0.00014368655152034908,
	"loss": 1.2224,
	"step": 203
	},
	{
	"epoch": 0.025934607286162648,
	"grad_norm": 0.18962696194648743,
	"learning_rate": 0.00014316442815762544,
	"loss": 1.2613,
	"step": 204
	},
	{
	"epoch": 0.026061737714035995,
	"grad_norm": 0.18641987442970276,
	"learning_rate": 0.00014264085460142202,
	"loss": 1.2525,
	"step": 205
	},
	{
	"epoch": 0.02618886814190934,
	"grad_norm": 0.19106072187423706,
	"learning_rate": 0.0001421158484422177,
	"loss": 1.2549,
	"step": 206
	},
	{
	"epoch": 0.026315998569782686,
	"grad_norm": 0.19771872460842133,
	"learning_rate": 0.0001415894273186223,
	"loss": 1.2612,
	"step": 207
	},
	{
	"epoch": 0.026443128997656034,
	"grad_norm": 0.18108506500720978,
	"learning_rate": 0.0001410616089167842,
	"loss": 1.2114,
	"step": 208
	},
	{
	"epoch": 0.026570259425529378,
	"grad_norm": 0.17011211812496185,
	"learning_rate": 0.0001405324109697961,
	"loss": 1.2695,
	"step": 209
	},
	{
	"epoch": 0.026697389853402725,
	"grad_norm": 0.1930396556854248,
	"learning_rate": 0.00014000185125709918,
	"loss": 1.211,
	"step": 210
	},
	{
	"epoch": 0.026824520281276073,
	"grad_norm": 0.19416122138500214,
	"learning_rate": 0.00013946994760388582,
	"loss": 1.1772,
	"step": 211
	},
	{
	"epoch": 0.026951650709149417,
	"grad_norm": 0.18353648483753204,
	"learning_rate": 0.00013893671788050074,
	"loss": 1.2672,
	"step": 212
	},
	{
	"epoch": 0.027078781137022764,
	"grad_norm": 0.18951141834259033,
	"learning_rate": 0.00013840218000184053,
	"loss": 1.3209,
	"step": 213
	},
	{
	"epoch": 0.027205911564896112,
	"grad_norm": 0.19500471651554108,
	"learning_rate": 0.00013786635192675184,
	"loss": 1.2519,
	"step": 214
	},
	{
	"epoch": 0.027333041992769456,
	"grad_norm": 0.1958056539297104,
	"learning_rate": 0.00013732925165742805,
	"loss": 1.208,
	"step": 215
	},
	{
	"epoch": 0.027460172420642803,
	"grad_norm": 0.1859259307384491,
	"learning_rate": 0.00013679089723880427,
	"loss": 1.2715,
	"step": 216
	},
	{
	"epoch": 0.02758730284851615,
	"grad_norm": 0.18673139810562134,
	"learning_rate": 0.00013625130675795134,
	"loss": 1.292,
	"step": 217
	},
	{
	"epoch": 0.027714433276389495,
	"grad_norm": 0.17959700524806976,
	"learning_rate": 0.00013571049834346799,
	"loss": 1.1896,
	"step": 218
	},
	{
	"epoch": 0.027841563704262842,
	"grad_norm": 0.1903347671031952,
	"learning_rate": 0.0001351684901648718,
	"loss": 1.3381,
	"step": 219
	},
	{
	"epoch": 0.02796869413213619,
	"grad_norm": 0.18993370234966278,
	"learning_rate": 0.00013462530043198873,
	"loss": 1.2739,
	"step": 220
	},
	{
	"epoch": 0.028095824560009534,
	"grad_norm": 0.18846477568149567,
	"learning_rate": 0.0001340809473943415,
	"loss": 1.2399,
	"step": 221
	},
	{
	"epoch": 0.02822295498788288,
	"grad_norm": 0.18699532747268677,
	"learning_rate": 0.00013353544934053616,
	"loss": 1.2061,
	"step": 222
	},
	{
	"epoch": 0.02835008541575623,
	"grad_norm": 0.19469809532165527,
	"learning_rate": 0.00013298882459764798,
	"loss": 1.2455,
	"step": 223
	},
	{
	"epoch": 0.028477215843629573,
	"grad_norm": 0.19830243289470673,
	"learning_rate": 0.00013244109153060548,
	"loss": 1.2542,
	"step": 224
	},
	{
	"epoch": 0.02860434627150292,
	"grad_norm": 0.20483078062534332,
	"learning_rate": 0.0001318922685415735,
	"loss": 1.2287,
	"step": 225
	},
	{
	"epoch": 0.028731476699376268,
	"grad_norm": 0.190695121884346,
	"learning_rate": 0.00013134237406933492,
	"loss": 1.2165,
	"step": 226
	},
	{
	"epoch": 0.02885860712724961,
	"grad_norm": 0.19430223107337952,
	"learning_rate": 0.00013079142658867124,
	"loss": 1.2922,
	"step": 227
	},
	{
	"epoch": 0.02898573755512296,
	"grad_norm": 0.1994917094707489,
	"learning_rate": 0.00013023944460974183,
	"loss": 1.2402,
	"step": 228
	},
	{
	"epoch": 0.029112867982996306,
	"grad_norm": 0.20195803046226501,
	"learning_rate": 0.00012968644667746206,
	"loss": 1.2594,
	"step": 229
	},
	{
	"epoch": 0.02923999841086965,
	"grad_norm": 0.19695453345775604,
	"learning_rate": 0.00012913245137088024,
	"loss": 1.2762,
	"step": 230
	},
	{
	"epoch": 0.029367128838742998,
	"grad_norm": 0.19726547598838806,
	"learning_rate": 0.00012857747730255338,
	"loss": 1.2494,
	"step": 231
	},
	{
	"epoch": 0.029494259266616345,
	"grad_norm": 0.19146564602851868,
	"learning_rate": 0.00012802154311792197,
	"loss": 1.2312,
	"step": 232
	},
	{
	"epoch": 0.02962138969448969,
	"grad_norm": 0.19849611818790436,
	"learning_rate": 0.00012746466749468345,
	"loss": 1.2186,
	"step": 233
	},
	{
	"epoch": 0.029748520122363037,
	"grad_norm": 0.18684804439544678,
	"learning_rate": 0.00012690686914216474,
	"loss": 1.1775,
	"step": 234
	},
	{
	"epoch": 0.029875650550236384,
	"grad_norm": 0.19955115020275116,
	"learning_rate": 0.0001263481668006937,
	"loss": 1.2476,
	"step": 235
	},
	{
	"epoch": 0.03000278097810973,
	"grad_norm": 0.20034034550189972,
	"learning_rate": 0.00012578857924096934,
	"loss": 1.2307,
	"step": 236
	},
	{
	"epoch": 0.030129911405983076,
	"grad_norm": 0.1980581283569336,
	"learning_rate": 0.00012522812526343148,
	"loss": 1.2332,
	"step": 237
	},
	{
	"epoch": 0.030257041833856423,
	"grad_norm": 0.1966305524110794,
	"learning_rate": 0.00012466682369762882,
	"loss": 1.2219,
	"step": 238
	},
	{
	"epoch": 0.030384172261729767,
	"grad_norm": 0.19543439149856567,
	"learning_rate": 0.00012410469340158655,
	"loss": 1.2998,
	"step": 239
	},
	{
	"epoch": 0.030511302689603115,
	"grad_norm": 0.19517168402671814,
	"learning_rate": 0.00012354175326117253,
	"loss": 1.2451,
	"step": 240
	},
	{
	"epoch": 0.030638433117476462,
	"grad_norm": 0.18800078332424164,
	"learning_rate": 0.00012297802218946306,
	"loss": 1.2349,
	"step": 241
	},
	{
	"epoch": 0.030765563545349806,
	"grad_norm": 0.20408718287944794,
	"learning_rate": 0.00012241351912610726,
	"loss": 1.3123,
	"step": 242
	},
	{
	"epoch": 0.030892693973223154,
	"grad_norm": 0.19570188224315643,
	"learning_rate": 0.00012184826303669083,
	"loss": 1.2181,
	"step": 243
	},
	{
	"epoch": 0.0310198244010965,
	"grad_norm": 0.1854136884212494,
	"learning_rate": 0.00012128227291209891,
	"loss": 1.2298,
	"step": 244
	},
	{
	"epoch": 0.031146954828969845,
	"grad_norm": 0.19532455503940582,
	"learning_rate": 0.00012071556776787786,
	"loss": 1.3124,
	"step": 245
	},
	{
	"epoch": 0.03127408525684319,
	"grad_norm": 0.18832701444625854,
	"learning_rate": 0.00012014816664359671,
	"loss": 1.1565,
	"step": 246
	},
	{
	"epoch": 0.03140121568471654,
	"grad_norm": 0.19303160905838013,
	"learning_rate": 0.0001195800886022071,
	"loss": 1.2329,
	"step": 247
	},
	{
	"epoch": 0.03152834611258989,
	"grad_norm": 0.1881171315908432,
	"learning_rate": 0.0001190113527294032,
	"loss": 1.2456,
	"step": 248
	},
	{
	"epoch": 0.03165547654046323,
	"grad_norm": 0.19656208157539368,
	"learning_rate": 0.00011844197813298017,
	"loss": 1.2959,
	"step": 249
	},
	{
	"epoch": 0.031782606968336576,
	"grad_norm": 0.19458794593811035,
	"learning_rate": 0.0001178719839421925,
	"loss": 1.2968,
	"step": 250
	},
	{
	"epoch": 0.031909737396209926,
	"grad_norm": 0.1953679323196411,
	"learning_rate": 0.00011730138930711101,
	"loss": 1.3225,
	"step": 251
	},
	{
	"epoch": 0.03203686782408327,
	"grad_norm": 0.18624471127986908,
	"learning_rate": 0.00011673021339797967,
	"loss": 1.2895,
	"step": 252
	},
	{
	"epoch": 0.032163998251956614,
	"grad_norm": 0.1975700408220291,
	"learning_rate": 0.00011615847540457157,
	"loss": 1.2272,
	"step": 253
	},
	{
	"epoch": 0.032291128679829965,
	"grad_norm": 0.18464815616607666,
	"learning_rate": 0.000115586194535544,
	"loss": 1.1589,
	"step": 254
	},
	{
	"epoch": 0.03241825910770331,
	"grad_norm": 0.19085770845413208,
	"learning_rate": 0.00011501339001779332,
	"loss": 1.2129,
	"step": 255
	},
	{
	"epoch": 0.03254538953557665,
	"grad_norm": 0.19415773451328278,
	"learning_rate": 0.00011444008109580884,
	"loss": 1.2209,
	"step": 256
	},
	{
	"epoch": 0.032672519963450004,
	"grad_norm": 0.20239484310150146,
	"learning_rate": 0.00011386628703102633,
	"loss": 1.2872,
	"step": 257
	},
	{
	"epoch": 0.03279965039132335,
	"grad_norm": 0.18557807803153992,
	"learning_rate": 0.00011329202710118088,
	"loss": 1.2661,
	"step": 258
	},
	{
	"epoch": 0.03292678081919669,
	"grad_norm": 0.19118450582027435,
	"learning_rate": 0.00011271732059965925,
	"loss": 1.2781,
	"step": 259
	},
	{
	"epoch": 0.03305391124707004,
	"grad_norm": 0.1958242654800415,
	"learning_rate": 0.00011214218683485158,
	"loss": 1.2579,
	"step": 260
	},
	{
	"epoch": 0.03318104167494339,
	"grad_norm": 0.1829763948917389,
	"learning_rate": 0.00011156664512950287,
	"loss": 1.2359,
	"step": 261
	},
	{
	"epoch": 0.03330817210281673,
	"grad_norm": 0.18603093922138214,
	"learning_rate": 0.00011099071482006361,
	"loss": 1.2487,
	"step": 262
	},
	{
	"epoch": 0.03343530253069008,
	"grad_norm": 0.18507151305675507,
	"learning_rate": 0.00011041441525604014,
	"loss": 1.2339,
	"step": 263
	},
	{
	"epoch": 0.033562432958563426,
	"grad_norm": 0.198081374168396,
	"learning_rate": 0.00010983776579934482,
	"loss": 1.1937,
	"step": 264
	},
	{
	"epoch": 0.03368956338643677,
	"grad_norm": 0.19567249715328217,
	"learning_rate": 0.00010926078582364514,
	"loss": 1.2447,
	"step": 265
	},
	{
	"epoch": 0.03381669381431012,
	"grad_norm": 0.1892256885766983,
	"learning_rate": 0.00010868349471371315,
	"loss": 1.2011,
	"step": 266
	},
	{
	"epoch": 0.033943824242183465,
	"grad_norm": 0.19196678698062897,
	"learning_rate": 0.000108105911864774,
	"loss": 1.2341,
	"step": 267
	},
	{
	"epoch": 0.03407095467005681,
	"grad_norm": 0.20349054038524628,
	"learning_rate": 0.00010752805668185442,
	"loss": 1.2582,
	"step": 268
	},
	{
	"epoch": 0.03419808509793016,
	"grad_norm": 0.19051875174045563,
	"learning_rate": 0.0001069499485791307,
	"loss": 1.2593,
	"step": 269
	},
	{
	"epoch": 0.034325215525803504,
	"grad_norm": 0.18554829061031342,
	"learning_rate": 0.00010637160697927651,
	"loss": 1.1395,
	"step": 270
	},
	{
	"epoch": 0.03445234595367685,
	"grad_norm": 0.18687109649181366,
	"learning_rate": 0.00010579305131281025,
	"loss": 1.2079,
	"step": 271
	},
	{
	"epoch": 0.0345794763815502,
	"grad_norm": 0.19098562002182007,
	"learning_rate": 0.00010521430101744239,
	"loss": 1.2147,
	"step": 272
	},
	{
	"epoch": 0.03470660680942354,
	"grad_norm": 0.19619682431221008,
	"learning_rate": 0.00010463537553742225,
	"loss": 1.1458,
	"step": 273
	},
	{
	"epoch": 0.03483373723729689,
	"grad_norm": 0.1930547058582306,
	"learning_rate": 0.00010405629432288488,
	"loss": 1.2704,
	"step": 274
	},
	{
	"epoch": 0.03496086766517024,
	"grad_norm": 0.19838818907737732,
	"learning_rate": 0.00010347707682919754,
	"loss": 1.2228,
	"step": 275
	},
	{
	"epoch": 0.03508799809304358,
	"grad_norm": 0.1933777928352356,
	"learning_rate": 0.00010289774251630602,
	"loss": 1.189,
	"step": 276
	},
	{
	"epoch": 0.035215128520916926,
	"grad_norm": 0.20109447836875916,
	"learning_rate": 0.0001023183108480809,
	"loss": 1.2199,
	"step": 277
	},
	{
	"epoch": 0.03534225894879028,
	"grad_norm": 0.18909600377082825,
	"learning_rate": 0.00010173880129166358,
	"loss": 1.1529,
	"step": 278
	},
	{
	"epoch": 0.03546938937666362,
	"grad_norm": 0.18809406459331512,
	"learning_rate": 0.00010115923331681232,
	"loss": 1.2183,
	"step": 279
	},
	{
	"epoch": 0.035596519804536965,
	"grad_norm": 0.191794291138649,
	"learning_rate": 0.00010057962639524798,
	"loss": 1.2621,
	"step": 280
	},
	{
	"epoch": 0.035723650232410316,
	"grad_norm": 0.19512364268302917,
	"learning_rate": 0.0001,
	"loss": 1.1575,
	"step": 281
	},
	{
	"epoch": 0.03585078066028366,
	"grad_norm": 0.18720309436321259,
	"learning_rate": 9.942037360475205e-05,
	"loss": 1.2435,
	"step": 282
	},
	{
	"epoch": 0.035977911088157004,
	"grad_norm": 0.20254142582416534,
	"learning_rate": 9.884076668318773e-05,
	"loss": 1.3042,
	"step": 283
	},
	{
	"epoch": 0.036105041516030355,
	"grad_norm": 0.1859116405248642,
	"learning_rate": 9.826119870833643e-05,
	"loss": 1.2607,
	"step": 284
	},
	{
	"epoch": 0.0362321719439037,
	"grad_norm": 0.20392954349517822,
	"learning_rate": 9.768168915191913e-05,
	"loss": 1.2478,
	"step": 285
	},
	{
	"epoch": 0.03635930237177704,
	"grad_norm": 0.20078176259994507,
	"learning_rate": 9.710225748369401e-05,
	"loss": 1.2359,
	"step": 286
	},
	{
	"epoch": 0.036486432799650394,
	"grad_norm": 0.1939856857061386,
	"learning_rate": 9.65229231708025e-05,
	"loss": 1.2484,
	"step": 287
	},
	{
	"epoch": 0.03661356322752374,
	"grad_norm": 0.19322216510772705,
	"learning_rate": 9.594370567711513e-05,
	"loss": 1.3249,
	"step": 288
	},
	{
	"epoch": 0.03674069365539708,
	"grad_norm": 0.1958608329296112,
	"learning_rate": 9.536462446257776e-05,
	"loss": 1.2212,
	"step": 289
	},
	{
	"epoch": 0.03686782408327043,
	"grad_norm": 0.2111774981021881,
	"learning_rate": 9.478569898255765e-05,
	"loss": 1.3251,
	"step": 290
	},
	{
	"epoch": 0.03699495451114378,
	"grad_norm": 0.20403634011745453,
	"learning_rate": 9.420694868718977e-05,
	"loss": 1.1831,
	"step": 291
	},
	{
	"epoch": 0.03712208493901712,
	"grad_norm": 0.20421482622623444,
	"learning_rate": 9.362839302072354e-05,
	"loss": 1.2513,
	"step": 292
	},
	{
	"epoch": 0.03724921536689047,
	"grad_norm": 0.1911601573228836,
	"learning_rate": 9.305005142086932e-05,
	"loss": 1.1875,
	"step": 293
	},
	{
	"epoch": 0.037376345794763816,
	"grad_norm": 0.20054025948047638,
	"learning_rate": 9.247194331814562e-05,
	"loss": 1.214,
	"step": 294
	},
	{
	"epoch": 0.03750347622263716,
	"grad_norm": 0.198801651597023,
	"learning_rate": 9.1894088135226e-05,
	"loss": 1.1993,
	"step": 295
	},
	{
	"epoch": 0.03763060665051051,
	"grad_norm": 0.1995508074760437,
	"learning_rate": 9.131650528628687e-05,
	"loss": 1.1986,
	"step": 296
	},
	{
	"epoch": 0.037757737078383855,
	"grad_norm": 0.20325009524822235,
	"learning_rate": 9.073921417635486e-05,
	"loss": 1.2292,
	"step": 297
	},
	{
	"epoch": 0.0378848675062572,
	"grad_norm": 0.193388894200325,
	"learning_rate": 9.016223420065519e-05,
	"loss": 1.2304,
	"step": 298
	},
	{
	"epoch": 0.03801199793413055,
	"grad_norm": 0.1861080676317215,
	"learning_rate": 8.958558474395987e-05,
	"loss": 1.2843,
	"step": 299
	},
	{
	"epoch": 0.03813912836200389,
	"grad_norm": 0.19273944199085236,
	"learning_rate": 8.900928517993644e-05,
	"loss": 1.111,
	"step": 300
	},
	{
	"epoch": 0.03813912836200389,
	"eval_loss": 1.2362135648727417,
	"eval_runtime": 1257.3039,
	"eval_samples_per_second": 3.977,
	"eval_steps_per_second": 0.994,
	"step": 300
	}
	],
	"logging_steps": 1,
	"max_steps": 552,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 100,
	"stateful_callbacks": {
	"EarlyStoppingCallback": {
	"args": {
	"early_stopping_patience": 2,
	"early_stopping_threshold": 0.0
	},
	"attributes": {
	"early_stopping_patience_counter": 0
	}
	},
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 3.1092000681885696e+18,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}