cbb-1b / checkpoint-1098 /trainer_state.json

Upload folder using huggingface_hub

9f0cda1 verified 4 months ago

191 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 2.9979522184300342,
	"eval_steps": 500,
	"global_step": 1098,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.0027303754266211604,
	"grad_norm": 1.3254656791687012,
	"learning_rate": 1.818181818181818e-06,
	"loss": 1.2897,
	"step": 1
	},
	{
	"epoch": 0.005460750853242321,
	"grad_norm": 1.3267881870269775,
	"learning_rate": 3.636363636363636e-06,
	"loss": 1.2823,
	"step": 2
	},
	{
	"epoch": 0.008191126279863481,
	"grad_norm": 1.2982481718063354,
	"learning_rate": 5.4545454545454545e-06,
	"loss": 1.2595,
	"step": 3
	},
	{
	"epoch": 0.010921501706484642,
	"grad_norm": 1.2894413471221924,
	"learning_rate": 7.272727272727272e-06,
	"loss": 1.2653,
	"step": 4
	},
	{
	"epoch": 0.013651877133105802,
	"grad_norm": 1.2869772911071777,
	"learning_rate": 9.090909090909091e-06,
	"loss": 1.2545,
	"step": 5
	},
	{
	"epoch": 0.016382252559726963,
	"grad_norm": 1.2373387813568115,
	"learning_rate": 1.0909090909090909e-05,
	"loss": 1.2194,
	"step": 6
	},
	{
	"epoch": 0.01911262798634812,
	"grad_norm": 1.20195734500885,
	"learning_rate": 1.2727272727272727e-05,
	"loss": 1.2139,
	"step": 7
	},
	{
	"epoch": 0.021843003412969283,
	"grad_norm": 1.1426103115081787,
	"learning_rate": 1.4545454545454545e-05,
	"loss": 1.2315,
	"step": 8
	},
	{
	"epoch": 0.024573378839590442,
	"grad_norm": 1.0495123863220215,
	"learning_rate": 1.6363636363636366e-05,
	"loss": 1.1944,
	"step": 9
	},
	{
	"epoch": 0.027303754266211604,
	"grad_norm": 0.8776500821113586,
	"learning_rate": 1.8181818181818182e-05,
	"loss": 1.183,
	"step": 10
	},
	{
	"epoch": 0.030034129692832763,
	"grad_norm": 0.8687052130699158,
	"learning_rate": 2e-05,
	"loss": 1.1592,
	"step": 11
	},
	{
	"epoch": 0.032764505119453925,
	"grad_norm": 0.7476271390914917,
	"learning_rate": 2.1818181818181818e-05,
	"loss": 1.1483,
	"step": 12
	},
	{
	"epoch": 0.03549488054607509,
	"grad_norm": 0.6418495774269104,
	"learning_rate": 2.3636363636363637e-05,
	"loss": 1.0762,
	"step": 13
	},
	{
	"epoch": 0.03822525597269624,
	"grad_norm": 0.600390613079071,
	"learning_rate": 2.5454545454545454e-05,
	"loss": 1.0492,
	"step": 14
	},
	{
	"epoch": 0.040955631399317405,
	"grad_norm": 0.5653348565101624,
	"learning_rate": 2.7272727272727273e-05,
	"loss": 1.0548,
	"step": 15
	},
	{
	"epoch": 0.04368600682593857,
	"grad_norm": 0.5357097387313843,
	"learning_rate": 2.909090909090909e-05,
	"loss": 1.0273,
	"step": 16
	},
	{
	"epoch": 0.04641638225255973,
	"grad_norm": 0.4480445683002472,
	"learning_rate": 3.090909090909091e-05,
	"loss": 1.0065,
	"step": 17
	},
	{
	"epoch": 0.049146757679180884,
	"grad_norm": 0.40983352065086365,
	"learning_rate": 3.272727272727273e-05,
	"loss": 0.9908,
	"step": 18
	},
	{
	"epoch": 0.05187713310580205,
	"grad_norm": 0.42159780859947205,
	"learning_rate": 3.454545454545455e-05,
	"loss": 0.9888,
	"step": 19
	},
	{
	"epoch": 0.05460750853242321,
	"grad_norm": 0.41620174050331116,
	"learning_rate": 3.6363636363636364e-05,
	"loss": 0.9575,
	"step": 20
	},
	{
	"epoch": 0.05733788395904437,
	"grad_norm": 0.3804452419281006,
	"learning_rate": 3.818181818181819e-05,
	"loss": 0.9413,
	"step": 21
	},
	{
	"epoch": 0.060068259385665526,
	"grad_norm": 0.37021100521087646,
	"learning_rate": 4e-05,
	"loss": 0.9223,
	"step": 22
	},
	{
	"epoch": 0.06279863481228669,
	"grad_norm": 0.34090206027030945,
	"learning_rate": 4.181818181818182e-05,
	"loss": 0.8878,
	"step": 23
	},
	{
	"epoch": 0.06552901023890785,
	"grad_norm": 0.32232972979545593,
	"learning_rate": 4.3636363636363636e-05,
	"loss": 0.8986,
	"step": 24
	},
	{
	"epoch": 0.06825938566552901,
	"grad_norm": 0.2941684424877167,
	"learning_rate": 4.545454545454546e-05,
	"loss": 0.8857,
	"step": 25
	},
	{
	"epoch": 0.07098976109215017,
	"grad_norm": 0.27072674036026,
	"learning_rate": 4.7272727272727275e-05,
	"loss": 0.8736,
	"step": 26
	},
	{
	"epoch": 0.07372013651877134,
	"grad_norm": 0.2696637511253357,
	"learning_rate": 4.909090909090909e-05,
	"loss": 0.8698,
	"step": 27
	},
	{
	"epoch": 0.07645051194539249,
	"grad_norm": 0.2565267086029053,
	"learning_rate": 5.090909090909091e-05,
	"loss": 0.8324,
	"step": 28
	},
	{
	"epoch": 0.07918088737201365,
	"grad_norm": 0.2474038451910019,
	"learning_rate": 5.272727272727272e-05,
	"loss": 0.841,
	"step": 29
	},
	{
	"epoch": 0.08191126279863481,
	"grad_norm": 0.22430865466594696,
	"learning_rate": 5.4545454545454546e-05,
	"loss": 0.8219,
	"step": 30
	},
	{
	"epoch": 0.08464163822525597,
	"grad_norm": 0.21238166093826294,
	"learning_rate": 5.636363636363636e-05,
	"loss": 0.8328,
	"step": 31
	},
	{
	"epoch": 0.08737201365187713,
	"grad_norm": 0.2210783213376999,
	"learning_rate": 5.818181818181818e-05,
	"loss": 0.8187,
	"step": 32
	},
	{
	"epoch": 0.0901023890784983,
	"grad_norm": 0.24119816720485687,
	"learning_rate": 6e-05,
	"loss": 0.8078,
	"step": 33
	},
	{
	"epoch": 0.09283276450511946,
	"grad_norm": 0.21313577890396118,
	"learning_rate": 6.181818181818182e-05,
	"loss": 0.8051,
	"step": 34
	},
	{
	"epoch": 0.09556313993174062,
	"grad_norm": 0.17824789881706238,
	"learning_rate": 6.363636363636364e-05,
	"loss": 0.7841,
	"step": 35
	},
	{
	"epoch": 0.09829351535836177,
	"grad_norm": 0.18413369357585907,
	"learning_rate": 6.545454545454546e-05,
	"loss": 0.7851,
	"step": 36
	},
	{
	"epoch": 0.10102389078498293,
	"grad_norm": 0.2003067582845688,
	"learning_rate": 6.727272727272727e-05,
	"loss": 0.8064,
	"step": 37
	},
	{
	"epoch": 0.1037542662116041,
	"grad_norm": 0.1989540457725525,
	"learning_rate": 6.90909090909091e-05,
	"loss": 0.7841,
	"step": 38
	},
	{
	"epoch": 0.10648464163822526,
	"grad_norm": 0.14544272422790527,
	"learning_rate": 7.090909090909092e-05,
	"loss": 0.7745,
	"step": 39
	},
	{
	"epoch": 0.10921501706484642,
	"grad_norm": 0.1559988260269165,
	"learning_rate": 7.272727272727273e-05,
	"loss": 0.7845,
	"step": 40
	},
	{
	"epoch": 0.11194539249146758,
	"grad_norm": 0.1705523580312729,
	"learning_rate": 7.454545454545455e-05,
	"loss": 0.7876,
	"step": 41
	},
	{
	"epoch": 0.11467576791808874,
	"grad_norm": 0.1464846283197403,
	"learning_rate": 7.636363636363637e-05,
	"loss": 0.7846,
	"step": 42
	},
	{
	"epoch": 0.1174061433447099,
	"grad_norm": 0.1304199993610382,
	"learning_rate": 7.818181818181818e-05,
	"loss": 0.7734,
	"step": 43
	},
	{
	"epoch": 0.12013651877133105,
	"grad_norm": 0.1516261249780655,
	"learning_rate": 8e-05,
	"loss": 0.7836,
	"step": 44
	},
	{
	"epoch": 0.12286689419795221,
	"grad_norm": 0.1361905336380005,
	"learning_rate": 8.181818181818183e-05,
	"loss": 0.7751,
	"step": 45
	},
	{
	"epoch": 0.12559726962457338,
	"grad_norm": 0.14435631036758423,
	"learning_rate": 8.363636363636364e-05,
	"loss": 0.7655,
	"step": 46
	},
	{
	"epoch": 0.12832764505119454,
	"grad_norm": 0.13407501578330994,
	"learning_rate": 8.545454545454545e-05,
	"loss": 0.7729,
	"step": 47
	},
	{
	"epoch": 0.1310580204778157,
	"grad_norm": 0.13555769622325897,
	"learning_rate": 8.727272727272727e-05,
	"loss": 0.7693,
	"step": 48
	},
	{
	"epoch": 0.13378839590443686,
	"grad_norm": 0.1439952850341797,
	"learning_rate": 8.90909090909091e-05,
	"loss": 0.7787,
	"step": 49
	},
	{
	"epoch": 0.13651877133105803,
	"grad_norm": 0.14037510752677917,
	"learning_rate": 9.090909090909092e-05,
	"loss": 0.7757,
	"step": 50
	},
	{
	"epoch": 0.1392491467576792,
	"grad_norm": 0.11772281676530838,
	"learning_rate": 9.272727272727273e-05,
	"loss": 0.7408,
	"step": 51
	},
	{
	"epoch": 0.14197952218430035,
	"grad_norm": 0.1545950025320053,
	"learning_rate": 9.454545454545455e-05,
	"loss": 0.7381,
	"step": 52
	},
	{
	"epoch": 0.1447098976109215,
	"grad_norm": 0.12565699219703674,
	"learning_rate": 9.636363636363637e-05,
	"loss": 0.7769,
	"step": 53
	},
	{
	"epoch": 0.14744027303754267,
	"grad_norm": 0.15412947535514832,
	"learning_rate": 9.818181818181818e-05,
	"loss": 0.7585,
	"step": 54
	},
	{
	"epoch": 0.15017064846416384,
	"grad_norm": 0.11638892441987991,
	"learning_rate": 0.0001,
	"loss": 0.7661,
	"step": 55
	},
	{
	"epoch": 0.15290102389078497,
	"grad_norm": 0.16432470083236694,
	"learning_rate": 0.00010181818181818181,
	"loss": 0.7546,
	"step": 56
	},
	{
	"epoch": 0.15563139931740613,
	"grad_norm": 0.11529026180505753,
	"learning_rate": 0.00010363636363636364,
	"loss": 0.7535,
	"step": 57
	},
	{
	"epoch": 0.1583617747440273,
	"grad_norm": 0.23582805693149567,
	"learning_rate": 0.00010545454545454545,
	"loss": 0.7683,
	"step": 58
	},
	{
	"epoch": 0.16109215017064846,
	"grad_norm": 0.12393908202648163,
	"learning_rate": 0.00010727272727272728,
	"loss": 0.7526,
	"step": 59
	},
	{
	"epoch": 0.16382252559726962,
	"grad_norm": 0.20981422066688538,
	"learning_rate": 0.00010909090909090909,
	"loss": 0.7397,
	"step": 60
	},
	{
	"epoch": 0.16655290102389078,
	"grad_norm": 0.1519405096769333,
	"learning_rate": 0.00011090909090909092,
	"loss": 0.7472,
	"step": 61
	},
	{
	"epoch": 0.16928327645051194,
	"grad_norm": 0.15360122919082642,
	"learning_rate": 0.00011272727272727272,
	"loss": 0.7452,
	"step": 62
	},
	{
	"epoch": 0.1720136518771331,
	"grad_norm": 0.14730164408683777,
	"learning_rate": 0.00011454545454545456,
	"loss": 0.7068,
	"step": 63
	},
	{
	"epoch": 0.17474402730375427,
	"grad_norm": 0.166826993227005,
	"learning_rate": 0.00011636363636363636,
	"loss": 0.7476,
	"step": 64
	},
	{
	"epoch": 0.17747440273037543,
	"grad_norm": 0.17365337908267975,
	"learning_rate": 0.0001181818181818182,
	"loss": 0.7083,
	"step": 65
	},
	{
	"epoch": 0.1802047781569966,
	"grad_norm": 0.2101927250623703,
	"learning_rate": 0.00012,
	"loss": 0.755,
	"step": 66
	},
	{
	"epoch": 0.18293515358361775,
	"grad_norm": 0.1802549660205841,
	"learning_rate": 0.00012181818181818183,
	"loss": 0.7388,
	"step": 67
	},
	{
	"epoch": 0.18566552901023892,
	"grad_norm": 0.17372193932533264,
	"learning_rate": 0.00012363636363636364,
	"loss": 0.7067,
	"step": 68
	},
	{
	"epoch": 0.18839590443686008,
	"grad_norm": 0.15312258899211884,
	"learning_rate": 0.00012545454545454546,
	"loss": 0.7424,
	"step": 69
	},
	{
	"epoch": 0.19112627986348124,
	"grad_norm": 0.24253840744495392,
	"learning_rate": 0.00012727272727272728,
	"loss": 0.7418,
	"step": 70
	},
	{
	"epoch": 0.19385665529010238,
	"grad_norm": 0.2043231725692749,
	"learning_rate": 0.0001290909090909091,
	"loss": 0.7362,
	"step": 71
	},
	{
	"epoch": 0.19658703071672354,
	"grad_norm": 0.19859246909618378,
	"learning_rate": 0.00013090909090909093,
	"loss": 0.7071,
	"step": 72
	},
	{
	"epoch": 0.1993174061433447,
	"grad_norm": 0.20175667107105255,
	"learning_rate": 0.00013272727272727275,
	"loss": 0.7202,
	"step": 73
	},
	{
	"epoch": 0.20204778156996586,
	"grad_norm": 0.1653033196926117,
	"learning_rate": 0.00013454545454545455,
	"loss": 0.7229,
	"step": 74
	},
	{
	"epoch": 0.20477815699658702,
	"grad_norm": 0.17003491520881653,
	"learning_rate": 0.00013636363636363637,
	"loss": 0.7353,
	"step": 75
	},
	{
	"epoch": 0.2075085324232082,
	"grad_norm": 0.18356764316558838,
	"learning_rate": 0.0001381818181818182,
	"loss": 0.7138,
	"step": 76
	},
	{
	"epoch": 0.21023890784982935,
	"grad_norm": 0.2215511053800583,
	"learning_rate": 0.00014,
	"loss": 0.7238,
	"step": 77
	},
	{
	"epoch": 0.2129692832764505,
	"grad_norm": 0.34184327721595764,
	"learning_rate": 0.00014181818181818184,
	"loss": 0.7297,
	"step": 78
	},
	{
	"epoch": 0.21569965870307167,
	"grad_norm": 0.25543472170829773,
	"learning_rate": 0.00014363636363636363,
	"loss": 0.742,
	"step": 79
	},
	{
	"epoch": 0.21843003412969283,
	"grad_norm": 0.2220849245786667,
	"learning_rate": 0.00014545454545454546,
	"loss": 0.6897,
	"step": 80
	},
	{
	"epoch": 0.221160409556314,
	"grad_norm": 0.25552013516426086,
	"learning_rate": 0.00014727272727272728,
	"loss": 0.744,
	"step": 81
	},
	{
	"epoch": 0.22389078498293516,
	"grad_norm": 0.2621108889579773,
	"learning_rate": 0.0001490909090909091,
	"loss": 0.7193,
	"step": 82
	},
	{
	"epoch": 0.22662116040955632,
	"grad_norm": 0.1840047836303711,
	"learning_rate": 0.0001509090909090909,
	"loss": 0.6991,
	"step": 83
	},
	{
	"epoch": 0.22935153583617748,
	"grad_norm": 0.21538959443569183,
	"learning_rate": 0.00015272727272727275,
	"loss": 0.7058,
	"step": 84
	},
	{
	"epoch": 0.23208191126279865,
	"grad_norm": 0.34613293409347534,
	"learning_rate": 0.00015454545454545454,
	"loss": 0.7011,
	"step": 85
	},
	{
	"epoch": 0.2348122866894198,
	"grad_norm": 0.26010966300964355,
	"learning_rate": 0.00015636363636363637,
	"loss": 0.7009,
	"step": 86
	},
	{
	"epoch": 0.23754266211604094,
	"grad_norm": 0.18031255900859833,
	"learning_rate": 0.0001581818181818182,
	"loss": 0.7036,
	"step": 87
	},
	{
	"epoch": 0.2402730375426621,
	"grad_norm": 0.24567286670207977,
	"learning_rate": 0.00016,
	"loss": 0.6921,
	"step": 88
	},
	{
	"epoch": 0.24300341296928327,
	"grad_norm": 0.19522973895072937,
	"learning_rate": 0.00016181818181818184,
	"loss": 0.7109,
	"step": 89
	},
	{
	"epoch": 0.24573378839590443,
	"grad_norm": 0.2405068725347519,
	"learning_rate": 0.00016363636363636366,
	"loss": 0.7134,
	"step": 90
	},
	{
	"epoch": 0.2484641638225256,
	"grad_norm": 0.15669392049312592,
	"learning_rate": 0.00016545454545454545,
	"loss": 0.6966,
	"step": 91
	},
	{
	"epoch": 0.25119453924914675,
	"grad_norm": 0.23415732383728027,
	"learning_rate": 0.00016727272727272728,
	"loss": 0.6771,
	"step": 92
	},
	{
	"epoch": 0.25392491467576794,
	"grad_norm": 0.1842266321182251,
	"learning_rate": 0.0001690909090909091,
	"loss": 0.6895,
	"step": 93
	},
	{
	"epoch": 0.2566552901023891,
	"grad_norm": 0.21642841398715973,
	"learning_rate": 0.0001709090909090909,
	"loss": 0.6913,
	"step": 94
	},
	{
	"epoch": 0.2593856655290102,
	"grad_norm": 0.26327016949653625,
	"learning_rate": 0.00017272727272727275,
	"loss": 0.6856,
	"step": 95
	},
	{
	"epoch": 0.2621160409556314,
	"grad_norm": 0.20735357701778412,
	"learning_rate": 0.00017454545454545454,
	"loss": 0.6769,
	"step": 96
	},
	{
	"epoch": 0.26484641638225254,
	"grad_norm": 0.3127861022949219,
	"learning_rate": 0.00017636363636363637,
	"loss": 0.6518,
	"step": 97
	},
	{
	"epoch": 0.2675767918088737,
	"grad_norm": 0.34650346636772156,
	"learning_rate": 0.0001781818181818182,
	"loss": 0.6937,
	"step": 98
	},
	{
	"epoch": 0.27030716723549486,
	"grad_norm": 0.252059668302536,
	"learning_rate": 0.00018,
	"loss": 0.6589,
	"step": 99
	},
	{
	"epoch": 0.27303754266211605,
	"grad_norm": 0.1896669715642929,
	"learning_rate": 0.00018181818181818183,
	"loss": 0.6717,
	"step": 100
	},
	{
	"epoch": 0.2757679180887372,
	"grad_norm": 0.2724236249923706,
	"learning_rate": 0.00018363636363636366,
	"loss": 0.6671,
	"step": 101
	},
	{
	"epoch": 0.2784982935153584,
	"grad_norm": 0.1814826875925064,
	"learning_rate": 0.00018545454545454545,
	"loss": 0.6564,
	"step": 102
	},
	{
	"epoch": 0.2812286689419795,
	"grad_norm": 0.24140000343322754,
	"learning_rate": 0.00018727272727272728,
	"loss": 0.6708,
	"step": 103
	},
	{
	"epoch": 0.2839590443686007,
	"grad_norm": 0.19333204627037048,
	"learning_rate": 0.0001890909090909091,
	"loss": 0.6628,
	"step": 104
	},
	{
	"epoch": 0.28668941979522183,
	"grad_norm": 0.21080803871154785,
	"learning_rate": 0.00019090909090909092,
	"loss": 0.6457,
	"step": 105
	},
	{
	"epoch": 0.289419795221843,
	"grad_norm": 0.20848962664604187,
	"learning_rate": 0.00019272727272727274,
	"loss": 0.6589,
	"step": 106
	},
	{
	"epoch": 0.29215017064846416,
	"grad_norm": 0.2381501942873001,
	"learning_rate": 0.00019454545454545457,
	"loss": 0.6846,
	"step": 107
	},
	{
	"epoch": 0.29488054607508535,
	"grad_norm": 0.1734190434217453,
	"learning_rate": 0.00019636363636363636,
	"loss": 0.6691,
	"step": 108
	},
	{
	"epoch": 0.2976109215017065,
	"grad_norm": 0.2187424749135971,
	"learning_rate": 0.00019818181818181821,
	"loss": 0.6806,
	"step": 109
	},
	{
	"epoch": 0.3003412969283277,
	"grad_norm": 0.21607345342636108,
	"learning_rate": 0.0002,
	"loss": 0.6588,
	"step": 110
	},
	{
	"epoch": 0.3030716723549488,
	"grad_norm": 0.1873304545879364,
	"learning_rate": 0.00019999949446003433,
	"loss": 0.6513,
	"step": 111
	},
	{
	"epoch": 0.30580204778156994,
	"grad_norm": 0.21443282067775726,
	"learning_rate": 0.00019999797784524866,
	"loss": 0.6704,
	"step": 112
	},
	{
	"epoch": 0.30853242320819113,
	"grad_norm": 0.18743731081485748,
	"learning_rate": 0.00019999545017097728,
	"loss": 0.6346,
	"step": 113
	},
	{
	"epoch": 0.31126279863481227,
	"grad_norm": 0.18916335701942444,
	"learning_rate": 0.0001999919114627769,
	"loss": 0.68,
	"step": 114
	},
	{
	"epoch": 0.31399317406143346,
	"grad_norm": 0.1925644427537918,
	"learning_rate": 0.00019998736175642673,
	"loss": 0.6408,
	"step": 115
	},
	{
	"epoch": 0.3167235494880546,
	"grad_norm": 0.17722898721694946,
	"learning_rate": 0.0001999818010979279,
	"loss": 0.6787,
	"step": 116
	},
	{
	"epoch": 0.3194539249146758,
	"grad_norm": 0.19374825060367584,
	"learning_rate": 0.0001999752295435032,
	"loss": 0.669,
	"step": 117
	},
	{
	"epoch": 0.3221843003412969,
	"grad_norm": 0.20013949275016785,
	"learning_rate": 0.00019996764715959618,
	"loss": 0.653,
	"step": 118
	},
	{
	"epoch": 0.3249146757679181,
	"grad_norm": 0.18780681490898132,
	"learning_rate": 0.00019995905402287094,
	"loss": 0.6557,
	"step": 119
	},
	{
	"epoch": 0.32764505119453924,
	"grad_norm": 0.1718084216117859,
	"learning_rate": 0.00019994945022021082,
	"loss": 0.6272,
	"step": 120
	},
	{
	"epoch": 0.33037542662116043,
	"grad_norm": 0.1613592952489853,
	"learning_rate": 0.00019993883584871808,
	"loss": 0.6515,
	"step": 121
	},
	{
	"epoch": 0.33310580204778156,
	"grad_norm": 0.1737043410539627,
	"learning_rate": 0.00019992721101571236,
	"loss": 0.6134,
	"step": 122
	},
	{
	"epoch": 0.33583617747440275,
	"grad_norm": 0.16362418234348297,
	"learning_rate": 0.0001999145758387301,
	"loss": 0.6448,
	"step": 123
	},
	{
	"epoch": 0.3385665529010239,
	"grad_norm": 0.19181552529335022,
	"learning_rate": 0.00019990093044552304,
	"loss": 0.6497,
	"step": 124
	},
	{
	"epoch": 0.3412969283276451,
	"grad_norm": 0.15803317725658417,
	"learning_rate": 0.00019988627497405696,
	"loss": 0.6116,
	"step": 125
	},
	{
	"epoch": 0.3440273037542662,
	"grad_norm": 0.2208717167377472,
	"learning_rate": 0.00019987060957251047,
	"loss": 0.6459,
	"step": 126
	},
	{
	"epoch": 0.34675767918088735,
	"grad_norm": 0.20142869651317596,
	"learning_rate": 0.00019985393439927323,
	"loss": 0.6589,
	"step": 127
	},
	{
	"epoch": 0.34948805460750854,
	"grad_norm": 0.17945925891399384,
	"learning_rate": 0.00019983624962294458,
	"loss": 0.6252,
	"step": 128
	},
	{
	"epoch": 0.35221843003412967,
	"grad_norm": 0.22226247191429138,
	"learning_rate": 0.00019981755542233177,
	"loss": 0.6379,
	"step": 129
	},
	{
	"epoch": 0.35494880546075086,
	"grad_norm": 0.1588139832019806,
	"learning_rate": 0.00019979785198644806,
	"loss": 0.6408,
	"step": 130
	},
	{
	"epoch": 0.357679180887372,
	"grad_norm": 0.22639498114585876,
	"learning_rate": 0.00019977713951451102,
	"loss": 0.6434,
	"step": 131
	},
	{
	"epoch": 0.3604095563139932,
	"grad_norm": 0.16015386581420898,
	"learning_rate": 0.00019975541821594026,
	"loss": 0.6151,
	"step": 132
	},
	{
	"epoch": 0.3631399317406143,
	"grad_norm": 0.21671050786972046,
	"learning_rate": 0.00019973268831035545,
	"loss": 0.6357,
	"step": 133
	},
	{
	"epoch": 0.3658703071672355,
	"grad_norm": 0.1871589720249176,
	"learning_rate": 0.00019970895002757413,
	"loss": 0.6436,
	"step": 134
	},
	{
	"epoch": 0.36860068259385664,
	"grad_norm": 0.19527480006217957,
	"learning_rate": 0.00019968420360760926,
	"loss": 0.6308,
	"step": 135
	},
	{
	"epoch": 0.37133105802047783,
	"grad_norm": 0.20158074796199799,
	"learning_rate": 0.000199658449300667,
	"loss": 0.6227,
	"step": 136
	},
	{
	"epoch": 0.37406143344709897,
	"grad_norm": 0.15605966746807098,
	"learning_rate": 0.00019963168736714392,
	"loss": 0.615,
	"step": 137
	},
	{
	"epoch": 0.37679180887372016,
	"grad_norm": 0.22042252123355865,
	"learning_rate": 0.00019960391807762463,
	"loss": 0.6263,
	"step": 138
	},
	{
	"epoch": 0.3795221843003413,
	"grad_norm": 0.16206978261470795,
	"learning_rate": 0.00019957514171287875,
	"loss": 0.6182,
	"step": 139
	},
	{
	"epoch": 0.3822525597269625,
	"grad_norm": 0.2251751869916916,
	"learning_rate": 0.00019954535856385837,
	"loss": 0.6376,
	"step": 140
	},
	{
	"epoch": 0.3849829351535836,
	"grad_norm": 0.16586551070213318,
	"learning_rate": 0.00019951456893169497,
	"loss": 0.6285,
	"step": 141
	},
	{
	"epoch": 0.38771331058020475,
	"grad_norm": 0.27427414059638977,
	"learning_rate": 0.0001994827731276963,
	"loss": 0.6397,
	"step": 142
	},
	{
	"epoch": 0.39044368600682594,
	"grad_norm": 0.21177491545677185,
	"learning_rate": 0.00019944997147334337,
	"loss": 0.6034,
	"step": 143
	},
	{
	"epoch": 0.3931740614334471,
	"grad_norm": 0.25477880239486694,
	"learning_rate": 0.0001994161643002871,
	"loss": 0.6199,
	"step": 144
	},
	{
	"epoch": 0.39590443686006827,
	"grad_norm": 0.23290970921516418,
	"learning_rate": 0.00019938135195034508,
	"loss": 0.6201,
	"step": 145
	},
	{
	"epoch": 0.3986348122866894,
	"grad_norm": 0.19198672473430634,
	"learning_rate": 0.00019934553477549794,
	"loss": 0.6213,
	"step": 146
	},
	{
	"epoch": 0.4013651877133106,
	"grad_norm": 0.1911400705575943,
	"learning_rate": 0.000199308713137886,
	"loss": 0.6146,
	"step": 147
	},
	{
	"epoch": 0.4040955631399317,
	"grad_norm": 0.18605491518974304,
	"learning_rate": 0.0001992708874098054,
	"loss": 0.6123,
	"step": 148
	},
	{
	"epoch": 0.4068259385665529,
	"grad_norm": 0.18028293550014496,
	"learning_rate": 0.0001992320579737045,
	"loss": 0.6061,
	"step": 149
	},
	{
	"epoch": 0.40955631399317405,
	"grad_norm": 0.1961037963628769,
	"learning_rate": 0.00019919222522217996,
	"loss": 0.622,
	"step": 150
	},
	{
	"epoch": 0.41228668941979524,
	"grad_norm": 0.17400594055652618,
	"learning_rate": 0.00019915138955797272,
	"loss": 0.6138,
	"step": 151
	},
	{
	"epoch": 0.4150170648464164,
	"grad_norm": 0.17892149090766907,
	"learning_rate": 0.00019910955139396396,
	"loss": 0.6242,
	"step": 152
	},
	{
	"epoch": 0.41774744027303756,
	"grad_norm": 0.21851663291454315,
	"learning_rate": 0.000199066711153171,
	"loss": 0.5913,
	"step": 153
	},
	{
	"epoch": 0.4204778156996587,
	"grad_norm": 0.1468774825334549,
	"learning_rate": 0.0001990228692687429,
	"loss": 0.6025,
	"step": 154
	},
	{
	"epoch": 0.4232081911262799,
	"grad_norm": 0.1920468658208847,
	"learning_rate": 0.00019897802618395614,
	"loss": 0.6127,
	"step": 155
	},
	{
	"epoch": 0.425938566552901,
	"grad_norm": 0.17375442385673523,
	"learning_rate": 0.00019893218235221015,
	"loss": 0.6211,
	"step": 156
	},
	{
	"epoch": 0.4286689419795222,
	"grad_norm": 0.15414904057979584,
	"learning_rate": 0.00019888533823702277,
	"loss": 0.6183,
	"step": 157
	},
	{
	"epoch": 0.43139931740614335,
	"grad_norm": 0.2245103418827057,
	"learning_rate": 0.0001988374943120254,
	"loss": 0.6248,
	"step": 158
	},
	{
	"epoch": 0.4341296928327645,
	"grad_norm": 0.17193332314491272,
	"learning_rate": 0.00019878865106095835,
	"loss": 0.5969,
	"step": 159
	},
	{
	"epoch": 0.43686006825938567,
	"grad_norm": 0.13767646253108978,
	"learning_rate": 0.00019873880897766598,
	"loss": 0.5943,
	"step": 160
	},
	{
	"epoch": 0.4395904436860068,
	"grad_norm": 0.1449906826019287,
	"learning_rate": 0.00019868796856609152,
	"loss": 0.573,
	"step": 161
	},
	{
	"epoch": 0.442320819112628,
	"grad_norm": 0.1392473578453064,
	"learning_rate": 0.00019863613034027224,
	"loss": 0.5926,
	"step": 162
	},
	{
	"epoch": 0.44505119453924913,
	"grad_norm": 0.1772463023662567,
	"learning_rate": 0.00019858329482433403,
	"loss": 0.6007,
	"step": 163
	},
	{
	"epoch": 0.4477815699658703,
	"grad_norm": 0.13768768310546875,
	"learning_rate": 0.0001985294625524861,
	"loss": 0.5901,
	"step": 164
	},
	{
	"epoch": 0.45051194539249145,
	"grad_norm": 0.1631435751914978,
	"learning_rate": 0.00019847463406901588,
	"loss": 0.5907,
	"step": 165
	},
	{
	"epoch": 0.45324232081911264,
	"grad_norm": 0.14781758189201355,
	"learning_rate": 0.00019841880992828306,
	"loss": 0.5903,
	"step": 166
	},
	{
	"epoch": 0.4559726962457338,
	"grad_norm": 0.13440802693367004,
	"learning_rate": 0.00019836199069471437,
	"loss": 0.5884,
	"step": 167
	},
	{
	"epoch": 0.45870307167235497,
	"grad_norm": 0.1414463371038437,
	"learning_rate": 0.00019830417694279766,
	"loss": 0.598,
	"step": 168
	},
	{
	"epoch": 0.4614334470989761,
	"grad_norm": 0.13185666501522064,
	"learning_rate": 0.0001982453692570762,
	"loss": 0.621,
	"step": 169
	},
	{
	"epoch": 0.4641638225255973,
	"grad_norm": 0.14422471821308136,
	"learning_rate": 0.00019818556823214268,
	"loss": 0.6065,
	"step": 170
	},
	{
	"epoch": 0.4668941979522184,
	"grad_norm": 0.13765788078308105,
	"learning_rate": 0.00019812477447263326,
	"loss": 0.6073,
	"step": 171
	},
	{
	"epoch": 0.4696245733788396,
	"grad_norm": 0.16028070449829102,
	"learning_rate": 0.0001980629885932214,
	"loss": 0.5767,
	"step": 172
	},
	{
	"epoch": 0.47235494880546075,
	"grad_norm": 0.14638394117355347,
	"learning_rate": 0.00019800021121861182,
	"loss": 0.5971,
	"step": 173
	},
	{
	"epoch": 0.4750853242320819,
	"grad_norm": 0.14843404293060303,
	"learning_rate": 0.0001979364429835339,
	"loss": 0.5894,
	"step": 174
	},
	{
	"epoch": 0.4778156996587031,
	"grad_norm": 0.13411492109298706,
	"learning_rate": 0.00019787168453273544,
	"loss": 0.5757,
	"step": 175
	},
	{
	"epoch": 0.4805460750853242,
	"grad_norm": 0.14304684102535248,
	"learning_rate": 0.0001978059365209762,
	"loss": 0.5846,
	"step": 176
	},
	{
	"epoch": 0.4832764505119454,
	"grad_norm": 0.13569754362106323,
	"learning_rate": 0.00019773919961302113,
	"loss": 0.5872,
	"step": 177
	},
	{
	"epoch": 0.48600682593856653,
	"grad_norm": 0.14318887889385223,
	"learning_rate": 0.00019767147448363366,
	"loss": 0.5804,
	"step": 178
	},
	{
	"epoch": 0.4887372013651877,
	"grad_norm": 0.1457952857017517,
	"learning_rate": 0.00019760276181756903,
	"loss": 0.5973,
	"step": 179
	},
	{
	"epoch": 0.49146757679180886,
	"grad_norm": 0.13820476830005646,
	"learning_rate": 0.00019753306230956718,
	"loss": 0.569,
	"step": 180
	},
	{
	"epoch": 0.49419795221843005,
	"grad_norm": 0.19338561594486237,
	"learning_rate": 0.00019746237666434587,
	"loss": 0.5723,
	"step": 181
	},
	{
	"epoch": 0.4969283276450512,
	"grad_norm": 0.17352697253227234,
	"learning_rate": 0.00019739070559659347,
	"loss": 0.578,
	"step": 182
	},
	{
	"epoch": 0.49965870307167237,
	"grad_norm": 0.15502339601516724,
	"learning_rate": 0.00019731804983096177,
	"loss": 0.5953,
	"step": 183
	},
	{
	"epoch": 0.5023890784982935,
	"grad_norm": 0.18948784470558167,
	"learning_rate": 0.00019724441010205863,
	"loss": 0.5883,
	"step": 184
	},
	{
	"epoch": 0.5051194539249146,
	"grad_norm": 0.17587606608867645,
	"learning_rate": 0.00019716978715444056,
	"loss": 0.5723,
	"step": 185
	},
	{
	"epoch": 0.5078498293515359,
	"grad_norm": 0.1599951833486557,
	"learning_rate": 0.0001970941817426052,
	"loss": 0.5799,
	"step": 186
	},
	{
	"epoch": 0.510580204778157,
	"grad_norm": 0.1717846840620041,
	"learning_rate": 0.00019701759463098374,
	"loss": 0.5543,
	"step": 187
	},
	{
	"epoch": 0.5133105802047782,
	"grad_norm": 0.14032602310180664,
	"learning_rate": 0.00019694002659393305,
	"loss": 0.5845,
	"step": 188
	},
	{
	"epoch": 0.5160409556313993,
	"grad_norm": 0.17668449878692627,
	"learning_rate": 0.000196861478415728,
	"loss": 0.6026,
	"step": 189
	},
	{
	"epoch": 0.5187713310580204,
	"grad_norm": 0.17806965112686157,
	"learning_rate": 0.00019678195089055346,
	"loss": 0.5681,
	"step": 190
	},
	{
	"epoch": 0.5215017064846417,
	"grad_norm": 0.13321803510189056,
	"learning_rate": 0.00019670144482249627,
	"loss": 0.5586,
	"step": 191
	},
	{
	"epoch": 0.5242320819112628,
	"grad_norm": 0.14684653282165527,
	"learning_rate": 0.00019661996102553718,
	"loss": 0.5589,
	"step": 192
	},
	{
	"epoch": 0.5269624573378839,
	"grad_norm": 0.1308140754699707,
	"learning_rate": 0.0001965375003235424,
	"loss": 0.568,
	"step": 193
	},
	{
	"epoch": 0.5296928327645051,
	"grad_norm": 0.17461615800857544,
	"learning_rate": 0.00019645406355025565,
	"loss": 0.5757,
	"step": 194
	},
	{
	"epoch": 0.5324232081911263,
	"grad_norm": 0.15591022372245789,
	"learning_rate": 0.0001963696515492893,
	"loss": 0.5946,
	"step": 195
	},
	{
	"epoch": 0.5351535836177475,
	"grad_norm": 0.14174342155456543,
	"learning_rate": 0.00019628426517411625,
	"loss": 0.5839,
	"step": 196
	},
	{
	"epoch": 0.5378839590443686,
	"grad_norm": 0.15242989361286163,
	"learning_rate": 0.0001961979052880609,
	"loss": 0.5567,
	"step": 197
	},
	{
	"epoch": 0.5406143344709897,
	"grad_norm": 0.16651766002178192,
	"learning_rate": 0.00019611057276429085,
	"loss": 0.5593,
	"step": 198
	},
	{
	"epoch": 0.543344709897611,
	"grad_norm": 0.14858382940292358,
	"learning_rate": 0.00019602226848580763,
	"loss": 0.5848,
	"step": 199
	},
	{
	"epoch": 0.5460750853242321,
	"grad_norm": 0.14774656295776367,
	"learning_rate": 0.00019593299334543808,
	"loss": 0.563,
	"step": 200
	},
	{
	"epoch": 0.5488054607508532,
	"grad_norm": 0.13993892073631287,
	"learning_rate": 0.0001958427482458253,
	"loss": 0.5742,
	"step": 201
	},
	{
	"epoch": 0.5515358361774744,
	"grad_norm": 0.15201717615127563,
	"learning_rate": 0.0001957515340994193,
	"loss": 0.5726,
	"step": 202
	},
	{
	"epoch": 0.5542662116040956,
	"grad_norm": 0.1567879170179367,
	"learning_rate": 0.00019565935182846802,
	"loss": 0.5707,
	"step": 203
	},
	{
	"epoch": 0.5569965870307167,
	"grad_norm": 0.13955365121364594,
	"learning_rate": 0.00019556620236500793,
	"loss": 0.5339,
	"step": 204
	},
	{
	"epoch": 0.5597269624573379,
	"grad_norm": 0.1425381898880005,
	"learning_rate": 0.00019547208665085457,
	"loss": 0.5698,
	"step": 205
	},
	{
	"epoch": 0.562457337883959,
	"grad_norm": 0.14695167541503906,
	"learning_rate": 0.00019537700563759304,
	"loss": 0.578,
	"step": 206
	},
	{
	"epoch": 0.5651877133105802,
	"grad_norm": 0.15581448376178741,
	"learning_rate": 0.00019528096028656832,
	"loss": 0.5552,
	"step": 207
	},
	{
	"epoch": 0.5679180887372014,
	"grad_norm": 0.13141174614429474,
	"learning_rate": 0.00019518395156887576,
	"loss": 0.5598,
	"step": 208
	},
	{
	"epoch": 0.5706484641638225,
	"grad_norm": 0.16678418219089508,
	"learning_rate": 0.00019508598046535095,
	"loss": 0.5485,
	"step": 209
	},
	{
	"epoch": 0.5733788395904437,
	"grad_norm": 0.1717272400856018,
	"learning_rate": 0.00019498704796656018,
	"loss": 0.5849,
	"step": 210
	},
	{
	"epoch": 0.5761092150170648,
	"grad_norm": 0.14453086256980896,
	"learning_rate": 0.00019488715507278998,
	"loss": 0.5757,
	"step": 211
	},
	{
	"epoch": 0.578839590443686,
	"grad_norm": 0.16329538822174072,
	"learning_rate": 0.0001947863027940374,
	"loss": 0.5632,
	"step": 212
	},
	{
	"epoch": 0.5815699658703072,
	"grad_norm": 0.15865112841129303,
	"learning_rate": 0.00019468449214999955,
	"loss": 0.5728,
	"step": 213
	},
	{
	"epoch": 0.5843003412969283,
	"grad_norm": 0.15316785871982574,
	"learning_rate": 0.00019458172417006347,
	"loss": 0.5556,
	"step": 214
	},
	{
	"epoch": 0.5870307167235495,
	"grad_norm": 0.16666734218597412,
	"learning_rate": 0.00019447799989329555,
	"loss": 0.5759,
	"step": 215
	},
	{
	"epoch": 0.5897610921501707,
	"grad_norm": 0.16525249183177948,
	"learning_rate": 0.00019437332036843118,
	"loss": 0.5667,
	"step": 216
	},
	{
	"epoch": 0.5924914675767918,
	"grad_norm": 0.14022761583328247,
	"learning_rate": 0.00019426768665386398,
	"loss": 0.5611,
	"step": 217
	},
	{
	"epoch": 0.595221843003413,
	"grad_norm": 0.15930500626564026,
	"learning_rate": 0.00019416109981763526,
	"loss": 0.5414,
	"step": 218
	},
	{
	"epoch": 0.5979522184300341,
	"grad_norm": 0.141464963555336,
	"learning_rate": 0.00019405356093742313,
	"loss": 0.5363,
	"step": 219
	},
	{
	"epoch": 0.6006825938566553,
	"grad_norm": 0.1541200429201126,
	"learning_rate": 0.0001939450711005316,
	"loss": 0.5487,
	"step": 220
	},
	{
	"epoch": 0.6034129692832765,
	"grad_norm": 0.13717712461948395,
	"learning_rate": 0.00019383563140387965,
	"loss": 0.5564,
	"step": 221
	},
	{
	"epoch": 0.6061433447098976,
	"grad_norm": 0.14139863848686218,
	"learning_rate": 0.00019372524295399013,
	"loss": 0.5592,
	"step": 222
	},
	{
	"epoch": 0.6088737201365187,
	"grad_norm": 0.13494791090488434,
	"learning_rate": 0.00019361390686697846,
	"loss": 0.5452,
	"step": 223
	},
	{
	"epoch": 0.6116040955631399,
	"grad_norm": 0.1512797623872757,
	"learning_rate": 0.0001935016242685415,
	"loss": 0.5595,
	"step": 224
	},
	{
	"epoch": 0.6143344709897611,
	"grad_norm": 0.1422545164823532,
	"learning_rate": 0.00019338839629394605,
	"loss": 0.5602,
	"step": 225
	},
	{
	"epoch": 0.6170648464163823,
	"grad_norm": 0.14444862306118011,
	"learning_rate": 0.00019327422408801744,
	"loss": 0.5574,
	"step": 226
	},
	{
	"epoch": 0.6197952218430034,
	"grad_norm": 0.15434536337852478,
	"learning_rate": 0.0001931591088051279,
	"loss": 0.5467,
	"step": 227
	},
	{
	"epoch": 0.6225255972696245,
	"grad_norm": 0.1420368254184723,
	"learning_rate": 0.000193043051609185,
	"loss": 0.5519,
	"step": 228
	},
	{
	"epoch": 0.6252559726962458,
	"grad_norm": 0.12724490463733673,
	"learning_rate": 0.00019292605367361978,
	"loss": 0.5779,
	"step": 229
	},
	{
	"epoch": 0.6279863481228669,
	"grad_norm": 0.13820625841617584,
	"learning_rate": 0.00019280811618137484,
	"loss": 0.5468,
	"step": 230
	},
	{
	"epoch": 0.630716723549488,
	"grad_norm": 0.1629246473312378,
	"learning_rate": 0.00019268924032489248,
	"loss": 0.5721,
	"step": 231
	},
	{
	"epoch": 0.6334470989761092,
	"grad_norm": 0.193836510181427,
	"learning_rate": 0.00019256942730610268,
	"loss": 0.5392,
	"step": 232
	},
	{
	"epoch": 0.6361774744027304,
	"grad_norm": 0.18511579930782318,
	"learning_rate": 0.0001924486783364108,
	"loss": 0.5718,
	"step": 233
	},
	{
	"epoch": 0.6389078498293516,
	"grad_norm": 0.14278100430965424,
	"learning_rate": 0.00019232699463668542,
	"loss": 0.5589,
	"step": 234
	},
	{
	"epoch": 0.6416382252559727,
	"grad_norm": 0.14693580567836761,
	"learning_rate": 0.00019220437743724605,
	"loss": 0.515,
	"step": 235
	},
	{
	"epoch": 0.6443686006825938,
	"grad_norm": 0.20018483698368073,
	"learning_rate": 0.00019208082797785055,
	"loss": 0.5484,
	"step": 236
	},
	{
	"epoch": 0.647098976109215,
	"grad_norm": 0.1597984880208969,
	"learning_rate": 0.00019195634750768275,
	"loss": 0.539,
	"step": 237
	},
	{
	"epoch": 0.6498293515358362,
	"grad_norm": 0.14270828664302826,
	"learning_rate": 0.00019183093728533966,
	"loss": 0.5496,
	"step": 238
	},
	{
	"epoch": 0.6525597269624573,
	"grad_norm": 0.1788954734802246,
	"learning_rate": 0.0001917045985788189,
	"loss": 0.5629,
	"step": 239
	},
	{
	"epoch": 0.6552901023890785,
	"grad_norm": 0.15234531462192535,
	"learning_rate": 0.00019157733266550575,
	"loss": 0.5454,
	"step": 240
	},
	{
	"epoch": 0.6580204778156996,
	"grad_norm": 0.17805363237857819,
	"learning_rate": 0.00019144914083216034,
	"loss": 0.5599,
	"step": 241
	},
	{
	"epoch": 0.6607508532423209,
	"grad_norm": 0.18313759565353394,
	"learning_rate": 0.00019132002437490458,
	"loss": 0.5544,
	"step": 242
	},
	{
	"epoch": 0.663481228668942,
	"grad_norm": 0.16873271763324738,
	"learning_rate": 0.00019118998459920902,
	"loss": 0.5445,
	"step": 243
	},
	{
	"epoch": 0.6662116040955631,
	"grad_norm": 0.23297423124313354,
	"learning_rate": 0.00019105902281987976,
	"loss": 0.5623,
	"step": 244
	},
	{
	"epoch": 0.6689419795221843,
	"grad_norm": 0.13279114663600922,
	"learning_rate": 0.00019092714036104508,
	"loss": 0.5354,
	"step": 245
	},
	{
	"epoch": 0.6716723549488055,
	"grad_norm": 0.18942788243293762,
	"learning_rate": 0.00019079433855614201,
	"loss": 0.5574,
	"step": 246
	},
	{
	"epoch": 0.6744027303754266,
	"grad_norm": 0.15293289721012115,
	"learning_rate": 0.00019066061874790303,
	"loss": 0.5473,
	"step": 247
	},
	{
	"epoch": 0.6771331058020478,
	"grad_norm": 0.1669221818447113,
	"learning_rate": 0.00019052598228834217,
	"loss": 0.5272,
	"step": 248
	},
	{
	"epoch": 0.6798634812286689,
	"grad_norm": 0.16368111968040466,
	"learning_rate": 0.00019039043053874175,
	"loss": 0.5387,
	"step": 249
	},
	{
	"epoch": 0.6825938566552902,
	"grad_norm": 0.15945561230182648,
	"learning_rate": 0.00019025396486963827,
	"loss": 0.548,
	"step": 250
	},
	{
	"epoch": 0.6853242320819113,
	"grad_norm": 0.1538572758436203,
	"learning_rate": 0.00019011658666080874,
	"loss": 0.5419,
	"step": 251
	},
	{
	"epoch": 0.6880546075085324,
	"grad_norm": 0.13356320559978485,
	"learning_rate": 0.00018997829730125663,
	"loss": 0.5271,
	"step": 252
	},
	{
	"epoch": 0.6907849829351536,
	"grad_norm": 0.1389850527048111,
	"learning_rate": 0.0001898390981891979,
	"loss": 0.5489,
	"step": 253
	},
	{
	"epoch": 0.6935153583617747,
	"grad_norm": 0.14726200699806213,
	"learning_rate": 0.00018969899073204686,
	"loss": 0.554,
	"step": 254
	},
	{
	"epoch": 0.6962457337883959,
	"grad_norm": 0.13865283131599426,
	"learning_rate": 0.0001895579763464019,
	"loss": 0.5486,
	"step": 255
	},
	{
	"epoch": 0.6989761092150171,
	"grad_norm": 0.15216147899627686,
	"learning_rate": 0.00018941605645803115,
	"loss": 0.5156,
	"step": 256
	},
	{
	"epoch": 0.7017064846416382,
	"grad_norm": 0.13091793656349182,
	"learning_rate": 0.00018927323250185815,
	"loss": 0.5359,
	"step": 257
	},
	{
	"epoch": 0.7044368600682593,
	"grad_norm": 0.14097946882247925,
	"learning_rate": 0.0001891295059219472,
	"loss": 0.5367,
	"step": 258
	},
	{
	"epoch": 0.7071672354948806,
	"grad_norm": 0.1434548795223236,
	"learning_rate": 0.00018898487817148898,
	"loss": 0.5395,
	"step": 259
	},
	{
	"epoch": 0.7098976109215017,
	"grad_norm": 0.15296564996242523,
	"learning_rate": 0.0001888393507127856,
	"loss": 0.5552,
	"step": 260
	},
	{
	"epoch": 0.7126279863481229,
	"grad_norm": 0.15522688627243042,
	"learning_rate": 0.000188692925017236,
	"loss": 0.5433,
	"step": 261
	},
	{
	"epoch": 0.715358361774744,
	"grad_norm": 0.1461726576089859,
	"learning_rate": 0.000188545602565321,
	"loss": 0.5291,
	"step": 262
	},
	{
	"epoch": 0.7180887372013652,
	"grad_norm": 0.14327403903007507,
	"learning_rate": 0.00018839738484658836,
	"loss": 0.5502,
	"step": 263
	},
	{
	"epoch": 0.7208191126279864,
	"grad_norm": 0.1613275706768036,
	"learning_rate": 0.00018824827335963765,
	"loss": 0.5565,
	"step": 264
	},
	{
	"epoch": 0.7235494880546075,
	"grad_norm": 0.2040315866470337,
	"learning_rate": 0.00018809826961210525,
	"loss": 0.5324,
	"step": 265
	},
	{
	"epoch": 0.7262798634812286,
	"grad_norm": 0.13186219334602356,
	"learning_rate": 0.0001879473751206489,
	"loss": 0.5395,
	"step": 266
	},
	{
	"epoch": 0.7290102389078499,
	"grad_norm": 0.15462934970855713,
	"learning_rate": 0.00018779559141093258,
	"loss": 0.527,
	"step": 267
	},
	{
	"epoch": 0.731740614334471,
	"grad_norm": 0.155660942196846,
	"learning_rate": 0.0001876429200176108,
	"loss": 0.5429,
	"step": 268
	},
	{
	"epoch": 0.7344709897610922,
	"grad_norm": 0.15280231833457947,
	"learning_rate": 0.00018748936248431353,
	"loss": 0.5571,
	"step": 269
	},
	{
	"epoch": 0.7372013651877133,
	"grad_norm": 0.19389967620372772,
	"learning_rate": 0.00018733492036363005,
	"loss": 0.5325,
	"step": 270
	},
	{
	"epoch": 0.7399317406143344,
	"grad_norm": 0.14486828446388245,
	"learning_rate": 0.0001871795952170937,
	"loss": 0.5229,
	"step": 271
	},
	{
	"epoch": 0.7426621160409557,
	"grad_norm": 0.18223008513450623,
	"learning_rate": 0.00018702338861516587,
	"loss": 0.538,
	"step": 272
	},
	{
	"epoch": 0.7453924914675768,
	"grad_norm": 0.1434670090675354,
	"learning_rate": 0.00018686630213722016,
	"loss": 0.5353,
	"step": 273
	},
	{
	"epoch": 0.7481228668941979,
	"grad_norm": 0.1796412169933319,
	"learning_rate": 0.0001867083373715264,
	"loss": 0.5221,
	"step": 274
	},
	{
	"epoch": 0.7508532423208191,
	"grad_norm": 0.14365307986736298,
	"learning_rate": 0.00018654949591523465,
	"loss": 0.5211,
	"step": 275
	},
	{
	"epoch": 0.7535836177474403,
	"grad_norm": 0.15224212408065796,
	"learning_rate": 0.000186389779374359,
	"loss": 0.5353,
	"step": 276
	},
	{
	"epoch": 0.7563139931740614,
	"grad_norm": 0.1557937115430832,
	"learning_rate": 0.00018622918936376132,
	"loss": 0.5359,
	"step": 277
	},
	{
	"epoch": 0.7590443686006826,
	"grad_norm": 0.13806480169296265,
	"learning_rate": 0.00018606772750713504,
	"loss": 0.5404,
	"step": 278
	},
	{
	"epoch": 0.7617747440273037,
	"grad_norm": 0.14786981046199799,
	"learning_rate": 0.00018590539543698854,
	"loss": 0.5377,
	"step": 279
	},
	{
	"epoch": 0.764505119453925,
	"grad_norm": 0.14674220979213715,
	"learning_rate": 0.00018574219479462878,
	"loss": 0.531,
	"step": 280
	},
	{
	"epoch": 0.7672354948805461,
	"grad_norm": 0.146319180727005,
	"learning_rate": 0.00018557812723014476,
	"loss": 0.5244,
	"step": 281
	},
	{
	"epoch": 0.7699658703071672,
	"grad_norm": 0.15404731035232544,
	"learning_rate": 0.00018541319440239066,
	"loss": 0.5419,
	"step": 282
	},
	{
	"epoch": 0.7726962457337884,
	"grad_norm": 0.13534784317016602,
	"learning_rate": 0.00018524739797896923,
	"loss": 0.5162,
	"step": 283
	},
	{
	"epoch": 0.7754266211604095,
	"grad_norm": 0.14462386071681976,
	"learning_rate": 0.0001850807396362148,
	"loss": 0.5338,
	"step": 284
	},
	{
	"epoch": 0.7781569965870307,
	"grad_norm": 0.126676544547081,
	"learning_rate": 0.00018491322105917645,
	"loss": 0.5291,
	"step": 285
	},
	{
	"epoch": 0.7808873720136519,
	"grad_norm": 0.1487646847963333,
	"learning_rate": 0.0001847448439416009,
	"loss": 0.5235,
	"step": 286
	},
	{
	"epoch": 0.783617747440273,
	"grad_norm": 0.14197687804698944,
	"learning_rate": 0.00018457560998591538,
	"loss": 0.518,
	"step": 287
	},
	{
	"epoch": 0.7863481228668942,
	"grad_norm": 0.13646024465560913,
	"learning_rate": 0.00018440552090321047,
	"loss": 0.5425,
	"step": 288
	},
	{
	"epoch": 0.7890784982935154,
	"grad_norm": 0.15339186787605286,
	"learning_rate": 0.00018423457841322273,
	"loss": 0.5203,
	"step": 289
	},
	{
	"epoch": 0.7918088737201365,
	"grad_norm": 0.14754898846149445,
	"learning_rate": 0.00018406278424431736,
	"loss": 0.5259,
	"step": 290
	},
	{
	"epoch": 0.7945392491467577,
	"grad_norm": 0.1315944641828537,
	"learning_rate": 0.00018389014013347078,
	"loss": 0.5248,
	"step": 291
	},
	{
	"epoch": 0.7972696245733788,
	"grad_norm": 0.14218468964099884,
	"learning_rate": 0.00018371664782625287,
	"loss": 0.5328,
	"step": 292
	},
	{
	"epoch": 0.8,
	"grad_norm": 0.1456083059310913,
	"learning_rate": 0.00018354230907680958,
	"loss": 0.5352,
	"step": 293
	},
	{
	"epoch": 0.8027303754266212,
	"grad_norm": 0.14852645993232727,
	"learning_rate": 0.00018336712564784503,
	"loss": 0.5379,
	"step": 294
	},
	{
	"epoch": 0.8054607508532423,
	"grad_norm": 0.17426247894763947,
	"learning_rate": 0.0001831910993106037,
	"loss": 0.5358,
	"step": 295
	},
	{
	"epoch": 0.8081911262798634,
	"grad_norm": 0.14848141372203827,
	"learning_rate": 0.0001830142318448525,
	"loss": 0.5377,
	"step": 296
	},
	{
	"epoch": 0.8109215017064847,
	"grad_norm": 0.14610137045383453,
	"learning_rate": 0.000182836525038863,
	"loss": 0.5331,
	"step": 297
	},
	{
	"epoch": 0.8136518771331058,
	"grad_norm": 0.1565829962491989,
	"learning_rate": 0.00018265798068939294,
	"loss": 0.5162,
	"step": 298
	},
	{
	"epoch": 0.816382252559727,
	"grad_norm": 0.1328669935464859,
	"learning_rate": 0.0001824786006016685,
	"loss": 0.5045,
	"step": 299
	},
	{
	"epoch": 0.8191126279863481,
	"grad_norm": 0.14329080283641815,
	"learning_rate": 0.00018229838658936564,
	"loss": 0.5083,
	"step": 300
	},
	{
	"epoch": 0.8218430034129692,
	"grad_norm": 0.13927890360355377,
	"learning_rate": 0.00018211734047459216,
	"loss": 0.5221,
	"step": 301
	},
	{
	"epoch": 0.8245733788395905,
	"grad_norm": 0.14164070785045624,
	"learning_rate": 0.00018193546408786898,
	"loss": 0.5243,
	"step": 302
	},
	{
	"epoch": 0.8273037542662116,
	"grad_norm": 0.14735499024391174,
	"learning_rate": 0.00018175275926811174,
	"loss": 0.5136,
	"step": 303
	},
	{
	"epoch": 0.8300341296928327,
	"grad_norm": 0.1411520093679428,
	"learning_rate": 0.00018156922786261216,
	"loss": 0.5164,
	"step": 304
	},
	{
	"epoch": 0.8327645051194539,
	"grad_norm": 0.14603470265865326,
	"learning_rate": 0.0001813848717270195,
	"loss": 0.5306,
	"step": 305
	},
	{
	"epoch": 0.8354948805460751,
	"grad_norm": 0.13602910935878754,
	"learning_rate": 0.00018119969272532166,
	"loss": 0.5014,
	"step": 306
	},
	{
	"epoch": 0.8382252559726963,
	"grad_norm": 0.15259377658367157,
	"learning_rate": 0.00018101369272982632,
	"loss": 0.5309,
	"step": 307
	},
	{
	"epoch": 0.8409556313993174,
	"grad_norm": 0.13250286877155304,
	"learning_rate": 0.00018082687362114212,
	"loss": 0.5034,
	"step": 308
	},
	{
	"epoch": 0.8436860068259385,
	"grad_norm": 0.15984928607940674,
	"learning_rate": 0.00018063923728815957,
	"loss": 0.5167,
	"step": 309
	},
	{
	"epoch": 0.8464163822525598,
	"grad_norm": 0.14640702307224274,
	"learning_rate": 0.00018045078562803203,
	"loss": 0.5361,
	"step": 310
	},
	{
	"epoch": 0.8491467576791809,
	"grad_norm": 0.1568063646554947,
	"learning_rate": 0.00018026152054615634,
	"loss": 0.5221,
	"step": 311
	},
	{
	"epoch": 0.851877133105802,
	"grad_norm": 0.14193738996982574,
	"learning_rate": 0.0001800714439561538,
	"loss": 0.5151,
	"step": 312
	},
	{
	"epoch": 0.8546075085324232,
	"grad_norm": 0.15847285091876984,
	"learning_rate": 0.00017988055777985067,
	"loss": 0.4923,
	"step": 313
	},
	{
	"epoch": 0.8573378839590444,
	"grad_norm": 0.16404108703136444,
	"learning_rate": 0.00017968886394725874,
	"loss": 0.5114,
	"step": 314
	},
	{
	"epoch": 0.8600682593856656,
	"grad_norm": 0.22749370336532593,
	"learning_rate": 0.00017949636439655592,
	"loss": 0.5225,
	"step": 315
	},
	{
	"epoch": 0.8627986348122867,
	"grad_norm": 0.17752817273139954,
	"learning_rate": 0.00017930306107406653,
	"loss": 0.5138,
	"step": 316
	},
	{
	"epoch": 0.8655290102389078,
	"grad_norm": 0.16554813086986542,
	"learning_rate": 0.00017910895593424163,
	"loss": 0.5087,
	"step": 317
	},
	{
	"epoch": 0.868259385665529,
	"grad_norm": 0.1888076812028885,
	"learning_rate": 0.00017891405093963938,
	"loss": 0.5163,
	"step": 318
	},
	{
	"epoch": 0.8709897610921502,
	"grad_norm": 0.15817560255527496,
	"learning_rate": 0.00017871834806090501,
	"loss": 0.4918,
	"step": 319
	},
	{
	"epoch": 0.8737201365187713,
	"grad_norm": 0.15167172253131866,
	"learning_rate": 0.00017852184927675112,
	"loss": 0.5248,
	"step": 320
	},
	{
	"epoch": 0.8764505119453925,
	"grad_norm": 0.15847422182559967,
	"learning_rate": 0.00017832455657393746,
	"loss": 0.5205,
	"step": 321
	},
	{
	"epoch": 0.8791808873720136,
	"grad_norm": 0.1436643898487091,
	"learning_rate": 0.00017812647194725094,
	"loss": 0.4869,
	"step": 322
	},
	{
	"epoch": 0.8819112627986349,
	"grad_norm": 0.1533607393503189,
	"learning_rate": 0.00017792759739948546,
	"loss": 0.5141,
	"step": 323
	},
	{
	"epoch": 0.884641638225256,
	"grad_norm": 0.1520746648311615,
	"learning_rate": 0.00017772793494142167,
	"loss": 0.5158,
	"step": 324
	},
	{
	"epoch": 0.8873720136518771,
	"grad_norm": 0.1398312896490097,
	"learning_rate": 0.0001775274865918066,
	"loss": 0.5126,
	"step": 325
	},
	{
	"epoch": 0.8901023890784983,
	"grad_norm": 0.14314454793930054,
	"learning_rate": 0.00017732625437733335,
	"loss": 0.5187,
	"step": 326
	},
	{
	"epoch": 0.8928327645051195,
	"grad_norm": 0.1434595286846161,
	"learning_rate": 0.00017712424033262042,
	"loss": 0.5018,
	"step": 327
	},
	{
	"epoch": 0.8955631399317406,
	"grad_norm": 0.13712714612483978,
	"learning_rate": 0.00017692144650019125,
	"loss": 0.5044,
	"step": 328
	},
	{
	"epoch": 0.8982935153583618,
	"grad_norm": 0.13560262322425842,
	"learning_rate": 0.00017671787493045356,
	"loss": 0.5153,
	"step": 329
	},
	{
	"epoch": 0.9010238907849829,
	"grad_norm": 0.15035240352153778,
	"learning_rate": 0.0001765135276816787,
	"loss": 0.5023,
	"step": 330
	},
	{
	"epoch": 0.903754266211604,
	"grad_norm": 0.14878690242767334,
	"learning_rate": 0.00017630840681998066,
	"loss": 0.4916,
	"step": 331
	},
	{
	"epoch": 0.9064846416382253,
	"grad_norm": 0.14930297434329987,
	"learning_rate": 0.00017610251441929533,
	"loss": 0.5092,
	"step": 332
	},
	{
	"epoch": 0.9092150170648464,
	"grad_norm": 0.14058218896389008,
	"learning_rate": 0.0001758958525613594,
	"loss": 0.5091,
	"step": 333
	},
	{
	"epoch": 0.9119453924914676,
	"grad_norm": 0.18505944311618805,
	"learning_rate": 0.00017568842333568952,
	"loss": 0.5047,
	"step": 334
	},
	{
	"epoch": 0.9146757679180887,
	"grad_norm": 0.16390664875507355,
	"learning_rate": 0.0001754802288395609,
	"loss": 0.5115,
	"step": 335
	},
	{
	"epoch": 0.9174061433447099,
	"grad_norm": 0.16126009821891785,
	"learning_rate": 0.00017527127117798635,
	"loss": 0.5017,
	"step": 336
	},
	{
	"epoch": 0.9201365187713311,
	"grad_norm": 0.16674400866031647,
	"learning_rate": 0.0001750615524636948,
	"loss": 0.5193,
	"step": 337
	},
	{
	"epoch": 0.9228668941979522,
	"grad_norm": 0.13656651973724365,
	"learning_rate": 0.00017485107481711012,
	"loss": 0.4909,
	"step": 338
	},
	{
	"epoch": 0.9255972696245733,
	"grad_norm": 0.15961140394210815,
	"learning_rate": 0.00017463984036632954,
	"loss": 0.5145,
	"step": 339
	},
	{
	"epoch": 0.9283276450511946,
	"grad_norm": 0.13122031092643738,
	"learning_rate": 0.00017442785124710227,
	"loss": 0.4854,
	"step": 340
	},
	{
	"epoch": 0.9310580204778157,
	"grad_norm": 0.14256154000759125,
	"learning_rate": 0.0001742151096028076,
	"loss": 0.4852,
	"step": 341
	},
	{
	"epoch": 0.9337883959044369,
	"grad_norm": 0.14853668212890625,
	"learning_rate": 0.00017400161758443375,
	"loss": 0.5202,
	"step": 342
	},
	{
	"epoch": 0.936518771331058,
	"grad_norm": 0.16885364055633545,
	"learning_rate": 0.00017378737735055562,
	"loss": 0.5136,
	"step": 343
	},
	{
	"epoch": 0.9392491467576792,
	"grad_norm": 0.1398458182811737,
	"learning_rate": 0.00017357239106731317,
	"loss": 0.4908,
	"step": 344
	},
	{
	"epoch": 0.9419795221843004,
	"grad_norm": 0.1871606856584549,
	"learning_rate": 0.00017335666090838965,
	"loss": 0.5076,
	"step": 345
	},
	{
	"epoch": 0.9447098976109215,
	"grad_norm": 0.17375800013542175,
	"learning_rate": 0.00017314018905498931,
	"loss": 0.5032,
	"step": 346
	},
	{
	"epoch": 0.9474402730375426,
	"grad_norm": 0.14859388768672943,
	"learning_rate": 0.00017292297769581571,
	"loss": 0.5119,
	"step": 347
	},
	{
	"epoch": 0.9501706484641638,
	"grad_norm": 0.18141716718673706,
	"learning_rate": 0.00017270502902704926,
	"loss": 0.4982,
	"step": 348
	},
	{
	"epoch": 0.952901023890785,
	"grad_norm": 0.17015349864959717,
	"learning_rate": 0.00017248634525232523,
	"loss": 0.4835,
	"step": 349
	},
	{
	"epoch": 0.9556313993174061,
	"grad_norm": 0.14770372211933136,
	"learning_rate": 0.00017226692858271134,
	"loss": 0.4851,
	"step": 350
	},
	{
	"epoch": 0.9583617747440273,
	"grad_norm": 0.19386035203933716,
	"learning_rate": 0.00017204678123668556,
	"loss": 0.5172,
	"step": 351
	},
	{
	"epoch": 0.9610921501706484,
	"grad_norm": 0.15225833654403687,
	"learning_rate": 0.00017182590544011347,
	"loss": 0.4972,
	"step": 352
	},
	{
	"epoch": 0.9638225255972697,
	"grad_norm": 0.1522843837738037,
	"learning_rate": 0.00017160430342622596,
	"loss": 0.493,
	"step": 353
	},
	{
	"epoch": 0.9665529010238908,
	"grad_norm": 0.13483871519565582,
	"learning_rate": 0.00017138197743559654,
	"loss": 0.4882,
	"step": 354
	},
	{
	"epoch": 0.9692832764505119,
	"grad_norm": 0.14176194369792938,
	"learning_rate": 0.00017115892971611863,
	"loss": 0.4921,
	"step": 355
	},
	{
	"epoch": 0.9720136518771331,
	"grad_norm": 0.15088023245334625,
	"learning_rate": 0.00017093516252298296,
	"loss": 0.5073,
	"step": 356
	},
	{
	"epoch": 0.9747440273037543,
	"grad_norm": 0.14758853614330292,
	"learning_rate": 0.00017071067811865476,
	"loss": 0.5106,
	"step": 357
	},
	{
	"epoch": 0.9774744027303754,
	"grad_norm": 0.14779959619045258,
	"learning_rate": 0.00017048547877285077,
	"loss": 0.5066,
	"step": 358
	},
	{
	"epoch": 0.9802047781569966,
	"grad_norm": 0.14550894498825073,
	"learning_rate": 0.00017025956676251636,
	"loss": 0.484,
	"step": 359
	},
	{
	"epoch": 0.9829351535836177,
	"grad_norm": 0.14111794531345367,
	"learning_rate": 0.00017003294437180255,
	"loss": 0.4719,
	"step": 360
	},
	{
	"epoch": 0.985665529010239,
	"grad_norm": 0.15354299545288086,
	"learning_rate": 0.00016980561389204284,
	"loss": 0.4874,
	"step": 361
	},
	{
	"epoch": 0.9883959044368601,
	"grad_norm": 0.14522601664066315,
	"learning_rate": 0.0001695775776217301,
	"loss": 0.4722,
	"step": 362
	},
	{
	"epoch": 0.9911262798634812,
	"grad_norm": 0.143354594707489,
	"learning_rate": 0.00016934883786649333,
	"loss": 0.5038,
	"step": 363
	},
	{
	"epoch": 0.9938566552901024,
	"grad_norm": 0.14298783242702484,
	"learning_rate": 0.0001691193969390742,
	"loss": 0.5007,
	"step": 364
	},
	{
	"epoch": 0.9965870307167235,
	"grad_norm": 0.14418889582157135,
	"learning_rate": 0.00016888925715930394,
	"loss": 0.5054,
	"step": 365
	},
	{
	"epoch": 0.9993174061433447,
	"grad_norm": 0.16697633266448975,
	"learning_rate": 0.0001686584208540797,
	"loss": 0.4948,
	"step": 366
	},
	{
	"epoch": 1.0020477815699658,
	"grad_norm": 0.4087926745414734,
	"learning_rate": 0.000168426890357341,
	"loss": 0.8762,
	"step": 367
	},
	{
	"epoch": 1.004778156996587,
	"grad_norm": 0.17126424610614777,
	"learning_rate": 0.00016819466801004621,
	"loss": 0.4964,
	"step": 368
	},
	{
	"epoch": 1.0075085324232083,
	"grad_norm": 0.18887566030025482,
	"learning_rate": 0.00016796175616014893,
	"loss": 0.4842,
	"step": 369
	},
	{
	"epoch": 1.0102389078498293,
	"grad_norm": 0.16404148936271667,
	"learning_rate": 0.00016772815716257412,
	"loss": 0.4761,
	"step": 370
	},
	{
	"epoch": 1.0129692832764505,
	"grad_norm": 0.2001708298921585,
	"learning_rate": 0.00016749387337919433,
	"loss": 0.5005,
	"step": 371
	},
	{
	"epoch": 1.0156996587030718,
	"grad_norm": 0.16301579773426056,
	"learning_rate": 0.0001672589071788059,
	"loss": 0.497,
	"step": 372
	},
	{
	"epoch": 1.0184300341296928,
	"grad_norm": 0.15777342021465302,
	"learning_rate": 0.0001670232609371049,
	"loss": 0.4771,
	"step": 373
	},
	{
	"epoch": 1.021160409556314,
	"grad_norm": 0.14179396629333496,
	"learning_rate": 0.00016678693703666325,
	"loss": 0.4491,
	"step": 374
	},
	{
	"epoch": 1.023890784982935,
	"grad_norm": 0.16155582666397095,
	"learning_rate": 0.00016654993786690444,
	"loss": 0.4846,
	"step": 375
	},
	{
	"epoch": 1.0266211604095563,
	"grad_norm": 0.15554013848304749,
	"learning_rate": 0.00016631226582407952,
	"loss": 0.4663,
	"step": 376
	},
	{
	"epoch": 1.0293515358361776,
	"grad_norm": 0.15946047008037567,
	"learning_rate": 0.00016607392331124282,
	"loss": 0.4978,
	"step": 377
	},
	{
	"epoch": 1.0320819112627986,
	"grad_norm": 0.14758449792861938,
	"learning_rate": 0.00016583491273822765,
	"loss": 0.4868,
	"step": 378
	},
	{
	"epoch": 1.0348122866894198,
	"grad_norm": 0.16146820783615112,
	"learning_rate": 0.0001655952365216219,
	"loss": 0.5056,
	"step": 379
	},
	{
	"epoch": 1.0375426621160408,
	"grad_norm": 0.14648115634918213,
	"learning_rate": 0.0001653548970847438,
	"loss": 0.5028,
	"step": 380
	},
	{
	"epoch": 1.040273037542662,
	"grad_norm": 0.14611080288887024,
	"learning_rate": 0.00016511389685761708,
	"loss": 0.4975,
	"step": 381
	},
	{
	"epoch": 1.0430034129692833,
	"grad_norm": 0.13934309780597687,
	"learning_rate": 0.00016487223827694672,
	"loss": 0.4881,
	"step": 382
	},
	{
	"epoch": 1.0457337883959044,
	"grad_norm": 0.1680765300989151,
	"learning_rate": 0.00016462992378609407,
	"loss": 0.4947,
	"step": 383
	},
	{
	"epoch": 1.0484641638225256,
	"grad_norm": 0.16694432497024536,
	"learning_rate": 0.00016438695583505242,
	"loss": 0.5,
	"step": 384
	},
	{
	"epoch": 1.0511945392491469,
	"grad_norm": 0.13027995824813843,
	"learning_rate": 0.00016414333688042186,
	"loss": 0.4783,
	"step": 385
	},
	{
	"epoch": 1.0539249146757679,
	"grad_norm": 0.15040864050388336,
	"learning_rate": 0.0001638990693853848,
	"loss": 0.4743,
	"step": 386
	},
	{
	"epoch": 1.0566552901023891,
	"grad_norm": 0.17924848198890686,
	"learning_rate": 0.00016365415581968084,
	"loss": 0.493,
	"step": 387
	},
	{
	"epoch": 1.0593856655290101,
	"grad_norm": 0.14788490533828735,
	"learning_rate": 0.0001634085986595819,
	"loss": 0.4932,
	"step": 388
	},
	{
	"epoch": 1.0621160409556314,
	"grad_norm": 0.1745985448360443,
	"learning_rate": 0.00016316240038786718,
	"loss": 0.5192,
	"step": 389
	},
	{
	"epoch": 1.0648464163822526,
	"grad_norm": 0.1356726437807083,
	"learning_rate": 0.00016291556349379795,
	"loss": 0.4829,
	"step": 390
	},
	{
	"epoch": 1.0675767918088737,
	"grad_norm": 0.1533443182706833,
	"learning_rate": 0.00016266809047309251,
	"loss": 0.4881,
	"step": 391
	},
	{
	"epoch": 1.070307167235495,
	"grad_norm": 0.16772933304309845,
	"learning_rate": 0.00016241998382790095,
	"loss": 0.4967,
	"step": 392
	},
	{
	"epoch": 1.073037542662116,
	"grad_norm": 0.1441749781370163,
	"learning_rate": 0.0001621712460667797,
	"loss": 0.4867,
	"step": 393
	},
	{
	"epoch": 1.0757679180887372,
	"grad_norm": 0.15063367784023285,
	"learning_rate": 0.00016192187970466644,
	"loss": 0.4916,
	"step": 394
	},
	{
	"epoch": 1.0784982935153584,
	"grad_norm": 0.14568567276000977,
	"learning_rate": 0.00016167188726285434,
	"loss": 0.488,
	"step": 395
	},
	{
	"epoch": 1.0812286689419794,
	"grad_norm": 0.14870832860469818,
	"learning_rate": 0.0001614212712689668,
	"loss": 0.4867,
	"step": 396
	},
	{
	"epoch": 1.0839590443686007,
	"grad_norm": 0.15368396043777466,
	"learning_rate": 0.00016117003425693188,
	"loss": 0.4766,
	"step": 397
	},
	{
	"epoch": 1.086689419795222,
	"grad_norm": 0.163841113448143,
	"learning_rate": 0.00016091817876695655,
	"loss": 0.4803,
	"step": 398
	},
	{
	"epoch": 1.089419795221843,
	"grad_norm": 0.17485839128494263,
	"learning_rate": 0.0001606657073455012,
	"loss": 0.4875,
	"step": 399
	},
	{
	"epoch": 1.0921501706484642,
	"grad_norm": 0.14765095710754395,
	"learning_rate": 0.00016041262254525362,
	"loss": 0.5058,
	"step": 400
	},
	{
	"epoch": 1.0948805460750852,
	"grad_norm": 0.1618352234363556,
	"learning_rate": 0.0001601589269251035,
	"loss": 0.4903,
	"step": 401
	},
	{
	"epoch": 1.0976109215017065,
	"grad_norm": 0.1456744223833084,
	"learning_rate": 0.0001599046230501163,
	"loss": 0.4822,
	"step": 402
	},
	{
	"epoch": 1.1003412969283277,
	"grad_norm": 0.15738914906978607,
	"learning_rate": 0.00015964971349150746,
	"loss": 0.4944,
	"step": 403
	},
	{
	"epoch": 1.1030716723549487,
	"grad_norm": 0.1475927084684372,
	"learning_rate": 0.0001593942008266164,
	"loss": 0.4971,
	"step": 404
	},
	{
	"epoch": 1.10580204778157,
	"grad_norm": 0.14080214500427246,
	"learning_rate": 0.00015913808763888039,
	"loss": 0.4864,
	"step": 405
	},
	{
	"epoch": 1.108532423208191,
	"grad_norm": 0.16853410005569458,
	"learning_rate": 0.00015888137651780845,
	"loss": 0.4891,
	"step": 406
	},
	{
	"epoch": 1.1112627986348123,
	"grad_norm": 0.13720810413360596,
	"learning_rate": 0.00015862407005895522,
	"loss": 0.4822,
	"step": 407
	},
	{
	"epoch": 1.1139931740614335,
	"grad_norm": 0.16895629465579987,
	"learning_rate": 0.00015836617086389468,
	"loss": 0.4598,
	"step": 408
	},
	{
	"epoch": 1.1167235494880545,
	"grad_norm": 0.15242214500904083,
	"learning_rate": 0.00015810768154019385,
	"loss": 0.4761,
	"step": 409
	},
	{
	"epoch": 1.1194539249146758,
	"grad_norm": 0.1633676141500473,
	"learning_rate": 0.00015784860470138633,
	"loss": 0.4912,
	"step": 410
	},
	{
	"epoch": 1.122184300341297,
	"grad_norm": 0.16565294563770294,
	"learning_rate": 0.00015758894296694615,
	"loss": 0.4836,
	"step": 411
	},
	{
	"epoch": 1.124914675767918,
	"grad_norm": 0.160204216837883,
	"learning_rate": 0.00015732869896226094,
	"loss": 0.4694,
	"step": 412
	},
	{
	"epoch": 1.1276450511945393,
	"grad_norm": 0.19067788124084473,
	"learning_rate": 0.00015706787531860557,
	"loss": 0.4853,
	"step": 413
	},
	{
	"epoch": 1.1303754266211605,
	"grad_norm": 0.14547327160835266,
	"learning_rate": 0.00015680647467311557,
	"loss": 0.4709,
	"step": 414
	},
	{
	"epoch": 1.1331058020477816,
	"grad_norm": 0.18047383427619934,
	"learning_rate": 0.0001565444996687605,
	"loss": 0.4738,
	"step": 415
	},
	{
	"epoch": 1.1358361774744028,
	"grad_norm": 0.16099834442138672,
	"learning_rate": 0.00015628195295431697,
	"loss": 0.4805,
	"step": 416
	},
	{
	"epoch": 1.1385665529010238,
	"grad_norm": 0.1426517814397812,
	"learning_rate": 0.00015601883718434207,
	"loss": 0.4573,
	"step": 417
	},
	{
	"epoch": 1.141296928327645,
	"grad_norm": 0.14879460632801056,
	"learning_rate": 0.00015575515501914668,
	"loss": 0.4778,
	"step": 418
	},
	{
	"epoch": 1.144027303754266,
	"grad_norm": 0.1393750160932541,
	"learning_rate": 0.0001554909091247682,
	"loss": 0.447,
	"step": 419
	},
	{
	"epoch": 1.1467576791808873,
	"grad_norm": 0.17599152028560638,
	"learning_rate": 0.00015522610217294375,
	"loss": 0.4969,
	"step": 420
	},
	{
	"epoch": 1.1494880546075086,
	"grad_norm": 0.17399606108665466,
	"learning_rate": 0.0001549607368410834,
	"loss": 0.4933,
	"step": 421
	},
	{
	"epoch": 1.1522184300341296,
	"grad_norm": 0.16555064916610718,
	"learning_rate": 0.00015469481581224272,
	"loss": 0.475,
	"step": 422
	},
	{
	"epoch": 1.1549488054607508,
	"grad_norm": 0.20848453044891357,
	"learning_rate": 0.00015442834177509582,
	"loss": 0.4872,
	"step": 423
	},
	{
	"epoch": 1.157679180887372,
	"grad_norm": 0.15129271149635315,
	"learning_rate": 0.00015416131742390827,
	"loss": 0.4963,
	"step": 424
	},
	{
	"epoch": 1.1604095563139931,
	"grad_norm": 0.1703735888004303,
	"learning_rate": 0.00015389374545850973,
	"loss": 0.479,
	"step": 425
	},
	{
	"epoch": 1.1631399317406144,
	"grad_norm": 0.152608722448349,
	"learning_rate": 0.00015362562858426654,
	"loss": 0.4831,
	"step": 426
	},
	{
	"epoch": 1.1658703071672356,
	"grad_norm": 0.14749537408351898,
	"learning_rate": 0.0001533569695120547,
	"loss": 0.4784,
	"step": 427
	},
	{
	"epoch": 1.1686006825938566,
	"grad_norm": 0.15642118453979492,
	"learning_rate": 0.0001530877709582321,
	"loss": 0.4679,
	"step": 428
	},
	{
	"epoch": 1.1713310580204779,
	"grad_norm": 0.13505250215530396,
	"learning_rate": 0.00015281803564461135,
	"loss": 0.4779,
	"step": 429
	},
	{
	"epoch": 1.174061433447099,
	"grad_norm": 0.14518040418624878,
	"learning_rate": 0.00015254776629843205,
	"loss": 0.448,
	"step": 430
	},
	{
	"epoch": 1.1767918088737201,
	"grad_norm": 0.12947289645671844,
	"learning_rate": 0.0001522769656523333,
	"loss": 0.4735,
	"step": 431
	},
	{
	"epoch": 1.1795221843003414,
	"grad_norm": 0.15066657960414886,
	"learning_rate": 0.00015200563644432612,
	"loss": 0.4791,
	"step": 432
	},
	{
	"epoch": 1.1822525597269624,
	"grad_norm": 0.13076473772525787,
	"learning_rate": 0.00015173378141776568,
	"loss": 0.4582,
	"step": 433
	},
	{
	"epoch": 1.1849829351535837,
	"grad_norm": 0.15804897248744965,
	"learning_rate": 0.00015146140332132358,
	"loss": 0.482,
	"step": 434
	},
	{
	"epoch": 1.1877133105802047,
	"grad_norm": 0.1330975741147995,
	"learning_rate": 0.00015118850490896012,
	"loss": 0.4736,
	"step": 435
	},
	{
	"epoch": 1.190443686006826,
	"grad_norm": 0.16358092427253723,
	"learning_rate": 0.00015091508893989633,
	"loss": 0.4992,
	"step": 436
	},
	{
	"epoch": 1.1931740614334472,
	"grad_norm": 0.14177009463310242,
	"learning_rate": 0.00015064115817858622,
	"loss": 0.4646,
	"step": 437
	},
	{
	"epoch": 1.1959044368600682,
	"grad_norm": 0.1569090485572815,
	"learning_rate": 0.00015036671539468878,
	"loss": 0.4683,
	"step": 438
	},
	{
	"epoch": 1.1986348122866894,
	"grad_norm": 0.15897628664970398,
	"learning_rate": 0.00015009176336303986,
	"loss": 0.4829,
	"step": 439
	},
	{
	"epoch": 1.2013651877133107,
	"grad_norm": 0.1482827216386795,
	"learning_rate": 0.00014981630486362435,
	"loss": 0.4552,
	"step": 440
	},
	{
	"epoch": 1.2040955631399317,
	"grad_norm": 0.1546843945980072,
	"learning_rate": 0.00014954034268154778,
	"loss": 0.4702,
	"step": 441
	},
	{
	"epoch": 1.206825938566553,
	"grad_norm": 0.1493782103061676,
	"learning_rate": 0.00014926387960700842,
	"loss": 0.4708,
	"step": 442
	},
	{
	"epoch": 1.209556313993174,
	"grad_norm": 0.15456125140190125,
	"learning_rate": 0.00014898691843526899,
	"loss": 0.4738,
	"step": 443
	},
	{
	"epoch": 1.2122866894197952,
	"grad_norm": 0.17920009791851044,
	"learning_rate": 0.00014870946196662822,
	"loss": 0.4616,
	"step": 444
	},
	{
	"epoch": 1.2150170648464165,
	"grad_norm": 0.15904481709003448,
	"learning_rate": 0.00014843151300639282,
	"loss": 0.4791,
	"step": 445
	},
	{
	"epoch": 1.2177474402730375,
	"grad_norm": 0.18129463493824005,
	"learning_rate": 0.00014815307436484898,
	"loss": 0.4789,
	"step": 446
	},
	{
	"epoch": 1.2204778156996587,
	"grad_norm": 0.14939218759536743,
	"learning_rate": 0.00014787414885723385,
	"loss": 0.4774,
	"step": 447
	},
	{
	"epoch": 1.2232081911262798,
	"grad_norm": 0.17625784873962402,
	"learning_rate": 0.00014759473930370736,
	"loss": 0.4785,
	"step": 448
	},
	{
	"epoch": 1.225938566552901,
	"grad_norm": 0.158066987991333,
	"learning_rate": 0.0001473148485293234,
	"loss": 0.4588,
	"step": 449
	},
	{
	"epoch": 1.2286689419795223,
	"grad_norm": 0.16107094287872314,
	"learning_rate": 0.00014703447936400134,
	"loss": 0.4734,
	"step": 450
	},
	{
	"epoch": 1.2313993174061433,
	"grad_norm": 0.1387171894311905,
	"learning_rate": 0.00014675363464249763,
	"loss": 0.4733,
	"step": 451
	},
	{
	"epoch": 1.2341296928327645,
	"grad_norm": 0.1561625450849533,
	"learning_rate": 0.00014647231720437686,
	"loss": 0.4502,
	"step": 452
	},
	{
	"epoch": 1.2368600682593858,
	"grad_norm": 0.1419583112001419,
	"learning_rate": 0.00014619052989398322,
	"loss": 0.4674,
	"step": 453
	},
	{
	"epoch": 1.2395904436860068,
	"grad_norm": 0.1578853279352188,
	"learning_rate": 0.00014590827556041158,
	"loss": 0.4635,
	"step": 454
	},
	{
	"epoch": 1.242320819112628,
	"grad_norm": 0.1325417309999466,
	"learning_rate": 0.00014562555705747892,
	"loss": 0.4557,
	"step": 455
	},
	{
	"epoch": 1.245051194539249,
	"grad_norm": 0.17075014114379883,
	"learning_rate": 0.00014534237724369534,
	"loss": 0.4928,
	"step": 456
	},
	{
	"epoch": 1.2477815699658703,
	"grad_norm": 0.136972576379776,
	"learning_rate": 0.00014505873898223496,
	"loss": 0.4699,
	"step": 457
	},
	{
	"epoch": 1.2505119453924913,
	"grad_norm": 0.16119800508022308,
	"learning_rate": 0.00014477464514090743,
	"loss": 0.4807,
	"step": 458
	},
	{
	"epoch": 1.2532423208191126,
	"grad_norm": 0.13321495056152344,
	"learning_rate": 0.00014449009859212857,
	"loss": 0.4806,
	"step": 459
	},
	{
	"epoch": 1.2559726962457338,
	"grad_norm": 0.14326980710029602,
	"learning_rate": 0.00014420510221289137,
	"loss": 0.4491,
	"step": 460
	},
	{
	"epoch": 1.2587030716723548,
	"grad_norm": 0.13098248839378357,
	"learning_rate": 0.00014391965888473703,
	"loss": 0.4653,
	"step": 461
	},
	{
	"epoch": 1.261433447098976,
	"grad_norm": 0.14561250805854797,
	"learning_rate": 0.00014363377149372584,
	"loss": 0.479,
	"step": 462
	},
	{
	"epoch": 1.2641638225255973,
	"grad_norm": 0.1377183198928833,
	"learning_rate": 0.0001433474429304077,
	"loss": 0.4607,
	"step": 463
	},
	{
	"epoch": 1.2668941979522184,
	"grad_norm": 0.14758490025997162,
	"learning_rate": 0.0001430606760897934,
	"loss": 0.472,
	"step": 464
	},
	{
	"epoch": 1.2696245733788396,
	"grad_norm": 0.15359081327915192,
	"learning_rate": 0.0001427734738713248,
	"loss": 0.469,
	"step": 465
	},
	{
	"epoch": 1.2723549488054609,
	"grad_norm": 0.16317234933376312,
	"learning_rate": 0.00014248583917884594,
	"loss": 0.4749,
	"step": 466
	},
	{
	"epoch": 1.2750853242320819,
	"grad_norm": 0.1310819685459137,
	"learning_rate": 0.00014219777492057348,
	"loss": 0.4722,
	"step": 467
	},
	{
	"epoch": 1.2778156996587031,
	"grad_norm": 0.14236976206302643,
	"learning_rate": 0.0001419092840090673,
	"loss": 0.4704,
	"step": 468
	},
	{
	"epoch": 1.2805460750853244,
	"grad_norm": 0.1274275928735733,
	"learning_rate": 0.00014162036936120115,
	"loss": 0.4687,
	"step": 469
	},
	{
	"epoch": 1.2832764505119454,
	"grad_norm": 0.13622865080833435,
	"learning_rate": 0.00014133103389813302,
	"loss": 0.4649,
	"step": 470
	},
	{
	"epoch": 1.2860068259385666,
	"grad_norm": 0.14750082790851593,
	"learning_rate": 0.0001410412805452757,
	"loss": 0.4654,
	"step": 471
	},
	{
	"epoch": 1.2887372013651877,
	"grad_norm": 0.14838138222694397,
	"learning_rate": 0.0001407511122322672,
	"loss": 0.4626,
	"step": 472
	},
	{
	"epoch": 1.291467576791809,
	"grad_norm": 0.1439974308013916,
	"learning_rate": 0.00014046053189294112,
	"loss": 0.4499,
	"step": 473
	},
	{
	"epoch": 1.29419795221843,
	"grad_norm": 0.14686186611652374,
	"learning_rate": 0.00014016954246529696,
	"loss": 0.4676,
	"step": 474
	},
	{
	"epoch": 1.2969283276450512,
	"grad_norm": 0.15870115160942078,
	"learning_rate": 0.00013987814689147041,
	"loss": 0.4448,
	"step": 475
	},
	{
	"epoch": 1.2996587030716724,
	"grad_norm": 0.16382190585136414,
	"learning_rate": 0.0001395863481177036,
	"loss": 0.4476,
	"step": 476
	},
	{
	"epoch": 1.3023890784982934,
	"grad_norm": 0.21564428508281708,
	"learning_rate": 0.00013929414909431544,
	"loss": 0.4751,
	"step": 477
	},
	{
	"epoch": 1.3051194539249147,
	"grad_norm": 0.15450774133205414,
	"learning_rate": 0.00013900155277567157,
	"loss": 0.4776,
	"step": 478
	},
	{
	"epoch": 1.307849829351536,
	"grad_norm": 0.15922100841999054,
	"learning_rate": 0.00013870856212015468,
	"loss": 0.4693,
	"step": 479
	},
	{
	"epoch": 1.310580204778157,
	"grad_norm": 0.13661661744117737,
	"learning_rate": 0.00013841518009013445,
	"loss": 0.454,
	"step": 480
	},
	{
	"epoch": 1.3133105802047782,
	"grad_norm": 0.1507856249809265,
	"learning_rate": 0.00013812140965193773,
	"loss": 0.4701,
	"step": 481
	},
	{
	"epoch": 1.3160409556313994,
	"grad_norm": 0.14306284487247467,
	"learning_rate": 0.00013782725377581848,
	"loss": 0.4842,
	"step": 482
	},
	{
	"epoch": 1.3187713310580205,
	"grad_norm": 0.15984225273132324,
	"learning_rate": 0.00013753271543592773,
	"loss": 0.4628,
	"step": 483
	},
	{
	"epoch": 1.3215017064846417,
	"grad_norm": 0.15242429077625275,
	"learning_rate": 0.00013723779761028347,
	"loss": 0.4708,
	"step": 484
	},
	{
	"epoch": 1.3242320819112627,
	"grad_norm": 0.14520719647407532,
	"learning_rate": 0.0001369425032807407,
	"loss": 0.4608,
	"step": 485
	},
	{
	"epoch": 1.326962457337884,
	"grad_norm": 0.16152748465538025,
	"learning_rate": 0.00013664683543296112,
	"loss": 0.4729,
	"step": 486
	},
	{
	"epoch": 1.329692832764505,
	"grad_norm": 0.15082891285419464,
	"learning_rate": 0.00013635079705638298,
	"loss": 0.4593,
	"step": 487
	},
	{
	"epoch": 1.3324232081911263,
	"grad_norm": 0.16038447618484497,
	"learning_rate": 0.00013605439114419094,
	"loss": 0.4718,
	"step": 488
	},
	{
	"epoch": 1.3351535836177475,
	"grad_norm": 0.1532922238111496,
	"learning_rate": 0.00013575762069328566,
	"loss": 0.4758,
	"step": 489
	},
	{
	"epoch": 1.3378839590443685,
	"grad_norm": 0.12969861924648285,
	"learning_rate": 0.00013546048870425356,
	"loss": 0.4582,
	"step": 490
	},
	{
	"epoch": 1.3406143344709898,
	"grad_norm": 0.13830237090587616,
	"learning_rate": 0.00013516299818133664,
	"loss": 0.4585,
	"step": 491
	},
	{
	"epoch": 1.343344709897611,
	"grad_norm": 0.148755744099617,
	"learning_rate": 0.00013486515213240188,
	"loss": 0.4803,
	"step": 492
	},
	{
	"epoch": 1.346075085324232,
	"grad_norm": 0.13623669743537903,
	"learning_rate": 0.0001345669535689108,
	"loss": 0.4562,
	"step": 493
	},
	{
	"epoch": 1.3488054607508533,
	"grad_norm": 0.14996616542339325,
	"learning_rate": 0.00013426840550588933,
	"loss": 0.4546,
	"step": 494
	},
	{
	"epoch": 1.3515358361774745,
	"grad_norm": 0.1468917429447174,
	"learning_rate": 0.000133969510961897,
	"loss": 0.4712,
	"step": 495
	},
	{
	"epoch": 1.3542662116040955,
	"grad_norm": 0.14525148272514343,
	"learning_rate": 0.0001336702729589965,
	"loss": 0.4614,
	"step": 496
	},
	{
	"epoch": 1.3569965870307168,
	"grad_norm": 0.13571806252002716,
	"learning_rate": 0.00013337069452272333,
	"loss": 0.4601,
	"step": 497
	},
	{
	"epoch": 1.3597269624573378,
	"grad_norm": 0.15235814452171326,
	"learning_rate": 0.00013307077868205487,
	"loss": 0.4785,
	"step": 498
	},
	{
	"epoch": 1.362457337883959,
	"grad_norm": 0.14220909774303436,
	"learning_rate": 0.00013277052846937996,
	"loss": 0.4561,
	"step": 499
	},
	{
	"epoch": 1.36518771331058,
	"grad_norm": 0.14699751138687134,
	"learning_rate": 0.00013246994692046836,
	"loss": 0.446,
	"step": 500
	},
	{
	"epoch": 1.3679180887372013,
	"grad_norm": 0.1636335849761963,
	"learning_rate": 0.00013216903707443967,
	"loss": 0.4614,
	"step": 501
	},
	{
	"epoch": 1.3706484641638226,
	"grad_norm": 0.1447010040283203,
	"learning_rate": 0.00013186780197373306,
	"loss": 0.4573,
	"step": 502
	},
	{
	"epoch": 1.3733788395904436,
	"grad_norm": 0.17758530378341675,
	"learning_rate": 0.0001315662446640761,
	"loss": 0.4701,
	"step": 503
	},
	{
	"epoch": 1.3761092150170648,
	"grad_norm": 0.1493985950946808,
	"learning_rate": 0.00013126436819445422,
	"loss": 0.4671,
	"step": 504
	},
	{
	"epoch": 1.378839590443686,
	"grad_norm": 0.16412951052188873,
	"learning_rate": 0.0001309621756170799,
	"loss": 0.4705,
	"step": 505
	},
	{
	"epoch": 1.3815699658703071,
	"grad_norm": 0.14819127321243286,
	"learning_rate": 0.00013065966998736155,
	"loss": 0.4579,
	"step": 506
	},
	{
	"epoch": 1.3843003412969284,
	"grad_norm": 0.1500328630208969,
	"learning_rate": 0.00013035685436387298,
	"loss": 0.4484,
	"step": 507
	},
	{
	"epoch": 1.3870307167235496,
	"grad_norm": 0.14388103783130646,
	"learning_rate": 0.0001300537318083221,
	"loss": 0.4325,
	"step": 508
	},
	{
	"epoch": 1.3897610921501706,
	"grad_norm": 0.17138421535491943,
	"learning_rate": 0.00012975030538552032,
	"loss": 0.4717,
	"step": 509
	},
	{
	"epoch": 1.3924914675767919,
	"grad_norm": 0.15119241178035736,
	"learning_rate": 0.00012944657816335123,
	"loss": 0.4279,
	"step": 510
	},
	{
	"epoch": 1.395221843003413,
	"grad_norm": 0.15165849030017853,
	"learning_rate": 0.00012914255321273986,
	"loss": 0.4716,
	"step": 511
	},
	{
	"epoch": 1.3979522184300341,
	"grad_norm": 0.14222781360149384,
	"learning_rate": 0.0001288382336076215,
	"loss": 0.4393,
	"step": 512
	},
	{
	"epoch": 1.4006825938566552,
	"grad_norm": 0.1435043066740036,
	"learning_rate": 0.00012853362242491053,
	"loss": 0.4589,
	"step": 513
	},
	{
	"epoch": 1.4034129692832764,
	"grad_norm": 0.15017302334308624,
	"learning_rate": 0.00012822872274446958,
	"loss": 0.4492,
	"step": 514
	},
	{
	"epoch": 1.4061433447098977,
	"grad_norm": 0.14196786284446716,
	"learning_rate": 0.00012792353764907804,
	"loss": 0.4588,
	"step": 515
	},
	{
	"epoch": 1.4088737201365187,
	"grad_norm": 0.144223153591156,
	"learning_rate": 0.0001276180702244012,
	"loss": 0.4678,
	"step": 516
	},
	{
	"epoch": 1.41160409556314,
	"grad_norm": 0.12913116812705994,
	"learning_rate": 0.0001273123235589589,
	"loss": 0.4515,
	"step": 517
	},
	{
	"epoch": 1.4143344709897612,
	"grad_norm": 0.1370343565940857,
	"learning_rate": 0.00012700630074409427,
	"loss": 0.444,
	"step": 518
	},
	{
	"epoch": 1.4170648464163822,
	"grad_norm": 0.15820138156414032,
	"learning_rate": 0.00012670000487394266,
	"loss": 0.4693,
	"step": 519
	},
	{
	"epoch": 1.4197952218430034,
	"grad_norm": 0.13780242204666138,
	"learning_rate": 0.0001263934390454001,
	"loss": 0.4621,
	"step": 520
	},
	{
	"epoch": 1.4225255972696247,
	"grad_norm": 0.13959269225597382,
	"learning_rate": 0.00012608660635809207,
	"loss": 0.448,
	"step": 521
	},
	{
	"epoch": 1.4252559726962457,
	"grad_norm": 0.14923076331615448,
	"learning_rate": 0.00012577950991434248,
	"loss": 0.4694,
	"step": 522
	},
	{
	"epoch": 1.427986348122867,
	"grad_norm": 0.14546504616737366,
	"learning_rate": 0.00012547215281914168,
	"loss": 0.4679,
	"step": 523
	},
	{
	"epoch": 1.430716723549488,
	"grad_norm": 0.13985708355903625,
	"learning_rate": 0.00012516453818011566,
	"loss": 0.469,
	"step": 524
	},
	{
	"epoch": 1.4334470989761092,
	"grad_norm": 0.15217062830924988,
	"learning_rate": 0.00012485666910749428,
	"loss": 0.4698,
	"step": 525
	},
	{
	"epoch": 1.4361774744027302,
	"grad_norm": 0.1293148696422577,
	"learning_rate": 0.00012454854871407994,
	"loss": 0.4448,
	"step": 526
	},
	{
	"epoch": 1.4389078498293515,
	"grad_norm": 0.16416317224502563,
	"learning_rate": 0.0001242401801152161,
	"loss": 0.4589,
	"step": 527
	},
	{
	"epoch": 1.4416382252559727,
	"grad_norm": 0.14671452343463898,
	"learning_rate": 0.0001239315664287558,
	"loss": 0.4498,
	"step": 528
	},
	{
	"epoch": 1.4443686006825938,
	"grad_norm": 0.17271259427070618,
	"learning_rate": 0.00012362271077503008,
	"loss": 0.4492,
	"step": 529
	},
	{
	"epoch": 1.447098976109215,
	"grad_norm": 0.15541909635066986,
	"learning_rate": 0.00012331361627681645,
	"loss": 0.4281,
	"step": 530
	},
	{
	"epoch": 1.4498293515358363,
	"grad_norm": 0.14913444221019745,
	"learning_rate": 0.00012300428605930736,
	"loss": 0.4334,
	"step": 531
	},
	{
	"epoch": 1.4525597269624573,
	"grad_norm": 0.167875275015831,
	"learning_rate": 0.00012269472325007858,
	"loss": 0.4736,
	"step": 532
	},
	{
	"epoch": 1.4552901023890785,
	"grad_norm": 0.15073426067829132,
	"learning_rate": 0.00012238493097905756,
	"loss": 0.4744,
	"step": 533
	},
	{
	"epoch": 1.4580204778156998,
	"grad_norm": 0.16593150794506073,
	"learning_rate": 0.00012207491237849172,
	"loss": 0.455,
	"step": 534
	},
	{
	"epoch": 1.4607508532423208,
	"grad_norm": 0.13389617204666138,
	"learning_rate": 0.00012176467058291699,
	"loss": 0.4394,
	"step": 535
	},
	{
	"epoch": 1.463481228668942,
	"grad_norm": 0.16609057784080505,
	"learning_rate": 0.00012145420872912585,
	"loss": 0.4562,
	"step": 536
	},
	{
	"epoch": 1.466211604095563,
	"grad_norm": 0.1394118070602417,
	"learning_rate": 0.00012114352995613582,
	"loss": 0.4451,
	"step": 537
	},
	{
	"epoch": 1.4689419795221843,
	"grad_norm": 0.17529746890068054,
	"learning_rate": 0.00012083263740515765,
	"loss": 0.4522,
	"step": 538
	},
	{
	"epoch": 1.4716723549488053,
	"grad_norm": 0.15724115073680878,
	"learning_rate": 0.00012052153421956342,
	"loss": 0.4556,
	"step": 539
	},
	{
	"epoch": 1.4744027303754266,
	"grad_norm": 0.17186792194843292,
	"learning_rate": 0.00012021022354485514,
	"loss": 0.4546,
	"step": 540
	},
	{
	"epoch": 1.4771331058020478,
	"grad_norm": 0.1301499456167221,
	"learning_rate": 0.00011989870852863254,
	"loss": 0.4431,
	"step": 541
	},
	{
	"epoch": 1.4798634812286688,
	"grad_norm": 0.1480223387479782,
	"learning_rate": 0.00011958699232056134,
	"loss": 0.4491,
	"step": 542
	},
	{
	"epoch": 1.48259385665529,
	"grad_norm": 0.13150086998939514,
	"learning_rate": 0.00011927507807234168,
	"loss": 0.4568,
	"step": 543
	},
	{
	"epoch": 1.4853242320819113,
	"grad_norm": 0.15769197046756744,
	"learning_rate": 0.00011896296893767587,
	"loss": 0.4501,
	"step": 544
	},
	{
	"epoch": 1.4880546075085324,
	"grad_norm": 0.13996848464012146,
	"learning_rate": 0.0001186506680722367,
	"loss": 0.4673,
	"step": 545
	},
	{
	"epoch": 1.4907849829351536,
	"grad_norm": 0.16406555473804474,
	"learning_rate": 0.00011833817863363564,
	"loss": 0.4634,
	"step": 546
	},
	{
	"epoch": 1.4935153583617748,
	"grad_norm": 0.14007951319217682,
	"learning_rate": 0.0001180255037813906,
	"loss": 0.4466,
	"step": 547
	},
	{
	"epoch": 1.4962457337883959,
	"grad_norm": 0.15525664389133453,
	"learning_rate": 0.00011771264667689427,
	"loss": 0.4323,
	"step": 548
	},
	{
	"epoch": 1.4989761092150171,
	"grad_norm": 0.143234983086586,
	"learning_rate": 0.00011739961048338213,
	"loss": 0.4395,
	"step": 549
	},
	{
	"epoch": 1.5017064846416384,
	"grad_norm": 0.15597446262836456,
	"learning_rate": 0.00011708639836590023,
	"loss": 0.4546,
	"step": 550
	},
	{
	"epoch": 1.5044368600682594,
	"grad_norm": 0.13816912472248077,
	"learning_rate": 0.00011677301349127348,
	"loss": 0.4586,
	"step": 551
	},
	{
	"epoch": 1.5071672354948804,
	"grad_norm": 0.16299140453338623,
	"learning_rate": 0.00011645945902807341,
	"loss": 0.4465,
	"step": 552
	},
	{
	"epoch": 1.5098976109215017,
	"grad_norm": 0.15032370388507843,
	"learning_rate": 0.00011614573814658629,
	"loss": 0.4579,
	"step": 553
	},
	{
	"epoch": 1.512627986348123,
	"grad_norm": 0.158245250582695,
	"learning_rate": 0.00011583185401878101,
	"loss": 0.4462,
	"step": 554
	},
	{
	"epoch": 1.515358361774744,
	"grad_norm": 0.16943717002868652,
	"learning_rate": 0.00011551780981827698,
	"loss": 0.4572,
	"step": 555
	},
	{
	"epoch": 1.5180887372013652,
	"grad_norm": 0.14559145271778107,
	"learning_rate": 0.00011520360872031209,
	"loss": 0.4693,
	"step": 556
	},
	{
	"epoch": 1.5208191126279864,
	"grad_norm": 0.16138285398483276,
	"learning_rate": 0.00011488925390171059,
	"loss": 0.4623,
	"step": 557
	},
	{
	"epoch": 1.5235494880546074,
	"grad_norm": 0.14859908819198608,
	"learning_rate": 0.00011457474854085096,
	"loss": 0.4684,
	"step": 558
	},
	{
	"epoch": 1.5262798634812287,
	"grad_norm": 0.15695518255233765,
	"learning_rate": 0.00011426009581763377,
	"loss": 0.4619,
	"step": 559
	},
	{
	"epoch": 1.52901023890785,
	"grad_norm": 0.14711041748523712,
	"learning_rate": 0.00011394529891344958,
	"loss": 0.4556,
	"step": 560
	},
	{
	"epoch": 1.531740614334471,
	"grad_norm": 0.15271785855293274,
	"learning_rate": 0.0001136303610111467,
	"loss": 0.4557,
	"step": 561
	},
	{
	"epoch": 1.5344709897610922,
	"grad_norm": 0.1541603058576584,
	"learning_rate": 0.00011331528529499909,
	"loss": 0.4644,
	"step": 562
	},
	{
	"epoch": 1.5372013651877134,
	"grad_norm": 0.1686154007911682,
	"learning_rate": 0.00011300007495067401,
	"loss": 0.4446,
	"step": 563
	},
	{
	"epoch": 1.5399317406143345,
	"grad_norm": 0.14042454957962036,
	"learning_rate": 0.00011268473316520007,
	"loss": 0.4275,
	"step": 564
	},
	{
	"epoch": 1.5426621160409555,
	"grad_norm": 0.14575007557868958,
	"learning_rate": 0.00011236926312693479,
	"loss": 0.4579,
	"step": 565
	},
	{
	"epoch": 1.545392491467577,
	"grad_norm": 0.16124123334884644,
	"learning_rate": 0.0001120536680255323,
	"loss": 0.441,
	"step": 566
	},
	{
	"epoch": 1.548122866894198,
	"grad_norm": 0.14471302926540375,
	"learning_rate": 0.00011173795105191145,
	"loss": 0.4543,
	"step": 567
	},
	{
	"epoch": 1.550853242320819,
	"grad_norm": 0.162650004029274,
	"learning_rate": 0.00011142211539822318,
	"loss": 0.4483,
	"step": 568
	},
	{
	"epoch": 1.5535836177474402,
	"grad_norm": 0.14518044888973236,
	"learning_rate": 0.00011110616425781833,
	"loss": 0.4386,
	"step": 569
	},
	{
	"epoch": 1.5563139931740615,
	"grad_norm": 0.1545732617378235,
	"learning_rate": 0.00011079010082521557,
	"loss": 0.4505,
	"step": 570
	},
	{
	"epoch": 1.5590443686006825,
	"grad_norm": 0.12941716611385345,
	"learning_rate": 0.00011047392829606876,
	"loss": 0.4288,
	"step": 571
	},
	{
	"epoch": 1.5617747440273038,
	"grad_norm": 0.15107029676437378,
	"learning_rate": 0.0001101576498671349,
	"loss": 0.4275,
	"step": 572
	},
	{
	"epoch": 1.564505119453925,
	"grad_norm": 0.13776972889900208,
	"learning_rate": 0.00010984126873624179,
	"loss": 0.432,
	"step": 573
	},
	{
	"epoch": 1.567235494880546,
	"grad_norm": 0.15370745956897736,
	"learning_rate": 0.00010952478810225548,
	"loss": 0.4523,
	"step": 574
	},
	{
	"epoch": 1.5699658703071673,
	"grad_norm": 0.14505314826965332,
	"learning_rate": 0.00010920821116504816,
	"loss": 0.4444,
	"step": 575
	},
	{
	"epoch": 1.5726962457337885,
	"grad_norm": 0.13053090870380402,
	"learning_rate": 0.0001088915411254657,
	"loss": 0.4492,
	"step": 576
	},
	{
	"epoch": 1.5754266211604095,
	"grad_norm": 0.15613074600696564,
	"learning_rate": 0.00010857478118529533,
	"loss": 0.465,
	"step": 577
	},
	{
	"epoch": 1.5781569965870306,
	"grad_norm": 0.164808988571167,
	"learning_rate": 0.00010825793454723325,
	"loss": 0.4498,
	"step": 578
	},
	{
	"epoch": 1.580887372013652,
	"grad_norm": 0.14633600413799286,
	"learning_rate": 0.0001079410044148522,
	"loss": 0.4416,
	"step": 579
	},
	{
	"epoch": 1.583617747440273,
	"grad_norm": 0.15268942713737488,
	"learning_rate": 0.00010762399399256917,
	"loss": 0.4431,
	"step": 580
	},
	{
	"epoch": 1.586348122866894,
	"grad_norm": 0.16290055215358734,
	"learning_rate": 0.00010730690648561292,
	"loss": 0.465,
	"step": 581
	},
	{
	"epoch": 1.5890784982935153,
	"grad_norm": 0.13567085564136505,
	"learning_rate": 0.00010698974509999158,
	"loss": 0.4397,
	"step": 582
	},
	{
	"epoch": 1.5918088737201366,
	"grad_norm": 0.14896200597286224,
	"learning_rate": 0.00010667251304246029,
	"loss": 0.4458,
	"step": 583
	},
	{
	"epoch": 1.5945392491467576,
	"grad_norm": 0.14076146483421326,
	"learning_rate": 0.00010635521352048872,
	"loss": 0.4476,
	"step": 584
	},
	{
	"epoch": 1.5972696245733788,
	"grad_norm": 0.13334687054157257,
	"learning_rate": 0.00010603784974222861,
	"loss": 0.4283,
	"step": 585
	},
	{
	"epoch": 1.6,
	"grad_norm": 0.16997142136096954,
	"learning_rate": 0.00010572042491648149,
	"loss": 0.4472,
	"step": 586
	},
	{
	"epoch": 1.6027303754266211,
	"grad_norm": 0.15508471429347992,
	"learning_rate": 0.00010540294225266607,
	"loss": 0.4518,
	"step": 587
	},
	{
	"epoch": 1.6054607508532424,
	"grad_norm": 0.1571933478116989,
	"learning_rate": 0.0001050854049607858,
	"loss": 0.4493,
	"step": 588
	},
	{
	"epoch": 1.6081911262798636,
	"grad_norm": 0.15027360618114471,
	"learning_rate": 0.00010476781625139656,
	"loss": 0.4562,
	"step": 589
	},
	{
	"epoch": 1.6109215017064846,
	"grad_norm": 0.14502452313899994,
	"learning_rate": 0.00010445017933557404,
	"loss": 0.4476,
	"step": 590
	},
	{
	"epoch": 1.6136518771331056,
	"grad_norm": 0.1403171718120575,
	"learning_rate": 0.00010413249742488131,
	"loss": 0.4154,
	"step": 591
	},
	{
	"epoch": 1.6163822525597271,
	"grad_norm": 0.14815428853034973,
	"learning_rate": 0.00010381477373133652,
	"loss": 0.4501,
	"step": 592
	},
	{
	"epoch": 1.6191126279863481,
	"grad_norm": 0.16107513010501862,
	"learning_rate": 0.00010349701146738007,
	"loss": 0.4507,
	"step": 593
	},
	{
	"epoch": 1.6218430034129692,
	"grad_norm": 0.14111128449440002,
	"learning_rate": 0.00010317921384584244,
	"loss": 0.4483,
	"step": 594
	},
	{
	"epoch": 1.6245733788395904,
	"grad_norm": 0.1680098921060562,
	"learning_rate": 0.0001028613840799117,
	"loss": 0.4584,
	"step": 595
	},
	{
	"epoch": 1.6273037542662117,
	"grad_norm": 0.13612088561058044,
	"learning_rate": 0.00010254352538310075,
	"loss": 0.4389,
	"step": 596
	},
	{
	"epoch": 1.6300341296928327,
	"grad_norm": 0.16298632323741913,
	"learning_rate": 0.00010222564096921505,
	"loss": 0.452,
	"step": 597
	},
	{
	"epoch": 1.632764505119454,
	"grad_norm": 0.14744146168231964,
	"learning_rate": 0.00010190773405232024,
	"loss": 0.4588,
	"step": 598
	},
	{
	"epoch": 1.6354948805460752,
	"grad_norm": 0.1594633013010025,
	"learning_rate": 0.00010158980784670927,
	"loss": 0.4403,
	"step": 599
	},
	{
	"epoch": 1.6382252559726962,
	"grad_norm": 0.15123943984508514,
	"learning_rate": 0.00010127186556687019,
	"loss": 0.4395,
	"step": 600
	},
	{
	"epoch": 1.6409556313993174,
	"grad_norm": 0.1518649309873581,
	"learning_rate": 0.00010095391042745361,
	"loss": 0.4265,
	"step": 601
	},
	{
	"epoch": 1.6436860068259387,
	"grad_norm": 0.1611323356628418,
	"learning_rate": 0.00010063594564324012,
	"loss": 0.444,
	"step": 602
	},
	{
	"epoch": 1.6464163822525597,
	"grad_norm": 0.14762264490127563,
	"learning_rate": 0.00010031797442910789,
	"loss": 0.4409,
	"step": 603
	},
	{
	"epoch": 1.6491467576791807,
	"grad_norm": 0.1546734869480133,
	"learning_rate": 0.0001,
	"loss": 0.4472,
	"step": 604
	},
	{
	"epoch": 1.6518771331058022,
	"grad_norm": 0.1580485701560974,
	"learning_rate": 9.968202557089212e-05,
	"loss": 0.4497,
	"step": 605
	},
	{
	"epoch": 1.6546075085324232,
	"grad_norm": 0.151153564453125,
	"learning_rate": 9.93640543567599e-05,
	"loss": 0.4495,
	"step": 606
	},
	{
	"epoch": 1.6573378839590442,
	"grad_norm": 0.14238281548023224,
	"learning_rate": 9.904608957254642e-05,
	"loss": 0.4481,
	"step": 607
	},
	{
	"epoch": 1.6600682593856655,
	"grad_norm": 0.13984693586826324,
	"learning_rate": 9.872813443312984e-05,
	"loss": 0.4262,
	"step": 608
	},
	{
	"epoch": 1.6627986348122867,
	"grad_norm": 0.14657770097255707,
	"learning_rate": 9.84101921532908e-05,
	"loss": 0.4554,
	"step": 609
	},
	{
	"epoch": 1.6655290102389078,
	"grad_norm": 0.15702606737613678,
	"learning_rate": 9.809226594767978e-05,
	"loss": 0.4553,
	"step": 610
	},
	{
	"epoch": 1.668259385665529,
	"grad_norm": 0.15596133470535278,
	"learning_rate": 9.777435903078494e-05,
	"loss": 0.4353,
	"step": 611
	},
	{
	"epoch": 1.6709897610921502,
	"grad_norm": 0.16193975508213043,
	"learning_rate": 9.745647461689931e-05,
	"loss": 0.4313,
	"step": 612
	},
	{
	"epoch": 1.6737201365187713,
	"grad_norm": 0.15690681338310242,
	"learning_rate": 9.713861592008833e-05,
	"loss": 0.4498,
	"step": 613
	},
	{
	"epoch": 1.6764505119453925,
	"grad_norm": 0.15277935564517975,
	"learning_rate": 9.682078615415754e-05,
	"loss": 0.4358,
	"step": 614
	},
	{
	"epoch": 1.6791808873720138,
	"grad_norm": 0.1447397917509079,
	"learning_rate": 9.650298853261997e-05,
	"loss": 0.4264,
	"step": 615
	},
	{
	"epoch": 1.6819112627986348,
	"grad_norm": 0.14292937517166138,
	"learning_rate": 9.61852262686635e-05,
	"loss": 0.4521,
	"step": 616
	},
	{
	"epoch": 1.6846416382252558,
	"grad_norm": 0.15041732788085938,
	"learning_rate": 9.586750257511867e-05,
	"loss": 0.445,
	"step": 617
	},
	{
	"epoch": 1.6873720136518773,
	"grad_norm": 0.14610610902309418,
	"learning_rate": 9.5549820664426e-05,
	"loss": 0.4493,
	"step": 618
	},
	{
	"epoch": 1.6901023890784983,
	"grad_norm": 0.1494503617286682,
	"learning_rate": 9.523218374860348e-05,
	"loss": 0.4473,
	"step": 619
	},
	{
	"epoch": 1.6928327645051193,
	"grad_norm": 0.14970283210277557,
	"learning_rate": 9.491459503921421e-05,
	"loss": 0.453,
	"step": 620
	},
	{
	"epoch": 1.6955631399317406,
	"grad_norm": 0.15402431786060333,
	"learning_rate": 9.459705774733396e-05,
	"loss": 0.4435,
	"step": 621
	},
	{
	"epoch": 1.6982935153583618,
	"grad_norm": 0.1583550125360489,
	"learning_rate": 9.427957508351852e-05,
	"loss": 0.4379,
	"step": 622
	},
	{
	"epoch": 1.7010238907849828,
	"grad_norm": 0.15820656716823578,
	"learning_rate": 9.396215025777139e-05,
	"loss": 0.4416,
	"step": 623
	},
	{
	"epoch": 1.703754266211604,
	"grad_norm": 0.14503423869609833,
	"learning_rate": 9.364478647951133e-05,
	"loss": 0.434,
	"step": 624
	},
	{
	"epoch": 1.7064846416382253,
	"grad_norm": 0.1509648561477661,
	"learning_rate": 9.332748695753973e-05,
	"loss": 0.4516,
	"step": 625
	},
	{
	"epoch": 1.7092150170648464,
	"grad_norm": 0.15958918631076813,
	"learning_rate": 9.301025490000841e-05,
	"loss": 0.4504,
	"step": 626
	},
	{
	"epoch": 1.7119453924914676,
	"grad_norm": 0.1522430032491684,
	"learning_rate": 9.269309351438711e-05,
	"loss": 0.4339,
	"step": 627
	},
	{
	"epoch": 1.7146757679180888,
	"grad_norm": 0.14348183572292328,
	"learning_rate": 9.237600600743085e-05,
	"loss": 0.4236,
	"step": 628
	},
	{
	"epoch": 1.7174061433447099,
	"grad_norm": 0.1558932512998581,
	"learning_rate": 9.20589955851478e-05,
	"loss": 0.4514,
	"step": 629
	},
	{
	"epoch": 1.7201365187713311,
	"grad_norm": 0.13524165749549866,
	"learning_rate": 9.174206545276677e-05,
	"loss": 0.4426,
	"step": 630
	},
	{
	"epoch": 1.7228668941979524,
	"grad_norm": 0.1464926302433014,
	"learning_rate": 9.142521881470469e-05,
	"loss": 0.4397,
	"step": 631
	},
	{
	"epoch": 1.7255972696245734,
	"grad_norm": 0.14485323429107666,
	"learning_rate": 9.11084588745343e-05,
	"loss": 0.4497,
	"step": 632
	},
	{
	"epoch": 1.7283276450511944,
	"grad_norm": 0.139760822057724,
	"learning_rate": 9.07917888349519e-05,
	"loss": 0.4465,
	"step": 633
	},
	{
	"epoch": 1.7310580204778157,
	"grad_norm": 0.16193385422229767,
	"learning_rate": 9.047521189774455e-05,
	"loss": 0.4377,
	"step": 634
	},
	{
	"epoch": 1.733788395904437,
	"grad_norm": 0.17076647281646729,
	"learning_rate": 9.015873126375822e-05,
	"loss": 0.4632,
	"step": 635
	},
	{
	"epoch": 1.736518771331058,
	"grad_norm": 0.14881980419158936,
	"learning_rate": 8.984235013286511e-05,
	"loss": 0.4401,
	"step": 636
	},
	{
	"epoch": 1.7392491467576792,
	"grad_norm": 0.15158087015151978,
	"learning_rate": 8.952607170393125e-05,
	"loss": 0.4292,
	"step": 637
	},
	{
	"epoch": 1.7419795221843004,
	"grad_norm": 0.14353424310684204,
	"learning_rate": 8.920989917478447e-05,
	"loss": 0.4439,
	"step": 638
	},
	{
	"epoch": 1.7447098976109214,
	"grad_norm": 0.13718026876449585,
	"learning_rate": 8.88938357421817e-05,
	"loss": 0.4267,
	"step": 639
	},
	{
	"epoch": 1.7474402730375427,
	"grad_norm": 0.15788930654525757,
	"learning_rate": 8.857788460177686e-05,
	"loss": 0.4241,
	"step": 640
	},
	{
	"epoch": 1.750170648464164,
	"grad_norm": 0.14324265718460083,
	"learning_rate": 8.826204894808855e-05,
	"loss": 0.4472,
	"step": 641
	},
	{
	"epoch": 1.752901023890785,
	"grad_norm": 0.15020030736923218,
	"learning_rate": 8.79463319744677e-05,
	"loss": 0.4339,
	"step": 642
	},
	{
	"epoch": 1.7556313993174062,
	"grad_norm": 0.13715054094791412,
	"learning_rate": 8.763073687306524e-05,
	"loss": 0.4429,
	"step": 643
	},
	{
	"epoch": 1.7583617747440274,
	"grad_norm": 0.16862636804580688,
	"learning_rate": 8.731526683479992e-05,
	"loss": 0.4509,
	"step": 644
	},
	{
	"epoch": 1.7610921501706485,
	"grad_norm": 0.14781633019447327,
	"learning_rate": 8.6999925049326e-05,
	"loss": 0.4319,
	"step": 645
	},
	{
	"epoch": 1.7638225255972695,
	"grad_norm": 0.15873977541923523,
	"learning_rate": 8.668471470500095e-05,
	"loss": 0.4486,
	"step": 646
	},
	{
	"epoch": 1.7665529010238907,
	"grad_norm": 0.1477411389350891,
	"learning_rate": 8.63696389888533e-05,
	"loss": 0.4461,
	"step": 647
	},
	{
	"epoch": 1.769283276450512,
	"grad_norm": 0.1804722100496292,
	"learning_rate": 8.605470108655045e-05,
	"loss": 0.4402,
	"step": 648
	},
	{
	"epoch": 1.772013651877133,
	"grad_norm": 0.15354932844638824,
	"learning_rate": 8.573990418236625e-05,
	"loss": 0.4564,
	"step": 649
	},
	{
	"epoch": 1.7747440273037542,
	"grad_norm": 0.1494126319885254,
	"learning_rate": 8.542525145914905e-05,
	"loss": 0.4377,
	"step": 650
	},
	{
	"epoch": 1.7774744027303755,
	"grad_norm": 0.14122453331947327,
	"learning_rate": 8.511074609828944e-05,
	"loss": 0.4361,
	"step": 651
	},
	{
	"epoch": 1.7802047781569965,
	"grad_norm": 0.16938751935958862,
	"learning_rate": 8.479639127968792e-05,
	"loss": 0.4593,
	"step": 652
	},
	{
	"epoch": 1.7829351535836178,
	"grad_norm": 0.1502314805984497,
	"learning_rate": 8.448219018172303e-05,
	"loss": 0.4336,
	"step": 653
	},
	{
	"epoch": 1.785665529010239,
	"grad_norm": 0.180609330534935,
	"learning_rate": 8.4168145981219e-05,
	"loss": 0.45,
	"step": 654
	},
	{
	"epoch": 1.78839590443686,
	"grad_norm": 0.1395808309316635,
	"learning_rate": 8.385426185341374e-05,
	"loss": 0.4188,
	"step": 655
	},
	{
	"epoch": 1.7911262798634813,
	"grad_norm": 0.1596853882074356,
	"learning_rate": 8.35405409719266e-05,
	"loss": 0.452,
	"step": 656
	},
	{
	"epoch": 1.7938566552901025,
	"grad_norm": 0.15330305695533752,
	"learning_rate": 8.322698650872656e-05,
	"loss": 0.4355,
	"step": 657
	},
	{
	"epoch": 1.7965870307167235,
	"grad_norm": 0.1545482873916626,
	"learning_rate": 8.291360163409978e-05,
	"loss": 0.4366,
	"step": 658
	},
	{
	"epoch": 1.7993174061433446,
	"grad_norm": 0.13950030505657196,
	"learning_rate": 8.260038951661787e-05,
	"loss": 0.4169,
	"step": 659
	},
	{
	"epoch": 1.802047781569966,
	"grad_norm": 0.14191307127475739,
	"learning_rate": 8.228735332310575e-05,
	"loss": 0.4471,
	"step": 660
	},
	{
	"epoch": 1.804778156996587,
	"grad_norm": 0.14557993412017822,
	"learning_rate": 8.197449621860943e-05,
	"loss": 0.4028,
	"step": 661
	},
	{
	"epoch": 1.807508532423208,
	"grad_norm": 0.13985979557037354,
	"learning_rate": 8.16618213663644e-05,
	"loss": 0.4293,
	"step": 662
	},
	{
	"epoch": 1.8102389078498293,
	"grad_norm": 0.1420183628797531,
	"learning_rate": 8.134933192776333e-05,
	"loss": 0.4313,
	"step": 663
	},
	{
	"epoch": 1.8129692832764506,
	"grad_norm": 0.1498919129371643,
	"learning_rate": 8.103703106232416e-05,
	"loss": 0.4315,
	"step": 664
	},
	{
	"epoch": 1.8156996587030716,
	"grad_norm": 0.1570868194103241,
	"learning_rate": 8.072492192765833e-05,
	"loss": 0.4348,
	"step": 665
	},
	{
	"epoch": 1.8184300341296928,
	"grad_norm": 0.15423277020454407,
	"learning_rate": 8.041300767943867e-05,
	"loss": 0.44,
	"step": 666
	},
	{
	"epoch": 1.821160409556314,
	"grad_norm": 0.13805197179317474,
	"learning_rate": 8.010129147136749e-05,
	"loss": 0.4317,
	"step": 667
	},
	{
	"epoch": 1.823890784982935,
	"grad_norm": 0.14628642797470093,
	"learning_rate": 7.978977645514487e-05,
	"loss": 0.4379,
	"step": 668
	},
	{
	"epoch": 1.8266211604095564,
	"grad_norm": 0.15710268914699554,
	"learning_rate": 7.947846578043659e-05,
	"loss": 0.4557,
	"step": 669
	},
	{
	"epoch": 1.8293515358361776,
	"grad_norm": 0.15317128598690033,
	"learning_rate": 7.916736259484239e-05,
	"loss": 0.4506,
	"step": 670
	},
	{
	"epoch": 1.8320819112627986,
	"grad_norm": 0.15919502079486847,
	"learning_rate": 7.88564700438642e-05,
	"loss": 0.435,
	"step": 671
	},
	{
	"epoch": 1.8348122866894196,
	"grad_norm": 0.1551041305065155,
	"learning_rate": 7.854579127087417e-05,
	"loss": 0.4283,
	"step": 672
	},
	{
	"epoch": 1.8375426621160411,
	"grad_norm": 0.16782739758491516,
	"learning_rate": 7.823532941708303e-05,
	"loss": 0.4507,
	"step": 673
	},
	{
	"epoch": 1.8402730375426621,
	"grad_norm": 0.15251149237155914,
	"learning_rate": 7.792508762150833e-05,
	"loss": 0.4255,
	"step": 674
	},
	{
	"epoch": 1.8430034129692832,
	"grad_norm": 0.17279517650604248,
	"learning_rate": 7.761506902094248e-05,
	"loss": 0.4255,
	"step": 675
	},
	{
	"epoch": 1.8457337883959044,
	"grad_norm": 0.13385094702243805,
	"learning_rate": 7.730527674992143e-05,
	"loss": 0.4229,
	"step": 676
	},
	{
	"epoch": 1.8484641638225257,
	"grad_norm": 0.15888231992721558,
	"learning_rate": 7.699571394069269e-05,
	"loss": 0.4335,
	"step": 677
	},
	{
	"epoch": 1.8511945392491467,
	"grad_norm": 0.1549587994813919,
	"learning_rate": 7.668638372318359e-05,
	"loss": 0.4083,
	"step": 678
	},
	{
	"epoch": 1.853924914675768,
	"grad_norm": 0.16143332421779633,
	"learning_rate": 7.637728922496996e-05,
	"loss": 0.4367,
	"step": 679
	},
	{
	"epoch": 1.8566552901023892,
	"grad_norm": 0.13735996186733246,
	"learning_rate": 7.606843357124426e-05,
	"loss": 0.4296,
	"step": 680
	},
	{
	"epoch": 1.8593856655290102,
	"grad_norm": 0.14317500591278076,
	"learning_rate": 7.575981988478392e-05,
	"loss": 0.4419,
	"step": 681
	},
	{
	"epoch": 1.8621160409556314,
	"grad_norm": 0.14451129734516144,
	"learning_rate": 7.54514512859201e-05,
	"loss": 0.4389,
	"step": 682
	},
	{
	"epoch": 1.8648464163822527,
	"grad_norm": 0.14233650267124176,
	"learning_rate": 7.514333089250577e-05,
	"loss": 0.4258,
	"step": 683
	},
	{
	"epoch": 1.8675767918088737,
	"grad_norm": 0.14275044202804565,
	"learning_rate": 7.483546181988436e-05,
	"loss": 0.4456,
	"step": 684
	},
	{
	"epoch": 1.8703071672354947,
	"grad_norm": 0.15278606116771698,
	"learning_rate": 7.452784718085833e-05,
	"loss": 0.4344,
	"step": 685
	},
	{
	"epoch": 1.8730375426621162,
	"grad_norm": 0.13797658681869507,
	"learning_rate": 7.422049008565757e-05,
	"loss": 0.4392,
	"step": 686
	},
	{
	"epoch": 1.8757679180887372,
	"grad_norm": 0.1518598347902298,
	"learning_rate": 7.391339364190794e-05,
	"loss": 0.431,
	"step": 687
	},
	{
	"epoch": 1.8784982935153582,
	"grad_norm": 0.1477964073419571,
	"learning_rate": 7.360656095459995e-05,
	"loss": 0.4449,
	"step": 688
	},
	{
	"epoch": 1.8812286689419795,
	"grad_norm": 0.14295299351215363,
	"learning_rate": 7.329999512605738e-05,
	"loss": 0.4384,
	"step": 689
	},
	{
	"epoch": 1.8839590443686007,
	"grad_norm": 0.1487056165933609,
	"learning_rate": 7.299369925590574e-05,
	"loss": 0.4311,
	"step": 690
	},
	{
	"epoch": 1.8866894197952218,
	"grad_norm": 0.14108945429325104,
	"learning_rate": 7.268767644104112e-05,
	"loss": 0.4239,
	"step": 691
	},
	{
	"epoch": 1.889419795221843,
	"grad_norm": 0.15047363936901093,
	"learning_rate": 7.238192977559884e-05,
	"loss": 0.4427,
	"step": 692
	},
	{
	"epoch": 1.8921501706484642,
	"grad_norm": 0.14634403586387634,
	"learning_rate": 7.2076462350922e-05,
	"loss": 0.4416,
	"step": 693
	},
	{
	"epoch": 1.8948805460750853,
	"grad_norm": 0.15272392332553864,
	"learning_rate": 7.177127725553045e-05,
	"loss": 0.43,
	"step": 694
	},
	{
	"epoch": 1.8976109215017065,
	"grad_norm": 0.15047992765903473,
	"learning_rate": 7.146637757508949e-05,
	"loss": 0.4436,
	"step": 695
	},
	{
	"epoch": 1.9003412969283278,
	"grad_norm": 0.14413118362426758,
	"learning_rate": 7.116176639237852e-05,
	"loss": 0.4169,
	"step": 696
	},
	{
	"epoch": 1.9030716723549488,
	"grad_norm": 0.1437167525291443,
	"learning_rate": 7.085744678726013e-05,
	"loss": 0.4389,
	"step": 697
	},
	{
	"epoch": 1.9058020477815698,
	"grad_norm": 0.14662359654903412,
	"learning_rate": 7.05534218366488e-05,
	"loss": 0.435,
	"step": 698
	},
	{
	"epoch": 1.9085324232081913,
	"grad_norm": 0.14521794021129608,
	"learning_rate": 7.024969461447972e-05,
	"loss": 0.4505,
	"step": 699
	},
	{
	"epoch": 1.9112627986348123,
	"grad_norm": 0.14155706763267517,
	"learning_rate": 6.994626819167789e-05,
	"loss": 0.4301,
	"step": 700
	},
	{
	"epoch": 1.9139931740614333,
	"grad_norm": 0.14663158357143402,
	"learning_rate": 6.964314563612708e-05,
	"loss": 0.4262,
	"step": 701
	},
	{
	"epoch": 1.9167235494880546,
	"grad_norm": 0.15034128725528717,
	"learning_rate": 6.934033001263847e-05,
	"loss": 0.4424,
	"step": 702
	},
	{
	"epoch": 1.9194539249146758,
	"grad_norm": 0.15093255043029785,
	"learning_rate": 6.903782438292015e-05,
	"loss": 0.4425,
	"step": 703
	},
	{
	"epoch": 1.9221843003412968,
	"grad_norm": 0.1556250900030136,
	"learning_rate": 6.873563180554583e-05,
	"loss": 0.4225,
	"step": 704
	},
	{
	"epoch": 1.924914675767918,
	"grad_norm": 0.16173475980758667,
	"learning_rate": 6.843375533592395e-05,
	"loss": 0.4228,
	"step": 705
	},
	{
	"epoch": 1.9276450511945393,
	"grad_norm": 0.1532420516014099,
	"learning_rate": 6.813219802626698e-05,
	"loss": 0.432,
	"step": 706
	},
	{
	"epoch": 1.9303754266211604,
	"grad_norm": 0.1596469283103943,
	"learning_rate": 6.783096292556035e-05,
	"loss": 0.4232,
	"step": 707
	},
	{
	"epoch": 1.9331058020477816,
	"grad_norm": 0.16538076102733612,
	"learning_rate": 6.753005307953167e-05,
	"loss": 0.45,
	"step": 708
	},
	{
	"epoch": 1.9358361774744028,
	"grad_norm": 0.15187640488147736,
	"learning_rate": 6.722947153062003e-05,
	"loss": 0.4442,
	"step": 709
	},
	{
	"epoch": 1.9385665529010239,
	"grad_norm": 0.14827731251716614,
	"learning_rate": 6.692922131794517e-05,
	"loss": 0.414,
	"step": 710
	},
	{
	"epoch": 1.9412969283276449,
	"grad_norm": 0.16438645124435425,
	"learning_rate": 6.662930547727668e-05,
	"loss": 0.419,
	"step": 711
	},
	{
	"epoch": 1.9440273037542664,
	"grad_norm": 0.15135832130908966,
	"learning_rate": 6.632972704100349e-05,
	"loss": 0.4155,
	"step": 712
	},
	{
	"epoch": 1.9467576791808874,
	"grad_norm": 0.15094083547592163,
	"learning_rate": 6.603048903810305e-05,
	"loss": 0.4258,
	"step": 713
	},
	{
	"epoch": 1.9494880546075084,
	"grad_norm": 0.14968033134937286,
	"learning_rate": 6.57315944941107e-05,
	"loss": 0.4395,
	"step": 714
	},
	{
	"epoch": 1.9522184300341296,
	"grad_norm": 0.16519851982593536,
	"learning_rate": 6.54330464310892e-05,
	"loss": 0.4406,
	"step": 715
	},
	{
	"epoch": 1.954948805460751,
	"grad_norm": 0.13855180144309998,
	"learning_rate": 6.513484786759818e-05,
	"loss": 0.43,
	"step": 716
	},
	{
	"epoch": 1.957679180887372,
	"grad_norm": 0.1570328176021576,
	"learning_rate": 6.483700181866337e-05,
	"loss": 0.4288,
	"step": 717
	},
	{
	"epoch": 1.9604095563139932,
	"grad_norm": 0.14928270876407623,
	"learning_rate": 6.453951129574644e-05,
	"loss": 0.4224,
	"step": 718
	},
	{
	"epoch": 1.9631399317406144,
	"grad_norm": 0.16348999738693237,
	"learning_rate": 6.42423793067144e-05,
	"loss": 0.4379,
	"step": 719
	},
	{
	"epoch": 1.9658703071672354,
	"grad_norm": 0.14947615563869476,
	"learning_rate": 6.39456088558091e-05,
	"loss": 0.445,
	"step": 720
	},
	{
	"epoch": 1.9686006825938567,
	"grad_norm": 0.14767783880233765,
	"learning_rate": 6.3649202943617e-05,
	"loss": 0.4388,
	"step": 721
	},
	{
	"epoch": 1.971331058020478,
	"grad_norm": 0.14223739504814148,
	"learning_rate": 6.33531645670389e-05,
	"loss": 0.433,
	"step": 722
	},
	{
	"epoch": 1.974061433447099,
	"grad_norm": 0.1595824956893921,
	"learning_rate": 6.305749671925931e-05,
	"loss": 0.4325,
	"step": 723
	},
	{
	"epoch": 1.9767918088737202,
	"grad_norm": 0.14971914887428284,
	"learning_rate": 6.276220238971652e-05,
	"loss": 0.4336,
	"step": 724
	},
	{
	"epoch": 1.9795221843003414,
	"grad_norm": 0.15426860749721527,
	"learning_rate": 6.24672845640723e-05,
	"loss": 0.433,
	"step": 725
	},
	{
	"epoch": 1.9822525597269625,
	"grad_norm": 0.1470557302236557,
	"learning_rate": 6.217274622418153e-05,
	"loss": 0.435,
	"step": 726
	},
	{
	"epoch": 1.9849829351535835,
	"grad_norm": 0.1564924120903015,
	"learning_rate": 6.187859034806224e-05,
	"loss": 0.4371,
	"step": 727
	},
	{
	"epoch": 1.9877133105802047,
	"grad_norm": 0.16420651972293854,
	"learning_rate": 6.158481990986557e-05,
	"loss": 0.4478,
	"step": 728
	},
	{
	"epoch": 1.990443686006826,
	"grad_norm": 0.15158484876155853,
	"learning_rate": 6.129143787984533e-05,
	"loss": 0.4267,
	"step": 729
	},
	{
	"epoch": 1.993174061433447,
	"grad_norm": 0.15700684487819672,
	"learning_rate": 6.099844722432843e-05,
	"loss": 0.4293,
	"step": 730
	},
	{
	"epoch": 1.9959044368600682,
	"grad_norm": 0.14848262071609497,
	"learning_rate": 6.070585090568459e-05,
	"loss": 0.4339,
	"step": 731
	},
	{
	"epoch": 1.9986348122866895,
	"grad_norm": 0.15596534311771393,
	"learning_rate": 6.0413651882296406e-05,
	"loss": 0.4245,
	"step": 732
	},
	{
	"epoch": 2.0013651877133105,
	"grad_norm": 0.3338797092437744,
	"learning_rate": 6.012185310852962e-05,
	"loss": 0.7156,
	"step": 733
	},
	{
	"epoch": 2.0040955631399315,
	"grad_norm": 0.1896699219942093,
	"learning_rate": 5.983045753470308e-05,
	"loss": 0.4228,
	"step": 734
	},
	{
	"epoch": 2.006825938566553,
	"grad_norm": 0.20619529485702515,
	"learning_rate": 5.953946810705888e-05,
	"loss": 0.4244,
	"step": 735
	},
	{
	"epoch": 2.009556313993174,
	"grad_norm": 0.17412033677101135,
	"learning_rate": 5.924888776773281e-05,
	"loss": 0.4186,
	"step": 736
	},
	{
	"epoch": 2.012286689419795,
	"grad_norm": 0.1896408498287201,
	"learning_rate": 5.8958719454724346e-05,
	"loss": 0.4259,
	"step": 737
	},
	{
	"epoch": 2.0150170648464165,
	"grad_norm": 0.16549214720726013,
	"learning_rate": 5.8668966101867005e-05,
	"loss": 0.3967,
	"step": 738
	},
	{
	"epoch": 2.0177474402730375,
	"grad_norm": 0.16784432530403137,
	"learning_rate": 5.837963063879884e-05,
	"loss": 0.4347,
	"step": 739
	},
	{
	"epoch": 2.0204778156996586,
	"grad_norm": 0.18551002442836761,
	"learning_rate": 5.809071599093272e-05,
	"loss": 0.4407,
	"step": 740
	},
	{
	"epoch": 2.02320819112628,
	"grad_norm": 0.17308658361434937,
	"learning_rate": 5.780222507942654e-05,
	"loss": 0.4343,
	"step": 741
	},
	{
	"epoch": 2.025938566552901,
	"grad_norm": 0.17182452976703644,
	"learning_rate": 5.751416082115408e-05,
	"loss": 0.422,
	"step": 742
	},
	{
	"epoch": 2.028668941979522,
	"grad_norm": 0.17356833815574646,
	"learning_rate": 5.722652612867523e-05,
	"loss": 0.4168,
	"step": 743
	},
	{
	"epoch": 2.0313993174061435,
	"grad_norm": 0.17314977943897247,
	"learning_rate": 5.6939323910206645e-05,
	"loss": 0.4285,
	"step": 744
	},
	{
	"epoch": 2.0341296928327646,
	"grad_norm": 0.1868155151605606,
	"learning_rate": 5.6652557069592304e-05,
	"loss": 0.4211,
	"step": 745
	},
	{
	"epoch": 2.0368600682593856,
	"grad_norm": 0.1524539738893509,
	"learning_rate": 5.63662285062742e-05,
	"loss": 0.4233,
	"step": 746
	},
	{
	"epoch": 2.0395904436860066,
	"grad_norm": 0.17929619550704956,
	"learning_rate": 5.608034111526298e-05,
	"loss": 0.4305,
	"step": 747
	},
	{
	"epoch": 2.042320819112628,
	"grad_norm": 0.1604132503271103,
	"learning_rate": 5.579489778710867e-05,
	"loss": 0.4214,
	"step": 748
	},
	{
	"epoch": 2.045051194539249,
	"grad_norm": 0.16710160672664642,
	"learning_rate": 5.550990140787147e-05,
	"loss": 0.4369,
	"step": 749
	},
	{
	"epoch": 2.04778156996587,
	"grad_norm": 0.16114738583564758,
	"learning_rate": 5.522535485909257e-05,
	"loss": 0.4216,
	"step": 750
	},
	{
	"epoch": 2.0505119453924916,
	"grad_norm": 0.16698378324508667,
	"learning_rate": 5.494126101776505e-05,
	"loss": 0.4323,
	"step": 751
	},
	{
	"epoch": 2.0532423208191126,
	"grad_norm": 0.174140065908432,
	"learning_rate": 5.4657622756304704e-05,
	"loss": 0.4135,
	"step": 752
	},
	{
	"epoch": 2.0559726962457336,
	"grad_norm": 0.1496962457895279,
	"learning_rate": 5.437444294252107e-05,
	"loss": 0.438,
	"step": 753
	},
	{
	"epoch": 2.058703071672355,
	"grad_norm": 0.1660911738872528,
	"learning_rate": 5.409172443958843e-05,
	"loss": 0.4262,
	"step": 754
	},
	{
	"epoch": 2.061433447098976,
	"grad_norm": 0.18081265687942505,
	"learning_rate": 5.380947010601681e-05,
	"loss": 0.4172,
	"step": 755
	},
	{
	"epoch": 2.064163822525597,
	"grad_norm": 0.14415475726127625,
	"learning_rate": 5.3527682795623146e-05,
	"loss": 0.4181,
	"step": 756
	},
	{
	"epoch": 2.0668941979522186,
	"grad_norm": 0.20684713125228882,
	"learning_rate": 5.324636535750238e-05,
	"loss": 0.4291,
	"step": 757
	},
	{
	"epoch": 2.0696245733788396,
	"grad_norm": 0.17085103690624237,
	"learning_rate": 5.296552063599868e-05,
	"loss": 0.4372,
	"step": 758
	},
	{
	"epoch": 2.0723549488054607,
	"grad_norm": 0.17061397433280945,
	"learning_rate": 5.2685151470676653e-05,
	"loss": 0.4247,
	"step": 759
	},
	{
	"epoch": 2.0750853242320817,
	"grad_norm": 0.17692053318023682,
	"learning_rate": 5.240526069629265e-05,
	"loss": 0.4261,
	"step": 760
	},
	{
	"epoch": 2.077815699658703,
	"grad_norm": 0.15353117883205414,
	"learning_rate": 5.212585114276614e-05,
	"loss": 0.4272,
	"step": 761
	},
	{
	"epoch": 2.080546075085324,
	"grad_norm": 0.17618128657341003,
	"learning_rate": 5.1846925635151045e-05,
	"loss": 0.4206,
	"step": 762
	},
	{
	"epoch": 2.083276450511945,
	"grad_norm": 0.154897078871727,
	"learning_rate": 5.156848699360719e-05,
	"loss": 0.4086,
	"step": 763
	},
	{
	"epoch": 2.0860068259385667,
	"grad_norm": 0.15779747068881989,
	"learning_rate": 5.129053803337181e-05,
	"loss": 0.4073,
	"step": 764
	},
	{
	"epoch": 2.0887372013651877,
	"grad_norm": 0.186599463224411,
	"learning_rate": 5.101308156473104e-05,
	"loss": 0.4204,
	"step": 765
	},
	{
	"epoch": 2.0914675767918087,
	"grad_norm": 0.15039357542991638,
	"learning_rate": 5.073612039299157e-05,
	"loss": 0.4277,
	"step": 766
	},
	{
	"epoch": 2.09419795221843,
	"grad_norm": 0.16809961199760437,
	"learning_rate": 5.0459657318452224e-05,
	"loss": 0.4312,
	"step": 767
	},
	{
	"epoch": 2.096928327645051,
	"grad_norm": 0.18085776269435883,
	"learning_rate": 5.0183695136375664e-05,
	"loss": 0.4233,
	"step": 768
	},
	{
	"epoch": 2.0996587030716722,
	"grad_norm": 0.15629561245441437,
	"learning_rate": 4.9908236636960126e-05,
	"loss": 0.4252,
	"step": 769
	},
	{
	"epoch": 2.1023890784982937,
	"grad_norm": 0.18560980260372162,
	"learning_rate": 4.963328460531127e-05,
	"loss": 0.4112,
	"step": 770
	},
	{
	"epoch": 2.1051194539249147,
	"grad_norm": 0.16012516617774963,
	"learning_rate": 4.935884182141377e-05,
	"loss": 0.4108,
	"step": 771
	},
	{
	"epoch": 2.1078498293515358,
	"grad_norm": 0.1524171382188797,
	"learning_rate": 4.908491106010368e-05,
	"loss": 0.4183,
	"step": 772
	},
	{
	"epoch": 2.1105802047781568,
	"grad_norm": 0.173212930560112,
	"learning_rate": 4.8811495091039926e-05,
	"loss": 0.4058,
	"step": 773
	},
	{
	"epoch": 2.1133105802047782,
	"grad_norm": 0.1540430635213852,
	"learning_rate": 4.8538596678676406e-05,
	"loss": 0.4016,
	"step": 774
	},
	{
	"epoch": 2.1160409556313993,
	"grad_norm": 0.1477975845336914,
	"learning_rate": 4.826621858223431e-05,
	"loss": 0.3975,
	"step": 775
	},
	{
	"epoch": 2.1187713310580203,
	"grad_norm": 0.17888864874839783,
	"learning_rate": 4.79943635556739e-05,
	"loss": 0.4208,
	"step": 776
	},
	{
	"epoch": 2.1215017064846418,
	"grad_norm": 0.15199348330497742,
	"learning_rate": 4.7723034347666696e-05,
	"loss": 0.4304,
	"step": 777
	},
	{
	"epoch": 2.124232081911263,
	"grad_norm": 0.15406261384487152,
	"learning_rate": 4.745223370156797e-05,
	"loss": 0.439,
	"step": 778
	},
	{
	"epoch": 2.126962457337884,
	"grad_norm": 0.1821894645690918,
	"learning_rate": 4.71819643553887e-05,
	"loss": 0.4298,
	"step": 779
	},
	{
	"epoch": 2.1296928327645053,
	"grad_norm": 0.15180355310440063,
	"learning_rate": 4.691222904176791e-05,
	"loss": 0.4136,
	"step": 780
	},
	{
	"epoch": 2.1324232081911263,
	"grad_norm": 0.17140239477157593,
	"learning_rate": 4.6643030487945326e-05,
	"loss": 0.4292,
	"step": 781
	},
	{
	"epoch": 2.1351535836177473,
	"grad_norm": 0.15253609418869019,
	"learning_rate": 4.6374371415733496e-05,
	"loss": 0.4393,
	"step": 782
	},
	{
	"epoch": 2.137883959044369,
	"grad_norm": 0.15631216764450073,
	"learning_rate": 4.6106254541490325e-05,
	"loss": 0.4162,
	"step": 783
	},
	{
	"epoch": 2.14061433447099,
	"grad_norm": 0.18031221628189087,
	"learning_rate": 4.583868257609171e-05,
	"loss": 0.415,
	"step": 784
	},
	{
	"epoch": 2.143344709897611,
	"grad_norm": 0.15720027685165405,
	"learning_rate": 4.55716582249042e-05,
	"loss": 0.4288,
	"step": 785
	},
	{
	"epoch": 2.146075085324232,
	"grad_norm": 0.15618009865283966,
	"learning_rate": 4.530518418775733e-05,
	"loss": 0.4238,
	"step": 786
	},
	{
	"epoch": 2.1488054607508533,
	"grad_norm": 0.16991287469863892,
	"learning_rate": 4.50392631589166e-05,
	"loss": 0.4321,
	"step": 787
	},
	{
	"epoch": 2.1515358361774743,
	"grad_norm": 0.16138002276420593,
	"learning_rate": 4.477389782705628e-05,
	"loss": 0.4172,
	"step": 788
	},
	{
	"epoch": 2.1542662116040954,
	"grad_norm": 0.15910767018795013,
	"learning_rate": 4.450909087523186e-05,
	"loss": 0.4149,
	"step": 789
	},
	{
	"epoch": 2.156996587030717,
	"grad_norm": 0.15495507419109344,
	"learning_rate": 4.424484498085335e-05,
	"loss": 0.4351,
	"step": 790
	},
	{
	"epoch": 2.159726962457338,
	"grad_norm": 0.15659591555595398,
	"learning_rate": 4.398116281565794e-05,
	"loss": 0.4254,
	"step": 791
	},
	{
	"epoch": 2.162457337883959,
	"grad_norm": 0.16147974133491516,
	"learning_rate": 4.371804704568309e-05,
	"loss": 0.4265,
	"step": 792
	},
	{
	"epoch": 2.1651877133105804,
	"grad_norm": 0.14304347336292267,
	"learning_rate": 4.345550033123954e-05,
	"loss": 0.4211,
	"step": 793
	},
	{
	"epoch": 2.1679180887372014,
	"grad_norm": 0.15479592978954315,
	"learning_rate": 4.3193525326884435e-05,
	"loss": 0.4002,
	"step": 794
	},
	{
	"epoch": 2.1706484641638224,
	"grad_norm": 0.1610931009054184,
	"learning_rate": 4.293212468139447e-05,
	"loss": 0.4156,
	"step": 795
	},
	{
	"epoch": 2.173378839590444,
	"grad_norm": 0.16558706760406494,
	"learning_rate": 4.267130103773911e-05,
	"loss": 0.4285,
	"step": 796
	},
	{
	"epoch": 2.176109215017065,
	"grad_norm": 0.16455373167991638,
	"learning_rate": 4.241105703305388e-05,
	"loss": 0.4058,
	"step": 797
	},
	{
	"epoch": 2.178839590443686,
	"grad_norm": 0.15886934101581573,
	"learning_rate": 4.215139529861367e-05,
	"loss": 0.4311,
	"step": 798
	},
	{
	"epoch": 2.181569965870307,
	"grad_norm": 0.15277110040187836,
	"learning_rate": 4.189231845980618e-05,
	"loss": 0.4176,
	"step": 799
	},
	{
	"epoch": 2.1843003412969284,
	"grad_norm": 0.15656784176826477,
	"learning_rate": 4.163382913610533e-05,
	"loss": 0.4205,
	"step": 800
	},
	{
	"epoch": 2.1870307167235494,
	"grad_norm": 0.1564100980758667,
	"learning_rate": 4.1375929941044786e-05,
	"loss": 0.4313,
	"step": 801
	},
	{
	"epoch": 2.1897610921501705,
	"grad_norm": 0.14734816551208496,
	"learning_rate": 4.111862348219158e-05,
	"loss": 0.4258,
	"step": 802
	},
	{
	"epoch": 2.192491467576792,
	"grad_norm": 0.15179724991321564,
	"learning_rate": 4.086191236111964e-05,
	"loss": 0.4372,
	"step": 803
	},
	{
	"epoch": 2.195221843003413,
	"grad_norm": 0.16015468537807465,
	"learning_rate": 4.060579917338362e-05,
	"loss": 0.4104,
	"step": 804
	},
	{
	"epoch": 2.197952218430034,
	"grad_norm": 0.1574854701757431,
	"learning_rate": 4.0350286508492554e-05,
	"loss": 0.4068,
	"step": 805
	},
	{
	"epoch": 2.2006825938566554,
	"grad_norm": 0.14426739513874054,
	"learning_rate": 4.009537694988372e-05,
	"loss": 0.3904,
	"step": 806
	},
	{
	"epoch": 2.2034129692832765,
	"grad_norm": 0.14949829876422882,
	"learning_rate": 3.9841073074896517e-05,
	"loss": 0.3934,
	"step": 807
	},
	{
	"epoch": 2.2061433447098975,
	"grad_norm": 0.1610165387392044,
	"learning_rate": 3.958737745474638e-05,
	"loss": 0.4207,
	"step": 808
	},
	{
	"epoch": 2.208873720136519,
	"grad_norm": 0.15804022550582886,
	"learning_rate": 3.933429265449882e-05,
	"loss": 0.3968,
	"step": 809
	},
	{
	"epoch": 2.21160409556314,
	"grad_norm": 0.15507763624191284,
	"learning_rate": 3.9081821233043436e-05,
	"loss": 0.4322,
	"step": 810
	},
	{
	"epoch": 2.214334470989761,
	"grad_norm": 0.16331470012664795,
	"learning_rate": 3.8829965743068174e-05,
	"loss": 0.4317,
	"step": 811
	},
	{
	"epoch": 2.217064846416382,
	"grad_norm": 0.14913159608840942,
	"learning_rate": 3.857872873103322e-05,
	"loss": 0.4098,
	"step": 812
	},
	{
	"epoch": 2.2197952218430035,
	"grad_norm": 0.15193897485733032,
	"learning_rate": 3.832811273714569e-05,
	"loss": 0.4319,
	"step": 813
	},
	{
	"epoch": 2.2225255972696245,
	"grad_norm": 0.15656188130378723,
	"learning_rate": 3.807812029533362e-05,
	"loss": 0.3962,
	"step": 814
	},
	{
	"epoch": 2.2252559726962455,
	"grad_norm": 0.1457897126674652,
	"learning_rate": 3.7828753933220295e-05,
	"loss": 0.4044,
	"step": 815
	},
	{
	"epoch": 2.227986348122867,
	"grad_norm": 0.15471549332141876,
	"learning_rate": 3.758001617209906e-05,
	"loss": 0.4251,
	"step": 816
	},
	{
	"epoch": 2.230716723549488,
	"grad_norm": 0.16151392459869385,
	"learning_rate": 3.733190952690753e-05,
	"loss": 0.4278,
	"step": 817
	},
	{
	"epoch": 2.233447098976109,
	"grad_norm": 0.15231560170650482,
	"learning_rate": 3.708443650620206e-05,
	"loss": 0.4286,
	"step": 818
	},
	{
	"epoch": 2.2361774744027305,
	"grad_norm": 0.13981011509895325,
	"learning_rate": 3.683759961213282e-05,
	"loss": 0.4127,
	"step": 819
	},
	{
	"epoch": 2.2389078498293515,
	"grad_norm": 0.16484162211418152,
	"learning_rate": 3.6591401340418116e-05,
	"loss": 0.4399,
	"step": 820
	},
	{
	"epoch": 2.2416382252559726,
	"grad_norm": 0.15228329598903656,
	"learning_rate": 3.634584418031915e-05,
	"loss": 0.4247,
	"step": 821
	},
	{
	"epoch": 2.244368600682594,
	"grad_norm": 0.14780929684638977,
	"learning_rate": 3.6100930614615205e-05,
	"loss": 0.4324,
	"step": 822
	},
	{
	"epoch": 2.247098976109215,
	"grad_norm": 0.1611994057893753,
	"learning_rate": 3.585666311957817e-05,
	"loss": 0.4263,
	"step": 823
	},
	{
	"epoch": 2.249829351535836,
	"grad_norm": 0.16382110118865967,
	"learning_rate": 3.561304416494762e-05,
	"loss": 0.4332,
	"step": 824
	},
	{
	"epoch": 2.252559726962457,
	"grad_norm": 0.15646643936634064,
	"learning_rate": 3.53700762139059e-05,
	"loss": 0.4132,
	"step": 825
	},
	{
	"epoch": 2.2552901023890786,
	"grad_norm": 0.16182062029838562,
	"learning_rate": 3.512776172305331e-05,
	"loss": 0.4199,
	"step": 826
	},
	{
	"epoch": 2.2580204778156996,
	"grad_norm": 0.15407824516296387,
	"learning_rate": 3.4886103142382945e-05,
	"loss": 0.4087,
	"step": 827
	},
	{
	"epoch": 2.260750853242321,
	"grad_norm": 0.1593010425567627,
	"learning_rate": 3.46451029152562e-05,
	"loss": 0.416,
	"step": 828
	},
	{
	"epoch": 2.263481228668942,
	"grad_norm": 0.15628038346767426,
	"learning_rate": 3.440476347837811e-05,
	"loss": 0.4296,
	"step": 829
	},
	{
	"epoch": 2.266211604095563,
	"grad_norm": 0.1571022868156433,
	"learning_rate": 3.41650872617724e-05,
	"loss": 0.419,
	"step": 830
	},
	{
	"epoch": 2.268941979522184,
	"grad_norm": 0.15472716093063354,
	"learning_rate": 3.392607668875718e-05,
	"loss": 0.4151,
	"step": 831
	},
	{
	"epoch": 2.2716723549488056,
	"grad_norm": 0.1419110745191574,
	"learning_rate": 3.36877341759205e-05,
	"loss": 0.3991,
	"step": 832
	},
	{
	"epoch": 2.2744027303754266,
	"grad_norm": 0.15272633731365204,
	"learning_rate": 3.345006213309557e-05,
	"loss": 0.3965,
	"step": 833
	},
	{
	"epoch": 2.2771331058020476,
	"grad_norm": 0.15313783288002014,
	"learning_rate": 3.321306296333673e-05,
	"loss": 0.4022,
	"step": 834
	},
	{
	"epoch": 2.279863481228669,
	"grad_norm": 0.14547322690486908,
	"learning_rate": 3.29767390628951e-05,
	"loss": 0.4127,
	"step": 835
	},
	{
	"epoch": 2.28259385665529,
	"grad_norm": 0.15116067230701447,
	"learning_rate": 3.274109282119413e-05,
	"loss": 0.4086,
	"step": 836
	},
	{
	"epoch": 2.285324232081911,
	"grad_norm": 0.1611739993095398,
	"learning_rate": 3.250612662080567e-05,
	"loss": 0.4261,
	"step": 837
	},
	{
	"epoch": 2.288054607508532,
	"grad_norm": 0.1671733260154724,
	"learning_rate": 3.227184283742591e-05,
	"loss": 0.4244,
	"step": 838
	},
	{
	"epoch": 2.2907849829351536,
	"grad_norm": 0.16232764720916748,
	"learning_rate": 3.2038243839851075e-05,
	"loss": 0.4118,
	"step": 839
	},
	{
	"epoch": 2.2935153583617747,
	"grad_norm": 0.1595815271139145,
	"learning_rate": 3.180533198995379e-05,
	"loss": 0.4333,
	"step": 840
	},
	{
	"epoch": 2.296245733788396,
	"grad_norm": 0.14766521751880646,
	"learning_rate": 3.1573109642659024e-05,
	"loss": 0.4066,
	"step": 841
	},
	{
	"epoch": 2.298976109215017,
	"grad_norm": 0.16081617772579193,
	"learning_rate": 3.134157914592032e-05,
	"loss": 0.4032,
	"step": 842
	},
	{
	"epoch": 2.301706484641638,
	"grad_norm": 0.15681862831115723,
	"learning_rate": 3.111074284069606e-05,
	"loss": 0.4256,
	"step": 843
	},
	{
	"epoch": 2.304436860068259,
	"grad_norm": 0.15217512845993042,
	"learning_rate": 3.088060306092582e-05,
	"loss": 0.4142,
	"step": 844
	},
	{
	"epoch": 2.3071672354948807,
	"grad_norm": 0.15541419386863708,
	"learning_rate": 3.065116213350671e-05,
	"loss": 0.4246,
	"step": 845
	},
	{
	"epoch": 2.3098976109215017,
	"grad_norm": 0.16863110661506653,
	"learning_rate": 3.042242237826991e-05,
	"loss": 0.4167,
	"step": 846
	},
	{
	"epoch": 2.3126279863481227,
	"grad_norm": 0.15719062089920044,
	"learning_rate": 3.0194386107957173e-05,
	"loss": 0.4272,
	"step": 847
	},
	{
	"epoch": 2.315358361774744,
	"grad_norm": 0.14599116146564484,
	"learning_rate": 2.9967055628197472e-05,
	"loss": 0.3826,
	"step": 848
	},
	{
	"epoch": 2.318088737201365,
	"grad_norm": 0.16562673449516296,
	"learning_rate": 2.974043323748367e-05,
	"loss": 0.4246,
	"step": 849
	},
	{
	"epoch": 2.3208191126279862,
	"grad_norm": 0.16425776481628418,
	"learning_rate": 2.951452122714926e-05,
	"loss": 0.4182,
	"step": 850
	},
	{
	"epoch": 2.3235494880546073,
	"grad_norm": 0.15654055774211884,
	"learning_rate": 2.9289321881345254e-05,
	"loss": 0.4349,
	"step": 851
	},
	{
	"epoch": 2.3262798634812287,
	"grad_norm": 0.15405891835689545,
	"learning_rate": 2.9064837477017048e-05,
	"loss": 0.4045,
	"step": 852
	},
	{
	"epoch": 2.3290102389078498,
	"grad_norm": 0.16419056057929993,
	"learning_rate": 2.88410702838814e-05,
	"loss": 0.4174,
	"step": 853
	},
	{
	"epoch": 2.331740614334471,
	"grad_norm": 0.16045036911964417,
	"learning_rate": 2.861802256440348e-05,
	"loss": 0.4185,
	"step": 854
	},
	{
	"epoch": 2.3344709897610922,
	"grad_norm": 0.15399502217769623,
	"learning_rate": 2.8395696573774032e-05,
	"loss": 0.4228,
	"step": 855
	},
	{
	"epoch": 2.3372013651877133,
	"grad_norm": 0.1503557562828064,
	"learning_rate": 2.8174094559886534e-05,
	"loss": 0.4146,
	"step": 856
	},
	{
	"epoch": 2.3399317406143343,
	"grad_norm": 0.16562052071094513,
	"learning_rate": 2.7953218763314458e-05,
	"loss": 0.4127,
	"step": 857
	},
	{
	"epoch": 2.3426621160409558,
	"grad_norm": 0.15860068798065186,
	"learning_rate": 2.773307141728867e-05,
	"loss": 0.4221,
	"step": 858
	},
	{
	"epoch": 2.345392491467577,
	"grad_norm": 0.16427016258239746,
	"learning_rate": 2.7513654747674788e-05,
	"loss": 0.4202,
	"step": 859
	},
	{
	"epoch": 2.348122866894198,
	"grad_norm": 0.15462426841259003,
	"learning_rate": 2.729497097295075e-05,
	"loss": 0.3986,
	"step": 860
	},
	{
	"epoch": 2.3508532423208193,
	"grad_norm": 0.150539368391037,
	"learning_rate": 2.7077022304184295e-05,
	"loss": 0.4112,
	"step": 861
	},
	{
	"epoch": 2.3535836177474403,
	"grad_norm": 0.14942197501659393,
	"learning_rate": 2.685981094501069e-05,
	"loss": 0.4173,
	"step": 862
	},
	{
	"epoch": 2.3563139931740613,
	"grad_norm": 0.15172016620635986,
	"learning_rate": 2.6643339091610377e-05,
	"loss": 0.4273,
	"step": 863
	},
	{
	"epoch": 2.359044368600683,
	"grad_norm": 0.15123523771762848,
	"learning_rate": 2.6427608932686843e-05,
	"loss": 0.4104,
	"step": 864
	},
	{
	"epoch": 2.361774744027304,
	"grad_norm": 0.15563499927520752,
	"learning_rate": 2.621262264944444e-05,
	"loss": 0.4215,
	"step": 865
	},
	{
	"epoch": 2.364505119453925,
	"grad_norm": 0.15889500081539154,
	"learning_rate": 2.599838241556626e-05,
	"loss": 0.4058,
	"step": 866
	},
	{
	"epoch": 2.3672354948805463,
	"grad_norm": 0.1514395922422409,
	"learning_rate": 2.5784890397192398e-05,
	"loss": 0.4058,
	"step": 867
	},
	{
	"epoch": 2.3699658703071673,
	"grad_norm": 0.15626998245716095,
	"learning_rate": 2.5572148752897795e-05,
	"loss": 0.4092,
	"step": 868
	},
	{
	"epoch": 2.3726962457337883,
	"grad_norm": 0.15478669106960297,
	"learning_rate": 2.5360159633670457e-05,
	"loss": 0.424,
	"step": 869
	},
	{
	"epoch": 2.3754266211604094,
	"grad_norm": 0.14791764318943024,
	"learning_rate": 2.514892518288988e-05,
	"loss": 0.4294,
	"step": 870
	},
	{
	"epoch": 2.378156996587031,
	"grad_norm": 0.15729525685310364,
	"learning_rate": 2.4938447536305243e-05,
	"loss": 0.4204,
	"step": 871
	},
	{
	"epoch": 2.380887372013652,
	"grad_norm": 0.1549883335828781,
	"learning_rate": 2.472872882201368e-05,
	"loss": 0.4302,
	"step": 872
	},
	{
	"epoch": 2.383617747440273,
	"grad_norm": 0.15608322620391846,
	"learning_rate": 2.451977116043911e-05,
	"loss": 0.4208,
	"step": 873
	},
	{
	"epoch": 2.3863481228668944,
	"grad_norm": 0.15836332738399506,
	"learning_rate": 2.431157666431052e-05,
	"loss": 0.4141,
	"step": 874
	},
	{
	"epoch": 2.3890784982935154,
	"grad_norm": 0.15775950253009796,
	"learning_rate": 2.410414743864059e-05,
	"loss": 0.4142,
	"step": 875
	},
	{
	"epoch": 2.3918088737201364,
	"grad_norm": 0.1490509808063507,
	"learning_rate": 2.3897485580704682e-05,
	"loss": 0.4079,
	"step": 876
	},
	{
	"epoch": 2.394539249146758,
	"grad_norm": 0.16001944243907928,
	"learning_rate": 2.3691593180019366e-05,
	"loss": 0.4276,
	"step": 877
	},
	{
	"epoch": 2.397269624573379,
	"grad_norm": 0.14967067539691925,
	"learning_rate": 2.3486472318321307e-05,
	"loss": 0.4045,
	"step": 878
	},
	{
	"epoch": 2.4,
	"grad_norm": 0.14551271498203278,
	"learning_rate": 2.3282125069546433e-05,
	"loss": 0.4169,
	"step": 879
	},
	{
	"epoch": 2.4027303754266214,
	"grad_norm": 0.15170224010944366,
	"learning_rate": 2.3078553499808797e-05,
	"loss": 0.4166,
	"step": 880
	},
	{
	"epoch": 2.4054607508532424,
	"grad_norm": 0.15381450951099396,
	"learning_rate": 2.2875759667379614e-05,
	"loss": 0.4046,
	"step": 881
	},
	{
	"epoch": 2.4081911262798634,
	"grad_norm": 0.15133820474147797,
	"learning_rate": 2.267374562266662e-05,
	"loss": 0.4053,
	"step": 882
	},
	{
	"epoch": 2.4109215017064844,
	"grad_norm": 0.15135996043682098,
	"learning_rate": 2.2472513408193384e-05,
	"loss": 0.4145,
	"step": 883
	},
	{
	"epoch": 2.413651877133106,
	"grad_norm": 0.1551310420036316,
	"learning_rate": 2.227206505857834e-05,
	"loss": 0.4107,
	"step": 884
	},
	{
	"epoch": 2.416382252559727,
	"grad_norm": 0.15202271938323975,
	"learning_rate": 2.207240260051453e-05,
	"loss": 0.409,
	"step": 885
	},
	{
	"epoch": 2.419112627986348,
	"grad_norm": 0.15912394225597382,
	"learning_rate": 2.1873528052749092e-05,
	"loss": 0.4293,
	"step": 886
	},
	{
	"epoch": 2.4218430034129694,
	"grad_norm": 0.164555624127388,
	"learning_rate": 2.167544342606256e-05,
	"loss": 0.4153,
	"step": 887
	},
	{
	"epoch": 2.4245733788395905,
	"grad_norm": 0.1536960005760193,
	"learning_rate": 2.1478150723248857e-05,
	"loss": 0.4161,
	"step": 888
	},
	{
	"epoch": 2.4273037542662115,
	"grad_norm": 0.1572561115026474,
	"learning_rate": 2.1281651939094992e-05,
	"loss": 0.3975,
	"step": 889
	},
	{
	"epoch": 2.430034129692833,
	"grad_norm": 0.14872194826602936,
	"learning_rate": 2.1085949060360654e-05,
	"loss": 0.4028,
	"step": 890
	},
	{
	"epoch": 2.432764505119454,
	"grad_norm": 0.15553632378578186,
	"learning_rate": 2.089104406575837e-05,
	"loss": 0.4113,
	"step": 891
	},
	{
	"epoch": 2.435494880546075,
	"grad_norm": 0.15172426402568817,
	"learning_rate": 2.0696938925933506e-05,
	"loss": 0.407,
	"step": 892
	},
	{
	"epoch": 2.4382252559726965,
	"grad_norm": 0.1533356010913849,
	"learning_rate": 2.0503635603444094e-05,
	"loss": 0.4225,
	"step": 893
	},
	{
	"epoch": 2.4409556313993175,
	"grad_norm": 0.15870912373065948,
	"learning_rate": 2.0311136052741277e-05,
	"loss": 0.4176,
	"step": 894
	},
	{
	"epoch": 2.4436860068259385,
	"grad_norm": 0.15616737306118011,
	"learning_rate": 2.0119442220149353e-05,
	"loss": 0.4158,
	"step": 895
	},
	{
	"epoch": 2.4464163822525595,
	"grad_norm": 0.15833789110183716,
	"learning_rate": 1.9928556043846214e-05,
	"loss": 0.403,
	"step": 896
	},
	{
	"epoch": 2.449146757679181,
	"grad_norm": 0.1554342359304428,
	"learning_rate": 1.9738479453843682e-05,
	"loss": 0.396,
	"step": 897
	},
	{
	"epoch": 2.451877133105802,
	"grad_norm": 0.17515774071216583,
	"learning_rate": 1.9549214371968004e-05,
	"loss": 0.4096,
	"step": 898
	},
	{
	"epoch": 2.454607508532423,
	"grad_norm": 0.15816594660282135,
	"learning_rate": 1.936076271184044e-05,
	"loss": 0.4232,
	"step": 899
	},
	{
	"epoch": 2.4573378839590445,
	"grad_norm": 0.1528582125902176,
	"learning_rate": 1.9173126378857907e-05,
	"loss": 0.4145,
	"step": 900
	},
	{
	"epoch": 2.4600682593856655,
	"grad_norm": 0.16006483137607574,
	"learning_rate": 1.898630727017371e-05,
	"loss": 0.4201,
	"step": 901
	},
	{
	"epoch": 2.4627986348122866,
	"grad_norm": 0.15996922552585602,
	"learning_rate": 1.8800307274678364e-05,
	"loss": 0.4056,
	"step": 902
	},
	{
	"epoch": 2.465529010238908,
	"grad_norm": 0.1555391103029251,
	"learning_rate": 1.861512827298051e-05,
	"loss": 0.4315,
	"step": 903
	},
	{
	"epoch": 2.468259385665529,
	"grad_norm": 0.15501669049263,
	"learning_rate": 1.8430772137387853e-05,
	"loss": 0.4159,
	"step": 904
	},
	{
	"epoch": 2.47098976109215,
	"grad_norm": 0.1523975431919098,
	"learning_rate": 1.8247240731888294e-05,
	"loss": 0.4004,
	"step": 905
	},
	{
	"epoch": 2.4737201365187715,
	"grad_norm": 0.1715194135904312,
	"learning_rate": 1.806453591213103e-05,
	"loss": 0.4124,
	"step": 906
	},
	{
	"epoch": 2.4764505119453926,
	"grad_norm": 0.15396980941295624,
	"learning_rate": 1.788265952540784e-05,
	"loss": 0.4094,
	"step": 907
	},
	{
	"epoch": 2.4791808873720136,
	"grad_norm": 0.1634356677532196,
	"learning_rate": 1.7701613410634365e-05,
	"loss": 0.4257,
	"step": 908
	},
	{
	"epoch": 2.4819112627986346,
	"grad_norm": 0.1548430621623993,
	"learning_rate": 1.752139939833154e-05,
	"loss": 0.3942,
	"step": 909
	},
	{
	"epoch": 2.484641638225256,
	"grad_norm": 0.15945452451705933,
	"learning_rate": 1.734201931060706e-05,
	"loss": 0.3979,
	"step": 910
	},
	{
	"epoch": 2.487372013651877,
	"grad_norm": 0.16635702550411224,
	"learning_rate": 1.7163474961137028e-05,
	"loss": 0.422,
	"step": 911
	},
	{
	"epoch": 2.490102389078498,
	"grad_norm": 0.16245630383491516,
	"learning_rate": 1.6985768155147496e-05,
	"loss": 0.4126,
	"step": 912
	},
	{
	"epoch": 2.4928327645051196,
	"grad_norm": 0.14662671089172363,
	"learning_rate": 1.6808900689396336e-05,
	"loss": 0.4062,
	"step": 913
	},
	{
	"epoch": 2.4955631399317406,
	"grad_norm": 0.1555013507604599,
	"learning_rate": 1.663287435215498e-05,
	"loss": 0.4101,
	"step": 914
	},
	{
	"epoch": 2.4982935153583616,
	"grad_norm": 0.14830157160758972,
	"learning_rate": 1.645769092319045e-05,
	"loss": 0.3843,
	"step": 915
	},
	{
	"epoch": 2.5010238907849827,
	"grad_norm": 0.16135641932487488,
	"learning_rate": 1.6283352173747145e-05,
	"loss": 0.4229,
	"step": 916
	},
	{
	"epoch": 2.503754266211604,
	"grad_norm": 0.15229038894176483,
	"learning_rate": 1.6109859866529255e-05,
	"loss": 0.4209,
	"step": 917
	},
	{
	"epoch": 2.506484641638225,
	"grad_norm": 0.15836934745311737,
	"learning_rate": 1.5937215755682665e-05,
	"loss": 0.422,
	"step": 918
	},
	{
	"epoch": 2.5092150170648466,
	"grad_norm": 0.1639019101858139,
	"learning_rate": 1.5765421586777284e-05,
	"loss": 0.4206,
	"step": 919
	},
	{
	"epoch": 2.5119453924914676,
	"grad_norm": 0.15451960265636444,
	"learning_rate": 1.5594479096789537e-05,
	"loss": 0.411,
	"step": 920
	},
	{
	"epoch": 2.5146757679180887,
	"grad_norm": 0.16513267159461975,
	"learning_rate": 1.5424390014084644e-05,
	"loss": 0.4324,
	"step": 921
	},
	{
	"epoch": 2.5174061433447097,
	"grad_norm": 0.15432654321193695,
	"learning_rate": 1.5255156058399122e-05,
	"loss": 0.4074,
	"step": 922
	},
	{
	"epoch": 2.520136518771331,
	"grad_norm": 0.16064870357513428,
	"learning_rate": 1.5086778940823543e-05,
	"loss": 0.417,
	"step": 923
	},
	{
	"epoch": 2.522866894197952,
	"grad_norm": 0.16009055078029633,
	"learning_rate": 1.4919260363785215e-05,
	"loss": 0.4128,
	"step": 924
	},
	{
	"epoch": 2.5255972696245736,
	"grad_norm": 0.1598517745733261,
	"learning_rate": 1.4752602021030792e-05,
	"loss": 0.4191,
	"step": 925
	},
	{
	"epoch": 2.5283276450511947,
	"grad_norm": 0.15252196788787842,
	"learning_rate": 1.4586805597609331e-05,
	"loss": 0.4124,
	"step": 926
	},
	{
	"epoch": 2.5310580204778157,
	"grad_norm": 0.1643335521221161,
	"learning_rate": 1.442187276985526e-05,
	"loss": 0.4207,
	"step": 927
	},
	{
	"epoch": 2.5337883959044367,
	"grad_norm": 0.15445098280906677,
	"learning_rate": 1.4257805205371234e-05,
	"loss": 0.3993,
	"step": 928
	},
	{
	"epoch": 2.536518771331058,
	"grad_norm": 0.15789660811424255,
	"learning_rate": 1.4094604563011472e-05,
	"loss": 0.4103,
	"step": 929
	},
	{
	"epoch": 2.539249146757679,
	"grad_norm": 0.15002034604549408,
	"learning_rate": 1.3932272492864984e-05,
	"loss": 0.4042,
	"step": 930
	},
	{
	"epoch": 2.5419795221843002,
	"grad_norm": 0.16790151596069336,
	"learning_rate": 1.3770810636238684e-05,
	"loss": 0.4245,
	"step": 931
	},
	{
	"epoch": 2.5447098976109217,
	"grad_norm": 0.1551153063774109,
	"learning_rate": 1.3610220625641002e-05,
	"loss": 0.4145,
	"step": 932
	},
	{
	"epoch": 2.5474402730375427,
	"grad_norm": 0.15363937616348267,
	"learning_rate": 1.3450504084765381e-05,
	"loss": 0.384,
	"step": 933
	},
	{
	"epoch": 2.5501706484641637,
	"grad_norm": 0.15322524309158325,
	"learning_rate": 1.3291662628473633e-05,
	"loss": 0.4042,
	"step": 934
	},
	{
	"epoch": 2.5529010238907848,
	"grad_norm": 0.1649988293647766,
	"learning_rate": 1.313369786277987e-05,
	"loss": 0.4236,
	"step": 935
	},
	{
	"epoch": 2.5556313993174062,
	"grad_norm": 0.150667205452919,
	"learning_rate": 1.2976611384834148e-05,
	"loss": 0.3908,
	"step": 936
	},
	{
	"epoch": 2.5583617747440273,
	"grad_norm": 0.16129009425640106,
	"learning_rate": 1.2820404782906315e-05,
	"loss": 0.4167,
	"step": 937
	},
	{
	"epoch": 2.5610921501706487,
	"grad_norm": 0.16965742409229279,
	"learning_rate": 1.2665079636369969e-05,
	"loss": 0.4145,
	"step": 938
	},
	{
	"epoch": 2.5638225255972698,
	"grad_norm": 0.14878158271312714,
	"learning_rate": 1.2510637515686496e-05,
	"loss": 0.4236,
	"step": 939
	},
	{
	"epoch": 2.5665529010238908,
	"grad_norm": 0.14919213950634003,
	"learning_rate": 1.2357079982389197e-05,
	"loss": 0.396,
	"step": 940
	},
	{
	"epoch": 2.569283276450512,
	"grad_norm": 0.1563798040151596,
	"learning_rate": 1.2204408589067462e-05,
	"loss": 0.3992,
	"step": 941
	},
	{
	"epoch": 2.5720136518771333,
	"grad_norm": 0.15581347048282623,
	"learning_rate": 1.2052624879351104e-05,
	"loss": 0.4261,
	"step": 942
	},
	{
	"epoch": 2.5747440273037543,
	"grad_norm": 0.15438248217105865,
	"learning_rate": 1.190173038789476e-05,
	"loss": 0.4013,
	"step": 943
	},
	{
	"epoch": 2.5774744027303753,
	"grad_norm": 0.15199199318885803,
	"learning_rate": 1.1751726640362349e-05,
	"loss": 0.4089,
	"step": 944
	},
	{
	"epoch": 2.580204778156997,
	"grad_norm": 0.1516939401626587,
	"learning_rate": 1.1602615153411667e-05,
	"loss": 0.4008,
	"step": 945
	},
	{
	"epoch": 2.582935153583618,
	"grad_norm": 0.15474575757980347,
	"learning_rate": 1.1454397434679021e-05,
	"loss": 0.4115,
	"step": 946
	},
	{
	"epoch": 2.585665529010239,
	"grad_norm": 0.15419447422027588,
	"learning_rate": 1.1307074982764022e-05,
	"loss": 0.4187,
	"step": 947
	},
	{
	"epoch": 2.58839590443686,
	"grad_norm": 0.15413175523281097,
	"learning_rate": 1.116064928721442e-05,
	"loss": 0.4191,
	"step": 948
	},
	{
	"epoch": 2.5911262798634813,
	"grad_norm": 0.1559099704027176,
	"learning_rate": 1.1015121828511032e-05,
	"loss": 0.4136,
	"step": 949
	},
	{
	"epoch": 2.5938566552901023,
	"grad_norm": 0.16210560500621796,
	"learning_rate": 1.0870494078052796e-05,
	"loss": 0.4204,
	"step": 950
	},
	{
	"epoch": 2.596587030716724,
	"grad_norm": 0.15410131216049194,
	"learning_rate": 1.0726767498141877e-05,
	"loss": 0.4098,
	"step": 951
	},
	{
	"epoch": 2.599317406143345,
	"grad_norm": 0.14466793835163116,
	"learning_rate": 1.0583943541968856e-05,
	"loss": 0.3832,
	"step": 952
	},
	{
	"epoch": 2.602047781569966,
	"grad_norm": 0.1512717753648758,
	"learning_rate": 1.044202365359811e-05,
	"loss": 0.4132,
	"step": 953
	},
	{
	"epoch": 2.604778156996587,
	"grad_norm": 0.1530720293521881,
	"learning_rate": 1.0301009267953143e-05,
	"loss": 0.4165,
	"step": 954
	},
	{
	"epoch": 2.6075085324232083,
	"grad_norm": 0.16781674325466156,
	"learning_rate": 1.0160901810802115e-05,
	"loss": 0.4203,
	"step": 955
	},
	{
	"epoch": 2.6102389078498294,
	"grad_norm": 0.14876051247119904,
	"learning_rate": 1.0021702698743407e-05,
	"loss": 0.4168,
	"step": 956
	},
	{
	"epoch": 2.6129692832764504,
	"grad_norm": 0.15021638572216034,
	"learning_rate": 9.883413339191294e-06,
	"loss": 0.4173,
	"step": 957
	},
	{
	"epoch": 2.615699658703072,
	"grad_norm": 0.159826397895813,
	"learning_rate": 9.746035130361742e-06,
	"loss": 0.4279,
	"step": 958
	},
	{
	"epoch": 2.618430034129693,
	"grad_norm": 0.156574085354805,
	"learning_rate": 9.609569461258262e-06,
	"loss": 0.4277,
	"step": 959
	},
	{
	"epoch": 2.621160409556314,
	"grad_norm": 0.157151460647583,
	"learning_rate": 9.474017711657834e-06,
	"loss": 0.412,
	"step": 960
	},
	{
	"epoch": 2.623890784982935,
	"grad_norm": 0.14887213706970215,
	"learning_rate": 9.339381252097e-06,
	"loss": 0.4012,
	"step": 961
	},
	{
	"epoch": 2.6266211604095564,
	"grad_norm": 0.15450581908226013,
	"learning_rate": 9.205661443857994e-06,
	"loss": 0.4077,
	"step": 962
	},
	{
	"epoch": 2.6293515358361774,
	"grad_norm": 0.14593878388404846,
	"learning_rate": 9.072859638954955e-06,
	"loss": 0.4064,
	"step": 963
	},
	{
	"epoch": 2.632081911262799,
	"grad_norm": 0.156602144241333,
	"learning_rate": 8.940977180120247e-06,
	"loss": 0.4267,
	"step": 964
	},
	{
	"epoch": 2.63481228668942,
	"grad_norm": 0.16739366948604584,
	"learning_rate": 8.810015400790994e-06,
	"loss": 0.4176,
	"step": 965
	},
	{
	"epoch": 2.637542662116041,
	"grad_norm": 0.15772853791713715,
	"learning_rate": 8.67997562509546e-06,
	"loss": 0.424,
	"step": 966
	},
	{
	"epoch": 2.640273037542662,
	"grad_norm": 0.15860068798065186,
	"learning_rate": 8.550859167839664e-06,
	"loss": 0.4167,
	"step": 967
	},
	{
	"epoch": 2.6430034129692834,
	"grad_norm": 0.15044620633125305,
	"learning_rate": 8.422667334494249e-06,
	"loss": 0.3916,
	"step": 968
	},
	{
	"epoch": 2.6457337883959045,
	"grad_norm": 0.150175541639328,
	"learning_rate": 8.295401421181125e-06,
	"loss": 0.3953,
	"step": 969
	},
	{
	"epoch": 2.6484641638225255,
	"grad_norm": 0.14696063101291656,
	"learning_rate": 8.169062714660346e-06,
	"loss": 0.4115,
	"step": 970
	},
	{
	"epoch": 2.651194539249147,
	"grad_norm": 0.1482568085193634,
	"learning_rate": 8.043652492317256e-06,
	"loss": 0.4017,
	"step": 971
	},
	{
	"epoch": 2.653924914675768,
	"grad_norm": 0.15316608548164368,
	"learning_rate": 7.919172022149456e-06,
	"loss": 0.4176,
	"step": 972
	},
	{
	"epoch": 2.656655290102389,
	"grad_norm": 0.15325787663459778,
	"learning_rate": 7.795622562753957e-06,
	"loss": 0.413,
	"step": 973
	},
	{
	"epoch": 2.65938566552901,
	"grad_norm": 0.16979162395000458,
	"learning_rate": 7.673005363314579e-06,
	"loss": 0.4244,
	"step": 974
	},
	{
	"epoch": 2.6621160409556315,
	"grad_norm": 0.16061224043369293,
	"learning_rate": 7.551321663589228e-06,
	"loss": 0.4082,
	"step": 975
	},
	{
	"epoch": 2.6648464163822525,
	"grad_norm": 0.1511377990245819,
	"learning_rate": 7.430572693897342e-06,
	"loss": 0.4047,
	"step": 976
	},
	{
	"epoch": 2.667576791808874,
	"grad_norm": 0.1549064815044403,
	"learning_rate": 7.310759675107515e-06,
	"loss": 0.4181,
	"step": 977
	},
	{
	"epoch": 2.670307167235495,
	"grad_norm": 0.15855662524700165,
	"learning_rate": 7.191883818625189e-06,
	"loss": 0.4242,
	"step": 978
	},
	{
	"epoch": 2.673037542662116,
	"grad_norm": 0.16046655178070068,
	"learning_rate": 7.073946326380243e-06,
	"loss": 0.4077,
	"step": 979
	},
	{
	"epoch": 2.675767918088737,
	"grad_norm": 0.1561538577079773,
	"learning_rate": 6.956948390814977e-06,
	"loss": 0.4117,
	"step": 980
	},
	{
	"epoch": 2.6784982935153585,
	"grad_norm": 0.16078175604343414,
	"learning_rate": 6.840891194872112e-06,
	"loss": 0.4342,
	"step": 981
	},
	{
	"epoch": 2.6812286689419795,
	"grad_norm": 0.15352275967597961,
	"learning_rate": 6.725775911982601e-06,
	"loss": 0.402,
	"step": 982
	},
	{
	"epoch": 2.6839590443686006,
	"grad_norm": 0.15391647815704346,
	"learning_rate": 6.6116037060539704e-06,
	"loss": 0.4095,
	"step": 983
	},
	{
	"epoch": 2.686689419795222,
	"grad_norm": 0.15556836128234863,
	"learning_rate": 6.498375731458528e-06,
	"loss": 0.4063,
	"step": 984
	},
	{
	"epoch": 2.689419795221843,
	"grad_norm": 0.15222905576229095,
	"learning_rate": 6.386093133021554e-06,
	"loss": 0.4139,
	"step": 985
	},
	{
	"epoch": 2.692150170648464,
	"grad_norm": 0.15189246833324432,
	"learning_rate": 6.274757046009871e-06,
	"loss": 0.4195,
	"step": 986
	},
	{
	"epoch": 2.694880546075085,
	"grad_norm": 0.15635477006435394,
	"learning_rate": 6.164368596120351e-06,
	"loss": 0.4137,
	"step": 987
	},
	{
	"epoch": 2.6976109215017066,
	"grad_norm": 0.15748678147792816,
	"learning_rate": 6.054928899468426e-06,
	"loss": 0.396,
	"step": 988
	},
	{
	"epoch": 2.7003412969283276,
	"grad_norm": 0.1585109978914261,
	"learning_rate": 5.946439062576903e-06,
	"loss": 0.4111,
	"step": 989
	},
	{
	"epoch": 2.703071672354949,
	"grad_norm": 0.15509194135665894,
	"learning_rate": 5.83890018236476e-06,
	"loss": 0.4259,
	"step": 990
	},
	{
	"epoch": 2.70580204778157,
	"grad_norm": 0.15443935990333557,
	"learning_rate": 5.732313346136031e-06,
	"loss": 0.4138,
	"step": 991
	},
	{
	"epoch": 2.708532423208191,
	"grad_norm": 0.1619240939617157,
	"learning_rate": 5.626679631568832e-06,
	"loss": 0.4091,
	"step": 992
	},
	{
	"epoch": 2.711262798634812,
	"grad_norm": 0.1597377061843872,
	"learning_rate": 5.522000106704439e-06,
	"loss": 0.4193,
	"step": 993
	},
	{
	"epoch": 2.7139931740614336,
	"grad_norm": 0.15020039677619934,
	"learning_rate": 5.418275829936537e-06,
	"loss": 0.4216,
	"step": 994
	},
	{
	"epoch": 2.7167235494880546,
	"grad_norm": 0.15186108648777008,
	"learning_rate": 5.315507850000456e-06,
	"loss": 0.4057,
	"step": 995
	},
	{
	"epoch": 2.7194539249146756,
	"grad_norm": 0.1630185842514038,
	"learning_rate": 5.2136972059626314e-06,
	"loss": 0.4141,
	"step": 996
	},
	{
	"epoch": 2.722184300341297,
	"grad_norm": 0.1610775589942932,
	"learning_rate": 5.112844927210048e-06,
	"loss": 0.4025,
	"step": 997
	},
	{
	"epoch": 2.724914675767918,
	"grad_norm": 0.15820352733135223,
	"learning_rate": 5.012952033439844e-06,
	"loss": 0.4197,
	"step": 998
	},
	{
	"epoch": 2.727645051194539,
	"grad_norm": 0.1567496657371521,
	"learning_rate": 4.914019534649039e-06,
	"loss": 0.4215,
	"step": 999
	},
	{
	"epoch": 2.73037542662116,
	"grad_norm": 0.1540801227092743,
	"learning_rate": 4.816048431124265e-06,
	"loss": 0.4237,
	"step": 1000
	},
	{
	"epoch": 2.7331058020477816,
	"grad_norm": 0.15339985489845276,
	"learning_rate": 4.719039713431694e-06,
	"loss": 0.4127,
	"step": 1001
	},
	{
	"epoch": 2.7358361774744027,
	"grad_norm": 0.1545177400112152,
	"learning_rate": 4.622994362406996e-06,
	"loss": 0.424,
	"step": 1002
	},
	{
	"epoch": 2.738566552901024,
	"grad_norm": 0.15001171827316284,
	"learning_rate": 4.527913349145441e-06,
	"loss": 0.4199,
	"step": 1003
	},
	{
	"epoch": 2.741296928327645,
	"grad_norm": 0.15352268517017365,
	"learning_rate": 4.433797634992077e-06,
	"loss": 0.3991,
	"step": 1004
	},
	{
	"epoch": 2.744027303754266,
	"grad_norm": 0.1552933305501938,
	"learning_rate": 4.340648171531992e-06,
	"loss": 0.4173,
	"step": 1005
	},
	{
	"epoch": 2.746757679180887,
	"grad_norm": 0.1541508436203003,
	"learning_rate": 4.248465900580734e-06,
	"loss": 0.4231,
	"step": 1006
	},
	{
	"epoch": 2.7494880546075087,
	"grad_norm": 0.15616647899150848,
	"learning_rate": 4.1572517541747294e-06,
	"loss": 0.4295,
	"step": 1007
	},
	{
	"epoch": 2.7522184300341297,
	"grad_norm": 0.14823675155639648,
	"learning_rate": 4.0670066545619225e-06,
	"loss": 0.403,
	"step": 1008
	},
	{
	"epoch": 2.7549488054607507,
	"grad_norm": 0.15372464060783386,
	"learning_rate": 3.977731514192385e-06,
	"loss": 0.4184,
	"step": 1009
	},
	{
	"epoch": 2.757679180887372,
	"grad_norm": 0.15206997096538544,
	"learning_rate": 3.889427235709153e-06,
	"loss": 0.4119,
	"step": 1010
	},
	{
	"epoch": 2.760409556313993,
	"grad_norm": 0.15120883285999298,
	"learning_rate": 3.802094711939075e-06,
	"loss": 0.413,
	"step": 1011
	},
	{
	"epoch": 2.7631399317406142,
	"grad_norm": 0.16259510815143585,
	"learning_rate": 3.7157348258837652e-06,
	"loss": 0.4253,
	"step": 1012
	},
	{
	"epoch": 2.7658703071672353,
	"grad_norm": 0.15381862223148346,
	"learning_rate": 3.6303484507106966e-06,
	"loss": 0.4162,
	"step": 1013
	},
	{
	"epoch": 2.7686006825938567,
	"grad_norm": 0.15234719216823578,
	"learning_rate": 3.5459364497443694e-06,
	"loss": 0.3893,
	"step": 1014
	},
	{
	"epoch": 2.7713310580204777,
	"grad_norm": 0.1562722623348236,
	"learning_rate": 3.4624996764575977e-06,
	"loss": 0.3979,
	"step": 1015
	},
	{
	"epoch": 2.774061433447099,
	"grad_norm": 0.15057340264320374,
	"learning_rate": 3.3800389744628404e-06,
	"loss": 0.3861,
	"step": 1016
	},
	{
	"epoch": 2.7767918088737202,
	"grad_norm": 0.1512756198644638,
	"learning_rate": 3.298555177503726e-06,
	"loss": 0.4135,
	"step": 1017
	},
	{
	"epoch": 2.7795221843003413,
	"grad_norm": 0.1467510461807251,
	"learning_rate": 3.2180491094465415e-06,
	"loss": 0.4133,
	"step": 1018
	},
	{
	"epoch": 2.7822525597269623,
	"grad_norm": 0.1518513709306717,
	"learning_rate": 3.1385215842720027e-06,
	"loss": 0.4032,
	"step": 1019
	},
	{
	"epoch": 2.7849829351535837,
	"grad_norm": 0.15052153170108795,
	"learning_rate": 3.059973406066963e-06,
	"loss": 0.4157,
	"step": 1020
	},
	{
	"epoch": 2.7877133105802048,
	"grad_norm": 0.15297654271125793,
	"learning_rate": 2.9824053690162723e-06,
	"loss": 0.4153,
	"step": 1021
	},
	{
	"epoch": 2.790443686006826,
	"grad_norm": 0.15268519520759583,
	"learning_rate": 2.905818257394799e-06,
	"loss": 0.4083,
	"step": 1022
	},
	{
	"epoch": 2.7931740614334473,
	"grad_norm": 0.1531766653060913,
	"learning_rate": 2.8302128455594656e-06,
	"loss": 0.3943,
	"step": 1023
	},
	{
	"epoch": 2.7959044368600683,
	"grad_norm": 0.1546778380870819,
	"learning_rate": 2.7555898979413797e-06,
	"loss": 0.4091,
	"step": 1024
	},
	{
	"epoch": 2.7986348122866893,
	"grad_norm": 0.15699811279773712,
	"learning_rate": 2.6819501690382277e-06,
	"loss": 0.4249,
	"step": 1025
	},
	{
	"epoch": 2.8013651877133103,
	"grad_norm": 0.15149515867233276,
	"learning_rate": 2.609294403406537e-06,
	"loss": 0.4064,
	"step": 1026
	},
	{
	"epoch": 2.804095563139932,
	"grad_norm": 0.15525740385055542,
	"learning_rate": 2.537623335654127e-06,
	"loss": 0.4086,
	"step": 1027
	},
	{
	"epoch": 2.806825938566553,
	"grad_norm": 0.15066871047019958,
	"learning_rate": 2.4669376904328247e-06,
	"loss": 0.4046,
	"step": 1028
	},
	{
	"epoch": 2.8095563139931743,
	"grad_norm": 0.1503557562828064,
	"learning_rate": 2.397238182430994e-06,
	"loss": 0.4007,
	"step": 1029
	},
	{
	"epoch": 2.8122866894197953,
	"grad_norm": 0.15836714208126068,
	"learning_rate": 2.3285255163663532e-06,
	"loss": 0.4297,
	"step": 1030
	},
	{
	"epoch": 2.8150170648464163,
	"grad_norm": 0.1532403528690338,
	"learning_rate": 2.2608003869788786e-06,
	"loss": 0.4096,
	"step": 1031
	},
	{
	"epoch": 2.8177474402730374,
	"grad_norm": 0.1478443145751953,
	"learning_rate": 2.1940634790238e-06,
	"loss": 0.3819,
	"step": 1032
	},
	{
	"epoch": 2.820477815699659,
	"grad_norm": 0.1495964378118515,
	"learning_rate": 2.128315467264552e-06,
	"loss": 0.4086,
	"step": 1033
	},
	{
	"epoch": 2.82320819112628,
	"grad_norm": 0.15715493261814117,
	"learning_rate": 2.063557016466111e-06,
	"loss": 0.403,
	"step": 1034
	},
	{
	"epoch": 2.825938566552901,
	"grad_norm": 0.14644889533519745,
	"learning_rate": 1.999788781388201e-06,
	"loss": 0.405,
	"step": 1035
	},
	{
	"epoch": 2.8286689419795223,
	"grad_norm": 0.15347984433174133,
	"learning_rate": 1.9370114067785994e-06,
	"loss": 0.4198,
	"step": 1036
	},
	{
	"epoch": 2.8313993174061434,
	"grad_norm": 0.14934033155441284,
	"learning_rate": 1.8752255273667752e-06,
	"loss": 0.4078,
	"step": 1037
	},
	{
	"epoch": 2.8341296928327644,
	"grad_norm": 0.1513030081987381,
	"learning_rate": 1.8144317678573497e-06,
	"loss": 0.4165,
	"step": 1038
	},
	{
	"epoch": 2.8368600682593854,
	"grad_norm": 0.15821826457977295,
	"learning_rate": 1.754630742923813e-06,
	"loss": 0.4213,
	"step": 1039
	},
	{
	"epoch": 2.839590443686007,
	"grad_norm": 0.1506132036447525,
	"learning_rate": 1.6958230572023503e-06,
	"loss": 0.4058,
	"step": 1040
	},
	{
	"epoch": 2.842320819112628,
	"grad_norm": 0.15292277932167053,
	"learning_rate": 1.6380093052856483e-06,
	"loss": 0.4219,
	"step": 1041
	},
	{
	"epoch": 2.8450511945392494,
	"grad_norm": 0.15926344692707062,
	"learning_rate": 1.5811900717169538e-06,
	"loss": 0.4144,
	"step": 1042
	},
	{
	"epoch": 2.8477815699658704,
	"grad_norm": 0.15735220909118652,
	"learning_rate": 1.525365930984146e-06,
	"loss": 0.3986,
	"step": 1043
	},
	{
	"epoch": 2.8505119453924914,
	"grad_norm": 0.15544278919696808,
	"learning_rate": 1.4705374475138978e-06,
	"loss": 0.4151,
	"step": 1044
	},
	{
	"epoch": 2.8532423208191124,
	"grad_norm": 0.14867156744003296,
	"learning_rate": 1.416705175666e-06,
	"loss": 0.4039,
	"step": 1045
	},
	{
	"epoch": 2.855972696245734,
	"grad_norm": 0.15105663239955902,
	"learning_rate": 1.3638696597277679e-06,
	"loss": 0.4022,
	"step": 1046
	},
	{
	"epoch": 2.858703071672355,
	"grad_norm": 0.15061454474925995,
	"learning_rate": 1.3120314339084783e-06,
	"loss": 0.3928,
	"step": 1047
	},
	{
	"epoch": 2.861433447098976,
	"grad_norm": 0.15541335940361023,
	"learning_rate": 1.2611910223340407e-06,
	"loss": 0.4108,
	"step": 1048
	},
	{
	"epoch": 2.8641638225255974,
	"grad_norm": 0.15430454909801483,
	"learning_rate": 1.2113489390416566e-06,
	"loss": 0.4142,
	"step": 1049
	},
	{
	"epoch": 2.8668941979522184,
	"grad_norm": 0.1592140942811966,
	"learning_rate": 1.1625056879746133e-06,
	"loss": 0.4123,
	"step": 1050
	},
	{
	"epoch": 2.8696245733788395,
	"grad_norm": 0.15730910003185272,
	"learning_rate": 1.1146617629772315e-06,
	"loss": 0.4047,
	"step": 1051
	},
	{
	"epoch": 2.8723549488054605,
	"grad_norm": 0.15714263916015625,
	"learning_rate": 1.0678176477898372e-06,
	"loss": 0.4258,
	"step": 1052
	},
	{
	"epoch": 2.875085324232082,
	"grad_norm": 0.15376383066177368,
	"learning_rate": 1.0219738160438753e-06,
	"loss": 0.4098,
	"step": 1053
	},
	{
	"epoch": 2.877815699658703,
	"grad_norm": 0.15481893718242645,
	"learning_rate": 9.771307312571254e-07,
	"loss": 0.408,
	"step": 1054
	},
	{
	"epoch": 2.8805460750853245,
	"grad_norm": 0.15664780139923096,
	"learning_rate": 9.332888468290169e-07,
	"loss": 0.3983,
	"step": 1055
	},
	{
	"epoch": 2.8832764505119455,
	"grad_norm": 0.15640319883823395,
	"learning_rate": 8.90448606036054e-07,
	"loss": 0.4215,
	"step": 1056
	},
	{
	"epoch": 2.8860068259385665,
	"grad_norm": 0.15267756581306458,
	"learning_rate": 8.486104420272977e-07,
	"loss": 0.401,
	"step": 1057
	},
	{
	"epoch": 2.8887372013651875,
	"grad_norm": 0.15257929265499115,
	"learning_rate": 8.077747778200473e-07,
	"loss": 0.4145,
	"step": 1058
	},
	{
	"epoch": 2.891467576791809,
	"grad_norm": 0.1439686268568039,
	"learning_rate": 7.679420262954984e-07,
	"loss": 0.3806,
	"step": 1059
	},
	{
	"epoch": 2.89419795221843,
	"grad_norm": 0.15835194289684296,
	"learning_rate": 7.291125901946027e-07,
	"loss": 0.4156,
	"step": 1060
	},
	{
	"epoch": 2.896928327645051,
	"grad_norm": 0.15433841943740845,
	"learning_rate": 6.912868621140045e-07,
	"loss": 0.4198,
	"step": 1061
	},
	{
	"epoch": 2.8996587030716725,
	"grad_norm": 0.15369294583797455,
	"learning_rate": 6.544652245020433e-07,
	"loss": 0.4086,
	"step": 1062
	},
	{
	"epoch": 2.9023890784982935,
	"grad_norm": 0.16046328842639923,
	"learning_rate": 6.18648049654913e-07,
	"loss": 0.4046,
	"step": 1063
	},
	{
	"epoch": 2.9051194539249146,
	"grad_norm": 0.15750819444656372,
	"learning_rate": 5.838356997128869e-07,
	"loss": 0.4245,
	"step": 1064
	},
	{
	"epoch": 2.9078498293515356,
	"grad_norm": 0.16013328731060028,
	"learning_rate": 5.500285266566319e-07,
	"loss": 0.4128,
	"step": 1065
	},
	{
	"epoch": 2.910580204778157,
	"grad_norm": 0.15908414125442505,
	"learning_rate": 5.172268723036999e-07,
	"loss": 0.4256,
	"step": 1066
	},
	{
	"epoch": 2.913310580204778,
	"grad_norm": 0.15130603313446045,
	"learning_rate": 4.854310683050312e-07,
	"loss": 0.4253,
	"step": 1067
	},
	{
	"epoch": 2.9160409556313995,
	"grad_norm": 0.1521066129207611,
	"learning_rate": 4.546414361416229e-07,
	"loss": 0.4131,
	"step": 1068
	},
	{
	"epoch": 2.9187713310580206,
	"grad_norm": 0.15544620156288147,
	"learning_rate": 4.2485828712126583e-07,
	"loss": 0.4088,
	"step": 1069
	},
	{
	"epoch": 2.9215017064846416,
	"grad_norm": 0.1541679948568344,
	"learning_rate": 3.96081922375402e-07,
	"loss": 0.4083,
	"step": 1070
	},
	{
	"epoch": 2.9242320819112626,
	"grad_norm": 0.15230417251586914,
	"learning_rate": 3.6831263285608266e-07,
	"loss": 0.4067,
	"step": 1071
	},
	{
	"epoch": 2.926962457337884,
	"grad_norm": 0.15117131173610687,
	"learning_rate": 3.415506993330153e-07,
	"loss": 0.4138,
	"step": 1072
	},
	{
	"epoch": 2.929692832764505,
	"grad_norm": 0.1522316336631775,
	"learning_rate": 3.1579639239074365e-07,
	"loss": 0.4052,
	"step": 1073
	},
	{
	"epoch": 2.932423208191126,
	"grad_norm": 0.1582721471786499,
	"learning_rate": 2.9104997242590527e-07,
	"loss": 0.4056,
	"step": 1074
	},
	{
	"epoch": 2.9351535836177476,
	"grad_norm": 0.1515754610300064,
	"learning_rate": 2.673116896445671e-07,
	"loss": 0.4054,
	"step": 1075
	},
	{
	"epoch": 2.9378839590443686,
	"grad_norm": 0.15097399055957794,
	"learning_rate": 2.4458178405974975e-07,
	"loss": 0.4036,
	"step": 1076
	},
	{
	"epoch": 2.9406143344709896,
	"grad_norm": 0.1505846381187439,
	"learning_rate": 2.2286048548897376e-07,
	"loss": 0.4269,
	"step": 1077
	},
	{
	"epoch": 2.9433447098976107,
	"grad_norm": 0.14722640812397003,
	"learning_rate": 2.0214801355192824e-07,
	"loss": 0.3938,
	"step": 1078
	},
	{
	"epoch": 2.946075085324232,
	"grad_norm": 0.15851718187332153,
	"learning_rate": 1.824445776682504e-07,
	"loss": 0.4163,
	"step": 1079
	},
	{
	"epoch": 2.948805460750853,
	"grad_norm": 0.1539052575826645,
	"learning_rate": 1.6375037705543826e-07,
	"loss": 0.4119,
	"step": 1080
	},
	{
	"epoch": 2.9515358361774746,
	"grad_norm": 0.15709060430526733,
	"learning_rate": 1.4606560072679687e-07,
	"loss": 0.4125,
	"step": 1081
	},
	{
	"epoch": 2.9542662116040956,
	"grad_norm": 0.14519765973091125,
	"learning_rate": 1.2939042748955077e-07,
	"loss": 0.399,
	"step": 1082
	},
	{
	"epoch": 2.9569965870307167,
	"grad_norm": 0.1516365110874176,
	"learning_rate": 1.1372502594303446e-07,
	"loss": 0.4181,
	"step": 1083
	},
	{
	"epoch": 2.9597269624573377,
	"grad_norm": 0.15326349437236786,
	"learning_rate": 9.906955447697153e-08,
	"loss": 0.4174,
	"step": 1084
	},
	{
	"epoch": 2.962457337883959,
	"grad_norm": 0.15291821956634521,
	"learning_rate": 8.542416126989805e-08,
	"loss": 0.4166,
	"step": 1085
	},
	{
	"epoch": 2.96518771331058,
	"grad_norm": 0.14680640399456024,
	"learning_rate": 7.27889842876417e-08,
	"loss": 0.4111,
	"step": 1086
	},
	{
	"epoch": 2.967918088737201,
	"grad_norm": 0.15194863080978394,
	"learning_rate": 6.116415128194497e-08,
	"loss": 0.4054,
	"step": 1087
	},
	{
	"epoch": 2.9706484641638227,
	"grad_norm": 0.160339817404747,
	"learning_rate": 5.054977978916631e-08,
	"loss": 0.4133,
	"step": 1088
	},
	{
	"epoch": 2.9733788395904437,
	"grad_norm": 0.15164430439472198,
	"learning_rate": 4.094597712908099e-08,
	"loss": 0.4134,
	"step": 1089
	},
	{
	"epoch": 2.9761092150170647,
	"grad_norm": 0.14412933588027954,
	"learning_rate": 3.2352840403804264e-08,
	"loss": 0.3914,
	"step": 1090
	},
	{
	"epoch": 2.9788395904436857,
	"grad_norm": 0.15993493795394897,
	"learning_rate": 2.477045649681431e-08,
	"loss": 0.4335,
	"step": 1091
	},
	{
	"epoch": 2.981569965870307,
	"grad_norm": 0.1526769995689392,
	"learning_rate": 1.81989020720974e-08,
	"loss": 0.4129,
	"step": 1092
	},
	{
	"epoch": 2.9843003412969282,
	"grad_norm": 0.14546047151088715,
	"learning_rate": 1.2638243573293018e-08,
	"loss": 0.3924,
	"step": 1093
	},
	{
	"epoch": 2.9870307167235497,
	"grad_norm": 0.150767520070076,
	"learning_rate": 8.088537223116532e-09,
	"loss": 0.3982,
	"step": 1094
	},
	{
	"epoch": 2.9897610921501707,
	"grad_norm": 0.15718306601047516,
	"learning_rate": 4.549829022748586e-09,
	"loss": 0.418,
	"step": 1095
	},
	{
	"epoch": 2.9924914675767917,
	"grad_norm": 0.14539772272109985,
	"learning_rate": 2.0221547513243897e-09,
	"loss": 0.4034,
	"step": 1096
	},
	{
	"epoch": 2.9952218430034128,
	"grad_norm": 0.15158307552337646,
	"learning_rate": 5.055399656894721e-10,
	"loss": 0.4038,
	"step": 1097
	},
	{
	"epoch": 2.9979522184300342,
	"grad_norm": 0.148399218916893,
	"learning_rate": 0.0,
	"loss": 0.4121,
	"step": 1098
	}
	],
	"logging_steps": 1,
	"max_steps": 1098,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 5.529684799263867e+17,
	"train_batch_size": 8,
	"trial_name": null,
	"trial_params": null
	}