{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.25,
  "eval_steps": 500,
  "global_step": 25000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 1e-05,
      "grad_norm": 1.0852325698269518,
      "learning_rate": 3e-06,
      "loss": 10.8502,
      "step": 1
    },
    {
      "epoch": 2e-05,
      "grad_norm": 1.0764721550065255,
      "learning_rate": 6e-06,
      "loss": 10.85,
      "step": 2
    },
    {
      "epoch": 3e-05,
      "grad_norm": 1.0926147944947429,
      "learning_rate": 9e-06,
      "loss": 10.8498,
      "step": 3
    },
    {
      "epoch": 4e-05,
      "grad_norm": 1.0859033473331443,
      "learning_rate": 1.2e-05,
      "loss": 10.8492,
      "step": 4
    },
    {
      "epoch": 5e-05,
      "grad_norm": 1.0906737956052586,
      "learning_rate": 1.5e-05,
      "loss": 10.8465,
      "step": 5
    },
    {
      "epoch": 6e-05,
      "grad_norm": 1.0895692490682567,
      "learning_rate": 1.8e-05,
      "loss": 10.8458,
      "step": 6
    },
    {
      "epoch": 7e-05,
      "grad_norm": 1.0913822945668856,
      "learning_rate": 2.1000000000000002e-05,
      "loss": 10.8367,
      "step": 7
    },
    {
      "epoch": 8e-05,
      "grad_norm": 1.076648393918225,
      "learning_rate": 2.4e-05,
      "loss": 10.8153,
      "step": 8
    },
    {
      "epoch": 9e-05,
      "grad_norm": 1.0616757084723456,
      "learning_rate": 2.7e-05,
      "loss": 10.8111,
      "step": 9
    },
    {
      "epoch": 0.0001,
      "grad_norm": 1.0802856236028353,
      "learning_rate": 3e-05,
      "loss": 10.7995,
      "step": 10
    },
    {
      "epoch": 0.00011,
      "grad_norm": 1.066268838859797,
      "learning_rate": 3.2999999999999996e-05,
      "loss": 10.7856,
      "step": 11
    },
    {
      "epoch": 0.00012,
      "grad_norm": 1.0695024318288597,
      "learning_rate": 3.6e-05,
      "loss": 10.7751,
      "step": 12
    },
    {
      "epoch": 0.00013,
      "grad_norm": 1.0509312651691944,
      "learning_rate": 3.9e-05,
      "loss": 10.7566,
      "step": 13
    },
    {
      "epoch": 0.00014,
      "grad_norm": 1.0428486200491074,
      "learning_rate": 4.2000000000000004e-05,
      "loss": 10.7464,
      "step": 14
    },
    {
      "epoch": 0.00015,
      "grad_norm": 1.0323891787095008,
      "learning_rate": 4.4999999999999996e-05,
      "loss": 10.7372,
      "step": 15
    },
    {
      "epoch": 0.00016,
      "grad_norm": 1.0039368052557194,
      "learning_rate": 4.8e-05,
      "loss": 10.7203,
      "step": 16
    },
    {
      "epoch": 0.00017,
      "grad_norm": 0.973581565324899,
      "learning_rate": 5.1000000000000006e-05,
      "loss": 10.7061,
      "step": 17
    },
    {
      "epoch": 0.00018,
      "grad_norm": 0.9665094864989575,
      "learning_rate": 5.4e-05,
      "loss": 10.6912,
      "step": 18
    },
    {
      "epoch": 0.00019,
      "grad_norm": 0.9428758378586757,
      "learning_rate": 5.7e-05,
      "loss": 10.6779,
      "step": 19
    },
    {
      "epoch": 0.0002,
      "grad_norm": 0.9431515825607029,
      "learning_rate": 6e-05,
      "loss": 10.6671,
      "step": 20
    },
    {
      "epoch": 0.00021,
      "grad_norm": 0.9246458493532533,
      "learning_rate": 6.3e-05,
      "loss": 10.6537,
      "step": 21
    },
    {
      "epoch": 0.00022,
      "grad_norm": 0.9206934968159175,
      "learning_rate": 6.599999999999999e-05,
      "loss": 10.639,
      "step": 22
    },
    {
      "epoch": 0.00023,
      "grad_norm": 0.9152225030599742,
      "learning_rate": 6.9e-05,
      "loss": 10.6272,
      "step": 23
    },
    {
      "epoch": 0.00024,
      "grad_norm": 0.9101679877876945,
      "learning_rate": 7.2e-05,
      "loss": 10.6142,
      "step": 24
    },
    {
      "epoch": 0.00025,
      "grad_norm": 0.9099590574781056,
      "learning_rate": 7.500000000000001e-05,
      "loss": 10.601,
      "step": 25
    },
    {
      "epoch": 0.00026,
      "grad_norm": 0.9093682715479869,
      "learning_rate": 7.8e-05,
      "loss": 10.5871,
      "step": 26
    },
    {
      "epoch": 0.00027,
      "grad_norm": 0.9036175189442642,
      "learning_rate": 8.1e-05,
      "loss": 10.5753,
      "step": 27
    },
    {
      "epoch": 0.00028,
      "grad_norm": 0.9033718484336727,
      "learning_rate": 8.400000000000001e-05,
      "loss": 10.5624,
      "step": 28
    },
    {
      "epoch": 0.00029,
      "grad_norm": 0.9067415030491869,
      "learning_rate": 8.7e-05,
      "loss": 10.5477,
      "step": 29
    },
    {
      "epoch": 0.0003,
      "grad_norm": 0.9152440544157157,
      "learning_rate": 8.999999999999999e-05,
      "loss": 10.5333,
      "step": 30
    },
    {
      "epoch": 0.00031,
      "grad_norm": 0.9122648708383427,
      "learning_rate": 9.3e-05,
      "loss": 10.5179,
      "step": 31
    },
    {
      "epoch": 0.00032,
      "grad_norm": 0.9091031697803933,
      "learning_rate": 9.6e-05,
      "loss": 10.5045,
      "step": 32
    },
    {
      "epoch": 0.00033,
      "grad_norm": 0.9095468136863347,
      "learning_rate": 9.900000000000001e-05,
      "loss": 10.4877,
      "step": 33
    },
    {
      "epoch": 0.00034,
      "grad_norm": 0.9108234370612854,
      "learning_rate": 0.00010200000000000001,
      "loss": 10.4718,
      "step": 34
    },
    {
      "epoch": 0.00035,
      "grad_norm": 0.9162402865630151,
      "learning_rate": 0.00010500000000000002,
      "loss": 10.4546,
      "step": 35
    },
    {
      "epoch": 0.00036,
      "grad_norm": 0.9114412465753974,
      "learning_rate": 0.000108,
      "loss": 10.4378,
      "step": 36
    },
    {
      "epoch": 0.00037,
      "grad_norm": 0.9012823116898077,
      "learning_rate": 0.000111,
      "loss": 10.4207,
      "step": 37
    },
    {
      "epoch": 0.00038,
      "grad_norm": 0.905600573119951,
      "learning_rate": 0.000114,
      "loss": 10.4015,
      "step": 38
    },
    {
      "epoch": 0.00039,
      "grad_norm": 0.9073115291741589,
      "learning_rate": 0.000117,
      "loss": 10.3827,
      "step": 39
    },
    {
      "epoch": 0.0004,
      "grad_norm": 0.9112034567745622,
      "learning_rate": 0.00012,
      "loss": 10.361,
      "step": 40
    },
    {
      "epoch": 0.00041,
      "grad_norm": 0.9001962110912249,
      "learning_rate": 0.000123,
      "loss": 10.3422,
      "step": 41
    },
    {
      "epoch": 0.00042,
      "grad_norm": 0.9057302480900619,
      "learning_rate": 0.000126,
      "loss": 10.3219,
      "step": 42
    },
    {
      "epoch": 0.00043,
      "grad_norm": 0.9101639037851382,
      "learning_rate": 0.000129,
      "loss": 10.299,
      "step": 43
    },
    {
      "epoch": 0.00044,
      "grad_norm": 0.9097658850257722,
      "learning_rate": 0.00013199999999999998,
      "loss": 10.2779,
      "step": 44
    },
    {
      "epoch": 0.00045,
      "grad_norm": 0.9112197620092461,
      "learning_rate": 0.000135,
      "loss": 10.255,
      "step": 45
    },
    {
      "epoch": 0.00046,
      "grad_norm": 0.9030509759254304,
      "learning_rate": 0.000138,
      "loss": 10.2333,
      "step": 46
    },
    {
      "epoch": 0.00047,
      "grad_norm": 0.909935878054922,
      "learning_rate": 0.000141,
      "loss": 10.2068,
      "step": 47
    },
    {
      "epoch": 0.00048,
      "grad_norm": 0.9080711409550536,
      "learning_rate": 0.000144,
      "loss": 10.1836,
      "step": 48
    },
    {
      "epoch": 0.00049,
      "grad_norm": 0.9075427394108988,
      "learning_rate": 0.000147,
      "loss": 10.1601,
      "step": 49
    },
    {
      "epoch": 0.0005,
      "grad_norm": 0.9115402833822758,
      "learning_rate": 0.00015000000000000001,
      "loss": 10.1348,
      "step": 50
    },
    {
      "epoch": 0.00051,
      "grad_norm": 0.9123864422518003,
      "learning_rate": 0.000153,
      "loss": 10.1081,
      "step": 51
    },
    {
      "epoch": 0.00052,
      "grad_norm": 0.9087603210312261,
      "learning_rate": 0.000156,
      "loss": 10.082,
      "step": 52
    },
    {
      "epoch": 0.00053,
      "grad_norm": 0.9004321599195088,
      "learning_rate": 0.000159,
      "loss": 10.0586,
      "step": 53
    },
    {
      "epoch": 0.00054,
      "grad_norm": 0.9194408947742885,
      "learning_rate": 0.000162,
      "loss": 10.0265,
      "step": 54
    },
    {
      "epoch": 0.00055,
      "grad_norm": 0.9109092479970925,
      "learning_rate": 0.000165,
      "loss": 10.0024,
      "step": 55
    },
    {
      "epoch": 0.00056,
      "grad_norm": 0.9034410075551015,
      "learning_rate": 0.00016800000000000002,
      "loss": 9.9748,
      "step": 56
    },
    {
      "epoch": 0.00057,
      "grad_norm": 0.9088310165095549,
      "learning_rate": 0.000171,
      "loss": 9.9476,
      "step": 57
    },
    {
      "epoch": 0.00058,
      "grad_norm": 0.9072222387188987,
      "learning_rate": 0.000174,
      "loss": 9.9231,
      "step": 58
    },
    {
      "epoch": 0.00059,
      "grad_norm": 0.9117524209160145,
      "learning_rate": 0.000177,
      "loss": 9.8912,
      "step": 59
    },
    {
      "epoch": 0.0006,
      "grad_norm": 0.8993095807646821,
      "learning_rate": 0.00017999999999999998,
      "loss": 9.865,
      "step": 60
    },
    {
      "epoch": 0.00061,
      "grad_norm": 0.8983569610899129,
      "learning_rate": 0.000183,
      "loss": 9.8375,
      "step": 61
    },
    {
      "epoch": 0.00062,
      "grad_norm": 0.9044118676660488,
      "learning_rate": 0.000186,
      "loss": 9.8067,
      "step": 62
    },
    {
      "epoch": 0.00063,
      "grad_norm": 0.8968556961344649,
      "learning_rate": 0.000189,
      "loss": 9.7793,
      "step": 63
    },
    {
      "epoch": 0.00064,
      "grad_norm": 0.8932322761861354,
      "learning_rate": 0.000192,
      "loss": 9.7493,
      "step": 64
    },
    {
      "epoch": 0.00065,
      "grad_norm": 0.9012805896747853,
      "learning_rate": 0.00019500000000000002,
      "loss": 9.7174,
      "step": 65
    },
    {
      "epoch": 0.00066,
      "grad_norm": 0.8945673092169469,
      "learning_rate": 0.00019800000000000002,
      "loss": 9.6935,
      "step": 66
    },
    {
      "epoch": 0.00067,
      "grad_norm": 0.8966961567883143,
      "learning_rate": 0.000201,
      "loss": 9.6641,
      "step": 67
    },
    {
      "epoch": 0.00068,
      "grad_norm": 0.9035315852540858,
      "learning_rate": 0.00020400000000000003,
      "loss": 9.632,
      "step": 68
    },
    {
      "epoch": 0.00069,
      "grad_norm": 0.8884844509246163,
      "learning_rate": 0.00020700000000000002,
      "loss": 9.6048,
      "step": 69
    },
    {
      "epoch": 0.0007,
      "grad_norm": 0.9014609193010997,
      "learning_rate": 0.00021000000000000004,
      "loss": 9.5684,
      "step": 70
    },
    {
      "epoch": 0.00071,
      "grad_norm": 0.8912146000887589,
      "learning_rate": 0.00021299999999999997,
      "loss": 9.5412,
      "step": 71
    },
    {
      "epoch": 0.00072,
      "grad_norm": 0.8946751138251678,
      "learning_rate": 0.000216,
      "loss": 9.5146,
      "step": 72
    },
    {
      "epoch": 0.00073,
      "grad_norm": 0.8936103707135835,
      "learning_rate": 0.00021899999999999998,
      "loss": 9.4811,
      "step": 73
    },
    {
      "epoch": 0.00074,
      "grad_norm": 0.8944690762762719,
      "learning_rate": 0.000222,
      "loss": 9.4547,
      "step": 74
    },
    {
      "epoch": 0.00075,
      "grad_norm": 0.8901019220356112,
      "learning_rate": 0.000225,
      "loss": 9.419,
      "step": 75
    },
    {
      "epoch": 0.00076,
      "grad_norm": 0.8989916490115168,
      "learning_rate": 0.000228,
      "loss": 9.3919,
      "step": 76
    },
    {
      "epoch": 0.00077,
      "grad_norm": 0.8950786665970848,
      "learning_rate": 0.000231,
      "loss": 9.3618,
      "step": 77
    },
    {
      "epoch": 0.00078,
      "grad_norm": 0.8906614388540859,
      "learning_rate": 0.000234,
      "loss": 9.3311,
      "step": 78
    },
    {
      "epoch": 0.00079,
      "grad_norm": 0.8922201842477633,
      "learning_rate": 0.00023700000000000001,
      "loss": 9.2961,
      "step": 79
    },
    {
      "epoch": 0.0008,
      "grad_norm": 0.8944676014783551,
      "learning_rate": 0.00024,
      "loss": 9.261,
      "step": 80
    },
    {
      "epoch": 0.00081,
      "grad_norm": 0.8994604493514015,
      "learning_rate": 0.00024300000000000002,
      "loss": 9.2301,
      "step": 81
    },
    {
      "epoch": 0.00082,
      "grad_norm": 0.9081887685982744,
      "learning_rate": 0.000246,
      "loss": 9.2056,
      "step": 82
    },
    {
      "epoch": 0.00083,
      "grad_norm": 0.8969756349033113,
      "learning_rate": 0.00024900000000000004,
      "loss": 9.1708,
      "step": 83
    },
    {
      "epoch": 0.00084,
      "grad_norm": 0.9002173540875773,
      "learning_rate": 0.000252,
      "loss": 9.1399,
      "step": 84
    },
    {
      "epoch": 0.00085,
      "grad_norm": 0.8962626923808309,
      "learning_rate": 0.000255,
      "loss": 9.1164,
      "step": 85
    },
    {
      "epoch": 0.00086,
      "grad_norm": 0.8974847972599506,
      "learning_rate": 0.000258,
      "loss": 9.0803,
      "step": 86
    },
    {
      "epoch": 0.00087,
      "grad_norm": 0.8897217554173651,
      "learning_rate": 0.000261,
      "loss": 9.0575,
      "step": 87
    },
    {
      "epoch": 0.00088,
      "grad_norm": 0.89571441212455,
      "learning_rate": 0.00026399999999999997,
      "loss": 9.0214,
      "step": 88
    },
    {
      "epoch": 0.00089,
      "grad_norm": 0.8904446195114993,
      "learning_rate": 0.000267,
      "loss": 8.9949,
      "step": 89
    },
    {
      "epoch": 0.0009,
      "grad_norm": 0.8898588618543369,
      "learning_rate": 0.00027,
      "loss": 8.9673,
      "step": 90
    },
    {
      "epoch": 0.00091,
      "grad_norm": 0.8846782407589194,
      "learning_rate": 0.000273,
      "loss": 8.9371,
      "step": 91
    },
    {
      "epoch": 0.00092,
      "grad_norm": 0.8861052380663628,
      "learning_rate": 0.000276,
      "loss": 8.9059,
      "step": 92
    },
    {
      "epoch": 0.00093,
      "grad_norm": 0.887769377915217,
      "learning_rate": 0.000279,
      "loss": 8.8757,
      "step": 93
    },
    {
      "epoch": 0.00094,
      "grad_norm": 0.8876037385438905,
      "learning_rate": 0.000282,
      "loss": 8.8493,
      "step": 94
    },
    {
      "epoch": 0.00095,
      "grad_norm": 0.8845306977273465,
      "learning_rate": 0.000285,
      "loss": 8.8182,
      "step": 95
    },
    {
      "epoch": 0.00096,
      "grad_norm": 0.8942873956095471,
      "learning_rate": 0.000288,
      "loss": 8.7858,
      "step": 96
    },
    {
      "epoch": 0.00097,
      "grad_norm": 0.885093824966415,
      "learning_rate": 0.000291,
      "loss": 8.7614,
      "step": 97
    },
    {
      "epoch": 0.00098,
      "grad_norm": 0.8868745046767721,
      "learning_rate": 0.000294,
      "loss": 8.7241,
      "step": 98
    },
    {
      "epoch": 0.00099,
      "grad_norm": 0.8870170622728157,
      "learning_rate": 0.000297,
      "loss": 8.7038,
      "step": 99
    },
    {
      "epoch": 0.001,
      "grad_norm": 0.8872899849688387,
      "learning_rate": 0.00030000000000000003,
      "loss": 8.6777,
      "step": 100
    },
    {
      "epoch": 0.00101,
      "grad_norm": 0.8900580366729636,
      "learning_rate": 0.00030300000000000005,
      "loss": 8.6439,
      "step": 101
    },
    {
      "epoch": 0.00102,
      "grad_norm": 0.8782792949253078,
      "learning_rate": 0.000306,
      "loss": 8.624,
      "step": 102
    },
    {
      "epoch": 0.00103,
      "grad_norm": 0.8831155019167556,
      "learning_rate": 0.000309,
      "loss": 8.5998,
      "step": 103
    },
    {
      "epoch": 0.00104,
      "grad_norm": 0.8733679539923869,
      "learning_rate": 0.000312,
      "loss": 8.5783,
      "step": 104
    },
    {
      "epoch": 0.00105,
      "grad_norm": 0.8744723269171101,
      "learning_rate": 0.000315,
      "loss": 8.5519,
      "step": 105
    },
    {
      "epoch": 0.00106,
      "grad_norm": 0.8685646437973352,
      "learning_rate": 0.000318,
      "loss": 8.5224,
      "step": 106
    },
    {
      "epoch": 0.00107,
      "grad_norm": 0.8618721810364073,
      "learning_rate": 0.000321,
      "loss": 8.498,
      "step": 107
    },
    {
      "epoch": 0.00108,
      "grad_norm": 0.8685042548991871,
      "learning_rate": 0.000324,
      "loss": 8.4695,
      "step": 108
    },
    {
      "epoch": 0.00109,
      "grad_norm": 0.8610554719114292,
      "learning_rate": 0.000327,
      "loss": 8.4497,
      "step": 109
    },
    {
      "epoch": 0.0011,
      "grad_norm": 0.8688551326351219,
      "learning_rate": 0.00033,
      "loss": 8.4258,
      "step": 110
    },
    {
      "epoch": 0.00111,
      "grad_norm": 0.8807293418345888,
      "learning_rate": 0.000333,
      "loss": 8.3951,
      "step": 111
    },
    {
      "epoch": 0.00112,
      "grad_norm": 0.9486358341091515,
      "learning_rate": 0.00033600000000000004,
      "loss": 8.3744,
      "step": 112
    },
    {
      "epoch": 0.00113,
      "grad_norm": 1.0731259256559935,
      "learning_rate": 0.000339,
      "loss": 8.3381,
      "step": 113
    },
    {
      "epoch": 0.00114,
      "grad_norm": 0.8765287858307944,
      "learning_rate": 0.000342,
      "loss": 8.3246,
      "step": 114
    },
    {
      "epoch": 0.00115,
      "grad_norm": 0.888519882671605,
      "learning_rate": 0.00034500000000000004,
      "loss": 8.3,
      "step": 115
    },
    {
      "epoch": 0.00116,
      "grad_norm": 0.8624494481396647,
      "learning_rate": 0.000348,
      "loss": 8.2774,
      "step": 116
    },
    {
      "epoch": 0.00117,
      "grad_norm": 0.8669997406867073,
      "learning_rate": 0.000351,
      "loss": 8.2472,
      "step": 117
    },
    {
      "epoch": 0.00118,
      "grad_norm": 0.8453477351571969,
      "learning_rate": 0.000354,
      "loss": 8.2308,
      "step": 118
    },
    {
      "epoch": 0.00119,
      "grad_norm": 0.8521071532477,
      "learning_rate": 0.000357,
      "loss": 8.2189,
      "step": 119
    },
    {
      "epoch": 0.0012,
      "grad_norm": 0.8273790560738514,
      "learning_rate": 0.00035999999999999997,
      "loss": 8.1923,
      "step": 120
    },
    {
      "epoch": 0.00121,
      "grad_norm": 0.8393654427306546,
      "learning_rate": 0.000363,
      "loss": 8.1655,
      "step": 121
    },
    {
      "epoch": 0.00122,
      "grad_norm": 0.8059519568643615,
      "learning_rate": 0.000366,
      "loss": 8.1479,
      "step": 122
    },
    {
      "epoch": 0.00123,
      "grad_norm": 0.8189584135948407,
      "learning_rate": 0.000369,
      "loss": 8.1261,
      "step": 123
    },
    {
      "epoch": 0.00124,
      "grad_norm": 0.8067888726335559,
      "learning_rate": 0.000372,
      "loss": 8.1038,
      "step": 124
    },
    {
      "epoch": 0.00125,
      "grad_norm": 0.8198363821900397,
      "learning_rate": 0.000375,
      "loss": 8.0789,
      "step": 125
    },
    {
      "epoch": 0.00126,
      "grad_norm": 0.7909108940386875,
      "learning_rate": 0.000378,
      "loss": 8.0658,
      "step": 126
    },
    {
      "epoch": 0.00127,
      "grad_norm": 0.7843545168994203,
      "learning_rate": 0.000381,
      "loss": 8.0421,
      "step": 127
    },
    {
      "epoch": 0.00128,
      "grad_norm": 0.8003774808810522,
      "learning_rate": 0.000384,
      "loss": 8.0238,
      "step": 128
    },
    {
      "epoch": 0.00129,
      "grad_norm": 0.7638903889263088,
      "learning_rate": 0.00038700000000000003,
      "loss": 8.0106,
      "step": 129
    },
    {
      "epoch": 0.0013,
      "grad_norm": 0.7931725212286161,
      "learning_rate": 0.00039000000000000005,
      "loss": 7.9852,
      "step": 130
    },
    {
      "epoch": 0.00131,
      "grad_norm": 0.851354650276413,
      "learning_rate": 0.000393,
      "loss": 7.955,
      "step": 131
    },
    {
      "epoch": 0.00132,
      "grad_norm": 0.9256168821467203,
      "learning_rate": 0.00039600000000000003,
      "loss": 7.9489,
      "step": 132
    },
    {
      "epoch": 0.00133,
      "grad_norm": 1.005976872013275,
      "learning_rate": 0.00039900000000000005,
      "loss": 7.9296,
      "step": 133
    },
    {
      "epoch": 0.00134,
      "grad_norm": 1.1153886851453207,
      "learning_rate": 0.000402,
      "loss": 7.9211,
      "step": 134
    },
    {
      "epoch": 0.00135,
      "grad_norm": 0.8471639827806281,
      "learning_rate": 0.00040500000000000003,
      "loss": 7.8895,
      "step": 135
    },
    {
      "epoch": 0.00136,
      "grad_norm": 0.8134678474512278,
      "learning_rate": 0.00040800000000000005,
      "loss": 7.8718,
      "step": 136
    },
    {
      "epoch": 0.00137,
      "grad_norm": 0.8615881294022464,
      "learning_rate": 0.000411,
      "loss": 7.8543,
      "step": 137
    },
    {
      "epoch": 0.00138,
      "grad_norm": 0.7278802715029148,
      "learning_rate": 0.00041400000000000003,
      "loss": 7.8276,
      "step": 138
    },
    {
      "epoch": 0.00139,
      "grad_norm": 0.7307783350096743,
      "learning_rate": 0.00041700000000000005,
      "loss": 7.8159,
      "step": 139
    },
    {
      "epoch": 0.0014,
      "grad_norm": 0.7140372083943641,
      "learning_rate": 0.00042000000000000007,
      "loss": 7.7973,
      "step": 140
    },
    {
      "epoch": 0.00141,
      "grad_norm": 0.7575231935901182,
      "learning_rate": 0.000423,
      "loss": 7.7858,
      "step": 141
    },
    {
      "epoch": 0.00142,
      "grad_norm": 0.701678588628802,
      "learning_rate": 0.00042599999999999995,
      "loss": 7.7727,
      "step": 142
    },
    {
      "epoch": 0.00143,
      "grad_norm": 0.6771555412467659,
      "learning_rate": 0.00042899999999999997,
      "loss": 7.7449,
      "step": 143
    },
    {
      "epoch": 0.00144,
      "grad_norm": 0.6615684105299617,
      "learning_rate": 0.000432,
      "loss": 7.7264,
      "step": 144
    },
    {
      "epoch": 0.00145,
      "grad_norm": 0.7168928847645009,
      "learning_rate": 0.000435,
      "loss": 7.7227,
      "step": 145
    },
    {
      "epoch": 0.00146,
      "grad_norm": 0.6785803038006687,
      "learning_rate": 0.00043799999999999997,
      "loss": 7.6843,
      "step": 146
    },
    {
      "epoch": 0.00147,
      "grad_norm": 0.674569045840065,
      "learning_rate": 0.000441,
      "loss": 7.6778,
      "step": 147
    },
    {
      "epoch": 0.00148,
      "grad_norm": 0.6220171351450662,
      "learning_rate": 0.000444,
      "loss": 7.6627,
      "step": 148
    },
    {
      "epoch": 0.00149,
      "grad_norm": 0.6302944022326216,
      "learning_rate": 0.00044699999999999997,
      "loss": 7.6572,
      "step": 149
    },
    {
      "epoch": 0.0015,
      "grad_norm": 0.7028824721139024,
      "learning_rate": 0.00045,
      "loss": 7.6276,
      "step": 150
    },
    {
      "epoch": 0.00151,
      "grad_norm": 0.8627581414583672,
      "learning_rate": 0.000453,
      "loss": 7.6119,
      "step": 151
    },
    {
      "epoch": 0.00152,
      "grad_norm": 0.9443366186732501,
      "learning_rate": 0.000456,
      "loss": 7.6082,
      "step": 152
    },
    {
      "epoch": 0.00153,
      "grad_norm": 0.8165953782619042,
      "learning_rate": 0.000459,
      "loss": 7.593,
      "step": 153
    },
    {
      "epoch": 0.00154,
      "grad_norm": 0.7443216104832789,
      "learning_rate": 0.000462,
      "loss": 7.5691,
      "step": 154
    },
    {
      "epoch": 0.00155,
      "grad_norm": 0.8234472492039366,
      "learning_rate": 0.000465,
      "loss": 7.5599,
      "step": 155
    },
    {
      "epoch": 0.00156,
      "grad_norm": 0.6944689992522087,
      "learning_rate": 0.000468,
      "loss": 7.5279,
      "step": 156
    },
    {
      "epoch": 0.00157,
      "grad_norm": 0.6018571838743536,
      "learning_rate": 0.000471,
      "loss": 7.5212,
      "step": 157
    },
    {
      "epoch": 0.00158,
      "grad_norm": 0.6619988530948462,
      "learning_rate": 0.00047400000000000003,
      "loss": 7.5046,
      "step": 158
    },
    {
      "epoch": 0.00159,
      "grad_norm": 0.6265648411112951,
      "learning_rate": 0.000477,
      "loss": 7.4897,
      "step": 159
    },
    {
      "epoch": 0.0016,
      "grad_norm": 0.6464511310681584,
      "learning_rate": 0.00048,
      "loss": 7.4689,
      "step": 160
    },
    {
      "epoch": 0.00161,
      "grad_norm": 0.6320665002117156,
      "learning_rate": 0.00048300000000000003,
      "loss": 7.4463,
      "step": 161
    },
    {
      "epoch": 0.00162,
      "grad_norm": 0.5803650259635545,
      "learning_rate": 0.00048600000000000005,
      "loss": 7.4486,
      "step": 162
    },
    {
      "epoch": 0.00163,
      "grad_norm": 0.5737128458576856,
      "learning_rate": 0.0004890000000000001,
      "loss": 7.4298,
      "step": 163
    },
    {
      "epoch": 0.00164,
      "grad_norm": 0.7222142092183056,
      "learning_rate": 0.000492,
      "loss": 7.4324,
      "step": 164
    },
    {
      "epoch": 0.00165,
      "grad_norm": 0.6646597416212581,
      "learning_rate": 0.000495,
      "loss": 7.4085,
      "step": 165
    },
    {
      "epoch": 0.00166,
      "grad_norm": 0.5400418940901194,
      "learning_rate": 0.0004980000000000001,
      "loss": 7.3779,
      "step": 166
    },
    {
      "epoch": 0.00167,
      "grad_norm": 0.509415377419389,
      "learning_rate": 0.000501,
      "loss": 7.3695,
      "step": 167
    },
    {
      "epoch": 0.00168,
      "grad_norm": 0.6649011711530104,
      "learning_rate": 0.000504,
      "loss": 7.3502,
      "step": 168
    },
    {
      "epoch": 0.00169,
      "grad_norm": 0.47037629648013785,
      "learning_rate": 0.0005070000000000001,
      "loss": 7.3396,
      "step": 169
    },
    {
      "epoch": 0.0017,
      "grad_norm": 0.5403588419672608,
      "learning_rate": 0.00051,
      "loss": 7.3246,
      "step": 170
    },
    {
      "epoch": 0.00171,
      "grad_norm": 0.5602918800765249,
      "learning_rate": 0.000513,
      "loss": 7.3054,
      "step": 171
    },
    {
      "epoch": 0.00172,
      "grad_norm": 0.673389772217806,
      "learning_rate": 0.000516,
      "loss": 7.3094,
      "step": 172
    },
    {
      "epoch": 0.00173,
      "grad_norm": 1.0978277339576752,
      "learning_rate": 0.0005189999999999999,
      "loss": 7.2985,
      "step": 173
    },
    {
      "epoch": 0.00174,
      "grad_norm": 1.0527490659306924,
      "learning_rate": 0.000522,
      "loss": 7.3309,
      "step": 174
    },
    {
      "epoch": 0.00175,
      "grad_norm": 0.447215175816087,
      "learning_rate": 0.000525,
      "loss": 7.2686,
      "step": 175
    },
    {
      "epoch": 0.00176,
      "grad_norm": 1.434718568428607,
      "learning_rate": 0.0005279999999999999,
      "loss": 7.2722,
      "step": 176
    },
    {
      "epoch": 0.00177,
      "grad_norm": 0.515979004078738,
      "learning_rate": 0.000531,
      "loss": 7.2504,
      "step": 177
    },
    {
      "epoch": 0.00178,
      "grad_norm": 0.6894449502470986,
      "learning_rate": 0.000534,
      "loss": 7.2483,
      "step": 178
    },
    {
      "epoch": 0.00179,
      "grad_norm": 0.564765632510306,
      "learning_rate": 0.000537,
      "loss": 7.2342,
      "step": 179
    },
    {
      "epoch": 0.0018,
      "grad_norm": 0.5678924126144868,
      "learning_rate": 0.00054,
      "loss": 7.2065,
      "step": 180
    },
    {
      "epoch": 0.00181,
      "grad_norm": 0.592977565407316,
      "learning_rate": 0.000543,
      "loss": 7.1937,
      "step": 181
    },
    {
      "epoch": 0.00182,
      "grad_norm": 0.562527508746574,
      "learning_rate": 0.000546,
      "loss": 7.1887,
      "step": 182
    },
    {
      "epoch": 0.00183,
      "grad_norm": 0.44528510710573116,
      "learning_rate": 0.000549,
      "loss": 7.1767,
      "step": 183
    },
    {
      "epoch": 0.00184,
      "grad_norm": 0.44473177727238294,
      "learning_rate": 0.000552,
      "loss": 7.1633,
      "step": 184
    },
    {
      "epoch": 0.00185,
      "grad_norm": 0.4257521538615004,
      "learning_rate": 0.000555,
      "loss": 7.1411,
      "step": 185
    },
    {
      "epoch": 0.00186,
      "grad_norm": 0.4409972547719895,
      "learning_rate": 0.000558,
      "loss": 7.1489,
      "step": 186
    },
    {
      "epoch": 0.00187,
      "grad_norm": 0.390495735764414,
      "learning_rate": 0.000561,
      "loss": 7.1234,
      "step": 187
    },
    {
      "epoch": 0.00188,
      "grad_norm": 0.3832212119451659,
      "learning_rate": 0.000564,
      "loss": 7.1014,
      "step": 188
    },
    {
      "epoch": 0.00189,
      "grad_norm": 0.41263318289872636,
      "learning_rate": 0.000567,
      "loss": 7.097,
      "step": 189
    },
    {
      "epoch": 0.0019,
      "grad_norm": 0.42879499878411836,
      "learning_rate": 0.00057,
      "loss": 7.0798,
      "step": 190
    },
    {
      "epoch": 0.00191,
      "grad_norm": 0.45074305144161203,
      "learning_rate": 0.000573,
      "loss": 7.0705,
      "step": 191
    },
    {
      "epoch": 0.00192,
      "grad_norm": 0.49519390713379596,
      "learning_rate": 0.000576,
      "loss": 7.0632,
      "step": 192
    },
    {
      "epoch": 0.00193,
      "grad_norm": 0.5696097064113711,
      "learning_rate": 0.000579,
      "loss": 7.0638,
      "step": 193
    },
    {
      "epoch": 0.00194,
      "grad_norm": 0.534428946566245,
      "learning_rate": 0.000582,
      "loss": 7.057,
      "step": 194
    },
    {
      "epoch": 0.00195,
      "grad_norm": 0.490207896666695,
      "learning_rate": 0.000585,
      "loss": 7.0356,
      "step": 195
    },
    {
      "epoch": 0.00196,
      "grad_norm": 0.4167189031811958,
      "learning_rate": 0.000588,
      "loss": 7.0223,
      "step": 196
    },
    {
      "epoch": 0.00197,
      "grad_norm": 0.48007746323055095,
      "learning_rate": 0.000591,
      "loss": 7.0154,
      "step": 197
    },
    {
      "epoch": 0.00198,
      "grad_norm": 0.876048914805006,
      "learning_rate": 0.000594,
      "loss": 7.0165,
      "step": 198
    },
    {
      "epoch": 0.00199,
      "grad_norm": 1.370346112091805,
      "learning_rate": 0.0005970000000000001,
      "loss": 7.0136,
      "step": 199
    },
    {
      "epoch": 0.002,
      "grad_norm": 0.6762012224362532,
      "learning_rate": 0.0006000000000000001,
      "loss": 6.9949,
      "step": 200
    },
    {
      "epoch": 0.00201,
      "grad_norm": 0.7937633271746596,
      "learning_rate": 0.000603,
      "loss": 6.9721,
      "step": 201
    },
    {
      "epoch": 0.00202,
      "grad_norm": 0.8964746740403945,
      "learning_rate": 0.0006060000000000001,
      "loss": 6.9666,
      "step": 202
    },
    {
      "epoch": 0.00203,
      "grad_norm": 0.7263793741406347,
      "learning_rate": 0.0006090000000000001,
      "loss": 6.9637,
      "step": 203
    },
    {
      "epoch": 0.00204,
      "grad_norm": 0.6263036991113456,
      "learning_rate": 0.000612,
      "loss": 6.9498,
      "step": 204
    },
    {
      "epoch": 0.00205,
      "grad_norm": 0.6750439819273765,
      "learning_rate": 0.000615,
      "loss": 6.936,
      "step": 205
    },
    {
      "epoch": 0.00206,
      "grad_norm": 0.5237479635289414,
      "learning_rate": 0.000618,
      "loss": 6.9276,
      "step": 206
    },
    {
      "epoch": 0.00207,
      "grad_norm": 0.44905870783635865,
      "learning_rate": 0.000621,
      "loss": 6.9149,
      "step": 207
    },
    {
      "epoch": 0.00208,
      "grad_norm": 0.5974162102879337,
      "learning_rate": 0.000624,
      "loss": 6.9145,
      "step": 208
    },
    {
      "epoch": 0.00209,
      "grad_norm": 0.5078613737809945,
      "learning_rate": 0.000627,
      "loss": 6.8932,
      "step": 209
    },
    {
      "epoch": 0.0021,
      "grad_norm": 0.6578576689890827,
      "learning_rate": 0.00063,
      "loss": 6.8833,
      "step": 210
    },
    {
      "epoch": 0.00211,
      "grad_norm": 0.7468480045410584,
      "learning_rate": 0.000633,
      "loss": 6.8851,
      "step": 211
    },
    {
      "epoch": 0.00212,
      "grad_norm": 0.768921505492079,
      "learning_rate": 0.000636,
      "loss": 6.8736,
      "step": 212
    },
    {
      "epoch": 0.00213,
      "grad_norm": 0.43674889526480726,
      "learning_rate": 0.000639,
      "loss": 6.8482,
      "step": 213
    },
    {
      "epoch": 0.00214,
      "grad_norm": 0.44770222580869473,
      "learning_rate": 0.000642,
      "loss": 6.8466,
      "step": 214
    },
    {
      "epoch": 0.00215,
      "grad_norm": 0.4987908268903726,
      "learning_rate": 0.000645,
      "loss": 6.8422,
      "step": 215
    },
    {
      "epoch": 0.00216,
      "grad_norm": 0.3778866757099475,
      "learning_rate": 0.000648,
      "loss": 6.8342,
      "step": 216
    },
    {
      "epoch": 0.00217,
      "grad_norm": 0.503647499102225,
      "learning_rate": 0.000651,
      "loss": 6.8089,
      "step": 217
    },
    {
      "epoch": 0.00218,
      "grad_norm": 0.5993267866741838,
      "learning_rate": 0.000654,
      "loss": 6.8135,
      "step": 218
    },
    {
      "epoch": 0.00219,
      "grad_norm": 0.8234186906536067,
      "learning_rate": 0.000657,
      "loss": 6.7909,
      "step": 219
    },
    {
      "epoch": 0.0022,
      "grad_norm": 0.8231498183396392,
      "learning_rate": 0.00066,
      "loss": 6.8151,
      "step": 220
    },
    {
      "epoch": 0.00221,
      "grad_norm": 0.6347034416977888,
      "learning_rate": 0.0006630000000000001,
      "loss": 6.782,
      "step": 221
    },
    {
      "epoch": 0.00222,
      "grad_norm": 0.6018163518897156,
      "learning_rate": 0.000666,
      "loss": 6.7718,
      "step": 222
    },
    {
      "epoch": 0.00223,
      "grad_norm": 0.6049102827685588,
      "learning_rate": 0.000669,
      "loss": 6.7674,
      "step": 223
    },
    {
      "epoch": 0.00224,
      "grad_norm": 0.4980503627910826,
      "learning_rate": 0.0006720000000000001,
      "loss": 6.7608,
      "step": 224
    },
    {
      "epoch": 0.00225,
      "grad_norm": 0.5566517086736245,
      "learning_rate": 0.000675,
      "loss": 6.7493,
      "step": 225
    },
    {
      "epoch": 0.00226,
      "grad_norm": 0.4371230113022049,
      "learning_rate": 0.000678,
      "loss": 6.7428,
      "step": 226
    },
    {
      "epoch": 0.00227,
      "grad_norm": 0.40182304387748197,
      "learning_rate": 0.0006810000000000001,
      "loss": 6.7143,
      "step": 227
    },
    {
      "epoch": 0.00228,
      "grad_norm": 0.5112677711899808,
      "learning_rate": 0.000684,
      "loss": 6.7211,
      "step": 228
    },
    {
      "epoch": 0.00229,
      "grad_norm": 0.4048973279712385,
      "learning_rate": 0.000687,
      "loss": 6.7135,
      "step": 229
    },
    {
      "epoch": 0.0023,
      "grad_norm": 0.42618577183903295,
      "learning_rate": 0.0006900000000000001,
      "loss": 6.691,
      "step": 230
    },
    {
      "epoch": 0.00231,
      "grad_norm": 0.4712439901030759,
      "learning_rate": 0.000693,
      "loss": 6.6777,
      "step": 231
    },
    {
      "epoch": 0.00232,
      "grad_norm": 0.5546996117138073,
      "learning_rate": 0.000696,
      "loss": 6.6902,
      "step": 232
    },
    {
      "epoch": 0.00233,
      "grad_norm": 0.6251585292690813,
      "learning_rate": 0.0006990000000000001,
      "loss": 6.6788,
      "step": 233
    },
    {
      "epoch": 0.00234,
      "grad_norm": 0.7044048270867854,
      "learning_rate": 0.000702,
      "loss": 6.6708,
      "step": 234
    },
    {
      "epoch": 0.00235,
      "grad_norm": 0.7650244494427701,
      "learning_rate": 0.000705,
      "loss": 6.6578,
      "step": 235
    },
    {
      "epoch": 0.00236,
      "grad_norm": 1.0242839403066175,
      "learning_rate": 0.000708,
      "loss": 6.6514,
      "step": 236
    },
    {
      "epoch": 0.00237,
      "grad_norm": 1.5412268350529807,
      "learning_rate": 0.0007109999999999999,
      "loss": 6.6806,
      "step": 237
    },
    {
      "epoch": 0.00238,
      "grad_norm": 0.7226146264721942,
      "learning_rate": 0.000714,
      "loss": 6.645,
      "step": 238
    },
    {
      "epoch": 0.00239,
      "grad_norm": 0.8688282422006292,
      "learning_rate": 0.000717,
      "loss": 6.6357,
      "step": 239
    },
    {
      "epoch": 0.0024,
      "grad_norm": 0.7827205547140218,
      "learning_rate": 0.0007199999999999999,
      "loss": 6.6472,
      "step": 240
    },
    {
      "epoch": 0.00241,
      "grad_norm": 0.9034551371971334,
      "learning_rate": 0.000723,
      "loss": 6.6355,
      "step": 241
    },
    {
      "epoch": 0.00242,
      "grad_norm": 0.9465232376141441,
      "learning_rate": 0.000726,
      "loss": 6.6133,
      "step": 242
    },
    {
      "epoch": 0.00243,
      "grad_norm": 0.8587844668110328,
      "learning_rate": 0.000729,
      "loss": 6.6044,
      "step": 243
    },
    {
      "epoch": 0.00244,
      "grad_norm": 0.7399571613579958,
      "learning_rate": 0.000732,
      "loss": 6.6023,
      "step": 244
    },
    {
      "epoch": 0.00245,
      "grad_norm": 0.574923768026061,
      "learning_rate": 0.000735,
      "loss": 6.5879,
      "step": 245
    },
    {
      "epoch": 0.00246,
      "grad_norm": 0.605974416339262,
      "learning_rate": 0.000738,
      "loss": 6.5742,
      "step": 246
    },
    {
      "epoch": 0.00247,
      "grad_norm": 0.5710304673194679,
      "learning_rate": 0.000741,
      "loss": 6.5636,
      "step": 247
    },
    {
      "epoch": 0.00248,
      "grad_norm": 0.6965257499480564,
      "learning_rate": 0.000744,
      "loss": 6.5776,
      "step": 248
    },
    {
      "epoch": 0.00249,
      "grad_norm": 0.6456846673011167,
      "learning_rate": 0.000747,
      "loss": 6.5561,
      "step": 249
    },
    {
      "epoch": 0.0025,
      "grad_norm": 0.5753461631243048,
      "learning_rate": 0.00075,
      "loss": 6.5548,
      "step": 250
    },
    {
      "epoch": 0.00251,
      "grad_norm": 0.42353871858505077,
      "learning_rate": 0.000753,
      "loss": 6.5158,
      "step": 251
    },
    {
      "epoch": 0.00252,
      "grad_norm": 0.49557105625334785,
      "learning_rate": 0.000756,
      "loss": 6.5269,
      "step": 252
    },
    {
      "epoch": 0.00253,
      "grad_norm": 0.5766748014233265,
      "learning_rate": 0.000759,
      "loss": 6.518,
      "step": 253
    },
    {
      "epoch": 0.00254,
      "grad_norm": 0.5067846710386406,
      "learning_rate": 0.000762,
      "loss": 6.5107,
      "step": 254
    },
    {
      "epoch": 0.00255,
      "grad_norm": 0.47970421756406123,
      "learning_rate": 0.0007650000000000001,
      "loss": 6.5096,
      "step": 255
    },
    {
      "epoch": 0.00256,
      "grad_norm": 0.5320243074578035,
      "learning_rate": 0.000768,
      "loss": 6.4888,
      "step": 256
    },
    {
      "epoch": 0.00257,
      "grad_norm": 0.6544133785890598,
      "learning_rate": 0.000771,
      "loss": 6.4948,
      "step": 257
    },
    {
      "epoch": 0.00258,
      "grad_norm": 0.7416037390762996,
      "learning_rate": 0.0007740000000000001,
      "loss": 6.4783,
      "step": 258
    },
    {
      "epoch": 0.00259,
      "grad_norm": 0.6373527730888897,
      "learning_rate": 0.000777,
      "loss": 6.4749,
      "step": 259
    },
    {
      "epoch": 0.0026,
      "grad_norm": 0.47810450763796425,
      "learning_rate": 0.0007800000000000001,
      "loss": 6.4546,
      "step": 260
    },
    {
      "epoch": 0.00261,
      "grad_norm": 0.5580595012782733,
      "learning_rate": 0.0007830000000000001,
      "loss": 6.4672,
      "step": 261
    },
    {
      "epoch": 0.00262,
      "grad_norm": 0.5815251459505872,
      "learning_rate": 0.000786,
      "loss": 6.4461,
      "step": 262
    },
    {
      "epoch": 0.00263,
      "grad_norm": 0.5190442800996695,
      "learning_rate": 0.0007890000000000001,
      "loss": 6.451,
      "step": 263
    },
    {
      "epoch": 0.00264,
      "grad_norm": 0.5014715985622594,
      "learning_rate": 0.0007920000000000001,
      "loss": 6.434,
      "step": 264
    },
    {
      "epoch": 0.00265,
      "grad_norm": 0.5767159920930507,
      "learning_rate": 0.000795,
      "loss": 6.4267,
      "step": 265
    },
    {
      "epoch": 0.00266,
      "grad_norm": 0.7264139293538269,
      "learning_rate": 0.0007980000000000001,
      "loss": 6.428,
      "step": 266
    },
    {
      "epoch": 0.00267,
      "grad_norm": 0.912670346256251,
      "learning_rate": 0.0008010000000000001,
      "loss": 6.4255,
      "step": 267
    },
    {
      "epoch": 0.00268,
      "grad_norm": 0.9513958897134662,
      "learning_rate": 0.000804,
      "loss": 6.4269,
      "step": 268
    },
    {
      "epoch": 0.00269,
      "grad_norm": 0.684045795635244,
      "learning_rate": 0.0008070000000000001,
      "loss": 6.4015,
      "step": 269
    },
    {
      "epoch": 0.0027,
      "grad_norm": 0.5702935812973139,
      "learning_rate": 0.0008100000000000001,
      "loss": 6.4003,
      "step": 270
    },
    {
      "epoch": 0.00271,
      "grad_norm": 0.6129714435298934,
      "learning_rate": 0.000813,
      "loss": 6.4061,
      "step": 271
    },
    {
      "epoch": 0.00272,
      "grad_norm": 0.7245489936319242,
      "learning_rate": 0.0008160000000000001,
      "loss": 6.393,
      "step": 272
    },
    {
      "epoch": 0.00273,
      "grad_norm": 0.7005289276676516,
      "learning_rate": 0.0008190000000000001,
      "loss": 6.383,
      "step": 273
    },
    {
      "epoch": 0.00274,
      "grad_norm": 0.8087304828857205,
      "learning_rate": 0.000822,
      "loss": 6.3662,
      "step": 274
    },
    {
      "epoch": 0.00275,
      "grad_norm": 1.1750908820172745,
      "learning_rate": 0.0008250000000000001,
      "loss": 6.3881,
      "step": 275
    },
    {
      "epoch": 0.00276,
      "grad_norm": 1.2316116625020541,
      "learning_rate": 0.0008280000000000001,
      "loss": 6.3829,
      "step": 276
    },
    {
      "epoch": 0.00277,
      "grad_norm": 0.8466007057008188,
      "learning_rate": 0.0008310000000000001,
      "loss": 6.3637,
      "step": 277
    },
    {
      "epoch": 0.00278,
      "grad_norm": 1.0699566045014268,
      "learning_rate": 0.0008340000000000001,
      "loss": 6.3585,
      "step": 278
    },
    {
      "epoch": 0.00279,
      "grad_norm": 0.9176192714012301,
      "learning_rate": 0.0008370000000000001,
      "loss": 6.3555,
      "step": 279
    },
    {
      "epoch": 0.0028,
      "grad_norm": 0.8370570497956159,
      "learning_rate": 0.0008400000000000001,
      "loss": 6.337,
      "step": 280
    },
    {
      "epoch": 0.00281,
      "grad_norm": 0.9837106590112089,
      "learning_rate": 0.0008430000000000001,
      "loss": 6.3349,
      "step": 281
    },
    {
      "epoch": 0.00282,
      "grad_norm": 0.9638325924361532,
      "learning_rate": 0.000846,
      "loss": 6.3337,
      "step": 282
    },
    {
      "epoch": 0.00283,
      "grad_norm": 0.8201342967629913,
      "learning_rate": 0.0008489999999999999,
      "loss": 6.3234,
      "step": 283
    },
    {
      "epoch": 0.00284,
      "grad_norm": 0.7494504986377379,
      "learning_rate": 0.0008519999999999999,
      "loss": 6.3111,
      "step": 284
    },
    {
      "epoch": 0.00285,
      "grad_norm": 0.6500575839227768,
      "learning_rate": 0.000855,
      "loss": 6.314,
      "step": 285
    },
    {
      "epoch": 0.00286,
      "grad_norm": 0.6158978014821535,
      "learning_rate": 0.0008579999999999999,
      "loss": 6.2896,
      "step": 286
    },
    {
      "epoch": 0.00287,
      "grad_norm": 0.5596137127282185,
      "learning_rate": 0.000861,
      "loss": 6.2911,
      "step": 287
    },
    {
      "epoch": 0.00288,
      "grad_norm": 0.5983803771698389,
      "learning_rate": 0.000864,
      "loss": 6.2729,
      "step": 288
    },
    {
      "epoch": 0.00289,
      "grad_norm": 0.6730668726749236,
      "learning_rate": 0.0008669999999999999,
      "loss": 6.2867,
      "step": 289
    },
    {
      "epoch": 0.0029,
      "grad_norm": 0.7493875593433929,
      "learning_rate": 0.00087,
      "loss": 6.2693,
      "step": 290
    },
    {
      "epoch": 0.00291,
      "grad_norm": 0.7983375848355683,
      "learning_rate": 0.000873,
      "loss": 6.2651,
      "step": 291
    },
    {
      "epoch": 0.00292,
      "grad_norm": 0.8346121383412798,
      "learning_rate": 0.0008759999999999999,
      "loss": 6.2655,
      "step": 292
    },
    {
      "epoch": 0.00293,
      "grad_norm": 0.915961517973993,
      "learning_rate": 0.000879,
      "loss": 6.2508,
      "step": 293
    },
    {
      "epoch": 0.00294,
      "grad_norm": 0.966838750452259,
      "learning_rate": 0.000882,
      "loss": 6.2734,
      "step": 294
    },
    {
      "epoch": 0.00295,
      "grad_norm": 0.479548574482138,
      "learning_rate": 0.0008849999999999999,
      "loss": 6.2216,
      "step": 295
    },
    {
      "epoch": 0.00296,
      "grad_norm": 0.583968812139162,
      "learning_rate": 0.000888,
      "loss": 6.2407,
      "step": 296
    },
    {
      "epoch": 0.00297,
      "grad_norm": 0.5310639812280135,
      "learning_rate": 0.000891,
      "loss": 6.2108,
      "step": 297
    },
    {
      "epoch": 0.00298,
      "grad_norm": 0.5175342241650946,
      "learning_rate": 0.0008939999999999999,
      "loss": 6.2236,
      "step": 298
    },
    {
      "epoch": 0.00299,
      "grad_norm": 0.4652973315008358,
      "learning_rate": 0.000897,
      "loss": 6.1974,
      "step": 299
    },
    {
      "epoch": 0.003,
      "grad_norm": 0.45180686655250246,
      "learning_rate": 0.0009,
      "loss": 6.1981,
      "step": 300
    },
    {
      "epoch": 0.00301,
      "grad_norm": 0.4267045703969487,
      "learning_rate": 0.0009029999999999999,
      "loss": 6.1896,
      "step": 301
    },
    {
      "epoch": 0.00302,
      "grad_norm": 0.42788415431197124,
      "learning_rate": 0.000906,
      "loss": 6.1747,
      "step": 302
    },
    {
      "epoch": 0.00303,
      "grad_norm": 0.44705407060167995,
      "learning_rate": 0.000909,
      "loss": 6.1846,
      "step": 303
    },
    {
      "epoch": 0.00304,
      "grad_norm": 0.6488030623423143,
      "learning_rate": 0.000912,
      "loss": 6.1818,
      "step": 304
    },
    {
      "epoch": 0.00305,
      "grad_norm": 1.1848923233227069,
      "learning_rate": 0.000915,
      "loss": 6.1971,
      "step": 305
    },
    {
      "epoch": 0.00306,
      "grad_norm": 1.146308347479262,
      "learning_rate": 0.000918,
      "loss": 6.1896,
      "step": 306
    },
    {
      "epoch": 0.00307,
      "grad_norm": 0.5610571056277671,
      "learning_rate": 0.000921,
      "loss": 6.1649,
      "step": 307
    },
    {
      "epoch": 0.00308,
      "grad_norm": 0.6387882167592376,
      "learning_rate": 0.000924,
      "loss": 6.1535,
      "step": 308
    },
    {
      "epoch": 0.00309,
      "grad_norm": 0.6644607985790572,
      "learning_rate": 0.000927,
      "loss": 6.1687,
      "step": 309
    },
    {
      "epoch": 0.0031,
      "grad_norm": 0.9254403317850548,
      "learning_rate": 0.00093,
      "loss": 6.1578,
      "step": 310
    },
    {
      "epoch": 0.00311,
      "grad_norm": 1.1729449300284347,
      "learning_rate": 0.000933,
      "loss": 6.1641,
      "step": 311
    },
    {
      "epoch": 0.00312,
      "grad_norm": 0.709455259431002,
      "learning_rate": 0.000936,
      "loss": 6.1296,
      "step": 312
    },
    {
      "epoch": 0.00313,
      "grad_norm": 0.9111247454263828,
      "learning_rate": 0.0009390000000000001,
      "loss": 6.1519,
      "step": 313
    },
    {
      "epoch": 0.00314,
      "grad_norm": 0.9354538212933341,
      "learning_rate": 0.000942,
      "loss": 6.1613,
      "step": 314
    },
    {
      "epoch": 0.00315,
      "grad_norm": 0.8799875640055459,
      "learning_rate": 0.000945,
      "loss": 6.1333,
      "step": 315
    },
    {
      "epoch": 0.00316,
      "grad_norm": 0.9808323579373607,
      "learning_rate": 0.0009480000000000001,
      "loss": 6.1147,
      "step": 316
    },
    {
      "epoch": 0.00317,
      "grad_norm": 0.9583160858065677,
      "learning_rate": 0.000951,
      "loss": 6.1333,
      "step": 317
    },
    {
      "epoch": 0.00318,
      "grad_norm": 1.0107022128845053,
      "learning_rate": 0.000954,
      "loss": 6.1332,
      "step": 318
    },
    {
      "epoch": 0.00319,
      "grad_norm": 0.9453172619742498,
      "learning_rate": 0.0009570000000000001,
      "loss": 6.1183,
      "step": 319
    },
    {
      "epoch": 0.0032,
      "grad_norm": 0.881992443544615,
      "learning_rate": 0.00096,
      "loss": 6.1175,
      "step": 320
    },
    {
      "epoch": 0.00321,
      "grad_norm": 0.8621522106832189,
      "learning_rate": 0.000963,
      "loss": 6.1031,
      "step": 321
    },
    {
      "epoch": 0.00322,
      "grad_norm": 0.8993520755674765,
      "learning_rate": 0.0009660000000000001,
      "loss": 6.0937,
      "step": 322
    },
    {
      "epoch": 0.00323,
      "grad_norm": 0.8168073718243659,
      "learning_rate": 0.000969,
      "loss": 6.0911,
      "step": 323
    },
    {
      "epoch": 0.00324,
      "grad_norm": 0.5742393221545353,
      "learning_rate": 0.0009720000000000001,
      "loss": 6.0859,
      "step": 324
    },
    {
      "epoch": 0.00325,
      "grad_norm": 0.6225863989817222,
      "learning_rate": 0.0009750000000000001,
      "loss": 6.0536,
      "step": 325
    },
    {
      "epoch": 0.00326,
      "grad_norm": 0.6133472134077819,
      "learning_rate": 0.0009780000000000001,
      "loss": 6.0689,
      "step": 326
    },
    {
      "epoch": 0.00327,
      "grad_norm": 0.5150229110459733,
      "learning_rate": 0.000981,
      "loss": 6.0685,
      "step": 327
    },
    {
      "epoch": 0.00328,
      "grad_norm": 0.4607860070893556,
      "learning_rate": 0.000984,
      "loss": 6.042,
      "step": 328
    },
    {
      "epoch": 0.00329,
      "grad_norm": 0.5192581817133665,
      "learning_rate": 0.000987,
      "loss": 6.0442,
      "step": 329
    },
    {
      "epoch": 0.0033,
      "grad_norm": 0.5292078450907967,
      "learning_rate": 0.00099,
      "loss": 6.0364,
      "step": 330
    },
    {
      "epoch": 0.00331,
      "grad_norm": 0.6947168538867383,
      "learning_rate": 0.0009930000000000002,
      "loss": 6.0435,
      "step": 331
    },
    {
      "epoch": 0.00332,
      "grad_norm": 0.8487757774754058,
      "learning_rate": 0.0009960000000000001,
      "loss": 6.0304,
      "step": 332
    },
    {
      "epoch": 0.00333,
      "grad_norm": 0.899631264631538,
      "learning_rate": 0.000999,
      "loss": 6.0174,
      "step": 333
    },
    {
      "epoch": 0.00334,
      "grad_norm": 0.7073321966133805,
      "learning_rate": 0.001002,
      "loss": 6.0239,
      "step": 334
    },
    {
      "epoch": 0.00335,
      "grad_norm": 0.7526163680323161,
      "learning_rate": 0.001005,
      "loss": 6.0225,
      "step": 335
    },
    {
      "epoch": 0.00336,
      "grad_norm": 1.0400532380171426,
      "learning_rate": 0.001008,
      "loss": 6.0251,
      "step": 336
    },
    {
      "epoch": 0.00337,
      "grad_norm": 0.8580613520432612,
      "learning_rate": 0.0010110000000000002,
      "loss": 5.9967,
      "step": 337
    },
    {
      "epoch": 0.00338,
      "grad_norm": 0.8200745373702635,
      "learning_rate": 0.0010140000000000001,
      "loss": 6.014,
      "step": 338
    },
    {
      "epoch": 0.00339,
      "grad_norm": 0.7727894868210932,
      "learning_rate": 0.0010170000000000001,
      "loss": 6.0161,
      "step": 339
    },
    {
      "epoch": 0.0034,
      "grad_norm": 0.8707811190707261,
      "learning_rate": 0.00102,
      "loss": 5.9943,
      "step": 340
    },
    {
      "epoch": 0.00341,
      "grad_norm": 0.7603523129117276,
      "learning_rate": 0.001023,
      "loss": 5.9713,
      "step": 341
    },
    {
      "epoch": 0.00342,
      "grad_norm": 0.726490122797017,
      "learning_rate": 0.001026,
      "loss": 5.9758,
      "step": 342
    },
    {
      "epoch": 0.00343,
      "grad_norm": 0.7698838300440034,
      "learning_rate": 0.0010290000000000002,
      "loss": 5.9813,
      "step": 343
    },
    {
      "epoch": 0.00344,
      "grad_norm": 0.810359133047605,
      "learning_rate": 0.001032,
      "loss": 5.9733,
      "step": 344
    },
    {
      "epoch": 0.00345,
      "grad_norm": 0.7483100579200008,
      "learning_rate": 0.001035,
      "loss": 5.9619,
      "step": 345
    },
    {
      "epoch": 0.00346,
      "grad_norm": 0.7290056537727018,
      "learning_rate": 0.0010379999999999999,
      "loss": 5.9677,
      "step": 346
    },
    {
      "epoch": 0.00347,
      "grad_norm": 0.879931767534797,
      "learning_rate": 0.001041,
      "loss": 5.9453,
      "step": 347
    },
    {
      "epoch": 0.00348,
      "grad_norm": 0.821531229371674,
      "learning_rate": 0.001044,
      "loss": 5.9515,
      "step": 348
    },
    {
      "epoch": 0.00349,
      "grad_norm": 0.7749600052843896,
      "learning_rate": 0.001047,
      "loss": 5.9348,
      "step": 349
    },
    {
      "epoch": 0.0035,
      "grad_norm": 0.8742079309636731,
      "learning_rate": 0.00105,
      "loss": 5.9574,
      "step": 350
    },
    {
      "epoch": 0.00351,
      "grad_norm": 0.75264610249205,
      "learning_rate": 0.001053,
      "loss": 5.9539,
      "step": 351
    },
    {
      "epoch": 0.00352,
      "grad_norm": 0.6495925446513019,
      "learning_rate": 0.0010559999999999999,
      "loss": 5.9277,
      "step": 352
    },
    {
      "epoch": 0.00353,
      "grad_norm": 0.7308566609637007,
      "learning_rate": 0.001059,
      "loss": 5.9295,
      "step": 353
    },
    {
      "epoch": 0.00354,
      "grad_norm": 0.7419374622409085,
      "learning_rate": 0.001062,
      "loss": 5.9081,
      "step": 354
    },
    {
      "epoch": 0.00355,
      "grad_norm": 0.6305963094195233,
      "learning_rate": 0.001065,
      "loss": 5.9163,
      "step": 355
    },
    {
      "epoch": 0.00356,
      "grad_norm": 0.7593726199074547,
      "learning_rate": 0.001068,
      "loss": 5.9116,
      "step": 356
    },
    {
      "epoch": 0.00357,
      "grad_norm": 0.7130446666608321,
      "learning_rate": 0.001071,
      "loss": 5.9022,
      "step": 357
    },
    {
      "epoch": 0.00358,
      "grad_norm": 0.705496115321125,
      "learning_rate": 0.001074,
      "loss": 5.8948,
      "step": 358
    },
    {
      "epoch": 0.00359,
      "grad_norm": 0.7437786860075282,
      "learning_rate": 0.001077,
      "loss": 5.8901,
      "step": 359
    },
    {
      "epoch": 0.0036,
      "grad_norm": 0.6350063029419483,
      "learning_rate": 0.00108,
      "loss": 5.8869,
      "step": 360
    },
    {
      "epoch": 0.00361,
      "grad_norm": 0.7400426345488472,
      "learning_rate": 0.001083,
      "loss": 5.886,
      "step": 361
    },
    {
      "epoch": 0.00362,
      "grad_norm": 0.6062975404814006,
      "learning_rate": 0.001086,
      "loss": 5.8687,
      "step": 362
    },
    {
      "epoch": 0.00363,
      "grad_norm": 0.7647489229089097,
      "learning_rate": 0.001089,
      "loss": 5.8616,
      "step": 363
    },
    {
      "epoch": 0.00364,
      "grad_norm": 1.0426193764053155,
      "learning_rate": 0.001092,
      "loss": 5.8896,
      "step": 364
    },
    {
      "epoch": 0.00365,
      "grad_norm": 0.9699640617637266,
      "learning_rate": 0.001095,
      "loss": 5.8858,
      "step": 365
    },
    {
      "epoch": 0.00366,
      "grad_norm": 0.574276128757519,
      "learning_rate": 0.001098,
      "loss": 5.8508,
      "step": 366
    },
    {
      "epoch": 0.00367,
      "grad_norm": 0.6185761162607817,
      "learning_rate": 0.001101,
      "loss": 5.8561,
      "step": 367
    },
    {
      "epoch": 0.00368,
      "grad_norm": 0.5694281157783647,
      "learning_rate": 0.001104,
      "loss": 5.8542,
      "step": 368
    },
    {
      "epoch": 0.00369,
      "grad_norm": 0.6437285541711164,
      "learning_rate": 0.001107,
      "loss": 5.8594,
      "step": 369
    },
    {
      "epoch": 0.0037,
      "grad_norm": 0.8677179765560395,
      "learning_rate": 0.00111,
      "loss": 5.8442,
      "step": 370
    },
    {
      "epoch": 0.00371,
      "grad_norm": 0.9363826836877185,
      "learning_rate": 0.001113,
      "loss": 5.823,
      "step": 371
    },
    {
      "epoch": 0.00372,
      "grad_norm": 0.6769054561744376,
      "learning_rate": 0.001116,
      "loss": 5.835,
      "step": 372
    },
    {
      "epoch": 0.00373,
      "grad_norm": 0.711623190683124,
      "learning_rate": 0.001119,
      "loss": 5.827,
      "step": 373
    },
    {
      "epoch": 0.00374,
      "grad_norm": 0.8787133685385395,
      "learning_rate": 0.001122,
      "loss": 5.8376,
      "step": 374
    },
    {
      "epoch": 0.00375,
      "grad_norm": 1.1697788362092756,
      "learning_rate": 0.0011250000000000001,
      "loss": 5.8511,
      "step": 375
    },
    {
      "epoch": 0.00376,
      "grad_norm": 0.9709193918687598,
      "learning_rate": 0.001128,
      "loss": 5.8427,
      "step": 376
    },
    {
      "epoch": 0.00377,
      "grad_norm": 1.0376916283065132,
      "learning_rate": 0.001131,
      "loss": 5.8397,
      "step": 377
    },
    {
      "epoch": 0.00378,
      "grad_norm": 1.2375411877265758,
      "learning_rate": 0.001134,
      "loss": 5.8481,
      "step": 378
    },
    {
      "epoch": 0.00379,
      "grad_norm": 0.9944842378779577,
      "learning_rate": 0.001137,
      "loss": 5.822,
      "step": 379
    },
    {
      "epoch": 0.0038,
      "grad_norm": 1.1307760763710821,
      "learning_rate": 0.00114,
      "loss": 5.859,
      "step": 380
    },
    {
      "epoch": 0.00381,
      "grad_norm": 0.8573478329639129,
      "learning_rate": 0.0011430000000000001,
      "loss": 5.8136,
      "step": 381
    },
    {
      "epoch": 0.00382,
      "grad_norm": 0.7268105691491051,
      "learning_rate": 0.001146,
      "loss": 5.7943,
      "step": 382
    },
    {
      "epoch": 0.00383,
      "grad_norm": 0.7419446712141832,
      "learning_rate": 0.001149,
      "loss": 5.8074,
      "step": 383
    },
    {
      "epoch": 0.00384,
      "grad_norm": 0.7923675534238995,
      "learning_rate": 0.001152,
      "loss": 5.81,
      "step": 384
    },
    {
      "epoch": 0.00385,
      "grad_norm": 0.9624110447648943,
      "learning_rate": 0.001155,
      "loss": 5.805,
      "step": 385
    },
    {
      "epoch": 0.00386,
      "grad_norm": 0.9319530344899567,
      "learning_rate": 0.001158,
      "loss": 5.8081,
      "step": 386
    },
    {
      "epoch": 0.00387,
      "grad_norm": 0.7643274958196878,
      "learning_rate": 0.0011610000000000001,
      "loss": 5.7788,
      "step": 387
    },
    {
      "epoch": 0.00388,
      "grad_norm": 0.7506191440573698,
      "learning_rate": 0.001164,
      "loss": 5.7762,
      "step": 388
    },
    {
      "epoch": 0.00389,
      "grad_norm": 0.7532595251955709,
      "learning_rate": 0.001167,
      "loss": 5.7827,
      "step": 389
    },
    {
      "epoch": 0.0039,
      "grad_norm": 0.73320332461548,
      "learning_rate": 0.00117,
      "loss": 5.7649,
      "step": 390
    },
    {
      "epoch": 0.00391,
      "grad_norm": 0.7031896008677531,
      "learning_rate": 0.001173,
      "loss": 5.7704,
      "step": 391
    },
    {
      "epoch": 0.00392,
      "grad_norm": 0.6785810028909003,
      "learning_rate": 0.001176,
      "loss": 5.7796,
      "step": 392
    },
    {
      "epoch": 0.00393,
      "grad_norm": 0.4867762853103691,
      "learning_rate": 0.0011790000000000001,
      "loss": 5.7469,
      "step": 393
    },
    {
      "epoch": 0.00394,
      "grad_norm": 0.4635737108120594,
      "learning_rate": 0.001182,
      "loss": 5.7346,
      "step": 394
    },
    {
      "epoch": 0.00395,
      "grad_norm": 0.46294401028763466,
      "learning_rate": 0.001185,
      "loss": 5.7175,
      "step": 395
    },
    {
      "epoch": 0.00396,
      "grad_norm": 0.4787180663927712,
      "learning_rate": 0.001188,
      "loss": 5.7326,
      "step": 396
    },
    {
      "epoch": 0.00397,
      "grad_norm": 0.473675499009933,
      "learning_rate": 0.001191,
      "loss": 5.7249,
      "step": 397
    },
    {
      "epoch": 0.00398,
      "grad_norm": 0.4875545781350187,
      "learning_rate": 0.0011940000000000002,
      "loss": 5.7309,
      "step": 398
    },
    {
      "epoch": 0.00399,
      "grad_norm": 0.562929492137865,
      "learning_rate": 0.0011970000000000001,
      "loss": 5.7326,
      "step": 399
    },
    {
      "epoch": 0.004,
      "grad_norm": 0.5814123728626063,
      "learning_rate": 0.0012000000000000001,
      "loss": 5.7207,
      "step": 400
    },
    {
      "epoch": 0.00401,
      "grad_norm": 0.5133861042849526,
      "learning_rate": 0.001203,
      "loss": 5.6995,
      "step": 401
    },
    {
      "epoch": 0.00402,
      "grad_norm": 0.42439274620769013,
      "learning_rate": 0.001206,
      "loss": 5.7051,
      "step": 402
    },
    {
      "epoch": 0.00403,
      "grad_norm": 0.46979039810569395,
      "learning_rate": 0.001209,
      "loss": 5.7011,
      "step": 403
    },
    {
      "epoch": 0.00404,
      "grad_norm": 0.6400564820167204,
      "learning_rate": 0.0012120000000000002,
      "loss": 5.6973,
      "step": 404
    },
    {
      "epoch": 0.00405,
      "grad_norm": 0.9145906738089299,
      "learning_rate": 0.0012150000000000002,
      "loss": 5.7031,
      "step": 405
    },
    {
      "epoch": 0.00406,
      "grad_norm": 1.3255500600189978,
      "learning_rate": 0.0012180000000000001,
      "loss": 5.7063,
      "step": 406
    },
    {
      "epoch": 0.00407,
      "grad_norm": 0.8028038937216726,
      "learning_rate": 0.0012209999999999999,
      "loss": 5.688,
      "step": 407
    },
    {
      "epoch": 0.00408,
      "grad_norm": 0.9847538347741682,
      "learning_rate": 0.001224,
      "loss": 5.7174,
      "step": 408
    },
    {
      "epoch": 0.00409,
      "grad_norm": 0.837601769367776,
      "learning_rate": 0.001227,
      "loss": 5.6896,
      "step": 409
    },
    {
      "epoch": 0.0041,
      "grad_norm": 1.2038243323400906,
      "learning_rate": 0.00123,
      "loss": 5.7085,
      "step": 410
    },
    {
      "epoch": 0.00411,
      "grad_norm": 0.7395928558220229,
      "learning_rate": 0.001233,
      "loss": 5.6997,
      "step": 411
    },
    {
      "epoch": 0.00412,
      "grad_norm": 0.8216091961802617,
      "learning_rate": 0.001236,
      "loss": 5.6935,
      "step": 412
    },
    {
      "epoch": 0.00413,
      "grad_norm": 1.140214049358121,
      "learning_rate": 0.0012389999999999999,
      "loss": 5.7021,
      "step": 413
    },
    {
      "epoch": 0.00414,
      "grad_norm": 1.039942035452628,
      "learning_rate": 0.001242,
      "loss": 5.6845,
      "step": 414
    },
    {
      "epoch": 0.00415,
      "grad_norm": 1.134607365917555,
      "learning_rate": 0.001245,
      "loss": 5.6908,
      "step": 415
    },
    {
      "epoch": 0.00416,
      "grad_norm": 0.9596978489821891,
      "learning_rate": 0.001248,
      "loss": 5.6703,
      "step": 416
    },
    {
      "epoch": 0.00417,
      "grad_norm": 1.043455051006676,
      "learning_rate": 0.001251,
      "loss": 5.7098,
      "step": 417
    },
    {
      "epoch": 0.00418,
      "grad_norm": 1.165018490109447,
      "learning_rate": 0.001254,
      "loss": 5.6861,
      "step": 418
    },
    {
      "epoch": 0.00419,
      "grad_norm": 1.007469512856913,
      "learning_rate": 0.0012569999999999999,
      "loss": 5.6646,
      "step": 419
    },
    {
      "epoch": 0.0042,
      "grad_norm": 1.410591782248098,
      "learning_rate": 0.00126,
      "loss": 5.7025,
      "step": 420
    },
    {
      "epoch": 0.00421,
      "grad_norm": 0.7733656450445685,
      "learning_rate": 0.001263,
      "loss": 5.6715,
      "step": 421
    },
    {
      "epoch": 0.00422,
      "grad_norm": 0.6699538596951103,
      "learning_rate": 0.001266,
      "loss": 5.6588,
      "step": 422
    },
    {
      "epoch": 0.00423,
      "grad_norm": 0.5873287754113306,
      "learning_rate": 0.001269,
      "loss": 5.6538,
      "step": 423
    },
    {
      "epoch": 0.00424,
      "grad_norm": 0.6444567501036798,
      "learning_rate": 0.001272,
      "loss": 5.6532,
      "step": 424
    },
    {
      "epoch": 0.00425,
      "grad_norm": 0.6334269686168538,
      "learning_rate": 0.001275,
      "loss": 5.6565,
      "step": 425
    },
    {
      "epoch": 0.00426,
      "grad_norm": 0.6731028337322849,
      "learning_rate": 0.001278,
      "loss": 5.655,
      "step": 426
    },
    {
      "epoch": 0.00427,
      "grad_norm": 0.5902581772724022,
      "learning_rate": 0.001281,
      "loss": 5.6354,
      "step": 427
    },
    {
      "epoch": 0.00428,
      "grad_norm": 0.5926402895935227,
      "learning_rate": 0.001284,
      "loss": 5.6139,
      "step": 428
    },
    {
      "epoch": 0.00429,
      "grad_norm": 0.6674533850736829,
      "learning_rate": 0.001287,
      "loss": 5.6273,
      "step": 429
    },
    {
      "epoch": 0.0043,
      "grad_norm": 0.7049075078295334,
      "learning_rate": 0.00129,
      "loss": 5.6258,
      "step": 430
    },
    {
      "epoch": 0.00431,
      "grad_norm": 0.7061497333512059,
      "learning_rate": 0.001293,
      "loss": 5.6127,
      "step": 431
    },
    {
      "epoch": 0.00432,
      "grad_norm": 0.7508446834433861,
      "learning_rate": 0.001296,
      "loss": 5.6216,
      "step": 432
    },
    {
      "epoch": 0.00433,
      "grad_norm": 0.9060630970118144,
      "learning_rate": 0.001299,
      "loss": 5.6177,
      "step": 433
    },
    {
      "epoch": 0.00434,
      "grad_norm": 0.9999640540361012,
      "learning_rate": 0.001302,
      "loss": 5.6265,
      "step": 434
    },
    {
      "epoch": 0.00435,
      "grad_norm": 0.7892162060492922,
      "learning_rate": 0.001305,
      "loss": 5.6142,
      "step": 435
    },
    {
      "epoch": 0.00436,
      "grad_norm": 0.7307564408784262,
      "learning_rate": 0.001308,
      "loss": 5.5916,
      "step": 436
    },
    {
      "epoch": 0.00437,
      "grad_norm": 0.6809142164228668,
      "learning_rate": 0.001311,
      "loss": 5.6072,
      "step": 437
    },
    {
      "epoch": 0.00438,
      "grad_norm": 0.7111840477444925,
      "learning_rate": 0.001314,
      "loss": 5.5903,
      "step": 438
    },
    {
      "epoch": 0.00439,
      "grad_norm": 0.6900645963261404,
      "learning_rate": 0.001317,
      "loss": 5.6171,
      "step": 439
    },
    {
      "epoch": 0.0044,
      "grad_norm": 0.819282796874481,
      "learning_rate": 0.00132,
      "loss": 5.5903,
      "step": 440
    },
    {
      "epoch": 0.00441,
      "grad_norm": 0.8210691697996518,
      "learning_rate": 0.001323,
      "loss": 5.5903,
      "step": 441
    },
    {
      "epoch": 0.00442,
      "grad_norm": 0.7312534209405126,
      "learning_rate": 0.0013260000000000001,
      "loss": 5.579,
      "step": 442
    },
    {
      "epoch": 0.00443,
      "grad_norm": 0.8543448836580776,
      "learning_rate": 0.001329,
      "loss": 5.5857,
      "step": 443
    },
    {
      "epoch": 0.00444,
      "grad_norm": 0.9642366068930326,
      "learning_rate": 0.001332,
      "loss": 5.595,
      "step": 444
    },
    {
      "epoch": 0.00445,
      "grad_norm": 1.0151719432819601,
      "learning_rate": 0.001335,
      "loss": 5.5844,
      "step": 445
    },
    {
      "epoch": 0.00446,
      "grad_norm": 0.9098827299445092,
      "learning_rate": 0.001338,
      "loss": 5.5855,
      "step": 446
    },
    {
      "epoch": 0.00447,
      "grad_norm": 0.7366716149436047,
      "learning_rate": 0.001341,
      "loss": 5.5785,
      "step": 447
    },
    {
      "epoch": 0.00448,
      "grad_norm": 0.688063707422099,
      "learning_rate": 0.0013440000000000001,
      "loss": 5.5612,
      "step": 448
    },
    {
      "epoch": 0.00449,
      "grad_norm": 0.6289607789766871,
      "learning_rate": 0.001347,
      "loss": 5.5606,
      "step": 449
    },
    {
      "epoch": 0.0045,
      "grad_norm": 0.8296220427029676,
      "learning_rate": 0.00135,
      "loss": 5.5654,
      "step": 450
    },
    {
      "epoch": 0.00451,
      "grad_norm": 0.8639016329270134,
      "learning_rate": 0.001353,
      "loss": 5.5574,
      "step": 451
    },
    {
      "epoch": 0.00452,
      "grad_norm": 0.8710379060383533,
      "learning_rate": 0.001356,
      "loss": 5.5436,
      "step": 452
    },
    {
      "epoch": 0.00453,
      "grad_norm": 0.9193516591602126,
      "learning_rate": 0.001359,
      "loss": 5.5351,
      "step": 453
    },
    {
      "epoch": 0.00454,
      "grad_norm": 1.1479753211147432,
      "learning_rate": 0.0013620000000000001,
      "loss": 5.5609,
      "step": 454
    },
    {
      "epoch": 0.00455,
      "grad_norm": 0.777429711985851,
      "learning_rate": 0.0013650000000000001,
      "loss": 5.5488,
      "step": 455
    },
    {
      "epoch": 0.00456,
      "grad_norm": 0.6628903616445764,
      "learning_rate": 0.001368,
      "loss": 5.5436,
      "step": 456
    },
    {
      "epoch": 0.00457,
      "grad_norm": 0.6885714365872945,
      "learning_rate": 0.001371,
      "loss": 5.5338,
      "step": 457
    },
    {
      "epoch": 0.00458,
      "grad_norm": 0.8476587819535301,
      "learning_rate": 0.001374,
      "loss": 5.5439,
      "step": 458
    },
    {
      "epoch": 0.00459,
      "grad_norm": 1.0268132771248308,
      "learning_rate": 0.0013770000000000002,
      "loss": 5.5398,
      "step": 459
    },
    {
      "epoch": 0.0046,
      "grad_norm": 0.829796705713153,
      "learning_rate": 0.0013800000000000002,
      "loss": 5.5243,
      "step": 460
    },
    {
      "epoch": 0.00461,
      "grad_norm": 0.682304970956673,
      "learning_rate": 0.0013830000000000001,
      "loss": 5.5134,
      "step": 461
    },
    {
      "epoch": 0.00462,
      "grad_norm": 0.7632352528809807,
      "learning_rate": 0.001386,
      "loss": 5.5282,
      "step": 462
    },
    {
      "epoch": 0.00463,
      "grad_norm": 0.7888238491454299,
      "learning_rate": 0.001389,
      "loss": 5.5068,
      "step": 463
    },
    {
      "epoch": 0.00464,
      "grad_norm": 0.6055698609216487,
      "learning_rate": 0.001392,
      "loss": 5.5197,
      "step": 464
    },
    {
      "epoch": 0.00465,
      "grad_norm": 0.534605737151693,
      "learning_rate": 0.0013950000000000002,
      "loss": 5.4889,
      "step": 465
    },
    {
      "epoch": 0.00466,
      "grad_norm": 0.5742494169981804,
      "learning_rate": 0.0013980000000000002,
      "loss": 5.5068,
      "step": 466
    },
    {
      "epoch": 0.00467,
      "grad_norm": 0.5366049876051217,
      "learning_rate": 0.0014010000000000001,
      "loss": 5.495,
      "step": 467
    },
    {
      "epoch": 0.00468,
      "grad_norm": 0.5022698964006017,
      "learning_rate": 0.001404,
      "loss": 5.4802,
      "step": 468
    },
    {
      "epoch": 0.00469,
      "grad_norm": 0.4740914476325971,
      "learning_rate": 0.001407,
      "loss": 5.4823,
      "step": 469
    },
    {
      "epoch": 0.0047,
      "grad_norm": 0.5019536528871698,
      "learning_rate": 0.00141,
      "loss": 5.4905,
      "step": 470
    },
    {
      "epoch": 0.00471,
      "grad_norm": 0.4682933268188089,
      "learning_rate": 0.001413,
      "loss": 5.4686,
      "step": 471
    },
    {
      "epoch": 0.00472,
      "grad_norm": 0.5247906848769013,
      "learning_rate": 0.001416,
      "loss": 5.4781,
      "step": 472
    },
    {
      "epoch": 0.00473,
      "grad_norm": 0.639980530606925,
      "learning_rate": 0.001419,
      "loss": 5.4907,
      "step": 473
    },
    {
      "epoch": 0.00474,
      "grad_norm": 0.7807932825015014,
      "learning_rate": 0.0014219999999999999,
      "loss": 5.4763,
      "step": 474
    },
    {
      "epoch": 0.00475,
      "grad_norm": 0.7383779646696264,
      "learning_rate": 0.001425,
      "loss": 5.469,
      "step": 475
    },
    {
      "epoch": 0.00476,
      "grad_norm": 0.5375784338255847,
      "learning_rate": 0.001428,
      "loss": 5.4946,
      "step": 476
    },
    {
      "epoch": 0.00477,
      "grad_norm": 0.7920239446472193,
      "learning_rate": 0.001431,
      "loss": 5.4563,
      "step": 477
    },
    {
      "epoch": 0.00478,
      "grad_norm": 0.9834272495974716,
      "learning_rate": 0.001434,
      "loss": 5.4917,
      "step": 478
    },
    {
      "epoch": 0.00479,
      "grad_norm": 1.1904363785289231,
      "learning_rate": 0.001437,
      "loss": 5.4771,
      "step": 479
    },
    {
      "epoch": 0.0048,
      "grad_norm": 0.8836750578846579,
      "learning_rate": 0.0014399999999999999,
      "loss": 5.462,
      "step": 480
    },
    {
      "epoch": 0.00481,
      "grad_norm": 1.0971029164726256,
      "learning_rate": 0.001443,
      "loss": 5.4854,
      "step": 481
    },
    {
      "epoch": 0.00482,
      "grad_norm": 1.0424527071772058,
      "learning_rate": 0.001446,
      "loss": 5.4551,
      "step": 482
    },
    {
      "epoch": 0.00483,
      "grad_norm": 1.1383385814972007,
      "learning_rate": 0.001449,
      "loss": 5.4737,
      "step": 483
    },
    {
      "epoch": 0.00484,
      "grad_norm": 1.1589895400739378,
      "learning_rate": 0.001452,
      "loss": 5.4781,
      "step": 484
    },
    {
      "epoch": 0.00485,
      "grad_norm": 1.223866148569803,
      "learning_rate": 0.001455,
      "loss": 5.4947,
      "step": 485
    },
    {
      "epoch": 0.00486,
      "grad_norm": 0.6598880348489398,
      "learning_rate": 0.001458,
      "loss": 5.4403,
      "step": 486
    },
    {
      "epoch": 0.00487,
      "grad_norm": 0.8338085747463339,
      "learning_rate": 0.001461,
      "loss": 5.4654,
      "step": 487
    },
    {
      "epoch": 0.00488,
      "grad_norm": 0.8132081616614524,
      "learning_rate": 0.001464,
      "loss": 5.4355,
      "step": 488
    },
    {
      "epoch": 0.00489,
      "grad_norm": 0.8613199238884757,
      "learning_rate": 0.001467,
      "loss": 5.4338,
      "step": 489
    },
    {
      "epoch": 0.0049,
      "grad_norm": 0.7539827955426024,
      "learning_rate": 0.00147,
      "loss": 5.4409,
      "step": 490
    },
    {
      "epoch": 0.00491,
      "grad_norm": 0.6915747459996715,
      "learning_rate": 0.001473,
      "loss": 5.4368,
      "step": 491
    },
    {
      "epoch": 0.00492,
      "grad_norm": 0.6709201812755653,
      "learning_rate": 0.001476,
      "loss": 5.431,
      "step": 492
    },
    {
      "epoch": 0.00493,
      "grad_norm": 0.6684348811489516,
      "learning_rate": 0.001479,
      "loss": 5.4267,
      "step": 493
    },
    {
      "epoch": 0.00494,
      "grad_norm": 0.5545769515776193,
      "learning_rate": 0.001482,
      "loss": 5.428,
      "step": 494
    },
    {
      "epoch": 0.00495,
      "grad_norm": 0.5667135849985778,
      "learning_rate": 0.001485,
      "loss": 5.4318,
      "step": 495
    },
    {
      "epoch": 0.00496,
      "grad_norm": 0.553251223973209,
      "learning_rate": 0.001488,
      "loss": 5.4133,
      "step": 496
    },
    {
      "epoch": 0.00497,
      "grad_norm": 0.5321464111975676,
      "learning_rate": 0.001491,
      "loss": 5.3957,
      "step": 497
    },
    {
      "epoch": 0.00498,
      "grad_norm": 0.49096949355178177,
      "learning_rate": 0.001494,
      "loss": 5.419,
      "step": 498
    },
    {
      "epoch": 0.00499,
      "grad_norm": 0.5792511038424676,
      "learning_rate": 0.001497,
      "loss": 5.3935,
      "step": 499
    },
    {
      "epoch": 0.005,
      "grad_norm": 0.7094946648983601,
      "learning_rate": 0.0015,
      "loss": 5.4021,
      "step": 500
    },
    {
      "epoch": 0.00501,
      "grad_norm": 0.7830780899072106,
      "learning_rate": 0.001503,
      "loss": 5.4012,
      "step": 501
    },
    {
      "epoch": 0.00502,
      "grad_norm": 0.8705740259484833,
      "learning_rate": 0.001506,
      "loss": 5.4132,
      "step": 502
    },
    {
      "epoch": 0.00503,
      "grad_norm": 0.8118458630499436,
      "learning_rate": 0.0015090000000000001,
      "loss": 5.4131,
      "step": 503
    },
    {
      "epoch": 0.00504,
      "grad_norm": 0.8780863177851627,
      "learning_rate": 0.001512,
      "loss": 5.4024,
      "step": 504
    },
    {
      "epoch": 0.00505,
      "grad_norm": 0.8916117611460668,
      "learning_rate": 0.001515,
      "loss": 5.3938,
      "step": 505
    },
    {
      "epoch": 0.00506,
      "grad_norm": 0.9362197750510605,
      "learning_rate": 0.001518,
      "loss": 5.3997,
      "step": 506
    },
    {
      "epoch": 0.00507,
      "grad_norm": 1.0130920325114388,
      "learning_rate": 0.001521,
      "loss": 5.4015,
      "step": 507
    },
    {
      "epoch": 0.00508,
      "grad_norm": 0.8372448631493079,
      "learning_rate": 0.001524,
      "loss": 5.3973,
      "step": 508
    },
    {
      "epoch": 0.00509,
      "grad_norm": 0.8595290882383629,
      "learning_rate": 0.0015270000000000001,
      "loss": 5.3824,
      "step": 509
    },
    {
      "epoch": 0.0051,
      "grad_norm": 0.8908771623663538,
      "learning_rate": 0.0015300000000000001,
      "loss": 5.4017,
      "step": 510
    },
    {
      "epoch": 0.00511,
      "grad_norm": 1.018640071103151,
      "learning_rate": 0.001533,
      "loss": 5.3841,
      "step": 511
    },
    {
      "epoch": 0.00512,
      "grad_norm": 1.1039344108877285,
      "learning_rate": 0.001536,
      "loss": 5.3839,
      "step": 512
    },
    {
      "epoch": 0.00513,
      "grad_norm": 1.018705128757106,
      "learning_rate": 0.001539,
      "loss": 5.3992,
      "step": 513
    },
    {
      "epoch": 0.00514,
      "grad_norm": 1.0856823606740633,
      "learning_rate": 0.001542,
      "loss": 5.3891,
      "step": 514
    },
    {
      "epoch": 0.00515,
      "grad_norm": 1.0775919048452265,
      "learning_rate": 0.0015450000000000001,
      "loss": 5.3996,
      "step": 515
    },
    {
      "epoch": 0.00516,
      "grad_norm": 1.0104511780701346,
      "learning_rate": 0.0015480000000000001,
      "loss": 5.3964,
      "step": 516
    },
    {
      "epoch": 0.00517,
      "grad_norm": 1.0413855474558291,
      "learning_rate": 0.001551,
      "loss": 5.3886,
      "step": 517
    },
    {
      "epoch": 0.00518,
      "grad_norm": 0.7612973684089466,
      "learning_rate": 0.001554,
      "loss": 5.3656,
      "step": 518
    },
    {
      "epoch": 0.00519,
      "grad_norm": 0.6774626111559078,
      "learning_rate": 0.001557,
      "loss": 5.3631,
      "step": 519
    },
    {
      "epoch": 0.0052,
      "grad_norm": 0.5696849549020969,
      "learning_rate": 0.0015600000000000002,
      "loss": 5.3692,
      "step": 520
    },
    {
      "epoch": 0.00521,
      "grad_norm": 0.5729212961920945,
      "learning_rate": 0.0015630000000000002,
      "loss": 5.3421,
      "step": 521
    },
    {
      "epoch": 0.00522,
      "grad_norm": 0.6524634891271429,
      "learning_rate": 0.0015660000000000001,
      "loss": 5.3482,
      "step": 522
    },
    {
      "epoch": 0.00523,
      "grad_norm": 0.6726078234829947,
      "learning_rate": 0.001569,
      "loss": 5.3604,
      "step": 523
    },
    {
      "epoch": 0.00524,
      "grad_norm": 0.6519432518685719,
      "learning_rate": 0.001572,
      "loss": 5.3646,
      "step": 524
    },
    {
      "epoch": 0.00525,
      "grad_norm": 0.6991224311762642,
      "learning_rate": 0.001575,
      "loss": 5.3575,
      "step": 525
    },
    {
      "epoch": 0.00526,
      "grad_norm": 0.6202961503223249,
      "learning_rate": 0.0015780000000000002,
      "loss": 5.3306,
      "step": 526
    },
    {
      "epoch": 0.00527,
      "grad_norm": 0.5608544792658494,
      "learning_rate": 0.0015810000000000002,
      "loss": 5.3379,
      "step": 527
    },
    {
      "epoch": 0.00528,
      "grad_norm": 0.6177007267681738,
      "learning_rate": 0.0015840000000000001,
      "loss": 5.3206,
      "step": 528
    },
    {
      "epoch": 0.00529,
      "grad_norm": 0.48495492558902425,
      "learning_rate": 0.001587,
      "loss": 5.3246,
      "step": 529
    },
    {
      "epoch": 0.0053,
      "grad_norm": 0.4709323576989762,
      "learning_rate": 0.00159,
      "loss": 5.3304,
      "step": 530
    },
    {
      "epoch": 0.00531,
      "grad_norm": 0.529532719880688,
      "learning_rate": 0.001593,
      "loss": 5.3166,
      "step": 531
    },
    {
      "epoch": 0.00532,
      "grad_norm": 0.5098383277074169,
      "learning_rate": 0.0015960000000000002,
      "loss": 5.3216,
      "step": 532
    },
    {
      "epoch": 0.00533,
      "grad_norm": 0.4454381993762929,
      "learning_rate": 0.0015990000000000002,
      "loss": 5.3134,
      "step": 533
    },
    {
      "epoch": 0.00534,
      "grad_norm": 0.4252458077548216,
      "learning_rate": 0.0016020000000000001,
      "loss": 5.3027,
      "step": 534
    },
    {
      "epoch": 0.00535,
      "grad_norm": 0.5072171102652949,
      "learning_rate": 0.001605,
      "loss": 5.3077,
      "step": 535
    },
    {
      "epoch": 0.00536,
      "grad_norm": 0.6459822593413646,
      "learning_rate": 0.001608,
      "loss": 5.3107,
      "step": 536
    },
    {
      "epoch": 0.00537,
      "grad_norm": 0.7666830941778624,
      "learning_rate": 0.0016110000000000002,
      "loss": 5.31,
      "step": 537
    },
    {
      "epoch": 0.00538,
      "grad_norm": 0.8221507236048067,
      "learning_rate": 0.0016140000000000002,
      "loss": 5.2923,
      "step": 538
    },
    {
      "epoch": 0.00539,
      "grad_norm": 0.831779757381357,
      "learning_rate": 0.0016170000000000002,
      "loss": 5.2913,
      "step": 539
    },
    {
      "epoch": 0.0054,
      "grad_norm": 0.7856275345794541,
      "learning_rate": 0.0016200000000000001,
      "loss": 5.3016,
      "step": 540
    },
    {
      "epoch": 0.00541,
      "grad_norm": 0.6804075992032543,
      "learning_rate": 0.001623,
      "loss": 5.2858,
      "step": 541
    },
    {
      "epoch": 0.00542,
      "grad_norm": 0.7690794494453981,
      "learning_rate": 0.001626,
      "loss": 5.3108,
      "step": 542
    },
    {
      "epoch": 0.00543,
      "grad_norm": 0.8644486526927273,
      "learning_rate": 0.0016290000000000002,
      "loss": 5.2796,
      "step": 543
    },
    {
      "epoch": 0.00544,
      "grad_norm": 0.8496144379530629,
      "learning_rate": 0.0016320000000000002,
      "loss": 5.3021,
      "step": 544
    },
    {
      "epoch": 0.00545,
      "grad_norm": 0.9475980596452462,
      "learning_rate": 0.0016350000000000002,
      "loss": 5.3177,
      "step": 545
    },
    {
      "epoch": 0.00546,
      "grad_norm": 0.9845718031318067,
      "learning_rate": 0.0016380000000000001,
      "loss": 5.295,
      "step": 546
    },
    {
      "epoch": 0.00547,
      "grad_norm": 0.9409915747667029,
      "learning_rate": 0.001641,
      "loss": 5.2939,
      "step": 547
    },
    {
      "epoch": 0.00548,
      "grad_norm": 0.9843592942492043,
      "learning_rate": 0.001644,
      "loss": 5.2968,
      "step": 548
    },
    {
      "epoch": 0.00549,
      "grad_norm": 0.9493094260301282,
      "learning_rate": 0.0016470000000000002,
      "loss": 5.2934,
      "step": 549
    },
    {
      "epoch": 0.0055,
      "grad_norm": 0.8611361277859524,
      "learning_rate": 0.0016500000000000002,
      "loss": 5.3029,
      "step": 550
    },
    {
      "epoch": 0.00551,
      "grad_norm": 0.9715881441414039,
      "learning_rate": 0.0016530000000000002,
      "loss": 5.3065,
      "step": 551
    },
    {
      "epoch": 0.00552,
      "grad_norm": 1.1094992375785262,
      "learning_rate": 0.0016560000000000001,
      "loss": 5.3056,
      "step": 552
    },
    {
      "epoch": 0.00553,
      "grad_norm": 1.0534001152383816,
      "learning_rate": 0.001659,
      "loss": 5.2999,
      "step": 553
    },
    {
      "epoch": 0.00554,
      "grad_norm": 0.9710868679896414,
      "learning_rate": 0.0016620000000000003,
      "loss": 5.3023,
      "step": 554
    },
    {
      "epoch": 0.00555,
      "grad_norm": 0.7839221021589211,
      "learning_rate": 0.0016650000000000002,
      "loss": 5.2983,
      "step": 555
    },
    {
      "epoch": 0.00556,
      "grad_norm": 0.7751353928928436,
      "learning_rate": 0.0016680000000000002,
      "loss": 5.2838,
      "step": 556
    },
    {
      "epoch": 0.00557,
      "grad_norm": 0.976465943327027,
      "learning_rate": 0.0016710000000000002,
      "loss": 5.2743,
      "step": 557
    },
    {
      "epoch": 0.00558,
      "grad_norm": 1.1542789282902877,
      "learning_rate": 0.0016740000000000001,
      "loss": 5.3159,
      "step": 558
    },
    {
      "epoch": 0.00559,
      "grad_norm": 0.925460422693844,
      "learning_rate": 0.001677,
      "loss": 5.2857,
      "step": 559
    },
    {
      "epoch": 0.0056,
      "grad_norm": 0.9401693950640773,
      "learning_rate": 0.0016800000000000003,
      "loss": 5.2634,
      "step": 560
    },
    {
      "epoch": 0.00561,
      "grad_norm": 0.9568676376365609,
      "learning_rate": 0.0016830000000000003,
      "loss": 5.2875,
      "step": 561
    },
    {
      "epoch": 0.00562,
      "grad_norm": 1.052222958057522,
      "learning_rate": 0.0016860000000000002,
      "loss": 5.2885,
      "step": 562
    },
    {
      "epoch": 0.00563,
      "grad_norm": 0.8981786385090996,
      "learning_rate": 0.001689,
      "loss": 5.2804,
      "step": 563
    },
    {
      "epoch": 0.00564,
      "grad_norm": 0.8729925726037424,
      "learning_rate": 0.001692,
      "loss": 5.2632,
      "step": 564
    },
    {
      "epoch": 0.00565,
      "grad_norm": 0.7853718417215511,
      "learning_rate": 0.001695,
      "loss": 5.2593,
      "step": 565
    },
    {
      "epoch": 0.00566,
      "grad_norm": 0.7408826324772542,
      "learning_rate": 0.0016979999999999999,
      "loss": 5.253,
      "step": 566
    },
    {
      "epoch": 0.00567,
      "grad_norm": 0.6229776590552076,
      "learning_rate": 0.0017009999999999998,
      "loss": 5.2429,
      "step": 567
    },
    {
      "epoch": 0.00568,
      "grad_norm": 0.5598722343375323,
      "learning_rate": 0.0017039999999999998,
      "loss": 5.2516,
      "step": 568
    },
    {
      "epoch": 0.00569,
      "grad_norm": 0.5222238686823768,
      "learning_rate": 0.001707,
      "loss": 5.2269,
      "step": 569
    },
    {
      "epoch": 0.0057,
      "grad_norm": 0.4848165136559503,
      "learning_rate": 0.00171,
      "loss": 5.2449,
      "step": 570
    },
    {
      "epoch": 0.00571,
      "grad_norm": 0.4212619410786628,
      "learning_rate": 0.001713,
      "loss": 5.2393,
      "step": 571
    },
    {
      "epoch": 0.00572,
      "grad_norm": 0.46300721556593205,
      "learning_rate": 0.0017159999999999999,
      "loss": 5.2148,
      "step": 572
    },
    {
      "epoch": 0.00573,
      "grad_norm": 0.46690826663873664,
      "learning_rate": 0.0017189999999999998,
      "loss": 5.2177,
      "step": 573
    },
    {
      "epoch": 0.00574,
      "grad_norm": 0.5544353822714976,
      "learning_rate": 0.001722,
      "loss": 5.2291,
      "step": 574
    },
    {
      "epoch": 0.00575,
      "grad_norm": 0.5414515614228337,
      "learning_rate": 0.001725,
      "loss": 5.2052,
      "step": 575
    },
    {
      "epoch": 0.00576,
      "grad_norm": 0.460862784072886,
      "learning_rate": 0.001728,
      "loss": 5.1957,
      "step": 576
    },
    {
      "epoch": 0.00577,
      "grad_norm": 0.37283009231118824,
      "learning_rate": 0.001731,
      "loss": 5.2169,
      "step": 577
    },
    {
      "epoch": 0.00578,
      "grad_norm": 0.394929191519935,
      "learning_rate": 0.0017339999999999999,
      "loss": 5.1956,
      "step": 578
    },
    {
      "epoch": 0.00579,
      "grad_norm": 0.46312560639878303,
      "learning_rate": 0.0017369999999999998,
      "loss": 5.1828,
      "step": 579
    },
    {
      "epoch": 0.0058,
      "grad_norm": 0.5018384741903004,
      "learning_rate": 0.00174,
      "loss": 5.1996,
      "step": 580
    },
    {
      "epoch": 0.00581,
      "grad_norm": 0.607886610189308,
      "learning_rate": 0.001743,
      "loss": 5.2004,
      "step": 581
    },
    {
      "epoch": 0.00582,
      "grad_norm": 0.6369652189686417,
      "learning_rate": 0.001746,
      "loss": 5.1762,
      "step": 582
    },
    {
      "epoch": 0.00583,
      "grad_norm": 0.6307825776815035,
      "learning_rate": 0.001749,
      "loss": 5.1999,
      "step": 583
    },
    {
      "epoch": 0.00584,
      "grad_norm": 0.6660509674445032,
      "learning_rate": 0.0017519999999999999,
      "loss": 5.1973,
      "step": 584
    },
    {
      "epoch": 0.00585,
      "grad_norm": 0.6419220280838183,
      "learning_rate": 0.0017549999999999998,
      "loss": 5.1782,
      "step": 585
    },
    {
      "epoch": 0.00586,
      "grad_norm": 0.6225781818783162,
      "learning_rate": 0.001758,
      "loss": 5.1816,
      "step": 586
    },
    {
      "epoch": 0.00587,
      "grad_norm": 0.7071422118089229,
      "learning_rate": 0.001761,
      "loss": 5.1689,
      "step": 587
    },
    {
      "epoch": 0.00588,
      "grad_norm": 0.7860915960717592,
      "learning_rate": 0.001764,
      "loss": 5.1776,
      "step": 588
    },
    {
      "epoch": 0.00589,
      "grad_norm": 0.7980122505569107,
      "learning_rate": 0.001767,
      "loss": 5.1811,
      "step": 589
    },
    {
      "epoch": 0.0059,
      "grad_norm": 0.7215274327961783,
      "learning_rate": 0.0017699999999999999,
      "loss": 5.1658,
      "step": 590
    },
    {
      "epoch": 0.00591,
      "grad_norm": 0.8634769330088186,
      "learning_rate": 0.001773,
      "loss": 5.1655,
      "step": 591
    },
    {
      "epoch": 0.00592,
      "grad_norm": 0.997893173060324,
      "learning_rate": 0.001776,
      "loss": 5.2022,
      "step": 592
    },
    {
      "epoch": 0.00593,
      "grad_norm": 1.03294909692111,
      "learning_rate": 0.001779,
      "loss": 5.2054,
      "step": 593
    },
    {
      "epoch": 0.00594,
      "grad_norm": 1.1929198243438337,
      "learning_rate": 0.001782,
      "loss": 5.2059,
      "step": 594
    },
    {
      "epoch": 0.00595,
      "grad_norm": 0.9565031631375653,
      "learning_rate": 0.001785,
      "loss": 5.1996,
      "step": 595
    },
    {
      "epoch": 0.00596,
      "grad_norm": 0.7489131564683893,
      "learning_rate": 0.0017879999999999999,
      "loss": 5.1856,
      "step": 596
    },
    {
      "epoch": 0.00597,
      "grad_norm": 0.8084904095480396,
      "learning_rate": 0.001791,
      "loss": 5.1654,
      "step": 597
    },
    {
      "epoch": 0.00598,
      "grad_norm": 0.8735012375123758,
      "learning_rate": 0.001794,
      "loss": 5.1801,
      "step": 598
    },
    {
      "epoch": 0.00599,
      "grad_norm": 0.8866107272305487,
      "learning_rate": 0.001797,
      "loss": 5.1794,
      "step": 599
    },
    {
      "epoch": 0.006,
      "grad_norm": 0.9944298023441023,
      "learning_rate": 0.0018,
      "loss": 5.186,
      "step": 600
    },
    {
      "epoch": 0.00601,
      "grad_norm": 1.2284675534425753,
      "learning_rate": 0.001803,
      "loss": 5.1964,
      "step": 601
    },
    {
      "epoch": 0.00602,
      "grad_norm": 0.9174538372133724,
      "learning_rate": 0.0018059999999999999,
      "loss": 5.167,
      "step": 602
    },
    {
      "epoch": 0.00603,
      "grad_norm": 1.0377055005145264,
      "learning_rate": 0.001809,
      "loss": 5.199,
      "step": 603
    },
    {
      "epoch": 0.00604,
      "grad_norm": 0.914710796715033,
      "learning_rate": 0.001812,
      "loss": 5.1662,
      "step": 604
    },
    {
      "epoch": 0.00605,
      "grad_norm": 1.2614192403498132,
      "learning_rate": 0.001815,
      "loss": 5.1973,
      "step": 605
    },
    {
      "epoch": 0.00606,
      "grad_norm": 0.7058550445524863,
      "learning_rate": 0.001818,
      "loss": 5.1744,
      "step": 606
    },
    {
      "epoch": 0.00607,
      "grad_norm": 0.7505416501870767,
      "learning_rate": 0.001821,
      "loss": 5.1771,
      "step": 607
    },
    {
      "epoch": 0.00608,
      "grad_norm": 0.8233533261017171,
      "learning_rate": 0.001824,
      "loss": 5.1679,
      "step": 608
    },
    {
      "epoch": 0.00609,
      "grad_norm": 0.7138670005342819,
      "learning_rate": 0.001827,
      "loss": 5.1654,
      "step": 609
    },
    {
      "epoch": 0.0061,
      "grad_norm": 0.6840037013923436,
      "learning_rate": 0.00183,
      "loss": 5.1301,
      "step": 610
    },
    {
      "epoch": 0.00611,
      "grad_norm": 0.9092201750767857,
      "learning_rate": 0.001833,
      "loss": 5.1441,
      "step": 611
    },
    {
      "epoch": 0.00612,
      "grad_norm": 0.9586638015997742,
      "learning_rate": 0.001836,
      "loss": 5.1502,
      "step": 612
    },
    {
      "epoch": 0.00613,
      "grad_norm": 0.9726859873682268,
      "learning_rate": 0.001839,
      "loss": 5.1479,
      "step": 613
    },
    {
      "epoch": 0.00614,
      "grad_norm": 0.8517287134785719,
      "learning_rate": 0.001842,
      "loss": 5.1632,
      "step": 614
    },
    {
      "epoch": 0.00615,
      "grad_norm": 0.808391187458492,
      "learning_rate": 0.001845,
      "loss": 5.1468,
      "step": 615
    },
    {
      "epoch": 0.00616,
      "grad_norm": 0.9667719890903622,
      "learning_rate": 0.001848,
      "loss": 5.139,
      "step": 616
    },
    {
      "epoch": 0.00617,
      "grad_norm": 0.9287751010032772,
      "learning_rate": 0.001851,
      "loss": 5.1507,
      "step": 617
    },
    {
      "epoch": 0.00618,
      "grad_norm": 0.8398949490968032,
      "learning_rate": 0.001854,
      "loss": 5.1338,
      "step": 618
    },
    {
      "epoch": 0.00619,
      "grad_norm": 0.8191529309136791,
      "learning_rate": 0.001857,
      "loss": 5.1377,
      "step": 619
    },
    {
      "epoch": 0.0062,
      "grad_norm": 0.6738755871000573,
      "learning_rate": 0.00186,
      "loss": 5.1151,
      "step": 620
    },
    {
      "epoch": 0.00621,
      "grad_norm": 0.542431752937381,
      "learning_rate": 0.001863,
      "loss": 5.1102,
      "step": 621
    },
    {
      "epoch": 0.00622,
      "grad_norm": 0.5826015665301125,
      "learning_rate": 0.001866,
      "loss": 5.1076,
      "step": 622
    },
    {
      "epoch": 0.00623,
      "grad_norm": 0.6277970980683506,
      "learning_rate": 0.001869,
      "loss": 5.0901,
      "step": 623
    },
    {
      "epoch": 0.00624,
      "grad_norm": 0.6204003612309649,
      "learning_rate": 0.001872,
      "loss": 5.0981,
      "step": 624
    },
    {
      "epoch": 0.00625,
      "grad_norm": 0.5202043778302353,
      "learning_rate": 0.001875,
      "loss": 5.1041,
      "step": 625
    },
    {
      "epoch": 0.00626,
      "grad_norm": 0.4583601362158537,
      "learning_rate": 0.0018780000000000001,
      "loss": 5.0782,
      "step": 626
    },
    {
      "epoch": 0.00627,
      "grad_norm": 0.49456742347801874,
      "learning_rate": 0.001881,
      "loss": 5.0657,
      "step": 627
    },
    {
      "epoch": 0.00628,
      "grad_norm": 0.4979469593628374,
      "learning_rate": 0.001884,
      "loss": 5.0939,
      "step": 628
    },
    {
      "epoch": 0.00629,
      "grad_norm": 0.4931618065441273,
      "learning_rate": 0.001887,
      "loss": 5.0612,
      "step": 629
    },
    {
      "epoch": 0.0063,
      "grad_norm": 0.5205717896967359,
      "learning_rate": 0.00189,
      "loss": 5.0588,
      "step": 630
    },
    {
      "epoch": 0.00631,
      "grad_norm": 0.46185074539676735,
      "learning_rate": 0.0018930000000000002,
      "loss": 5.0466,
      "step": 631
    },
    {
      "epoch": 0.00632,
      "grad_norm": 0.5076264188019036,
      "learning_rate": 0.0018960000000000001,
      "loss": 5.0644,
      "step": 632
    },
    {
      "epoch": 0.00633,
      "grad_norm": 0.5650163435420743,
      "learning_rate": 0.001899,
      "loss": 5.0598,
      "step": 633
    },
    {
      "epoch": 0.00634,
      "grad_norm": 0.6177308428090045,
      "learning_rate": 0.001902,
      "loss": 5.0443,
      "step": 634
    },
    {
      "epoch": 0.00635,
      "grad_norm": 0.5966699478227224,
      "learning_rate": 0.001905,
      "loss": 5.0351,
      "step": 635
    },
    {
      "epoch": 0.00636,
      "grad_norm": 0.46738522618380884,
      "learning_rate": 0.001908,
      "loss": 5.0351,
      "step": 636
    },
    {
      "epoch": 0.00637,
      "grad_norm": 0.5725125755370889,
      "learning_rate": 0.0019110000000000002,
      "loss": 5.0522,
      "step": 637
    },
    {
      "epoch": 0.00638,
      "grad_norm": 0.6188024234981583,
      "learning_rate": 0.0019140000000000001,
      "loss": 5.0398,
      "step": 638
    },
    {
      "epoch": 0.00639,
      "grad_norm": 0.612857971520305,
      "learning_rate": 0.001917,
      "loss": 5.0146,
      "step": 639
    },
    {
      "epoch": 0.0064,
      "grad_norm": 0.7145019607417652,
      "learning_rate": 0.00192,
      "loss": 5.021,
      "step": 640
    },
    {
      "epoch": 0.00641,
      "grad_norm": 0.8625442633088365,
      "learning_rate": 0.001923,
      "loss": 5.0332,
      "step": 641
    },
    {
      "epoch": 0.00642,
      "grad_norm": 0.9132924514062416,
      "learning_rate": 0.001926,
      "loss": 5.0263,
      "step": 642
    },
    {
      "epoch": 0.00643,
      "grad_norm": 0.9679240940884513,
      "learning_rate": 0.0019290000000000002,
      "loss": 5.0516,
      "step": 643
    },
    {
      "epoch": 0.00644,
      "grad_norm": 0.9705199088056594,
      "learning_rate": 0.0019320000000000001,
      "loss": 5.0234,
      "step": 644
    },
    {
      "epoch": 0.00645,
      "grad_norm": 0.8188630256116934,
      "learning_rate": 0.001935,
      "loss": 5.0249,
      "step": 645
    },
    {
      "epoch": 0.00646,
      "grad_norm": 0.8158823988142996,
      "learning_rate": 0.001938,
      "loss": 5.0348,
      "step": 646
    },
    {
      "epoch": 0.00647,
      "grad_norm": 0.7787935632242667,
      "learning_rate": 0.001941,
      "loss": 5.037,
      "step": 647
    },
    {
      "epoch": 0.00648,
      "grad_norm": 0.9237403874154956,
      "learning_rate": 0.0019440000000000002,
      "loss": 5.0316,
      "step": 648
    },
    {
      "epoch": 0.00649,
      "grad_norm": 1.036550642596614,
      "learning_rate": 0.0019470000000000002,
      "loss": 5.0289,
      "step": 649
    },
    {
      "epoch": 0.0065,
      "grad_norm": 0.8648530228627821,
      "learning_rate": 0.0019500000000000001,
      "loss": 5.0479,
      "step": 650
    },
    {
      "epoch": 0.00651,
      "grad_norm": 0.984999530178637,
      "learning_rate": 0.001953,
      "loss": 5.0283,
      "step": 651
    },
    {
      "epoch": 0.00652,
      "grad_norm": 1.1933341828071018,
      "learning_rate": 0.0019560000000000003,
      "loss": 5.0354,
      "step": 652
    },
    {
      "epoch": 0.00653,
      "grad_norm": 0.8822963296035995,
      "learning_rate": 0.0019590000000000002,
      "loss": 5.0195,
      "step": 653
    },
    {
      "epoch": 0.00654,
      "grad_norm": 0.9601302022272827,
      "learning_rate": 0.001962,
      "loss": 5.0019,
      "step": 654
    },
    {
      "epoch": 0.00655,
      "grad_norm": 1.1161400796235366,
      "learning_rate": 0.001965,
      "loss": 5.0334,
      "step": 655
    },
    {
      "epoch": 0.00656,
      "grad_norm": 1.0829794237799832,
      "learning_rate": 0.001968,
      "loss": 5.0307,
      "step": 656
    },
    {
      "epoch": 0.00657,
      "grad_norm": 0.9921530841514947,
      "learning_rate": 0.001971,
      "loss": 5.0086,
      "step": 657
    },
    {
      "epoch": 0.00658,
      "grad_norm": 1.028510611823797,
      "learning_rate": 0.001974,
      "loss": 5.0305,
      "step": 658
    },
    {
      "epoch": 0.00659,
      "grad_norm": 1.0763635896939565,
      "learning_rate": 0.001977,
      "loss": 5.0177,
      "step": 659
    },
    {
      "epoch": 0.0066,
      "grad_norm": 1.3314133581668428,
      "learning_rate": 0.00198,
      "loss": 5.0358,
      "step": 660
    },
    {
      "epoch": 0.00661,
      "grad_norm": 0.8366683487964699,
      "learning_rate": 0.001983,
      "loss": 4.987,
      "step": 661
    },
    {
      "epoch": 0.00662,
      "grad_norm": 0.9690057003124267,
      "learning_rate": 0.0019860000000000004,
      "loss": 4.9897,
      "step": 662
    },
    {
      "epoch": 0.00663,
      "grad_norm": 1.1210889842101375,
      "learning_rate": 0.0019890000000000003,
      "loss": 5.0167,
      "step": 663
    },
    {
      "epoch": 0.00664,
      "grad_norm": 0.9227592782421535,
      "learning_rate": 0.0019920000000000003,
      "loss": 4.9926,
      "step": 664
    },
    {
      "epoch": 0.00665,
      "grad_norm": 0.7430643643535455,
      "learning_rate": 0.0019950000000000002,
      "loss": 4.9603,
      "step": 665
    },
    {
      "epoch": 0.00666,
      "grad_norm": 0.730237759629391,
      "learning_rate": 0.001998,
      "loss": 4.9749,
      "step": 666
    },
    {
      "epoch": 0.00667,
      "grad_norm": 0.7298395123979716,
      "learning_rate": 0.002001,
      "loss": 4.9682,
      "step": 667
    },
    {
      "epoch": 0.00668,
      "grad_norm": 0.7359965443598192,
      "learning_rate": 0.002004,
      "loss": 4.9858,
      "step": 668
    },
    {
      "epoch": 0.00669,
      "grad_norm": 0.803685757229409,
      "learning_rate": 0.002007,
      "loss": 4.9957,
      "step": 669
    },
    {
      "epoch": 0.0067,
      "grad_norm": 0.7455537997836268,
      "learning_rate": 0.00201,
      "loss": 4.9782,
      "step": 670
    },
    {
      "epoch": 0.00671,
      "grad_norm": 0.7155119749171314,
      "learning_rate": 0.002013,
      "loss": 4.9551,
      "step": 671
    },
    {
      "epoch": 0.00672,
      "grad_norm": 0.7618179803756451,
      "learning_rate": 0.002016,
      "loss": 4.9472,
      "step": 672
    },
    {
      "epoch": 0.00673,
      "grad_norm": 0.927797245884825,
      "learning_rate": 0.002019,
      "loss": 4.953,
      "step": 673
    },
    {
      "epoch": 0.00674,
      "grad_norm": 1.0633432044595463,
      "learning_rate": 0.0020220000000000004,
      "loss": 4.9634,
      "step": 674
    },
    {
      "epoch": 0.00675,
      "grad_norm": 0.8942052250721098,
      "learning_rate": 0.0020250000000000003,
      "loss": 4.9712,
      "step": 675
    },
    {
      "epoch": 0.00676,
      "grad_norm": 0.8664052109661106,
      "learning_rate": 0.0020280000000000003,
      "loss": 4.9561,
      "step": 676
    },
    {
      "epoch": 0.00677,
      "grad_norm": 0.7768891936596246,
      "learning_rate": 0.0020310000000000003,
      "loss": 4.9501,
      "step": 677
    },
    {
      "epoch": 0.00678,
      "grad_norm": 0.8508591245716224,
      "learning_rate": 0.0020340000000000002,
      "loss": 4.9609,
      "step": 678
    },
    {
      "epoch": 0.00679,
      "grad_norm": 0.899178484097066,
      "learning_rate": 0.002037,
      "loss": 4.9511,
      "step": 679
    },
    {
      "epoch": 0.0068,
      "grad_norm": 0.9041496786679454,
      "learning_rate": 0.00204,
      "loss": 4.9463,
      "step": 680
    },
    {
      "epoch": 0.00681,
      "grad_norm": 1.1323904416700994,
      "learning_rate": 0.002043,
      "loss": 4.9558,
      "step": 681
    },
    {
      "epoch": 0.00682,
      "grad_norm": 0.9111266905834888,
      "learning_rate": 0.002046,
      "loss": 4.9418,
      "step": 682
    },
    {
      "epoch": 0.00683,
      "grad_norm": 0.8925821061333421,
      "learning_rate": 0.002049,
      "loss": 4.9271,
      "step": 683
    },
    {
      "epoch": 0.00684,
      "grad_norm": 0.9085464749855919,
      "learning_rate": 0.002052,
      "loss": 4.9489,
      "step": 684
    },
    {
      "epoch": 0.00685,
      "grad_norm": 0.7656795310652994,
      "learning_rate": 0.0020550000000000004,
      "loss": 4.9405,
      "step": 685
    },
    {
      "epoch": 0.00686,
      "grad_norm": 0.7811896035655447,
      "learning_rate": 0.0020580000000000004,
      "loss": 4.9138,
      "step": 686
    },
    {
      "epoch": 0.00687,
      "grad_norm": 0.6835895457049103,
      "learning_rate": 0.0020610000000000003,
      "loss": 4.9092,
      "step": 687
    },
    {
      "epoch": 0.00688,
      "grad_norm": 0.7478929092218646,
      "learning_rate": 0.002064,
      "loss": 4.9101,
      "step": 688
    },
    {
      "epoch": 0.00689,
      "grad_norm": 0.722678645543914,
      "learning_rate": 0.002067,
      "loss": 4.9105,
      "step": 689
    },
    {
      "epoch": 0.0069,
      "grad_norm": 0.777097923162003,
      "learning_rate": 0.00207,
      "loss": 4.9057,
      "step": 690
    },
    {
      "epoch": 0.00691,
      "grad_norm": 0.8568701965666653,
      "learning_rate": 0.0020729999999999998,
      "loss": 4.8774,
      "step": 691
    },
    {
      "epoch": 0.00692,
      "grad_norm": 0.7917857300738825,
      "learning_rate": 0.0020759999999999997,
      "loss": 4.8965,
      "step": 692
    },
    {
      "epoch": 0.00693,
      "grad_norm": 0.6390484895042627,
      "learning_rate": 0.0020789999999999997,
      "loss": 4.8963,
      "step": 693
    },
    {
      "epoch": 0.00694,
      "grad_norm": 0.5563965742668177,
      "learning_rate": 0.002082,
      "loss": 4.8677,
      "step": 694
    },
    {
      "epoch": 0.00695,
      "grad_norm": 0.6126256581377956,
      "learning_rate": 0.002085,
      "loss": 4.868,
      "step": 695
    },
    {
      "epoch": 0.00696,
      "grad_norm": 0.603510667767381,
      "learning_rate": 0.002088,
      "loss": 4.8758,
      "step": 696
    },
    {
      "epoch": 0.00697,
      "grad_norm": 0.6639527590552766,
      "learning_rate": 0.002091,
      "loss": 4.8784,
      "step": 697
    },
    {
      "epoch": 0.00698,
      "grad_norm": 0.8067091784202179,
      "learning_rate": 0.002094,
      "loss": 4.8757,
      "step": 698
    },
    {
      "epoch": 0.00699,
      "grad_norm": 0.9128354608980798,
      "learning_rate": 0.002097,
      "loss": 4.8621,
      "step": 699
    },
    {
      "epoch": 0.007,
      "grad_norm": 0.9298486189112464,
      "learning_rate": 0.0021,
      "loss": 4.8859,
      "step": 700
    },
    {
      "epoch": 0.00701,
      "grad_norm": 0.9079094485202986,
      "learning_rate": 0.002103,
      "loss": 4.8619,
      "step": 701
    },
    {
      "epoch": 0.00702,
      "grad_norm": 1.1115710589227372,
      "learning_rate": 0.002106,
      "loss": 4.8769,
      "step": 702
    },
    {
      "epoch": 0.00703,
      "grad_norm": 0.8563727136979473,
      "learning_rate": 0.0021089999999999998,
      "loss": 4.846,
      "step": 703
    },
    {
      "epoch": 0.00704,
      "grad_norm": 0.8507471900724869,
      "learning_rate": 0.0021119999999999997,
      "loss": 4.8622,
      "step": 704
    },
    {
      "epoch": 0.00705,
      "grad_norm": 0.769782464639154,
      "learning_rate": 0.002115,
      "loss": 4.8683,
      "step": 705
    },
    {
      "epoch": 0.00706,
      "grad_norm": 0.823170135002925,
      "learning_rate": 0.002118,
      "loss": 4.8626,
      "step": 706
    },
    {
      "epoch": 0.00707,
      "grad_norm": 0.8501476997865872,
      "learning_rate": 0.002121,
      "loss": 4.8385,
      "step": 707
    },
    {
      "epoch": 0.00708,
      "grad_norm": 0.9757796217120096,
      "learning_rate": 0.002124,
      "loss": 4.8255,
      "step": 708
    },
    {
      "epoch": 0.00709,
      "grad_norm": 1.136516664517519,
      "learning_rate": 0.002127,
      "loss": 4.875,
      "step": 709
    },
    {
      "epoch": 0.0071,
      "grad_norm": 0.8442578613736921,
      "learning_rate": 0.00213,
      "loss": 4.8495,
      "step": 710
    },
    {
      "epoch": 0.00711,
      "grad_norm": 0.8659317374368097,
      "learning_rate": 0.002133,
      "loss": 4.8259,
      "step": 711
    },
    {
      "epoch": 0.00712,
      "grad_norm": 0.8943878610557214,
      "learning_rate": 0.002136,
      "loss": 4.8499,
      "step": 712
    },
    {
      "epoch": 0.00713,
      "grad_norm": 1.005520267323647,
      "learning_rate": 0.002139,
      "loss": 4.8703,
      "step": 713
    },
    {
      "epoch": 0.00714,
      "grad_norm": 0.8575705768347217,
      "learning_rate": 0.002142,
      "loss": 4.8532,
      "step": 714
    },
    {
      "epoch": 0.00715,
      "grad_norm": 0.6910728311533939,
      "learning_rate": 0.0021449999999999998,
      "loss": 4.8149,
      "step": 715
    },
    {
      "epoch": 0.00716,
      "grad_norm": 0.523242752687879,
      "learning_rate": 0.002148,
      "loss": 4.8032,
      "step": 716
    },
    {
      "epoch": 0.00717,
      "grad_norm": 0.6187034661565012,
      "learning_rate": 0.002151,
      "loss": 4.8222,
      "step": 717
    },
    {
      "epoch": 0.00718,
      "grad_norm": 0.6631189634234681,
      "learning_rate": 0.002154,
      "loss": 4.8103,
      "step": 718
    },
    {
      "epoch": 0.00719,
      "grad_norm": 0.8615055808443407,
      "learning_rate": 0.002157,
      "loss": 4.8254,
      "step": 719
    },
    {
      "epoch": 0.0072,
      "grad_norm": 0.9623520860140555,
      "learning_rate": 0.00216,
      "loss": 4.8223,
      "step": 720
    },
    {
      "epoch": 0.00721,
      "grad_norm": 0.8352385840849053,
      "learning_rate": 0.002163,
      "loss": 4.8225,
      "step": 721
    },
    {
      "epoch": 0.00722,
      "grad_norm": 0.7006874812702103,
      "learning_rate": 0.002166,
      "loss": 4.7938,
      "step": 722
    },
    {
      "epoch": 0.00723,
      "grad_norm": 0.6468265093949516,
      "learning_rate": 0.002169,
      "loss": 4.8026,
      "step": 723
    },
    {
      "epoch": 0.00724,
      "grad_norm": 0.6351725533732875,
      "learning_rate": 0.002172,
      "loss": 4.7959,
      "step": 724
    },
    {
      "epoch": 0.00725,
      "grad_norm": 0.6951521484434561,
      "learning_rate": 0.002175,
      "loss": 4.7939,
      "step": 725
    },
    {
      "epoch": 0.00726,
      "grad_norm": 0.6685844017951591,
      "learning_rate": 0.002178,
      "loss": 4.8021,
      "step": 726
    },
    {
      "epoch": 0.00727,
      "grad_norm": 0.6591838242250027,
      "learning_rate": 0.0021809999999999998,
      "loss": 4.7801,
      "step": 727
    },
    {
      "epoch": 0.00728,
      "grad_norm": 0.6854740798872353,
      "learning_rate": 0.002184,
      "loss": 4.7935,
      "step": 728
    },
    {
      "epoch": 0.00729,
      "grad_norm": 0.8458670368113279,
      "learning_rate": 0.002187,
      "loss": 4.7998,
      "step": 729
    },
    {
      "epoch": 0.0073,
      "grad_norm": 0.8820596950270692,
      "learning_rate": 0.00219,
      "loss": 4.7957,
      "step": 730
    },
    {
      "epoch": 0.00731,
      "grad_norm": 0.7920873615429597,
      "learning_rate": 0.002193,
      "loss": 4.787,
      "step": 731
    },
    {
      "epoch": 0.00732,
      "grad_norm": 0.76241161465089,
      "learning_rate": 0.002196,
      "loss": 4.7953,
      "step": 732
    },
    {
      "epoch": 0.00733,
      "grad_norm": 0.7536070296480728,
      "learning_rate": 0.002199,
      "loss": 4.7713,
      "step": 733
    },
    {
      "epoch": 0.00734,
      "grad_norm": 0.7828888260304195,
      "learning_rate": 0.002202,
      "loss": 4.8005,
      "step": 734
    },
    {
      "epoch": 0.00735,
      "grad_norm": 0.789301121324275,
      "learning_rate": 0.002205,
      "loss": 4.8061,
      "step": 735
    },
    {
      "epoch": 0.00736,
      "grad_norm": 0.8665427368268374,
      "learning_rate": 0.002208,
      "loss": 4.7947,
      "step": 736
    },
    {
      "epoch": 0.00737,
      "grad_norm": 0.9213665258793059,
      "learning_rate": 0.002211,
      "loss": 4.7668,
      "step": 737
    },
    {
      "epoch": 0.00738,
      "grad_norm": 1.1051560061666965,
      "learning_rate": 0.002214,
      "loss": 4.7801,
      "step": 738
    },
    {
      "epoch": 0.00739,
      "grad_norm": 1.2853173109817828,
      "learning_rate": 0.0022170000000000002,
      "loss": 4.7871,
      "step": 739
    },
    {
      "epoch": 0.0074,
      "grad_norm": 0.8846868930049064,
      "learning_rate": 0.00222,
      "loss": 4.7826,
      "step": 740
    },
    {
      "epoch": 0.00741,
      "grad_norm": 0.9670459302610688,
      "learning_rate": 0.002223,
      "loss": 4.7564,
      "step": 741
    },
    {
      "epoch": 0.00742,
      "grad_norm": 1.0093936436375988,
      "learning_rate": 0.002226,
      "loss": 4.8054,
      "step": 742
    },
    {
      "epoch": 0.00743,
      "grad_norm": 0.9547940794526594,
      "learning_rate": 0.002229,
      "loss": 4.7628,
      "step": 743
    },
    {
      "epoch": 0.00744,
      "grad_norm": 1.0433514490805686,
      "learning_rate": 0.002232,
      "loss": 4.781,
      "step": 744
    },
    {
      "epoch": 0.00745,
      "grad_norm": 0.9325385949963747,
      "learning_rate": 0.002235,
      "loss": 4.7894,
      "step": 745
    },
    {
      "epoch": 0.00746,
      "grad_norm": 0.9019275826038629,
      "learning_rate": 0.002238,
      "loss": 4.7783,
      "step": 746
    },
    {
      "epoch": 0.00747,
      "grad_norm": 0.7966099158360889,
      "learning_rate": 0.002241,
      "loss": 4.7608,
      "step": 747
    },
    {
      "epoch": 0.00748,
      "grad_norm": 0.8565839398988285,
      "learning_rate": 0.002244,
      "loss": 4.7749,
      "step": 748
    },
    {
      "epoch": 0.00749,
      "grad_norm": 0.816231494499623,
      "learning_rate": 0.002247,
      "loss": 4.7509,
      "step": 749
    },
    {
      "epoch": 0.0075,
      "grad_norm": 0.7743660614307156,
      "learning_rate": 0.0022500000000000003,
      "loss": 4.7909,
      "step": 750
    },
    {
      "epoch": 0.00751,
      "grad_norm": 0.8113380512241561,
      "learning_rate": 0.0022530000000000002,
      "loss": 4.7775,
      "step": 751
    },
    {
      "epoch": 0.00752,
      "grad_norm": 0.9985073455505076,
      "learning_rate": 0.002256,
      "loss": 4.7451,
      "step": 752
    },
    {
      "epoch": 0.00753,
      "grad_norm": 0.9478375008624312,
      "learning_rate": 0.002259,
      "loss": 4.7859,
      "step": 753
    },
    {
      "epoch": 0.00754,
      "grad_norm": 0.7802553547569654,
      "learning_rate": 0.002262,
      "loss": 4.7862,
      "step": 754
    },
    {
      "epoch": 0.00755,
      "grad_norm": 0.9025323762262523,
      "learning_rate": 0.002265,
      "loss": 4.7581,
      "step": 755
    },
    {
      "epoch": 0.00756,
      "grad_norm": 0.8357144009512405,
      "learning_rate": 0.002268,
      "loss": 4.7438,
      "step": 756
    },
    {
      "epoch": 0.00757,
      "grad_norm": 0.8081799661033972,
      "learning_rate": 0.002271,
      "loss": 4.7545,
      "step": 757
    },
    {
      "epoch": 0.00758,
      "grad_norm": 0.7188920586125809,
      "learning_rate": 0.002274,
      "loss": 4.7374,
      "step": 758
    },
    {
      "epoch": 0.00759,
      "grad_norm": 0.6594203091253575,
      "learning_rate": 0.002277,
      "loss": 4.7482,
      "step": 759
    },
    {
      "epoch": 0.0076,
      "grad_norm": 0.5807043361033956,
      "learning_rate": 0.00228,
      "loss": 4.7315,
      "step": 760
    },
    {
      "epoch": 0.00761,
      "grad_norm": 0.5117441659228741,
      "learning_rate": 0.002283,
      "loss": 4.7,
      "step": 761
    },
    {
      "epoch": 0.00762,
      "grad_norm": 0.5351172229626283,
      "learning_rate": 0.0022860000000000003,
      "loss": 4.6819,
      "step": 762
    },
    {
      "epoch": 0.00763,
      "grad_norm": 0.47299369798782465,
      "learning_rate": 0.0022890000000000002,
      "loss": 4.7227,
      "step": 763
    },
    {
      "epoch": 0.00764,
      "grad_norm": 0.4213562585185439,
      "learning_rate": 0.002292,
      "loss": 4.7275,
      "step": 764
    },
    {
      "epoch": 0.00765,
      "grad_norm": 0.44406465565712894,
      "learning_rate": 0.002295,
      "loss": 4.7138,
      "step": 765
    },
    {
      "epoch": 0.00766,
      "grad_norm": 0.5032974849101272,
      "learning_rate": 0.002298,
      "loss": 4.7114,
      "step": 766
    },
    {
      "epoch": 0.00767,
      "grad_norm": 0.7140868069096875,
      "learning_rate": 0.002301,
      "loss": 4.7151,
      "step": 767
    },
    {
      "epoch": 0.00768,
      "grad_norm": 0.8614017286346677,
      "learning_rate": 0.002304,
      "loss": 4.7339,
      "step": 768
    },
    {
      "epoch": 0.00769,
      "grad_norm": 0.811936140634688,
      "learning_rate": 0.002307,
      "loss": 4.7346,
      "step": 769
    },
    {
      "epoch": 0.0077,
      "grad_norm": 0.6775529648178301,
      "learning_rate": 0.00231,
      "loss": 4.6843,
      "step": 770
    },
    {
      "epoch": 0.00771,
      "grad_norm": 0.7223031465513833,
      "learning_rate": 0.002313,
      "loss": 4.7255,
      "step": 771
    },
    {
      "epoch": 0.00772,
      "grad_norm": 0.6676019393281739,
      "learning_rate": 0.002316,
      "loss": 4.7053,
      "step": 772
    },
    {
      "epoch": 0.00773,
      "grad_norm": 0.6378542516153257,
      "learning_rate": 0.0023190000000000003,
      "loss": 4.7069,
      "step": 773
    },
    {
      "epoch": 0.00774,
      "grad_norm": 0.6207815175915733,
      "learning_rate": 0.0023220000000000003,
      "loss": 4.7187,
      "step": 774
    },
    {
      "epoch": 0.00775,
      "grad_norm": 0.5964347529354114,
      "learning_rate": 0.0023250000000000002,
      "loss": 4.7097,
      "step": 775
    },
    {
      "epoch": 0.00776,
      "grad_norm": 0.630013209430953,
      "learning_rate": 0.002328,
      "loss": 4.7096,
      "step": 776
    },
    {
      "epoch": 0.00777,
      "grad_norm": 0.6207512829319234,
      "learning_rate": 0.002331,
      "loss": 4.6848,
      "step": 777
    },
    {
      "epoch": 0.00778,
      "grad_norm": 0.6524761943334527,
      "learning_rate": 0.002334,
      "loss": 4.6998,
      "step": 778
    },
    {
      "epoch": 0.00779,
      "grad_norm": 0.8187914523175398,
      "learning_rate": 0.002337,
      "loss": 4.6652,
      "step": 779
    },
    {
      "epoch": 0.0078,
      "grad_norm": 0.9648917298565269,
      "learning_rate": 0.00234,
      "loss": 4.7012,
      "step": 780
    },
    {
      "epoch": 0.00781,
      "grad_norm": 1.0384259728763472,
      "learning_rate": 0.002343,
      "loss": 4.7097,
      "step": 781
    },
    {
      "epoch": 0.00782,
      "grad_norm": 0.9635439545076986,
      "learning_rate": 0.002346,
      "loss": 4.7024,
      "step": 782
    },
    {
      "epoch": 0.00783,
      "grad_norm": 0.7812806082921958,
      "learning_rate": 0.002349,
      "loss": 4.6794,
      "step": 783
    },
    {
      "epoch": 0.00784,
      "grad_norm": 0.781961859641362,
      "learning_rate": 0.002352,
      "loss": 4.6914,
      "step": 784
    },
    {
      "epoch": 0.00785,
      "grad_norm": 0.7694622494278262,
      "learning_rate": 0.0023550000000000003,
      "loss": 4.6875,
      "step": 785
    },
    {
      "epoch": 0.00786,
      "grad_norm": 0.8298262678177186,
      "learning_rate": 0.0023580000000000003,
      "loss": 4.6951,
      "step": 786
    },
    {
      "epoch": 0.00787,
      "grad_norm": 0.880296894881306,
      "learning_rate": 0.0023610000000000003,
      "loss": 4.6973,
      "step": 787
    },
    {
      "epoch": 0.00788,
      "grad_norm": 0.9208137721706963,
      "learning_rate": 0.002364,
      "loss": 4.6994,
      "step": 788
    },
    {
      "epoch": 0.00789,
      "grad_norm": 0.8821666937927062,
      "learning_rate": 0.002367,
      "loss": 4.7255,
      "step": 789
    },
    {
      "epoch": 0.0079,
      "grad_norm": 0.9083723262351499,
      "learning_rate": 0.00237,
      "loss": 4.7016,
      "step": 790
    },
    {
      "epoch": 0.00791,
      "grad_norm": 1.102156246123534,
      "learning_rate": 0.002373,
      "loss": 4.6636,
      "step": 791
    },
    {
      "epoch": 0.00792,
      "grad_norm": 1.0575279803993471,
      "learning_rate": 0.002376,
      "loss": 4.7024,
      "step": 792
    },
    {
      "epoch": 0.00793,
      "grad_norm": 0.8169656544039797,
      "learning_rate": 0.002379,
      "loss": 4.6881,
      "step": 793
    },
    {
      "epoch": 0.00794,
      "grad_norm": 0.678074438498389,
      "learning_rate": 0.002382,
      "loss": 4.6904,
      "step": 794
    },
    {
      "epoch": 0.00795,
      "grad_norm": 0.6991656768310356,
      "learning_rate": 0.002385,
      "loss": 4.6673,
      "step": 795
    },
    {
      "epoch": 0.00796,
      "grad_norm": 0.7517528447717965,
      "learning_rate": 0.0023880000000000004,
      "loss": 4.6604,
      "step": 796
    },
    {
      "epoch": 0.00797,
      "grad_norm": 0.6539739996063073,
      "learning_rate": 0.0023910000000000003,
      "loss": 4.701,
      "step": 797
    },
    {
      "epoch": 0.00798,
      "grad_norm": 0.7347989701629836,
      "learning_rate": 0.0023940000000000003,
      "loss": 4.6689,
      "step": 798
    },
    {
      "epoch": 0.00799,
      "grad_norm": 0.9872840389626579,
      "learning_rate": 0.0023970000000000003,
      "loss": 4.7125,
      "step": 799
    },
    {
      "epoch": 0.008,
      "grad_norm": 1.5507273997188689,
      "learning_rate": 0.0024000000000000002,
      "loss": 4.7008,
      "step": 800
    },
    {
      "epoch": 0.00801,
      "grad_norm": 0.8492322576000088,
      "learning_rate": 0.002403,
      "loss": 4.7238,
      "step": 801
    },
    {
      "epoch": 0.00802,
      "grad_norm": 0.814504548640702,
      "learning_rate": 0.002406,
      "loss": 4.7004,
      "step": 802
    },
    {
      "epoch": 0.00803,
      "grad_norm": 1.1123606888778432,
      "learning_rate": 0.002409,
      "loss": 4.6958,
      "step": 803
    },
    {
      "epoch": 0.00804,
      "grad_norm": 0.9176660269114695,
      "learning_rate": 0.002412,
      "loss": 4.6917,
      "step": 804
    },
    {
      "epoch": 0.00805,
      "grad_norm": 0.8611830884359055,
      "learning_rate": 0.002415,
      "loss": 4.683,
      "step": 805
    },
    {
      "epoch": 0.00806,
      "grad_norm": 0.8560370675329328,
      "learning_rate": 0.002418,
      "loss": 4.6967,
      "step": 806
    },
    {
      "epoch": 0.00807,
      "grad_norm": 0.9747797626725606,
      "learning_rate": 0.0024210000000000004,
      "loss": 4.6971,
      "step": 807
    },
    {
      "epoch": 0.00808,
      "grad_norm": 0.8688248558202334,
      "learning_rate": 0.0024240000000000004,
      "loss": 4.6774,
      "step": 808
    },
    {
      "epoch": 0.00809,
      "grad_norm": 0.7810743476258258,
      "learning_rate": 0.0024270000000000003,
      "loss": 4.6681,
      "step": 809
    },
    {
      "epoch": 0.0081,
      "grad_norm": 0.8213343057667987,
      "learning_rate": 0.0024300000000000003,
      "loss": 4.7024,
      "step": 810
    },
    {
      "epoch": 0.00811,
      "grad_norm": 0.7477361334276762,
      "learning_rate": 0.0024330000000000003,
      "loss": 4.6714,
      "step": 811
    },
    {
      "epoch": 0.00812,
      "grad_norm": 0.7141317849387557,
      "learning_rate": 0.0024360000000000002,
      "loss": 4.6487,
      "step": 812
    },
    {
      "epoch": 0.00813,
      "grad_norm": 0.6767630610959015,
      "learning_rate": 0.0024389999999999998,
      "loss": 4.6789,
      "step": 813
    },
    {
      "epoch": 0.00814,
      "grad_norm": 0.6590399241347584,
      "learning_rate": 0.0024419999999999997,
      "loss": 4.6805,
      "step": 814
    },
    {
      "epoch": 0.00815,
      "grad_norm": 0.6023723300104491,
      "learning_rate": 0.0024449999999999997,
      "loss": 4.6845,
      "step": 815
    },
    {
      "epoch": 0.00816,
      "grad_norm": 0.6246267032157732,
      "learning_rate": 0.002448,
      "loss": 4.6577,
      "step": 816
    },
    {
      "epoch": 0.00817,
      "grad_norm": 0.7633099639321208,
      "learning_rate": 0.002451,
      "loss": 4.6687,
      "step": 817
    },
    {
      "epoch": 0.00818,
      "grad_norm": 0.7806805982678847,
      "learning_rate": 0.002454,
      "loss": 4.6553,
      "step": 818
    },
    {
      "epoch": 0.00819,
      "grad_norm": 0.5501560610295794,
      "learning_rate": 0.002457,
      "loss": 4.6552,
      "step": 819
    },
    {
      "epoch": 0.0082,
      "grad_norm": 0.5777814189038608,
      "learning_rate": 0.00246,
      "loss": 4.6416,
      "step": 820
    },
    {
      "epoch": 0.00821,
      "grad_norm": 0.5160511618881874,
      "learning_rate": 0.002463,
      "loss": 4.6164,
      "step": 821
    },
    {
      "epoch": 0.00822,
      "grad_norm": 0.43680612738745367,
      "learning_rate": 0.002466,
      "loss": 4.6137,
      "step": 822
    },
    {
      "epoch": 0.00823,
      "grad_norm": 0.4259879914150642,
      "learning_rate": 0.002469,
      "loss": 4.6172,
      "step": 823
    },
    {
      "epoch": 0.00824,
      "grad_norm": 0.4527560352782629,
      "learning_rate": 0.002472,
      "loss": 4.6356,
      "step": 824
    },
    {
      "epoch": 0.00825,
      "grad_norm": 0.43208606152268814,
      "learning_rate": 0.0024749999999999998,
      "loss": 4.6397,
      "step": 825
    },
    {
      "epoch": 0.00826,
      "grad_norm": 0.5843185047977487,
      "learning_rate": 0.0024779999999999997,
      "loss": 4.6038,
      "step": 826
    },
    {
      "epoch": 0.00827,
      "grad_norm": 0.8997674361314323,
      "learning_rate": 0.002481,
      "loss": 4.6637,
      "step": 827
    },
    {
      "epoch": 0.00828,
      "grad_norm": 1.2604684983866747,
      "learning_rate": 0.002484,
      "loss": 4.671,
      "step": 828
    },
    {
      "epoch": 0.00829,
      "grad_norm": 0.8292065747963988,
      "learning_rate": 0.002487,
      "loss": 4.6469,
      "step": 829
    },
    {
      "epoch": 0.0083,
      "grad_norm": 0.8821037896462126,
      "learning_rate": 0.00249,
      "loss": 4.6692,
      "step": 830
    },
    {
      "epoch": 0.00831,
      "grad_norm": 0.7576971723716095,
      "learning_rate": 0.002493,
      "loss": 4.6376,
      "step": 831
    },
    {
      "epoch": 0.00832,
      "grad_norm": 0.759884157810248,
      "learning_rate": 0.002496,
      "loss": 4.6375,
      "step": 832
    },
    {
      "epoch": 0.00833,
      "grad_norm": 0.7334765635370318,
      "learning_rate": 0.002499,
      "loss": 4.632,
      "step": 833
    },
    {
      "epoch": 0.00834,
      "grad_norm": 0.743907849255925,
      "learning_rate": 0.002502,
      "loss": 4.6343,
      "step": 834
    },
    {
      "epoch": 0.00835,
      "grad_norm": 0.8768740048015519,
      "learning_rate": 0.002505,
      "loss": 4.633,
      "step": 835
    },
    {
      "epoch": 0.00836,
      "grad_norm": 0.8050749482625906,
      "learning_rate": 0.002508,
      "loss": 4.6653,
      "step": 836
    },
    {
      "epoch": 0.00837,
      "grad_norm": 0.6963558438544545,
      "learning_rate": 0.0025109999999999998,
      "loss": 4.6012,
      "step": 837
    },
    {
      "epoch": 0.00838,
      "grad_norm": 0.8008054438848442,
      "learning_rate": 0.0025139999999999997,
      "loss": 4.6308,
      "step": 838
    },
    {
      "epoch": 0.00839,
      "grad_norm": 1.048577251560161,
      "learning_rate": 0.002517,
      "loss": 4.6417,
      "step": 839
    },
    {
      "epoch": 0.0084,
      "grad_norm": 1.068829465042819,
      "learning_rate": 0.00252,
      "loss": 4.6393,
      "step": 840
    },
    {
      "epoch": 0.00841,
      "grad_norm": 0.9585677425278923,
      "learning_rate": 0.002523,
      "loss": 4.6304,
      "step": 841
    },
    {
      "epoch": 0.00842,
      "grad_norm": 0.9086027488597042,
      "learning_rate": 0.002526,
      "loss": 4.6299,
      "step": 842
    },
    {
      "epoch": 0.00843,
      "grad_norm": 0.9467491452172306,
      "learning_rate": 0.002529,
      "loss": 4.6351,
      "step": 843
    },
    {
      "epoch": 0.00844,
      "grad_norm": 0.9086629177949005,
      "learning_rate": 0.002532,
      "loss": 4.6535,
      "step": 844
    },
    {
      "epoch": 0.00845,
      "grad_norm": 1.0087851359380933,
      "learning_rate": 0.002535,
      "loss": 4.6308,
      "step": 845
    },
    {
      "epoch": 0.00846,
      "grad_norm": 1.0982346271909766,
      "learning_rate": 0.002538,
      "loss": 4.6654,
      "step": 846
    },
    {
      "epoch": 0.00847,
      "grad_norm": 0.8915036821171446,
      "learning_rate": 0.002541,
      "loss": 4.6379,
      "step": 847
    },
    {
      "epoch": 0.00848,
      "grad_norm": 1.0647705326129857,
      "learning_rate": 0.002544,
      "loss": 4.6485,
      "step": 848
    },
    {
      "epoch": 0.00849,
      "grad_norm": 1.068501032060827,
      "learning_rate": 0.002547,
      "loss": 4.6458,
      "step": 849
    },
    {
      "epoch": 0.0085,
      "grad_norm": 0.9913137717408333,
      "learning_rate": 0.00255,
      "loss": 4.6526,
      "step": 850
    },
    {
      "epoch": 0.00851,
      "grad_norm": 0.8594454569866243,
      "learning_rate": 0.002553,
      "loss": 4.6627,
      "step": 851
    },
    {
      "epoch": 0.00852,
      "grad_norm": 0.896044658032046,
      "learning_rate": 0.002556,
      "loss": 4.6576,
      "step": 852
    },
    {
      "epoch": 0.00853,
      "grad_norm": 0.8600636350866336,
      "learning_rate": 0.002559,
      "loss": 4.633,
      "step": 853
    },
    {
      "epoch": 0.00854,
      "grad_norm": 0.8249013935069577,
      "learning_rate": 0.002562,
      "loss": 4.6232,
      "step": 854
    },
    {
      "epoch": 0.00855,
      "grad_norm": 0.7799882774394947,
      "learning_rate": 0.002565,
      "loss": 4.6068,
      "step": 855
    },
    {
      "epoch": 0.00856,
      "grad_norm": 0.761486927017014,
      "learning_rate": 0.002568,
      "loss": 4.6334,
      "step": 856
    },
    {
      "epoch": 0.00857,
      "grad_norm": 0.7158635780925376,
      "learning_rate": 0.002571,
      "loss": 4.6223,
      "step": 857
    },
    {
      "epoch": 0.00858,
      "grad_norm": 0.774083453902133,
      "learning_rate": 0.002574,
      "loss": 4.6142,
      "step": 858
    },
    {
      "epoch": 0.00859,
      "grad_norm": 0.850850893304573,
      "learning_rate": 0.002577,
      "loss": 4.6099,
      "step": 859
    },
    {
      "epoch": 0.0086,
      "grad_norm": 0.7669731990407863,
      "learning_rate": 0.00258,
      "loss": 4.6341,
      "step": 860
    },
    {
      "epoch": 0.00861,
      "grad_norm": 0.7162038190133796,
      "learning_rate": 0.0025830000000000002,
      "loss": 4.6456,
      "step": 861
    },
    {
      "epoch": 0.00862,
      "grad_norm": 0.6479803425037955,
      "learning_rate": 0.002586,
      "loss": 4.5912,
      "step": 862
    },
    {
      "epoch": 0.00863,
      "grad_norm": 0.6368722094988631,
      "learning_rate": 0.002589,
      "loss": 4.6167,
      "step": 863
    },
    {
      "epoch": 0.00864,
      "grad_norm": 0.6118405956106333,
      "learning_rate": 0.002592,
      "loss": 4.5664,
      "step": 864
    },
    {
      "epoch": 0.00865,
      "grad_norm": 0.5650431634807904,
      "learning_rate": 0.002595,
      "loss": 4.5813,
      "step": 865
    },
    {
      "epoch": 0.00866,
      "grad_norm": 0.6733850251887965,
      "learning_rate": 0.002598,
      "loss": 4.6085,
      "step": 866
    },
    {
      "epoch": 0.00867,
      "grad_norm": 0.7146438239332881,
      "learning_rate": 0.002601,
      "loss": 4.5993,
      "step": 867
    },
    {
      "epoch": 0.00868,
      "grad_norm": 0.5365995387053527,
      "learning_rate": 0.002604,
      "loss": 4.5719,
      "step": 868
    },
    {
      "epoch": 0.00869,
      "grad_norm": 0.5667577739936428,
      "learning_rate": 0.002607,
      "loss": 4.5823,
      "step": 869
    },
    {
      "epoch": 0.0087,
      "grad_norm": 0.6451283729695402,
      "learning_rate": 0.00261,
      "loss": 4.5731,
      "step": 870
    },
    {
      "epoch": 0.00871,
      "grad_norm": 0.7555644992037868,
      "learning_rate": 0.002613,
      "loss": 4.5997,
      "step": 871
    },
    {
      "epoch": 0.00872,
      "grad_norm": 0.8189768002730787,
      "learning_rate": 0.002616,
      "loss": 4.5791,
      "step": 872
    },
    {
      "epoch": 0.00873,
      "grad_norm": 0.6554558150815033,
      "learning_rate": 0.0026190000000000002,
      "loss": 4.5881,
      "step": 873
    },
    {
      "epoch": 0.00874,
      "grad_norm": 0.44798120348082565,
      "learning_rate": 0.002622,
      "loss": 4.5966,
      "step": 874
    },
    {
      "epoch": 0.00875,
      "grad_norm": 0.5565097001333607,
      "learning_rate": 0.002625,
      "loss": 4.5495,
      "step": 875
    },
    {
      "epoch": 0.00876,
      "grad_norm": 0.47756263300713336,
      "learning_rate": 0.002628,
      "loss": 4.5755,
      "step": 876
    },
    {
      "epoch": 0.00877,
      "grad_norm": 0.5306407935778719,
      "learning_rate": 0.002631,
      "loss": 4.5933,
      "step": 877
    },
    {
      "epoch": 0.00878,
      "grad_norm": 0.4614494798142028,
      "learning_rate": 0.002634,
      "loss": 4.5228,
      "step": 878
    },
    {
      "epoch": 0.00879,
      "grad_norm": 0.4660093179336554,
      "learning_rate": 0.002637,
      "loss": 4.5537,
      "step": 879
    },
    {
      "epoch": 0.0088,
      "grad_norm": 0.5031002143838149,
      "learning_rate": 0.00264,
      "loss": 4.5622,
      "step": 880
    },
    {
      "epoch": 0.00881,
      "grad_norm": 0.5059040564958315,
      "learning_rate": 0.002643,
      "loss": 4.5642,
      "step": 881
    },
    {
      "epoch": 0.00882,
      "grad_norm": 0.5172200646638598,
      "learning_rate": 0.002646,
      "loss": 4.5628,
      "step": 882
    },
    {
      "epoch": 0.00883,
      "grad_norm": 0.5784463708218814,
      "learning_rate": 0.002649,
      "loss": 4.5589,
      "step": 883
    },
    {
      "epoch": 0.00884,
      "grad_norm": 0.6180073373235953,
      "learning_rate": 0.0026520000000000003,
      "loss": 4.5464,
      "step": 884
    },
    {
      "epoch": 0.00885,
      "grad_norm": 0.8787173694483682,
      "learning_rate": 0.0026550000000000002,
      "loss": 4.5525,
      "step": 885
    },
    {
      "epoch": 0.00886,
      "grad_norm": 1.0673888729842724,
      "learning_rate": 0.002658,
      "loss": 4.6158,
      "step": 886
    },
    {
      "epoch": 0.00887,
      "grad_norm": 0.6898543177879018,
      "learning_rate": 0.002661,
      "loss": 4.5725,
      "step": 887
    },
    {
      "epoch": 0.00888,
      "grad_norm": 0.853477571603772,
      "learning_rate": 0.002664,
      "loss": 4.5844,
      "step": 888
    },
    {
      "epoch": 0.00889,
      "grad_norm": 0.8678931299790948,
      "learning_rate": 0.002667,
      "loss": 4.5451,
      "step": 889
    },
    {
      "epoch": 0.0089,
      "grad_norm": 0.7924796160958208,
      "learning_rate": 0.00267,
      "loss": 4.5923,
      "step": 890
    },
    {
      "epoch": 0.00891,
      "grad_norm": 0.9978786126328875,
      "learning_rate": 0.002673,
      "loss": 4.5837,
      "step": 891
    },
    {
      "epoch": 0.00892,
      "grad_norm": 0.8940798431065213,
      "learning_rate": 0.002676,
      "loss": 4.5952,
      "step": 892
    },
    {
      "epoch": 0.00893,
      "grad_norm": 0.8958101523241404,
      "learning_rate": 0.002679,
      "loss": 4.5818,
      "step": 893
    },
    {
      "epoch": 0.00894,
      "grad_norm": 0.7899502816364142,
      "learning_rate": 0.002682,
      "loss": 4.5617,
      "step": 894
    },
    {
      "epoch": 0.00895,
      "grad_norm": 0.8690679111994202,
      "learning_rate": 0.0026850000000000003,
      "loss": 4.5764,
      "step": 895
    },
    {
      "epoch": 0.00896,
      "grad_norm": 0.7586764471402897,
      "learning_rate": 0.0026880000000000003,
      "loss": 4.5653,
      "step": 896
    },
    {
      "epoch": 0.00897,
      "grad_norm": 0.8398125438690992,
      "learning_rate": 0.0026910000000000002,
      "loss": 4.5844,
      "step": 897
    },
    {
      "epoch": 0.00898,
      "grad_norm": 0.916381884952665,
      "learning_rate": 0.002694,
      "loss": 4.5838,
      "step": 898
    },
    {
      "epoch": 0.00899,
      "grad_norm": 1.003578523934926,
      "learning_rate": 0.002697,
      "loss": 4.5931,
      "step": 899
    },
    {
      "epoch": 0.009,
      "grad_norm": 1.0819102819564956,
      "learning_rate": 0.0027,
      "loss": 4.6243,
      "step": 900
    },
    {
      "epoch": 0.00901,
      "grad_norm": 1.0380833806593022,
      "learning_rate": 0.002703,
      "loss": 4.585,
      "step": 901
    },
    {
      "epoch": 0.00902,
      "grad_norm": 0.9970218234631744,
      "learning_rate": 0.002706,
      "loss": 4.5964,
      "step": 902
    },
    {
      "epoch": 0.00903,
      "grad_norm": 0.9843987698563316,
      "learning_rate": 0.002709,
      "loss": 4.59,
      "step": 903
    },
    {
      "epoch": 0.00904,
      "grad_norm": 0.9436236396032095,
      "learning_rate": 0.002712,
      "loss": 4.5739,
      "step": 904
    },
    {
      "epoch": 0.00905,
      "grad_norm": 0.9617986484804574,
      "learning_rate": 0.002715,
      "loss": 4.5864,
      "step": 905
    },
    {
      "epoch": 0.00906,
      "grad_norm": 0.9793075174387083,
      "learning_rate": 0.002718,
      "loss": 4.6075,
      "step": 906
    },
    {
      "epoch": 0.00907,
      "grad_norm": 0.829821867155264,
      "learning_rate": 0.0027210000000000003,
      "loss": 4.6113,
      "step": 907
    },
    {
      "epoch": 0.00908,
      "grad_norm": 0.7029669246243915,
      "learning_rate": 0.0027240000000000003,
      "loss": 4.5776,
      "step": 908
    },
    {
      "epoch": 0.00909,
      "grad_norm": 0.6424625298559724,
      "learning_rate": 0.0027270000000000003,
      "loss": 4.5922,
      "step": 909
    },
    {
      "epoch": 0.0091,
      "grad_norm": 0.541466576098649,
      "learning_rate": 0.0027300000000000002,
      "loss": 4.591,
      "step": 910
    },
    {
      "epoch": 0.00911,
      "grad_norm": 0.5322572887883155,
      "learning_rate": 0.002733,
      "loss": 4.5472,
      "step": 911
    },
    {
      "epoch": 0.00912,
      "grad_norm": 0.5035937451256259,
      "learning_rate": 0.002736,
      "loss": 4.5482,
      "step": 912
    },
    {
      "epoch": 0.00913,
      "grad_norm": 0.5644505438751861,
      "learning_rate": 0.002739,
      "loss": 4.5413,
      "step": 913
    },
    {
      "epoch": 0.00914,
      "grad_norm": 0.5623403578732925,
      "learning_rate": 0.002742,
      "loss": 4.569,
      "step": 914
    },
    {
      "epoch": 0.00915,
      "grad_norm": 0.623567024043847,
      "learning_rate": 0.002745,
      "loss": 4.5303,
      "step": 915
    },
    {
      "epoch": 0.00916,
      "grad_norm": 0.7282453918583485,
      "learning_rate": 0.002748,
      "loss": 4.5134,
      "step": 916
    },
    {
      "epoch": 0.00917,
      "grad_norm": 0.849259998793401,
      "learning_rate": 0.002751,
      "loss": 4.53,
      "step": 917
    },
    {
      "epoch": 0.00918,
      "grad_norm": 0.8847496389011684,
      "learning_rate": 0.0027540000000000004,
      "loss": 4.5649,
      "step": 918
    },
    {
      "epoch": 0.00919,
      "grad_norm": 0.5812965220998871,
      "learning_rate": 0.0027570000000000003,
      "loss": 4.5257,
      "step": 919
    },
    {
      "epoch": 0.0092,
      "grad_norm": 0.6986684330141565,
      "learning_rate": 0.0027600000000000003,
      "loss": 4.5448,
      "step": 920
    },
    {
      "epoch": 0.00921,
      "grad_norm": 0.7458959997852286,
      "learning_rate": 0.0027630000000000003,
      "loss": 4.5421,
      "step": 921
    },
    {
      "epoch": 0.00922,
      "grad_norm": 0.7148661298609507,
      "learning_rate": 0.0027660000000000002,
      "loss": 4.5394,
      "step": 922
    },
    {
      "epoch": 0.00923,
      "grad_norm": 0.9687178948744826,
      "learning_rate": 0.002769,
      "loss": 4.53,
      "step": 923
    },
    {
      "epoch": 0.00924,
      "grad_norm": 0.8657140698466169,
      "learning_rate": 0.002772,
      "loss": 4.5532,
      "step": 924
    },
    {
      "epoch": 0.00925,
      "grad_norm": 0.7168831350145214,
      "learning_rate": 0.002775,
      "loss": 4.5283,
      "step": 925
    },
    {
      "epoch": 0.00926,
      "grad_norm": 0.6574829303703393,
      "learning_rate": 0.002778,
      "loss": 4.5395,
      "step": 926
    },
    {
      "epoch": 0.00927,
      "grad_norm": 0.6438234464350411,
      "learning_rate": 0.002781,
      "loss": 4.5034,
      "step": 927
    },
    {
      "epoch": 0.00928,
      "grad_norm": 0.5862425539112647,
      "learning_rate": 0.002784,
      "loss": 4.5433,
      "step": 928
    },
    {
      "epoch": 0.00929,
      "grad_norm": 0.5140610933427406,
      "learning_rate": 0.0027870000000000004,
      "loss": 4.5207,
      "step": 929
    },
    {
      "epoch": 0.0093,
      "grad_norm": 0.5616746929674309,
      "learning_rate": 0.0027900000000000004,
      "loss": 4.5043,
      "step": 930
    },
    {
      "epoch": 0.00931,
      "grad_norm": 0.6437618467570244,
      "learning_rate": 0.0027930000000000003,
      "loss": 4.5177,
      "step": 931
    },
    {
      "epoch": 0.00932,
      "grad_norm": 0.6931141138815896,
      "learning_rate": 0.0027960000000000003,
      "loss": 4.5276,
      "step": 932
    },
    {
      "epoch": 0.00933,
      "grad_norm": 0.5978591705981104,
      "learning_rate": 0.0027990000000000003,
      "loss": 4.4907,
      "step": 933
    },
    {
      "epoch": 0.00934,
      "grad_norm": 0.603121732270584,
      "learning_rate": 0.0028020000000000002,
      "loss": 4.5154,
      "step": 934
    },
    {
      "epoch": 0.00935,
      "grad_norm": 0.6873365438378514,
      "learning_rate": 0.002805,
      "loss": 4.528,
      "step": 935
    },
    {
      "epoch": 0.00936,
      "grad_norm": 0.8285273293509559,
      "learning_rate": 0.002808,
      "loss": 4.5353,
      "step": 936
    },
    {
      "epoch": 0.00937,
      "grad_norm": 0.8237643167810483,
      "learning_rate": 0.002811,
      "loss": 4.5562,
      "step": 937
    },
    {
      "epoch": 0.00938,
      "grad_norm": 0.7428902051912631,
      "learning_rate": 0.002814,
      "loss": 4.5253,
      "step": 938
    },
    {
      "epoch": 0.00939,
      "grad_norm": 0.7604729540920855,
      "learning_rate": 0.002817,
      "loss": 4.5245,
      "step": 939
    },
    {
      "epoch": 0.0094,
      "grad_norm": 0.7827067718534406,
      "learning_rate": 0.00282,
      "loss": 4.5235,
      "step": 940
    },
    {
      "epoch": 0.00941,
      "grad_norm": 0.8076797169385439,
      "learning_rate": 0.002823,
      "loss": 4.51,
      "step": 941
    },
    {
      "epoch": 0.00942,
      "grad_norm": 0.9871062678858393,
      "learning_rate": 0.002826,
      "loss": 4.5397,
      "step": 942
    },
    {
      "epoch": 0.00943,
      "grad_norm": 1.070901691951404,
      "learning_rate": 0.002829,
      "loss": 4.5537,
      "step": 943
    },
    {
      "epoch": 0.00944,
      "grad_norm": 1.1009445115031264,
      "learning_rate": 0.002832,
      "loss": 4.5564,
      "step": 944
    },
    {
      "epoch": 0.00945,
      "grad_norm": 0.9908698154571977,
      "learning_rate": 0.002835,
      "loss": 4.5644,
      "step": 945
    },
    {
      "epoch": 0.00946,
      "grad_norm": 0.8740733112948412,
      "learning_rate": 0.002838,
      "loss": 4.5681,
      "step": 946
    },
    {
      "epoch": 0.00947,
      "grad_norm": 0.8705398172356058,
      "learning_rate": 0.0028409999999999998,
      "loss": 4.5461,
      "step": 947
    },
    {
      "epoch": 0.00948,
      "grad_norm": 1.030159035482114,
      "learning_rate": 0.0028439999999999997,
      "loss": 4.5447,
      "step": 948
    },
    {
      "epoch": 0.00949,
      "grad_norm": 1.1928707625743042,
      "learning_rate": 0.002847,
      "loss": 4.5979,
      "step": 949
    },
    {
      "epoch": 0.0095,
      "grad_norm": 0.8189553326064546,
      "learning_rate": 0.00285,
      "loss": 4.5703,
      "step": 950
    },
    {
      "epoch": 0.00951,
      "grad_norm": 0.7516073328932897,
      "learning_rate": 0.002853,
      "loss": 4.5403,
      "step": 951
    },
    {
      "epoch": 0.00952,
      "grad_norm": 0.770673808631835,
      "learning_rate": 0.002856,
      "loss": 4.5655,
      "step": 952
    },
    {
      "epoch": 0.00953,
      "grad_norm": 0.7009141910952996,
      "learning_rate": 0.002859,
      "loss": 4.5043,
      "step": 953
    },
    {
      "epoch": 0.00954,
      "grad_norm": 0.5282029249680377,
      "learning_rate": 0.002862,
      "loss": 4.5519,
      "step": 954
    },
    {
      "epoch": 0.00955,
      "grad_norm": 0.5219597666883256,
      "learning_rate": 0.002865,
      "loss": 4.538,
      "step": 955
    },
    {
      "epoch": 0.00956,
      "grad_norm": 0.5409798826026391,
      "learning_rate": 0.002868,
      "loss": 4.5165,
      "step": 956
    },
    {
      "epoch": 0.00957,
      "grad_norm": 0.6331938100277174,
      "learning_rate": 0.002871,
      "loss": 4.5418,
      "step": 957
    },
    {
      "epoch": 0.00958,
      "grad_norm": 0.7239336518776873,
      "learning_rate": 0.002874,
      "loss": 4.5243,
      "step": 958
    },
    {
      "epoch": 0.00959,
      "grad_norm": 0.7150326004222285,
      "learning_rate": 0.002877,
      "loss": 4.522,
      "step": 959
    },
    {
      "epoch": 0.0096,
      "grad_norm": 0.6351033882583478,
      "learning_rate": 0.0028799999999999997,
      "loss": 4.5137,
      "step": 960
    },
    {
      "epoch": 0.00961,
      "grad_norm": 0.6041315929206551,
      "learning_rate": 0.002883,
      "loss": 4.4839,
      "step": 961
    },
    {
      "epoch": 0.00962,
      "grad_norm": 0.5086415396789048,
      "learning_rate": 0.002886,
      "loss": 4.5083,
      "step": 962
    },
    {
      "epoch": 0.00963,
      "grad_norm": 0.4921917843701592,
      "learning_rate": 0.002889,
      "loss": 4.5074,
      "step": 963
    },
    {
      "epoch": 0.00964,
      "grad_norm": 0.4478664141352426,
      "learning_rate": 0.002892,
      "loss": 4.495,
      "step": 964
    },
    {
      "epoch": 0.00965,
      "grad_norm": 0.48082018280974176,
      "learning_rate": 0.002895,
      "loss": 4.4696,
      "step": 965
    },
    {
      "epoch": 0.00966,
      "grad_norm": 0.4468338524710237,
      "learning_rate": 0.002898,
      "loss": 4.4813,
      "step": 966
    },
    {
      "epoch": 0.00967,
      "grad_norm": 0.48445225824086435,
      "learning_rate": 0.002901,
      "loss": 4.5202,
      "step": 967
    },
    {
      "epoch": 0.00968,
      "grad_norm": 0.48753092096488154,
      "learning_rate": 0.002904,
      "loss": 4.4828,
      "step": 968
    },
    {
      "epoch": 0.00969,
      "grad_norm": 0.6098233552287393,
      "learning_rate": 0.002907,
      "loss": 4.5184,
      "step": 969
    },
    {
      "epoch": 0.0097,
      "grad_norm": 0.6841177464426628,
      "learning_rate": 0.00291,
      "loss": 4.4838,
      "step": 970
    },
    {
      "epoch": 0.00971,
      "grad_norm": 0.6296009793604628,
      "learning_rate": 0.002913,
      "loss": 4.4828,
      "step": 971
    },
    {
      "epoch": 0.00972,
      "grad_norm": 0.5562757859803139,
      "learning_rate": 0.002916,
      "loss": 4.489,
      "step": 972
    },
    {
      "epoch": 0.00973,
      "grad_norm": 0.6219386413564563,
      "learning_rate": 0.002919,
      "loss": 4.4862,
      "step": 973
    },
    {
      "epoch": 0.00974,
      "grad_norm": 0.6213225377585438,
      "learning_rate": 0.002922,
      "loss": 4.4703,
      "step": 974
    },
    {
      "epoch": 0.00975,
      "grad_norm": 0.6189120844763403,
      "learning_rate": 0.002925,
      "loss": 4.495,
      "step": 975
    },
    {
      "epoch": 0.00976,
      "grad_norm": 0.6980011774079288,
      "learning_rate": 0.002928,
      "loss": 4.4782,
      "step": 976
    },
    {
      "epoch": 0.00977,
      "grad_norm": 0.8069639632421017,
      "learning_rate": 0.002931,
      "loss": 4.5242,
      "step": 977
    },
    {
      "epoch": 0.00978,
      "grad_norm": 0.8591505642618554,
      "learning_rate": 0.002934,
      "loss": 4.5028,
      "step": 978
    },
    {
      "epoch": 0.00979,
      "grad_norm": 1.0057608067163726,
      "learning_rate": 0.002937,
      "loss": 4.5085,
      "step": 979
    },
    {
      "epoch": 0.0098,
      "grad_norm": 0.9763019684979988,
      "learning_rate": 0.00294,
      "loss": 4.5104,
      "step": 980
    },
    {
      "epoch": 0.00981,
      "grad_norm": 1.1460525604086425,
      "learning_rate": 0.002943,
      "loss": 4.5283,
      "step": 981
    },
    {
      "epoch": 0.00982,
      "grad_norm": 0.8686566673097217,
      "learning_rate": 0.002946,
      "loss": 4.5182,
      "step": 982
    },
    {
      "epoch": 0.00983,
      "grad_norm": 0.7552343773130578,
      "learning_rate": 0.0029490000000000002,
      "loss": 4.509,
      "step": 983
    },
    {
      "epoch": 0.00984,
      "grad_norm": 0.7358884017265429,
      "learning_rate": 0.002952,
      "loss": 4.4971,
      "step": 984
    },
    {
      "epoch": 0.00985,
      "grad_norm": 0.6607503765583332,
      "learning_rate": 0.002955,
      "loss": 4.5113,
      "step": 985
    },
    {
      "epoch": 0.00986,
      "grad_norm": 0.7870472473630633,
      "learning_rate": 0.002958,
      "loss": 4.4806,
      "step": 986
    },
    {
      "epoch": 0.00987,
      "grad_norm": 0.8309954161006763,
      "learning_rate": 0.002961,
      "loss": 4.5007,
      "step": 987
    },
    {
      "epoch": 0.00988,
      "grad_norm": 1.0363998670740173,
      "learning_rate": 0.002964,
      "loss": 4.5401,
      "step": 988
    },
    {
      "epoch": 0.00989,
      "grad_norm": 0.9365239840713727,
      "learning_rate": 0.002967,
      "loss": 4.498,
      "step": 989
    },
    {
      "epoch": 0.0099,
      "grad_norm": 0.6864119295113198,
      "learning_rate": 0.00297,
      "loss": 4.5125,
      "step": 990
    },
    {
      "epoch": 0.00991,
      "grad_norm": 0.8171121562090214,
      "learning_rate": 0.002973,
      "loss": 4.5158,
      "step": 991
    },
    {
      "epoch": 0.00992,
      "grad_norm": 0.745069203907096,
      "learning_rate": 0.002976,
      "loss": 4.5062,
      "step": 992
    },
    {
      "epoch": 0.00993,
      "grad_norm": 0.7044611843609352,
      "learning_rate": 0.002979,
      "loss": 4.5043,
      "step": 993
    },
    {
      "epoch": 0.00994,
      "grad_norm": 0.7308973125697386,
      "learning_rate": 0.002982,
      "loss": 4.5052,
      "step": 994
    },
    {
      "epoch": 0.00995,
      "grad_norm": 0.8310300849484714,
      "learning_rate": 0.0029850000000000002,
      "loss": 4.5102,
      "step": 995
    },
    {
      "epoch": 0.00996,
      "grad_norm": 0.8154352480510918,
      "learning_rate": 0.002988,
      "loss": 4.5127,
      "step": 996
    },
    {
      "epoch": 0.00997,
      "grad_norm": 0.6863991992573223,
      "learning_rate": 0.002991,
      "loss": 4.4952,
      "step": 997
    },
    {
      "epoch": 0.00998,
      "grad_norm": 0.7462946911384132,
      "learning_rate": 0.002994,
      "loss": 4.5024,
      "step": 998
    },
    {
      "epoch": 0.00999,
      "grad_norm": 0.7146477573054392,
      "learning_rate": 0.002997,
      "loss": 4.483,
      "step": 999
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7363866046556987,
      "learning_rate": 0.003,
      "loss": 4.5015,
      "step": 1000
    },
    {
      "epoch": 0.01001,
      "grad_norm": 0.7767634479973017,
      "learning_rate": 0.003,
      "loss": 4.5068,
      "step": 1001
    },
    {
      "epoch": 0.01002,
      "grad_norm": 0.7384346352174465,
      "learning_rate": 0.003,
      "loss": 4.4747,
      "step": 1002
    },
    {
      "epoch": 0.01003,
      "grad_norm": 0.9090237711013536,
      "learning_rate": 0.003,
      "loss": 4.5011,
      "step": 1003
    },
    {
      "epoch": 0.01004,
      "grad_norm": 0.9963866692217829,
      "learning_rate": 0.003,
      "loss": 4.5176,
      "step": 1004
    },
    {
      "epoch": 0.01005,
      "grad_norm": 0.7385625753811653,
      "learning_rate": 0.003,
      "loss": 4.4726,
      "step": 1005
    },
    {
      "epoch": 0.01006,
      "grad_norm": 0.8633843540772702,
      "learning_rate": 0.003,
      "loss": 4.4763,
      "step": 1006
    },
    {
      "epoch": 0.01007,
      "grad_norm": 0.7060101206147821,
      "learning_rate": 0.003,
      "loss": 4.5223,
      "step": 1007
    },
    {
      "epoch": 0.01008,
      "grad_norm": 0.6108586434140804,
      "learning_rate": 0.003,
      "loss": 4.5217,
      "step": 1008
    },
    {
      "epoch": 0.01009,
      "grad_norm": 0.828764309531409,
      "learning_rate": 0.003,
      "loss": 4.4986,
      "step": 1009
    },
    {
      "epoch": 0.0101,
      "grad_norm": 0.8644895268754212,
      "learning_rate": 0.003,
      "loss": 4.5093,
      "step": 1010
    },
    {
      "epoch": 0.01011,
      "grad_norm": 0.848614232580047,
      "learning_rate": 0.003,
      "loss": 4.5177,
      "step": 1011
    },
    {
      "epoch": 0.01012,
      "grad_norm": 0.8343157703645258,
      "learning_rate": 0.003,
      "loss": 4.5029,
      "step": 1012
    },
    {
      "epoch": 0.01013,
      "grad_norm": 0.768035628231052,
      "learning_rate": 0.003,
      "loss": 4.5208,
      "step": 1013
    },
    {
      "epoch": 0.01014,
      "grad_norm": 0.7039487217317226,
      "learning_rate": 0.003,
      "loss": 4.4647,
      "step": 1014
    },
    {
      "epoch": 0.01015,
      "grad_norm": 0.6611578667499322,
      "learning_rate": 0.003,
      "loss": 4.5009,
      "step": 1015
    },
    {
      "epoch": 0.01016,
      "grad_norm": 0.7043885975091321,
      "learning_rate": 0.003,
      "loss": 4.5125,
      "step": 1016
    },
    {
      "epoch": 0.01017,
      "grad_norm": 0.756430652075804,
      "learning_rate": 0.003,
      "loss": 4.4573,
      "step": 1017
    },
    {
      "epoch": 0.01018,
      "grad_norm": 0.8264253518319646,
      "learning_rate": 0.003,
      "loss": 4.5131,
      "step": 1018
    },
    {
      "epoch": 0.01019,
      "grad_norm": 0.6230428354913109,
      "learning_rate": 0.003,
      "loss": 4.4842,
      "step": 1019
    },
    {
      "epoch": 0.0102,
      "grad_norm": 0.5985675812220549,
      "learning_rate": 0.003,
      "loss": 4.4799,
      "step": 1020
    },
    {
      "epoch": 0.01021,
      "grad_norm": 0.6443728807360013,
      "learning_rate": 0.003,
      "loss": 4.4622,
      "step": 1021
    },
    {
      "epoch": 0.01022,
      "grad_norm": 0.5977304558601406,
      "learning_rate": 0.003,
      "loss": 4.471,
      "step": 1022
    },
    {
      "epoch": 0.01023,
      "grad_norm": 0.6033117691674914,
      "learning_rate": 0.003,
      "loss": 4.4639,
      "step": 1023
    },
    {
      "epoch": 0.01024,
      "grad_norm": 0.5721338516828588,
      "learning_rate": 0.003,
      "loss": 4.4293,
      "step": 1024
    },
    {
      "epoch": 0.01025,
      "grad_norm": 0.519887079849401,
      "learning_rate": 0.003,
      "loss": 4.4534,
      "step": 1025
    },
    {
      "epoch": 0.01026,
      "grad_norm": 0.5755595398770954,
      "learning_rate": 0.003,
      "loss": 4.4517,
      "step": 1026
    },
    {
      "epoch": 0.01027,
      "grad_norm": 0.5583328190762978,
      "learning_rate": 0.003,
      "loss": 4.4696,
      "step": 1027
    },
    {
      "epoch": 0.01028,
      "grad_norm": 0.5058651878513157,
      "learning_rate": 0.003,
      "loss": 4.4465,
      "step": 1028
    },
    {
      "epoch": 0.01029,
      "grad_norm": 0.42380394436046726,
      "learning_rate": 0.003,
      "loss": 4.4233,
      "step": 1029
    },
    {
      "epoch": 0.0103,
      "grad_norm": 0.39795258529145094,
      "learning_rate": 0.003,
      "loss": 4.4496,
      "step": 1030
    },
    {
      "epoch": 0.01031,
      "grad_norm": 0.4022804170357935,
      "learning_rate": 0.003,
      "loss": 4.4638,
      "step": 1031
    },
    {
      "epoch": 0.01032,
      "grad_norm": 0.5582373276086869,
      "learning_rate": 0.003,
      "loss": 4.4376,
      "step": 1032
    },
    {
      "epoch": 0.01033,
      "grad_norm": 0.6139042225833007,
      "learning_rate": 0.003,
      "loss": 4.4788,
      "step": 1033
    },
    {
      "epoch": 0.01034,
      "grad_norm": 0.6557191072266806,
      "learning_rate": 0.003,
      "loss": 4.4457,
      "step": 1034
    },
    {
      "epoch": 0.01035,
      "grad_norm": 0.5676080638884442,
      "learning_rate": 0.003,
      "loss": 4.4414,
      "step": 1035
    },
    {
      "epoch": 0.01036,
      "grad_norm": 0.5050742194579916,
      "learning_rate": 0.003,
      "loss": 4.428,
      "step": 1036
    },
    {
      "epoch": 0.01037,
      "grad_norm": 0.5525989752332315,
      "learning_rate": 0.003,
      "loss": 4.4551,
      "step": 1037
    },
    {
      "epoch": 0.01038,
      "grad_norm": 0.5387172699734339,
      "learning_rate": 0.003,
      "loss": 4.4503,
      "step": 1038
    },
    {
      "epoch": 0.01039,
      "grad_norm": 0.6460000205561591,
      "learning_rate": 0.003,
      "loss": 4.4441,
      "step": 1039
    },
    {
      "epoch": 0.0104,
      "grad_norm": 0.6935756269119994,
      "learning_rate": 0.003,
      "loss": 4.4606,
      "step": 1040
    },
    {
      "epoch": 0.01041,
      "grad_norm": 0.5741917605191801,
      "learning_rate": 0.003,
      "loss": 4.4375,
      "step": 1041
    },
    {
      "epoch": 0.01042,
      "grad_norm": 0.6148258717657724,
      "learning_rate": 0.003,
      "loss": 4.4633,
      "step": 1042
    },
    {
      "epoch": 0.01043,
      "grad_norm": 0.6159104547012477,
      "learning_rate": 0.003,
      "loss": 4.4115,
      "step": 1043
    },
    {
      "epoch": 0.01044,
      "grad_norm": 0.6904933868900409,
      "learning_rate": 0.003,
      "loss": 4.4475,
      "step": 1044
    },
    {
      "epoch": 0.01045,
      "grad_norm": 0.8049701467278718,
      "learning_rate": 0.003,
      "loss": 4.4778,
      "step": 1045
    },
    {
      "epoch": 0.01046,
      "grad_norm": 0.9237805199835166,
      "learning_rate": 0.003,
      "loss": 4.4531,
      "step": 1046
    },
    {
      "epoch": 0.01047,
      "grad_norm": 0.9025221925048675,
      "learning_rate": 0.003,
      "loss": 4.4626,
      "step": 1047
    },
    {
      "epoch": 0.01048,
      "grad_norm": 0.8439861117867156,
      "learning_rate": 0.003,
      "loss": 4.4594,
      "step": 1048
    },
    {
      "epoch": 0.01049,
      "grad_norm": 0.7790844674480221,
      "learning_rate": 0.003,
      "loss": 4.4347,
      "step": 1049
    },
    {
      "epoch": 0.0105,
      "grad_norm": 0.776695316132939,
      "learning_rate": 0.003,
      "loss": 4.4785,
      "step": 1050
    },
    {
      "epoch": 0.01051,
      "grad_norm": 0.7863011853992664,
      "learning_rate": 0.003,
      "loss": 4.4553,
      "step": 1051
    },
    {
      "epoch": 0.01052,
      "grad_norm": 0.8021665228413719,
      "learning_rate": 0.003,
      "loss": 4.4676,
      "step": 1052
    },
    {
      "epoch": 0.01053,
      "grad_norm": 0.8250289833375999,
      "learning_rate": 0.003,
      "loss": 4.4839,
      "step": 1053
    },
    {
      "epoch": 0.01054,
      "grad_norm": 0.8127326662842188,
      "learning_rate": 0.003,
      "loss": 4.4756,
      "step": 1054
    },
    {
      "epoch": 0.01055,
      "grad_norm": 0.6832114273454296,
      "learning_rate": 0.003,
      "loss": 4.4478,
      "step": 1055
    },
    {
      "epoch": 0.01056,
      "grad_norm": 0.5228897971280976,
      "learning_rate": 0.003,
      "loss": 4.4394,
      "step": 1056
    },
    {
      "epoch": 0.01057,
      "grad_norm": 0.4987455249643701,
      "learning_rate": 0.003,
      "loss": 4.4465,
      "step": 1057
    },
    {
      "epoch": 0.01058,
      "grad_norm": 0.5069816603194066,
      "learning_rate": 0.003,
      "loss": 4.4371,
      "step": 1058
    },
    {
      "epoch": 0.01059,
      "grad_norm": 0.5329576461470134,
      "learning_rate": 0.003,
      "loss": 4.4393,
      "step": 1059
    },
    {
      "epoch": 0.0106,
      "grad_norm": 0.5913240098111924,
      "learning_rate": 0.003,
      "loss": 4.4328,
      "step": 1060
    },
    {
      "epoch": 0.01061,
      "grad_norm": 0.6641649573073009,
      "learning_rate": 0.003,
      "loss": 4.4323,
      "step": 1061
    },
    {
      "epoch": 0.01062,
      "grad_norm": 0.6948839344713771,
      "learning_rate": 0.003,
      "loss": 4.4277,
      "step": 1062
    },
    {
      "epoch": 0.01063,
      "grad_norm": 0.7496196359877443,
      "learning_rate": 0.003,
      "loss": 4.4299,
      "step": 1063
    },
    {
      "epoch": 0.01064,
      "grad_norm": 0.7407156531442353,
      "learning_rate": 0.003,
      "loss": 4.4428,
      "step": 1064
    },
    {
      "epoch": 0.01065,
      "grad_norm": 0.8146214155770399,
      "learning_rate": 0.003,
      "loss": 4.4515,
      "step": 1065
    },
    {
      "epoch": 0.01066,
      "grad_norm": 1.0565696790056225,
      "learning_rate": 0.003,
      "loss": 4.4664,
      "step": 1066
    },
    {
      "epoch": 0.01067,
      "grad_norm": 0.9516719454349619,
      "learning_rate": 0.003,
      "loss": 4.4775,
      "step": 1067
    },
    {
      "epoch": 0.01068,
      "grad_norm": 0.8836216542706563,
      "learning_rate": 0.003,
      "loss": 4.4595,
      "step": 1068
    },
    {
      "epoch": 0.01069,
      "grad_norm": 0.8856659067512235,
      "learning_rate": 0.003,
      "loss": 4.4705,
      "step": 1069
    },
    {
      "epoch": 0.0107,
      "grad_norm": 0.7369477178565681,
      "learning_rate": 0.003,
      "loss": 4.5149,
      "step": 1070
    },
    {
      "epoch": 0.01071,
      "grad_norm": 0.7091155745251179,
      "learning_rate": 0.003,
      "loss": 4.4353,
      "step": 1071
    },
    {
      "epoch": 0.01072,
      "grad_norm": 0.7806676177349076,
      "learning_rate": 0.003,
      "loss": 4.4735,
      "step": 1072
    },
    {
      "epoch": 0.01073,
      "grad_norm": 0.779498494599653,
      "learning_rate": 0.003,
      "loss": 4.4707,
      "step": 1073
    },
    {
      "epoch": 0.01074,
      "grad_norm": 0.7011503302080855,
      "learning_rate": 0.003,
      "loss": 4.4305,
      "step": 1074
    },
    {
      "epoch": 0.01075,
      "grad_norm": 0.7831586118222456,
      "learning_rate": 0.003,
      "loss": 4.4706,
      "step": 1075
    },
    {
      "epoch": 0.01076,
      "grad_norm": 0.6817777196152708,
      "learning_rate": 0.003,
      "loss": 4.4647,
      "step": 1076
    },
    {
      "epoch": 0.01077,
      "grad_norm": 0.6515982616563128,
      "learning_rate": 0.003,
      "loss": 4.4569,
      "step": 1077
    },
    {
      "epoch": 0.01078,
      "grad_norm": 0.6249083984468133,
      "learning_rate": 0.003,
      "loss": 4.4127,
      "step": 1078
    },
    {
      "epoch": 0.01079,
      "grad_norm": 0.4801837441136389,
      "learning_rate": 0.003,
      "loss": 4.4399,
      "step": 1079
    },
    {
      "epoch": 0.0108,
      "grad_norm": 0.4834910792844242,
      "learning_rate": 0.003,
      "loss": 4.414,
      "step": 1080
    },
    {
      "epoch": 0.01081,
      "grad_norm": 0.5002980543849196,
      "learning_rate": 0.003,
      "loss": 4.4211,
      "step": 1081
    },
    {
      "epoch": 0.01082,
      "grad_norm": 0.46027227348307553,
      "learning_rate": 0.003,
      "loss": 4.4371,
      "step": 1082
    },
    {
      "epoch": 0.01083,
      "grad_norm": 0.5053419259251781,
      "learning_rate": 0.003,
      "loss": 4.4248,
      "step": 1083
    },
    {
      "epoch": 0.01084,
      "grad_norm": 0.5478371338567822,
      "learning_rate": 0.003,
      "loss": 4.432,
      "step": 1084
    },
    {
      "epoch": 0.01085,
      "grad_norm": 0.638950358189342,
      "learning_rate": 0.003,
      "loss": 4.4218,
      "step": 1085
    },
    {
      "epoch": 0.01086,
      "grad_norm": 0.7298487594957375,
      "learning_rate": 0.003,
      "loss": 4.4062,
      "step": 1086
    },
    {
      "epoch": 0.01087,
      "grad_norm": 0.7670847873505783,
      "learning_rate": 0.003,
      "loss": 4.4209,
      "step": 1087
    },
    {
      "epoch": 0.01088,
      "grad_norm": 0.5867260721567662,
      "learning_rate": 0.003,
      "loss": 4.4531,
      "step": 1088
    },
    {
      "epoch": 0.01089,
      "grad_norm": 0.4907796734695404,
      "learning_rate": 0.003,
      "loss": 4.4179,
      "step": 1089
    },
    {
      "epoch": 0.0109,
      "grad_norm": 0.4903707687409907,
      "learning_rate": 0.003,
      "loss": 4.4411,
      "step": 1090
    },
    {
      "epoch": 0.01091,
      "grad_norm": 0.511946289807033,
      "learning_rate": 0.003,
      "loss": 4.4087,
      "step": 1091
    },
    {
      "epoch": 0.01092,
      "grad_norm": 0.5375667414298495,
      "learning_rate": 0.003,
      "loss": 4.4066,
      "step": 1092
    },
    {
      "epoch": 0.01093,
      "grad_norm": 0.5526188836109072,
      "learning_rate": 0.003,
      "loss": 4.4252,
      "step": 1093
    },
    {
      "epoch": 0.01094,
      "grad_norm": 0.6569826260877659,
      "learning_rate": 0.003,
      "loss": 4.4168,
      "step": 1094
    },
    {
      "epoch": 0.01095,
      "grad_norm": 0.6683609284273635,
      "learning_rate": 0.003,
      "loss": 4.4455,
      "step": 1095
    },
    {
      "epoch": 0.01096,
      "grad_norm": 0.6112917342063837,
      "learning_rate": 0.003,
      "loss": 4.4132,
      "step": 1096
    },
    {
      "epoch": 0.01097,
      "grad_norm": 0.5936520390981126,
      "learning_rate": 0.003,
      "loss": 4.4026,
      "step": 1097
    },
    {
      "epoch": 0.01098,
      "grad_norm": 0.5505899855201338,
      "learning_rate": 0.003,
      "loss": 4.4193,
      "step": 1098
    },
    {
      "epoch": 0.01099,
      "grad_norm": 0.5445174984633135,
      "learning_rate": 0.003,
      "loss": 4.3908,
      "step": 1099
    },
    {
      "epoch": 0.011,
      "grad_norm": 0.5222662353377032,
      "learning_rate": 0.003,
      "loss": 4.3945,
      "step": 1100
    },
    {
      "epoch": 0.01101,
      "grad_norm": 0.46913668980272694,
      "learning_rate": 0.003,
      "loss": 4.405,
      "step": 1101
    },
    {
      "epoch": 0.01102,
      "grad_norm": 0.5475692182539631,
      "learning_rate": 0.003,
      "loss": 4.4129,
      "step": 1102
    },
    {
      "epoch": 0.01103,
      "grad_norm": 0.74691541890188,
      "learning_rate": 0.003,
      "loss": 4.4147,
      "step": 1103
    },
    {
      "epoch": 0.01104,
      "grad_norm": 1.169818056602323,
      "learning_rate": 0.003,
      "loss": 4.4275,
      "step": 1104
    },
    {
      "epoch": 0.01105,
      "grad_norm": 0.9343656682141298,
      "learning_rate": 0.003,
      "loss": 4.4588,
      "step": 1105
    },
    {
      "epoch": 0.01106,
      "grad_norm": 0.8373607049055661,
      "learning_rate": 0.003,
      "loss": 4.427,
      "step": 1106
    },
    {
      "epoch": 0.01107,
      "grad_norm": 1.0060128392113217,
      "learning_rate": 0.003,
      "loss": 4.4596,
      "step": 1107
    },
    {
      "epoch": 0.01108,
      "grad_norm": 1.0553010173790884,
      "learning_rate": 0.003,
      "loss": 4.4404,
      "step": 1108
    },
    {
      "epoch": 0.01109,
      "grad_norm": 1.2691952138628209,
      "learning_rate": 0.003,
      "loss": 4.4668,
      "step": 1109
    },
    {
      "epoch": 0.0111,
      "grad_norm": 1.3693624188172686,
      "learning_rate": 0.003,
      "loss": 4.519,
      "step": 1110
    },
    {
      "epoch": 0.01111,
      "grad_norm": 0.8462220605493783,
      "learning_rate": 0.003,
      "loss": 4.4548,
      "step": 1111
    },
    {
      "epoch": 0.01112,
      "grad_norm": 0.9753802461608932,
      "learning_rate": 0.003,
      "loss": 4.4881,
      "step": 1112
    },
    {
      "epoch": 0.01113,
      "grad_norm": 1.109859012060144,
      "learning_rate": 0.003,
      "loss": 4.4607,
      "step": 1113
    },
    {
      "epoch": 0.01114,
      "grad_norm": 1.0190945675303893,
      "learning_rate": 0.003,
      "loss": 4.4798,
      "step": 1114
    },
    {
      "epoch": 0.01115,
      "grad_norm": 1.329753511251153,
      "learning_rate": 0.003,
      "loss": 4.5276,
      "step": 1115
    },
    {
      "epoch": 0.01116,
      "grad_norm": 1.1336923243330905,
      "learning_rate": 0.003,
      "loss": 4.5138,
      "step": 1116
    },
    {
      "epoch": 0.01117,
      "grad_norm": 0.9232555932261653,
      "learning_rate": 0.003,
      "loss": 4.4861,
      "step": 1117
    },
    {
      "epoch": 0.01118,
      "grad_norm": 0.9897100726190513,
      "learning_rate": 0.003,
      "loss": 4.4981,
      "step": 1118
    },
    {
      "epoch": 0.01119,
      "grad_norm": 0.8425069501586361,
      "learning_rate": 0.003,
      "loss": 4.496,
      "step": 1119
    },
    {
      "epoch": 0.0112,
      "grad_norm": 0.9470337823805681,
      "learning_rate": 0.003,
      "loss": 4.4807,
      "step": 1120
    },
    {
      "epoch": 0.01121,
      "grad_norm": 0.8493765745632654,
      "learning_rate": 0.003,
      "loss": 4.4938,
      "step": 1121
    },
    {
      "epoch": 0.01122,
      "grad_norm": 0.7060199222018124,
      "learning_rate": 0.003,
      "loss": 4.4573,
      "step": 1122
    },
    {
      "epoch": 0.01123,
      "grad_norm": 0.6253251988669283,
      "learning_rate": 0.003,
      "loss": 4.4403,
      "step": 1123
    },
    {
      "epoch": 0.01124,
      "grad_norm": 0.579388445721472,
      "learning_rate": 0.003,
      "loss": 4.4818,
      "step": 1124
    },
    {
      "epoch": 0.01125,
      "grad_norm": 0.5338341898835932,
      "learning_rate": 0.003,
      "loss": 4.445,
      "step": 1125
    },
    {
      "epoch": 0.01126,
      "grad_norm": 0.47354440883207194,
      "learning_rate": 0.003,
      "loss": 4.4467,
      "step": 1126
    },
    {
      "epoch": 0.01127,
      "grad_norm": 0.3984491115653698,
      "learning_rate": 0.003,
      "loss": 4.4717,
      "step": 1127
    },
    {
      "epoch": 0.01128,
      "grad_norm": 0.3566421914933017,
      "learning_rate": 0.003,
      "loss": 4.4458,
      "step": 1128
    },
    {
      "epoch": 0.01129,
      "grad_norm": 0.34656729580538187,
      "learning_rate": 0.003,
      "loss": 4.4347,
      "step": 1129
    },
    {
      "epoch": 0.0113,
      "grad_norm": 0.35275594407110167,
      "learning_rate": 0.003,
      "loss": 4.4105,
      "step": 1130
    },
    {
      "epoch": 0.01131,
      "grad_norm": 0.3969022192306343,
      "learning_rate": 0.003,
      "loss": 4.4228,
      "step": 1131
    },
    {
      "epoch": 0.01132,
      "grad_norm": 0.5205676939993591,
      "learning_rate": 0.003,
      "loss": 4.4476,
      "step": 1132
    },
    {
      "epoch": 0.01133,
      "grad_norm": 0.7603712505670571,
      "learning_rate": 0.003,
      "loss": 4.4182,
      "step": 1133
    },
    {
      "epoch": 0.01134,
      "grad_norm": 1.1337080760085034,
      "learning_rate": 0.003,
      "loss": 4.4452,
      "step": 1134
    },
    {
      "epoch": 0.01135,
      "grad_norm": 0.7787650219297657,
      "learning_rate": 0.003,
      "loss": 4.4689,
      "step": 1135
    },
    {
      "epoch": 0.01136,
      "grad_norm": 0.5572304947875402,
      "learning_rate": 0.003,
      "loss": 4.431,
      "step": 1136
    },
    {
      "epoch": 0.01137,
      "grad_norm": 0.8150586648529397,
      "learning_rate": 0.003,
      "loss": 4.4648,
      "step": 1137
    },
    {
      "epoch": 0.01138,
      "grad_norm": 0.6150246614041321,
      "learning_rate": 0.003,
      "loss": 4.4126,
      "step": 1138
    },
    {
      "epoch": 0.01139,
      "grad_norm": 0.5367844011428362,
      "learning_rate": 0.003,
      "loss": 4.4175,
      "step": 1139
    },
    {
      "epoch": 0.0114,
      "grad_norm": 0.6012525993749752,
      "learning_rate": 0.003,
      "loss": 4.4009,
      "step": 1140
    },
    {
      "epoch": 0.01141,
      "grad_norm": 0.5424141892266514,
      "learning_rate": 0.003,
      "loss": 4.4201,
      "step": 1141
    },
    {
      "epoch": 0.01142,
      "grad_norm": 0.5412453890044091,
      "learning_rate": 0.003,
      "loss": 4.3966,
      "step": 1142
    },
    {
      "epoch": 0.01143,
      "grad_norm": 0.5770302538150057,
      "learning_rate": 0.003,
      "loss": 4.4119,
      "step": 1143
    },
    {
      "epoch": 0.01144,
      "grad_norm": 0.5130699212907381,
      "learning_rate": 0.003,
      "loss": 4.3751,
      "step": 1144
    },
    {
      "epoch": 0.01145,
      "grad_norm": 0.5251054242059607,
      "learning_rate": 0.003,
      "loss": 4.4104,
      "step": 1145
    },
    {
      "epoch": 0.01146,
      "grad_norm": 0.49049755788491844,
      "learning_rate": 0.003,
      "loss": 4.3944,
      "step": 1146
    },
    {
      "epoch": 0.01147,
      "grad_norm": 0.5045052675153547,
      "learning_rate": 0.003,
      "loss": 4.3854,
      "step": 1147
    },
    {
      "epoch": 0.01148,
      "grad_norm": 0.4827133382574848,
      "learning_rate": 0.003,
      "loss": 4.3821,
      "step": 1148
    },
    {
      "epoch": 0.01149,
      "grad_norm": 0.5391199926065147,
      "learning_rate": 0.003,
      "loss": 4.4015,
      "step": 1149
    },
    {
      "epoch": 0.0115,
      "grad_norm": 0.556670900436413,
      "learning_rate": 0.003,
      "loss": 4.3937,
      "step": 1150
    },
    {
      "epoch": 0.01151,
      "grad_norm": 0.4138744913914359,
      "learning_rate": 0.003,
      "loss": 4.3843,
      "step": 1151
    },
    {
      "epoch": 0.01152,
      "grad_norm": 0.34659544473454046,
      "learning_rate": 0.003,
      "loss": 4.3933,
      "step": 1152
    },
    {
      "epoch": 0.01153,
      "grad_norm": 0.3606979565928329,
      "learning_rate": 0.003,
      "loss": 4.4077,
      "step": 1153
    },
    {
      "epoch": 0.01154,
      "grad_norm": 0.3838071333965551,
      "learning_rate": 0.003,
      "loss": 4.3817,
      "step": 1154
    },
    {
      "epoch": 0.01155,
      "grad_norm": 0.43382343220964015,
      "learning_rate": 0.003,
      "loss": 4.3724,
      "step": 1155
    },
    {
      "epoch": 0.01156,
      "grad_norm": 0.568032323517354,
      "learning_rate": 0.003,
      "loss": 4.3801,
      "step": 1156
    },
    {
      "epoch": 0.01157,
      "grad_norm": 0.7429160925487139,
      "learning_rate": 0.003,
      "loss": 4.4178,
      "step": 1157
    },
    {
      "epoch": 0.01158,
      "grad_norm": 0.8073054366394413,
      "learning_rate": 0.003,
      "loss": 4.387,
      "step": 1158
    },
    {
      "epoch": 0.01159,
      "grad_norm": 0.6292689134714561,
      "learning_rate": 0.003,
      "loss": 4.38,
      "step": 1159
    },
    {
      "epoch": 0.0116,
      "grad_norm": 0.4911473702594272,
      "learning_rate": 0.003,
      "loss": 4.3886,
      "step": 1160
    },
    {
      "epoch": 0.01161,
      "grad_norm": 0.6900507602397448,
      "learning_rate": 0.003,
      "loss": 4.3715,
      "step": 1161
    },
    {
      "epoch": 0.01162,
      "grad_norm": 0.6121668130868384,
      "learning_rate": 0.003,
      "loss": 4.383,
      "step": 1162
    },
    {
      "epoch": 0.01163,
      "grad_norm": 0.5188516342723972,
      "learning_rate": 0.003,
      "loss": 4.398,
      "step": 1163
    },
    {
      "epoch": 0.01164,
      "grad_norm": 0.47059613888734086,
      "learning_rate": 0.003,
      "loss": 4.3849,
      "step": 1164
    },
    {
      "epoch": 0.01165,
      "grad_norm": 0.4930235270437344,
      "learning_rate": 0.003,
      "loss": 4.379,
      "step": 1165
    },
    {
      "epoch": 0.01166,
      "grad_norm": 0.5090992397072657,
      "learning_rate": 0.003,
      "loss": 4.3819,
      "step": 1166
    },
    {
      "epoch": 0.01167,
      "grad_norm": 0.5100693561752512,
      "learning_rate": 0.003,
      "loss": 4.3474,
      "step": 1167
    },
    {
      "epoch": 0.01168,
      "grad_norm": 0.542972407415325,
      "learning_rate": 0.003,
      "loss": 4.3564,
      "step": 1168
    },
    {
      "epoch": 0.01169,
      "grad_norm": 0.6608507785020126,
      "learning_rate": 0.003,
      "loss": 4.4017,
      "step": 1169
    },
    {
      "epoch": 0.0117,
      "grad_norm": 0.7569575045672728,
      "learning_rate": 0.003,
      "loss": 4.3919,
      "step": 1170
    },
    {
      "epoch": 0.01171,
      "grad_norm": 0.7480827801441813,
      "learning_rate": 0.003,
      "loss": 4.3755,
      "step": 1171
    },
    {
      "epoch": 0.01172,
      "grad_norm": 0.6792739909760177,
      "learning_rate": 0.003,
      "loss": 4.3636,
      "step": 1172
    },
    {
      "epoch": 0.01173,
      "grad_norm": 0.6630549828260829,
      "learning_rate": 0.003,
      "loss": 4.4004,
      "step": 1173
    },
    {
      "epoch": 0.01174,
      "grad_norm": 0.7082139785795519,
      "learning_rate": 0.003,
      "loss": 4.3812,
      "step": 1174
    },
    {
      "epoch": 0.01175,
      "grad_norm": 0.7551341949449439,
      "learning_rate": 0.003,
      "loss": 4.3695,
      "step": 1175
    },
    {
      "epoch": 0.01176,
      "grad_norm": 0.7273809152124429,
      "learning_rate": 0.003,
      "loss": 4.4015,
      "step": 1176
    },
    {
      "epoch": 0.01177,
      "grad_norm": 0.7688229463519815,
      "learning_rate": 0.003,
      "loss": 4.3744,
      "step": 1177
    },
    {
      "epoch": 0.01178,
      "grad_norm": 0.8108730548430517,
      "learning_rate": 0.003,
      "loss": 4.3934,
      "step": 1178
    },
    {
      "epoch": 0.01179,
      "grad_norm": 0.9813575828904657,
      "learning_rate": 0.003,
      "loss": 4.3791,
      "step": 1179
    },
    {
      "epoch": 0.0118,
      "grad_norm": 1.0649482117107174,
      "learning_rate": 0.003,
      "loss": 4.4668,
      "step": 1180
    },
    {
      "epoch": 0.01181,
      "grad_norm": 0.8898608281741577,
      "learning_rate": 0.003,
      "loss": 4.4423,
      "step": 1181
    },
    {
      "epoch": 0.01182,
      "grad_norm": 0.8357467858332619,
      "learning_rate": 0.003,
      "loss": 4.4019,
      "step": 1182
    },
    {
      "epoch": 0.01183,
      "grad_norm": 0.836760730371029,
      "learning_rate": 0.003,
      "loss": 4.3978,
      "step": 1183
    },
    {
      "epoch": 0.01184,
      "grad_norm": 0.877827474309717,
      "learning_rate": 0.003,
      "loss": 4.4398,
      "step": 1184
    },
    {
      "epoch": 0.01185,
      "grad_norm": 0.9245006346426254,
      "learning_rate": 0.003,
      "loss": 4.4185,
      "step": 1185
    },
    {
      "epoch": 0.01186,
      "grad_norm": 0.8254790906319688,
      "learning_rate": 0.003,
      "loss": 4.4365,
      "step": 1186
    },
    {
      "epoch": 0.01187,
      "grad_norm": 1.0721875866066062,
      "learning_rate": 0.003,
      "loss": 4.4147,
      "step": 1187
    },
    {
      "epoch": 0.01188,
      "grad_norm": 0.9234750910217812,
      "learning_rate": 0.003,
      "loss": 4.4331,
      "step": 1188
    },
    {
      "epoch": 0.01189,
      "grad_norm": 0.9441254842548545,
      "learning_rate": 0.003,
      "loss": 4.4335,
      "step": 1189
    },
    {
      "epoch": 0.0119,
      "grad_norm": 0.8429630478570035,
      "learning_rate": 0.003,
      "loss": 4.4107,
      "step": 1190
    },
    {
      "epoch": 0.01191,
      "grad_norm": 0.8544918450444529,
      "learning_rate": 0.003,
      "loss": 4.3931,
      "step": 1191
    },
    {
      "epoch": 0.01192,
      "grad_norm": 0.8750082673567889,
      "learning_rate": 0.003,
      "loss": 4.427,
      "step": 1192
    },
    {
      "epoch": 0.01193,
      "grad_norm": 0.7782283225810788,
      "learning_rate": 0.003,
      "loss": 4.4195,
      "step": 1193
    },
    {
      "epoch": 0.01194,
      "grad_norm": 0.8365585080211665,
      "learning_rate": 0.003,
      "loss": 4.4265,
      "step": 1194
    },
    {
      "epoch": 0.01195,
      "grad_norm": 0.8044450759919293,
      "learning_rate": 0.003,
      "loss": 4.4051,
      "step": 1195
    },
    {
      "epoch": 0.01196,
      "grad_norm": 0.663410887761066,
      "learning_rate": 0.003,
      "loss": 4.4006,
      "step": 1196
    },
    {
      "epoch": 0.01197,
      "grad_norm": 0.7191563284623688,
      "learning_rate": 0.003,
      "loss": 4.3713,
      "step": 1197
    },
    {
      "epoch": 0.01198,
      "grad_norm": 0.6109157259818949,
      "learning_rate": 0.003,
      "loss": 4.4347,
      "step": 1198
    },
    {
      "epoch": 0.01199,
      "grad_norm": 0.49919171672958657,
      "learning_rate": 0.003,
      "loss": 4.4063,
      "step": 1199
    },
    {
      "epoch": 0.012,
      "grad_norm": 0.4537605024865045,
      "learning_rate": 0.003,
      "loss": 4.3913,
      "step": 1200
    },
    {
      "epoch": 0.01201,
      "grad_norm": 0.4780231672540523,
      "learning_rate": 0.003,
      "loss": 4.4204,
      "step": 1201
    },
    {
      "epoch": 0.01202,
      "grad_norm": 0.5600100605398236,
      "learning_rate": 0.003,
      "loss": 4.3868,
      "step": 1202
    },
    {
      "epoch": 0.01203,
      "grad_norm": 0.7311892376344462,
      "learning_rate": 0.003,
      "loss": 4.4051,
      "step": 1203
    },
    {
      "epoch": 0.01204,
      "grad_norm": 0.9179100255668401,
      "learning_rate": 0.003,
      "loss": 4.4073,
      "step": 1204
    },
    {
      "epoch": 0.01205,
      "grad_norm": 0.8405883995035961,
      "learning_rate": 0.003,
      "loss": 4.4071,
      "step": 1205
    },
    {
      "epoch": 0.01206,
      "grad_norm": 0.5805185590607895,
      "learning_rate": 0.003,
      "loss": 4.4348,
      "step": 1206
    },
    {
      "epoch": 0.01207,
      "grad_norm": 0.5705660963600058,
      "learning_rate": 0.003,
      "loss": 4.359,
      "step": 1207
    },
    {
      "epoch": 0.01208,
      "grad_norm": 0.5103594427986314,
      "learning_rate": 0.003,
      "loss": 4.4136,
      "step": 1208
    },
    {
      "epoch": 0.01209,
      "grad_norm": 0.4394123490710629,
      "learning_rate": 0.003,
      "loss": 4.3897,
      "step": 1209
    },
    {
      "epoch": 0.0121,
      "grad_norm": 0.5463405097177936,
      "learning_rate": 0.003,
      "loss": 4.3949,
      "step": 1210
    },
    {
      "epoch": 0.01211,
      "grad_norm": 0.5044353172078102,
      "learning_rate": 0.003,
      "loss": 4.3781,
      "step": 1211
    },
    {
      "epoch": 0.01212,
      "grad_norm": 0.4270131501697402,
      "learning_rate": 0.003,
      "loss": 4.3982,
      "step": 1212
    },
    {
      "epoch": 0.01213,
      "grad_norm": 0.3945884644715176,
      "learning_rate": 0.003,
      "loss": 4.3729,
      "step": 1213
    },
    {
      "epoch": 0.01214,
      "grad_norm": 0.40327036636948815,
      "learning_rate": 0.003,
      "loss": 4.3565,
      "step": 1214
    },
    {
      "epoch": 0.01215,
      "grad_norm": 0.41033770529140634,
      "learning_rate": 0.003,
      "loss": 4.3626,
      "step": 1215
    },
    {
      "epoch": 0.01216,
      "grad_norm": 0.39745070811400113,
      "learning_rate": 0.003,
      "loss": 4.3556,
      "step": 1216
    },
    {
      "epoch": 0.01217,
      "grad_norm": 0.3765107225543665,
      "learning_rate": 0.003,
      "loss": 4.3601,
      "step": 1217
    },
    {
      "epoch": 0.01218,
      "grad_norm": 0.3728731687229966,
      "learning_rate": 0.003,
      "loss": 4.3546,
      "step": 1218
    },
    {
      "epoch": 0.01219,
      "grad_norm": 0.39445630505711005,
      "learning_rate": 0.003,
      "loss": 4.3541,
      "step": 1219
    },
    {
      "epoch": 0.0122,
      "grad_norm": 0.4147897339756862,
      "learning_rate": 0.003,
      "loss": 4.3467,
      "step": 1220
    },
    {
      "epoch": 0.01221,
      "grad_norm": 0.4491697002019059,
      "learning_rate": 0.003,
      "loss": 4.3628,
      "step": 1221
    },
    {
      "epoch": 0.01222,
      "grad_norm": 0.480111589813077,
      "learning_rate": 0.003,
      "loss": 4.3459,
      "step": 1222
    },
    {
      "epoch": 0.01223,
      "grad_norm": 0.4554234447859426,
      "learning_rate": 0.003,
      "loss": 4.3426,
      "step": 1223
    },
    {
      "epoch": 0.01224,
      "grad_norm": 0.4400762187377611,
      "learning_rate": 0.003,
      "loss": 4.3264,
      "step": 1224
    },
    {
      "epoch": 0.01225,
      "grad_norm": 0.6794202546583998,
      "learning_rate": 0.003,
      "loss": 4.3569,
      "step": 1225
    },
    {
      "epoch": 0.01226,
      "grad_norm": 1.0814698752713194,
      "learning_rate": 0.003,
      "loss": 4.3701,
      "step": 1226
    },
    {
      "epoch": 0.01227,
      "grad_norm": 1.155555006554351,
      "learning_rate": 0.003,
      "loss": 4.4276,
      "step": 1227
    },
    {
      "epoch": 0.01228,
      "grad_norm": 0.6549739358010055,
      "learning_rate": 0.003,
      "loss": 4.3793,
      "step": 1228
    },
    {
      "epoch": 0.01229,
      "grad_norm": 0.8770247693165437,
      "learning_rate": 0.003,
      "loss": 4.3857,
      "step": 1229
    },
    {
      "epoch": 0.0123,
      "grad_norm": 0.8853901281486561,
      "learning_rate": 0.003,
      "loss": 4.4222,
      "step": 1230
    },
    {
      "epoch": 0.01231,
      "grad_norm": 0.8670800529033448,
      "learning_rate": 0.003,
      "loss": 4.3911,
      "step": 1231
    },
    {
      "epoch": 0.01232,
      "grad_norm": 0.9431543452250409,
      "learning_rate": 0.003,
      "loss": 4.367,
      "step": 1232
    },
    {
      "epoch": 0.01233,
      "grad_norm": 0.8277618322356394,
      "learning_rate": 0.003,
      "loss": 4.3795,
      "step": 1233
    },
    {
      "epoch": 0.01234,
      "grad_norm": 0.8336748449561182,
      "learning_rate": 0.003,
      "loss": 4.3975,
      "step": 1234
    },
    {
      "epoch": 0.01235,
      "grad_norm": 0.7544795324708781,
      "learning_rate": 0.003,
      "loss": 4.3863,
      "step": 1235
    },
    {
      "epoch": 0.01236,
      "grad_norm": 0.5802889369771423,
      "learning_rate": 0.003,
      "loss": 4.3798,
      "step": 1236
    },
    {
      "epoch": 0.01237,
      "grad_norm": 0.5720134479022692,
      "learning_rate": 0.003,
      "loss": 4.39,
      "step": 1237
    },
    {
      "epoch": 0.01238,
      "grad_norm": 0.5465298337070073,
      "learning_rate": 0.003,
      "loss": 4.3824,
      "step": 1238
    },
    {
      "epoch": 0.01239,
      "grad_norm": 0.4850058714656761,
      "learning_rate": 0.003,
      "loss": 4.3643,
      "step": 1239
    },
    {
      "epoch": 0.0124,
      "grad_norm": 0.4837743776235617,
      "learning_rate": 0.003,
      "loss": 4.3458,
      "step": 1240
    },
    {
      "epoch": 0.01241,
      "grad_norm": 0.5464699719483495,
      "learning_rate": 0.003,
      "loss": 4.3589,
      "step": 1241
    },
    {
      "epoch": 0.01242,
      "grad_norm": 0.5595595597987126,
      "learning_rate": 0.003,
      "loss": 4.3633,
      "step": 1242
    },
    {
      "epoch": 0.01243,
      "grad_norm": 0.5979309976720427,
      "learning_rate": 0.003,
      "loss": 4.3445,
      "step": 1243
    },
    {
      "epoch": 0.01244,
      "grad_norm": 0.6121994614553141,
      "learning_rate": 0.003,
      "loss": 4.3608,
      "step": 1244
    },
    {
      "epoch": 0.01245,
      "grad_norm": 0.5870074144222708,
      "learning_rate": 0.003,
      "loss": 4.3495,
      "step": 1245
    },
    {
      "epoch": 0.01246,
      "grad_norm": 0.5518561222562742,
      "learning_rate": 0.003,
      "loss": 4.3238,
      "step": 1246
    },
    {
      "epoch": 0.01247,
      "grad_norm": 0.4927404519806348,
      "learning_rate": 0.003,
      "loss": 4.3772,
      "step": 1247
    },
    {
      "epoch": 0.01248,
      "grad_norm": 0.5614227665664012,
      "learning_rate": 0.003,
      "loss": 4.3637,
      "step": 1248
    },
    {
      "epoch": 0.01249,
      "grad_norm": 0.6237525658945793,
      "learning_rate": 0.003,
      "loss": 4.3523,
      "step": 1249
    },
    {
      "epoch": 0.0125,
      "grad_norm": 0.6285540244025752,
      "learning_rate": 0.003,
      "loss": 4.3285,
      "step": 1250
    },
    {
      "epoch": 0.01251,
      "grad_norm": 0.6642330670109609,
      "learning_rate": 0.003,
      "loss": 4.3632,
      "step": 1251
    },
    {
      "epoch": 0.01252,
      "grad_norm": 0.7486788521183766,
      "learning_rate": 0.003,
      "loss": 4.3494,
      "step": 1252
    },
    {
      "epoch": 0.01253,
      "grad_norm": 0.8786228409254562,
      "learning_rate": 0.003,
      "loss": 4.3342,
      "step": 1253
    },
    {
      "epoch": 0.01254,
      "grad_norm": 0.890563511155565,
      "learning_rate": 0.003,
      "loss": 4.3886,
      "step": 1254
    },
    {
      "epoch": 0.01255,
      "grad_norm": 0.7085396917976445,
      "learning_rate": 0.003,
      "loss": 4.3296,
      "step": 1255
    },
    {
      "epoch": 0.01256,
      "grad_norm": 0.6070625004395708,
      "learning_rate": 0.003,
      "loss": 4.3521,
      "step": 1256
    },
    {
      "epoch": 0.01257,
      "grad_norm": 0.7279044200924197,
      "learning_rate": 0.003,
      "loss": 4.3471,
      "step": 1257
    },
    {
      "epoch": 0.01258,
      "grad_norm": 0.726475334235256,
      "learning_rate": 0.003,
      "loss": 4.353,
      "step": 1258
    },
    {
      "epoch": 0.01259,
      "grad_norm": 0.657918565683394,
      "learning_rate": 0.003,
      "loss": 4.3505,
      "step": 1259
    },
    {
      "epoch": 0.0126,
      "grad_norm": 0.6055846983934545,
      "learning_rate": 0.003,
      "loss": 4.3785,
      "step": 1260
    },
    {
      "epoch": 0.01261,
      "grad_norm": 0.5773200966833153,
      "learning_rate": 0.003,
      "loss": 4.3685,
      "step": 1261
    },
    {
      "epoch": 0.01262,
      "grad_norm": 0.6458766772660607,
      "learning_rate": 0.003,
      "loss": 4.3644,
      "step": 1262
    },
    {
      "epoch": 0.01263,
      "grad_norm": 0.6994622766289077,
      "learning_rate": 0.003,
      "loss": 4.3614,
      "step": 1263
    },
    {
      "epoch": 0.01264,
      "grad_norm": 0.6737363060454188,
      "learning_rate": 0.003,
      "loss": 4.3487,
      "step": 1264
    },
    {
      "epoch": 0.01265,
      "grad_norm": 0.6931523604065615,
      "learning_rate": 0.003,
      "loss": 4.362,
      "step": 1265
    },
    {
      "epoch": 0.01266,
      "grad_norm": 0.7992655892294886,
      "learning_rate": 0.003,
      "loss": 4.3669,
      "step": 1266
    },
    {
      "epoch": 0.01267,
      "grad_norm": 0.8435255340483996,
      "learning_rate": 0.003,
      "loss": 4.3876,
      "step": 1267
    },
    {
      "epoch": 0.01268,
      "grad_norm": 0.82699021549605,
      "learning_rate": 0.003,
      "loss": 4.3772,
      "step": 1268
    },
    {
      "epoch": 0.01269,
      "grad_norm": 0.8697659895900343,
      "learning_rate": 0.003,
      "loss": 4.3448,
      "step": 1269
    },
    {
      "epoch": 0.0127,
      "grad_norm": 0.8134112717759022,
      "learning_rate": 0.003,
      "loss": 4.3816,
      "step": 1270
    },
    {
      "epoch": 0.01271,
      "grad_norm": 0.6786099831237219,
      "learning_rate": 0.003,
      "loss": 4.3563,
      "step": 1271
    },
    {
      "epoch": 0.01272,
      "grad_norm": 0.5433088474514179,
      "learning_rate": 0.003,
      "loss": 4.3456,
      "step": 1272
    },
    {
      "epoch": 0.01273,
      "grad_norm": 0.6040582030046006,
      "learning_rate": 0.003,
      "loss": 4.3736,
      "step": 1273
    },
    {
      "epoch": 0.01274,
      "grad_norm": 0.6407861769281096,
      "learning_rate": 0.003,
      "loss": 4.3663,
      "step": 1274
    },
    {
      "epoch": 0.01275,
      "grad_norm": 0.6627818267548817,
      "learning_rate": 0.003,
      "loss": 4.3377,
      "step": 1275
    },
    {
      "epoch": 0.01276,
      "grad_norm": 0.6923007139718157,
      "learning_rate": 0.003,
      "loss": 4.3441,
      "step": 1276
    },
    {
      "epoch": 0.01277,
      "grad_norm": 0.5987383094358855,
      "learning_rate": 0.003,
      "loss": 4.3497,
      "step": 1277
    },
    {
      "epoch": 0.01278,
      "grad_norm": 0.48773862011134517,
      "learning_rate": 0.003,
      "loss": 4.3516,
      "step": 1278
    },
    {
      "epoch": 0.01279,
      "grad_norm": 0.48017539333754444,
      "learning_rate": 0.003,
      "loss": 4.3344,
      "step": 1279
    },
    {
      "epoch": 0.0128,
      "grad_norm": 0.3998167780826478,
      "learning_rate": 0.003,
      "loss": 4.3274,
      "step": 1280
    },
    {
      "epoch": 0.01281,
      "grad_norm": 0.48111796457160566,
      "learning_rate": 0.003,
      "loss": 4.3157,
      "step": 1281
    },
    {
      "epoch": 0.01282,
      "grad_norm": 0.48993303263312776,
      "learning_rate": 0.003,
      "loss": 4.3556,
      "step": 1282
    },
    {
      "epoch": 0.01283,
      "grad_norm": 0.6253877533100372,
      "learning_rate": 0.003,
      "loss": 4.3297,
      "step": 1283
    },
    {
      "epoch": 0.01284,
      "grad_norm": 0.5705490801287882,
      "learning_rate": 0.003,
      "loss": 4.3318,
      "step": 1284
    },
    {
      "epoch": 0.01285,
      "grad_norm": 0.5653707188447052,
      "learning_rate": 0.003,
      "loss": 4.3333,
      "step": 1285
    },
    {
      "epoch": 0.01286,
      "grad_norm": 0.6731371693634968,
      "learning_rate": 0.003,
      "loss": 4.3633,
      "step": 1286
    },
    {
      "epoch": 0.01287,
      "grad_norm": 0.8405912851979439,
      "learning_rate": 0.003,
      "loss": 4.3324,
      "step": 1287
    },
    {
      "epoch": 0.01288,
      "grad_norm": 0.8660374446207693,
      "learning_rate": 0.003,
      "loss": 4.3541,
      "step": 1288
    },
    {
      "epoch": 0.01289,
      "grad_norm": 0.7397369781522664,
      "learning_rate": 0.003,
      "loss": 4.3429,
      "step": 1289
    },
    {
      "epoch": 0.0129,
      "grad_norm": 0.757608447039217,
      "learning_rate": 0.003,
      "loss": 4.3696,
      "step": 1290
    },
    {
      "epoch": 0.01291,
      "grad_norm": 0.7650019196438267,
      "learning_rate": 0.003,
      "loss": 4.3518,
      "step": 1291
    },
    {
      "epoch": 0.01292,
      "grad_norm": 0.7951894033365094,
      "learning_rate": 0.003,
      "loss": 4.3829,
      "step": 1292
    },
    {
      "epoch": 0.01293,
      "grad_norm": 0.7837011153839035,
      "learning_rate": 0.003,
      "loss": 4.3714,
      "step": 1293
    },
    {
      "epoch": 0.01294,
      "grad_norm": 0.6526824466083696,
      "learning_rate": 0.003,
      "loss": 4.3313,
      "step": 1294
    },
    {
      "epoch": 0.01295,
      "grad_norm": 0.6353462230193634,
      "learning_rate": 0.003,
      "loss": 4.3599,
      "step": 1295
    },
    {
      "epoch": 0.01296,
      "grad_norm": 0.6488557896271576,
      "learning_rate": 0.003,
      "loss": 4.3294,
      "step": 1296
    },
    {
      "epoch": 0.01297,
      "grad_norm": 0.5423372327946681,
      "learning_rate": 0.003,
      "loss": 4.339,
      "step": 1297
    },
    {
      "epoch": 0.01298,
      "grad_norm": 0.5444047364511552,
      "learning_rate": 0.003,
      "loss": 4.3162,
      "step": 1298
    },
    {
      "epoch": 0.01299,
      "grad_norm": 0.562127858257584,
      "learning_rate": 0.003,
      "loss": 4.3274,
      "step": 1299
    },
    {
      "epoch": 0.013,
      "grad_norm": 0.6451595635625548,
      "learning_rate": 0.003,
      "loss": 4.3212,
      "step": 1300
    },
    {
      "epoch": 0.01301,
      "grad_norm": 0.7106141053798949,
      "learning_rate": 0.003,
      "loss": 4.338,
      "step": 1301
    },
    {
      "epoch": 0.01302,
      "grad_norm": 0.7037506677578025,
      "learning_rate": 0.003,
      "loss": 4.3858,
      "step": 1302
    },
    {
      "epoch": 0.01303,
      "grad_norm": 0.549949183276315,
      "learning_rate": 0.003,
      "loss": 4.3254,
      "step": 1303
    },
    {
      "epoch": 0.01304,
      "grad_norm": 0.5830602260260574,
      "learning_rate": 0.003,
      "loss": 4.3529,
      "step": 1304
    },
    {
      "epoch": 0.01305,
      "grad_norm": 0.6092381647637285,
      "learning_rate": 0.003,
      "loss": 4.3402,
      "step": 1305
    },
    {
      "epoch": 0.01306,
      "grad_norm": 0.9341457164339962,
      "learning_rate": 0.003,
      "loss": 4.343,
      "step": 1306
    },
    {
      "epoch": 0.01307,
      "grad_norm": 1.4224559913594186,
      "learning_rate": 0.003,
      "loss": 4.4238,
      "step": 1307
    },
    {
      "epoch": 0.01308,
      "grad_norm": 0.6683671842157866,
      "learning_rate": 0.003,
      "loss": 4.3493,
      "step": 1308
    },
    {
      "epoch": 0.01309,
      "grad_norm": 1.0351217140401547,
      "learning_rate": 0.003,
      "loss": 4.4146,
      "step": 1309
    },
    {
      "epoch": 0.0131,
      "grad_norm": 0.8334924149650484,
      "learning_rate": 0.003,
      "loss": 4.3523,
      "step": 1310
    },
    {
      "epoch": 0.01311,
      "grad_norm": 0.8015005705393878,
      "learning_rate": 0.003,
      "loss": 4.3919,
      "step": 1311
    },
    {
      "epoch": 0.01312,
      "grad_norm": 0.9176746638594273,
      "learning_rate": 0.003,
      "loss": 4.3611,
      "step": 1312
    },
    {
      "epoch": 0.01313,
      "grad_norm": 0.8139738106820199,
      "learning_rate": 0.003,
      "loss": 4.3563,
      "step": 1313
    },
    {
      "epoch": 0.01314,
      "grad_norm": 0.727251837713852,
      "learning_rate": 0.003,
      "loss": 4.3474,
      "step": 1314
    },
    {
      "epoch": 0.01315,
      "grad_norm": 0.6676632023954393,
      "learning_rate": 0.003,
      "loss": 4.3714,
      "step": 1315
    },
    {
      "epoch": 0.01316,
      "grad_norm": 0.6813048603278513,
      "learning_rate": 0.003,
      "loss": 4.3558,
      "step": 1316
    },
    {
      "epoch": 0.01317,
      "grad_norm": 0.6300708633746414,
      "learning_rate": 0.003,
      "loss": 4.378,
      "step": 1317
    },
    {
      "epoch": 0.01318,
      "grad_norm": 0.7076482507132812,
      "learning_rate": 0.003,
      "loss": 4.3605,
      "step": 1318
    },
    {
      "epoch": 0.01319,
      "grad_norm": 0.6736047843532478,
      "learning_rate": 0.003,
      "loss": 4.364,
      "step": 1319
    },
    {
      "epoch": 0.0132,
      "grad_norm": 0.5964453447416318,
      "learning_rate": 0.003,
      "loss": 4.3581,
      "step": 1320
    },
    {
      "epoch": 0.01321,
      "grad_norm": 0.47030220882918905,
      "learning_rate": 0.003,
      "loss": 4.3301,
      "step": 1321
    },
    {
      "epoch": 0.01322,
      "grad_norm": 0.4742080870560624,
      "learning_rate": 0.003,
      "loss": 4.3574,
      "step": 1322
    },
    {
      "epoch": 0.01323,
      "grad_norm": 0.48984225873706044,
      "learning_rate": 0.003,
      "loss": 4.3497,
      "step": 1323
    },
    {
      "epoch": 0.01324,
      "grad_norm": 0.5369510525758135,
      "learning_rate": 0.003,
      "loss": 4.3352,
      "step": 1324
    },
    {
      "epoch": 0.01325,
      "grad_norm": 0.5504701205401483,
      "learning_rate": 0.003,
      "loss": 4.3385,
      "step": 1325
    },
    {
      "epoch": 0.01326,
      "grad_norm": 0.5673099911125634,
      "learning_rate": 0.003,
      "loss": 4.3243,
      "step": 1326
    },
    {
      "epoch": 0.01327,
      "grad_norm": 0.4865587770330079,
      "learning_rate": 0.003,
      "loss": 4.3399,
      "step": 1327
    },
    {
      "epoch": 0.01328,
      "grad_norm": 0.4578059012385763,
      "learning_rate": 0.003,
      "loss": 4.3277,
      "step": 1328
    },
    {
      "epoch": 0.01329,
      "grad_norm": 0.4815378415098633,
      "learning_rate": 0.003,
      "loss": 4.2921,
      "step": 1329
    },
    {
      "epoch": 0.0133,
      "grad_norm": 0.5592011199263653,
      "learning_rate": 0.003,
      "loss": 4.3364,
      "step": 1330
    },
    {
      "epoch": 0.01331,
      "grad_norm": 0.6524837201480481,
      "learning_rate": 0.003,
      "loss": 4.3256,
      "step": 1331
    },
    {
      "epoch": 0.01332,
      "grad_norm": 0.8576005564857463,
      "learning_rate": 0.003,
      "loss": 4.3511,
      "step": 1332
    },
    {
      "epoch": 0.01333,
      "grad_norm": 0.9139584243573721,
      "learning_rate": 0.003,
      "loss": 4.3382,
      "step": 1333
    },
    {
      "epoch": 0.01334,
      "grad_norm": 0.7699767099990749,
      "learning_rate": 0.003,
      "loss": 4.3209,
      "step": 1334
    },
    {
      "epoch": 0.01335,
      "grad_norm": 0.9105795238149825,
      "learning_rate": 0.003,
      "loss": 4.3513,
      "step": 1335
    },
    {
      "epoch": 0.01336,
      "grad_norm": 0.9449124572578428,
      "learning_rate": 0.003,
      "loss": 4.3595,
      "step": 1336
    },
    {
      "epoch": 0.01337,
      "grad_norm": 0.8839799501947162,
      "learning_rate": 0.003,
      "loss": 4.3666,
      "step": 1337
    },
    {
      "epoch": 0.01338,
      "grad_norm": 0.8700783598647539,
      "learning_rate": 0.003,
      "loss": 4.3367,
      "step": 1338
    },
    {
      "epoch": 0.01339,
      "grad_norm": 0.8610679300232287,
      "learning_rate": 0.003,
      "loss": 4.3656,
      "step": 1339
    },
    {
      "epoch": 0.0134,
      "grad_norm": 0.9245398144375081,
      "learning_rate": 0.003,
      "loss": 4.3527,
      "step": 1340
    },
    {
      "epoch": 0.01341,
      "grad_norm": 0.7746264590506864,
      "learning_rate": 0.003,
      "loss": 4.3623,
      "step": 1341
    },
    {
      "epoch": 0.01342,
      "grad_norm": 0.7444570148279184,
      "learning_rate": 0.003,
      "loss": 4.3319,
      "step": 1342
    },
    {
      "epoch": 0.01343,
      "grad_norm": 0.8969785054543269,
      "learning_rate": 0.003,
      "loss": 4.3417,
      "step": 1343
    },
    {
      "epoch": 0.01344,
      "grad_norm": 0.8665723124328953,
      "learning_rate": 0.003,
      "loss": 4.3502,
      "step": 1344
    },
    {
      "epoch": 0.01345,
      "grad_norm": 0.6831543138899113,
      "learning_rate": 0.003,
      "loss": 4.3776,
      "step": 1345
    },
    {
      "epoch": 0.01346,
      "grad_norm": 0.625697906097118,
      "learning_rate": 0.003,
      "loss": 4.3469,
      "step": 1346
    },
    {
      "epoch": 0.01347,
      "grad_norm": 0.6576058750432187,
      "learning_rate": 0.003,
      "loss": 4.3425,
      "step": 1347
    },
    {
      "epoch": 0.01348,
      "grad_norm": 0.5243855341021313,
      "learning_rate": 0.003,
      "loss": 4.3344,
      "step": 1348
    },
    {
      "epoch": 0.01349,
      "grad_norm": 0.5645682386429235,
      "learning_rate": 0.003,
      "loss": 4.3534,
      "step": 1349
    },
    {
      "epoch": 0.0135,
      "grad_norm": 0.6464802745457856,
      "learning_rate": 0.003,
      "loss": 4.3487,
      "step": 1350
    },
    {
      "epoch": 0.01351,
      "grad_norm": 0.6140774871479286,
      "learning_rate": 0.003,
      "loss": 4.3731,
      "step": 1351
    },
    {
      "epoch": 0.01352,
      "grad_norm": 0.5910057101458851,
      "learning_rate": 0.003,
      "loss": 4.3486,
      "step": 1352
    },
    {
      "epoch": 0.01353,
      "grad_norm": 0.5189635003660854,
      "learning_rate": 0.003,
      "loss": 4.3285,
      "step": 1353
    },
    {
      "epoch": 0.01354,
      "grad_norm": 0.48063452026290554,
      "learning_rate": 0.003,
      "loss": 4.3159,
      "step": 1354
    },
    {
      "epoch": 0.01355,
      "grad_norm": 0.4467878184437842,
      "learning_rate": 0.003,
      "loss": 4.3256,
      "step": 1355
    },
    {
      "epoch": 0.01356,
      "grad_norm": 0.4118799509480309,
      "learning_rate": 0.003,
      "loss": 4.3178,
      "step": 1356
    },
    {
      "epoch": 0.01357,
      "grad_norm": 0.4100502484702344,
      "learning_rate": 0.003,
      "loss": 4.3041,
      "step": 1357
    },
    {
      "epoch": 0.01358,
      "grad_norm": 0.412455124213396,
      "learning_rate": 0.003,
      "loss": 4.3003,
      "step": 1358
    },
    {
      "epoch": 0.01359,
      "grad_norm": 0.46760647383491716,
      "learning_rate": 0.003,
      "loss": 4.3395,
      "step": 1359
    },
    {
      "epoch": 0.0136,
      "grad_norm": 0.5617691070267921,
      "learning_rate": 0.003,
      "loss": 4.3077,
      "step": 1360
    },
    {
      "epoch": 0.01361,
      "grad_norm": 0.7602325372403749,
      "learning_rate": 0.003,
      "loss": 4.3297,
      "step": 1361
    },
    {
      "epoch": 0.01362,
      "grad_norm": 0.9475459172774735,
      "learning_rate": 0.003,
      "loss": 4.3591,
      "step": 1362
    },
    {
      "epoch": 0.01363,
      "grad_norm": 0.8968742883061624,
      "learning_rate": 0.003,
      "loss": 4.3557,
      "step": 1363
    },
    {
      "epoch": 0.01364,
      "grad_norm": 0.6939217241789496,
      "learning_rate": 0.003,
      "loss": 4.3451,
      "step": 1364
    },
    {
      "epoch": 0.01365,
      "grad_norm": 0.7258884734824279,
      "learning_rate": 0.003,
      "loss": 4.3658,
      "step": 1365
    },
    {
      "epoch": 0.01366,
      "grad_norm": 0.6969466740776088,
      "learning_rate": 0.003,
      "loss": 4.3146,
      "step": 1366
    },
    {
      "epoch": 0.01367,
      "grad_norm": 0.5835124525702842,
      "learning_rate": 0.003,
      "loss": 4.3609,
      "step": 1367
    },
    {
      "epoch": 0.01368,
      "grad_norm": 0.6385510495556607,
      "learning_rate": 0.003,
      "loss": 4.3329,
      "step": 1368
    },
    {
      "epoch": 0.01369,
      "grad_norm": 0.6527049554291405,
      "learning_rate": 0.003,
      "loss": 4.3277,
      "step": 1369
    },
    {
      "epoch": 0.0137,
      "grad_norm": 0.5958773621355411,
      "learning_rate": 0.003,
      "loss": 4.3484,
      "step": 1370
    },
    {
      "epoch": 0.01371,
      "grad_norm": 0.5116264880954566,
      "learning_rate": 0.003,
      "loss": 4.3379,
      "step": 1371
    },
    {
      "epoch": 0.01372,
      "grad_norm": 0.47497442415709884,
      "learning_rate": 0.003,
      "loss": 4.285,
      "step": 1372
    },
    {
      "epoch": 0.01373,
      "grad_norm": 0.4919302188338485,
      "learning_rate": 0.003,
      "loss": 4.3263,
      "step": 1373
    },
    {
      "epoch": 0.01374,
      "grad_norm": 0.5098435569626014,
      "learning_rate": 0.003,
      "loss": 4.3313,
      "step": 1374
    },
    {
      "epoch": 0.01375,
      "grad_norm": 0.48516361465791263,
      "learning_rate": 0.003,
      "loss": 4.2907,
      "step": 1375
    },
    {
      "epoch": 0.01376,
      "grad_norm": 0.47820279723214054,
      "learning_rate": 0.003,
      "loss": 4.3156,
      "step": 1376
    },
    {
      "epoch": 0.01377,
      "grad_norm": 0.5605397917837966,
      "learning_rate": 0.003,
      "loss": 4.3156,
      "step": 1377
    },
    {
      "epoch": 0.01378,
      "grad_norm": 0.6074855045770513,
      "learning_rate": 0.003,
      "loss": 4.3477,
      "step": 1378
    },
    {
      "epoch": 0.01379,
      "grad_norm": 0.790948814593468,
      "learning_rate": 0.003,
      "loss": 4.3238,
      "step": 1379
    },
    {
      "epoch": 0.0138,
      "grad_norm": 0.9450973850406263,
      "learning_rate": 0.003,
      "loss": 4.3379,
      "step": 1380
    },
    {
      "epoch": 0.01381,
      "grad_norm": 0.8427221722996694,
      "learning_rate": 0.003,
      "loss": 4.3388,
      "step": 1381
    },
    {
      "epoch": 0.01382,
      "grad_norm": 0.6717154970979985,
      "learning_rate": 0.003,
      "loss": 4.3301,
      "step": 1382
    },
    {
      "epoch": 0.01383,
      "grad_norm": 0.7203290959554749,
      "learning_rate": 0.003,
      "loss": 4.3316,
      "step": 1383
    },
    {
      "epoch": 0.01384,
      "grad_norm": 0.8108684001139901,
      "learning_rate": 0.003,
      "loss": 4.3616,
      "step": 1384
    },
    {
      "epoch": 0.01385,
      "grad_norm": 0.7536569525897636,
      "learning_rate": 0.003,
      "loss": 4.3491,
      "step": 1385
    },
    {
      "epoch": 0.01386,
      "grad_norm": 0.7170208076766044,
      "learning_rate": 0.003,
      "loss": 4.3224,
      "step": 1386
    },
    {
      "epoch": 0.01387,
      "grad_norm": 0.7442453828077332,
      "learning_rate": 0.003,
      "loss": 4.3132,
      "step": 1387
    },
    {
      "epoch": 0.01388,
      "grad_norm": 0.6263710315963331,
      "learning_rate": 0.003,
      "loss": 4.3468,
      "step": 1388
    },
    {
      "epoch": 0.01389,
      "grad_norm": 0.6832177735784069,
      "learning_rate": 0.003,
      "loss": 4.3313,
      "step": 1389
    },
    {
      "epoch": 0.0139,
      "grad_norm": 0.6389171239669225,
      "learning_rate": 0.003,
      "loss": 4.3298,
      "step": 1390
    },
    {
      "epoch": 0.01391,
      "grad_norm": 0.6878908541552311,
      "learning_rate": 0.003,
      "loss": 4.3331,
      "step": 1391
    },
    {
      "epoch": 0.01392,
      "grad_norm": 0.7071113368075915,
      "learning_rate": 0.003,
      "loss": 4.3467,
      "step": 1392
    },
    {
      "epoch": 0.01393,
      "grad_norm": 0.7509678939394911,
      "learning_rate": 0.003,
      "loss": 4.3167,
      "step": 1393
    },
    {
      "epoch": 0.01394,
      "grad_norm": 0.6897663506233761,
      "learning_rate": 0.003,
      "loss": 4.3034,
      "step": 1394
    },
    {
      "epoch": 0.01395,
      "grad_norm": 0.6186721093392761,
      "learning_rate": 0.003,
      "loss": 4.3322,
      "step": 1395
    },
    {
      "epoch": 0.01396,
      "grad_norm": 0.6301020321478175,
      "learning_rate": 0.003,
      "loss": 4.3224,
      "step": 1396
    },
    {
      "epoch": 0.01397,
      "grad_norm": 0.6584085329483399,
      "learning_rate": 0.003,
      "loss": 4.3397,
      "step": 1397
    },
    {
      "epoch": 0.01398,
      "grad_norm": 0.6241984225824584,
      "learning_rate": 0.003,
      "loss": 4.3151,
      "step": 1398
    },
    {
      "epoch": 0.01399,
      "grad_norm": 0.6679710819080718,
      "learning_rate": 0.003,
      "loss": 4.3393,
      "step": 1399
    },
    {
      "epoch": 0.014,
      "grad_norm": 0.7222909611996308,
      "learning_rate": 0.003,
      "loss": 4.3009,
      "step": 1400
    },
    {
      "epoch": 0.01401,
      "grad_norm": 0.7571875348826584,
      "learning_rate": 0.003,
      "loss": 4.3281,
      "step": 1401
    },
    {
      "epoch": 0.01402,
      "grad_norm": 0.6970928306893117,
      "learning_rate": 0.003,
      "loss": 4.3342,
      "step": 1402
    },
    {
      "epoch": 0.01403,
      "grad_norm": 0.7040895953951464,
      "learning_rate": 0.003,
      "loss": 4.3087,
      "step": 1403
    },
    {
      "epoch": 0.01404,
      "grad_norm": 0.6766856807345807,
      "learning_rate": 0.003,
      "loss": 4.3304,
      "step": 1404
    },
    {
      "epoch": 0.01405,
      "grad_norm": 0.6656515972408905,
      "learning_rate": 0.003,
      "loss": 4.3452,
      "step": 1405
    },
    {
      "epoch": 0.01406,
      "grad_norm": 0.6427355374677336,
      "learning_rate": 0.003,
      "loss": 4.3293,
      "step": 1406
    },
    {
      "epoch": 0.01407,
      "grad_norm": 0.5569348725391735,
      "learning_rate": 0.003,
      "loss": 4.3382,
      "step": 1407
    },
    {
      "epoch": 0.01408,
      "grad_norm": 0.653968270617751,
      "learning_rate": 0.003,
      "loss": 4.2995,
      "step": 1408
    },
    {
      "epoch": 0.01409,
      "grad_norm": 0.7092043165323135,
      "learning_rate": 0.003,
      "loss": 4.3349,
      "step": 1409
    },
    {
      "epoch": 0.0141,
      "grad_norm": 0.6596834265722145,
      "learning_rate": 0.003,
      "loss": 4.3119,
      "step": 1410
    },
    {
      "epoch": 0.01411,
      "grad_norm": 0.668518681865148,
      "learning_rate": 0.003,
      "loss": 4.2919,
      "step": 1411
    },
    {
      "epoch": 0.01412,
      "grad_norm": 0.7357916681226567,
      "learning_rate": 0.003,
      "loss": 4.3321,
      "step": 1412
    },
    {
      "epoch": 0.01413,
      "grad_norm": 0.7903826580906821,
      "learning_rate": 0.003,
      "loss": 4.3294,
      "step": 1413
    },
    {
      "epoch": 0.01414,
      "grad_norm": 0.9662669804499839,
      "learning_rate": 0.003,
      "loss": 4.3198,
      "step": 1414
    },
    {
      "epoch": 0.01415,
      "grad_norm": 0.9750513967867162,
      "learning_rate": 0.003,
      "loss": 4.327,
      "step": 1415
    },
    {
      "epoch": 0.01416,
      "grad_norm": 0.766343796622766,
      "learning_rate": 0.003,
      "loss": 4.3409,
      "step": 1416
    },
    {
      "epoch": 0.01417,
      "grad_norm": 0.6901214923655755,
      "learning_rate": 0.003,
      "loss": 4.3293,
      "step": 1417
    },
    {
      "epoch": 0.01418,
      "grad_norm": 0.7874750009631106,
      "learning_rate": 0.003,
      "loss": 4.3546,
      "step": 1418
    },
    {
      "epoch": 0.01419,
      "grad_norm": 0.9550810426960523,
      "learning_rate": 0.003,
      "loss": 4.3477,
      "step": 1419
    },
    {
      "epoch": 0.0142,
      "grad_norm": 0.8975542415095807,
      "learning_rate": 0.003,
      "loss": 4.3589,
      "step": 1420
    },
    {
      "epoch": 0.01421,
      "grad_norm": 0.9518531410750217,
      "learning_rate": 0.003,
      "loss": 4.3287,
      "step": 1421
    },
    {
      "epoch": 0.01422,
      "grad_norm": 0.8839562802775344,
      "learning_rate": 0.003,
      "loss": 4.338,
      "step": 1422
    },
    {
      "epoch": 0.01423,
      "grad_norm": 0.8905632330465241,
      "learning_rate": 0.003,
      "loss": 4.3444,
      "step": 1423
    },
    {
      "epoch": 0.01424,
      "grad_norm": 0.8441033206602504,
      "learning_rate": 0.003,
      "loss": 4.3588,
      "step": 1424
    },
    {
      "epoch": 0.01425,
      "grad_norm": 0.8358861815023613,
      "learning_rate": 0.003,
      "loss": 4.3652,
      "step": 1425
    },
    {
      "epoch": 0.01426,
      "grad_norm": 0.8667170217049547,
      "learning_rate": 0.003,
      "loss": 4.345,
      "step": 1426
    },
    {
      "epoch": 0.01427,
      "grad_norm": 0.9004457242110626,
      "learning_rate": 0.003,
      "loss": 4.3475,
      "step": 1427
    },
    {
      "epoch": 0.01428,
      "grad_norm": 0.6649852293703221,
      "learning_rate": 0.003,
      "loss": 4.33,
      "step": 1428
    },
    {
      "epoch": 0.01429,
      "grad_norm": 0.7036433265664787,
      "learning_rate": 0.003,
      "loss": 4.3424,
      "step": 1429
    },
    {
      "epoch": 0.0143,
      "grad_norm": 0.6541401785894563,
      "learning_rate": 0.003,
      "loss": 4.3493,
      "step": 1430
    },
    {
      "epoch": 0.01431,
      "grad_norm": 0.7703756821195129,
      "learning_rate": 0.003,
      "loss": 4.3401,
      "step": 1431
    },
    {
      "epoch": 0.01432,
      "grad_norm": 0.7699810491963369,
      "learning_rate": 0.003,
      "loss": 4.3292,
      "step": 1432
    },
    {
      "epoch": 0.01433,
      "grad_norm": 0.8450381783395694,
      "learning_rate": 0.003,
      "loss": 4.3304,
      "step": 1433
    },
    {
      "epoch": 0.01434,
      "grad_norm": 0.9597314824876599,
      "learning_rate": 0.003,
      "loss": 4.3233,
      "step": 1434
    },
    {
      "epoch": 0.01435,
      "grad_norm": 0.9933571600055039,
      "learning_rate": 0.003,
      "loss": 4.3322,
      "step": 1435
    },
    {
      "epoch": 0.01436,
      "grad_norm": 0.8443498365288639,
      "learning_rate": 0.003,
      "loss": 4.3488,
      "step": 1436
    },
    {
      "epoch": 0.01437,
      "grad_norm": 0.7389871326992684,
      "learning_rate": 0.003,
      "loss": 4.3563,
      "step": 1437
    },
    {
      "epoch": 0.01438,
      "grad_norm": 0.5911590915667476,
      "learning_rate": 0.003,
      "loss": 4.3434,
      "step": 1438
    },
    {
      "epoch": 0.01439,
      "grad_norm": 0.4545412791958111,
      "learning_rate": 0.003,
      "loss": 4.3018,
      "step": 1439
    },
    {
      "epoch": 0.0144,
      "grad_norm": 0.4765435630344443,
      "learning_rate": 0.003,
      "loss": 4.3117,
      "step": 1440
    },
    {
      "epoch": 0.01441,
      "grad_norm": 0.45011781235472936,
      "learning_rate": 0.003,
      "loss": 4.3167,
      "step": 1441
    },
    {
      "epoch": 0.01442,
      "grad_norm": 0.43134401837040753,
      "learning_rate": 0.003,
      "loss": 4.2983,
      "step": 1442
    },
    {
      "epoch": 0.01443,
      "grad_norm": 0.443910914161992,
      "learning_rate": 0.003,
      "loss": 4.3596,
      "step": 1443
    },
    {
      "epoch": 0.01444,
      "grad_norm": 0.44181787364588626,
      "learning_rate": 0.003,
      "loss": 4.3224,
      "step": 1444
    },
    {
      "epoch": 0.01445,
      "grad_norm": 0.3727371105020461,
      "learning_rate": 0.003,
      "loss": 4.2792,
      "step": 1445
    },
    {
      "epoch": 0.01446,
      "grad_norm": 0.41251015974859473,
      "learning_rate": 0.003,
      "loss": 4.2943,
      "step": 1446
    },
    {
      "epoch": 0.01447,
      "grad_norm": 0.4261928826004591,
      "learning_rate": 0.003,
      "loss": 4.3018,
      "step": 1447
    },
    {
      "epoch": 0.01448,
      "grad_norm": 0.4376299402914724,
      "learning_rate": 0.003,
      "loss": 4.31,
      "step": 1448
    },
    {
      "epoch": 0.01449,
      "grad_norm": 0.4789249067229029,
      "learning_rate": 0.003,
      "loss": 4.3061,
      "step": 1449
    },
    {
      "epoch": 0.0145,
      "grad_norm": 0.5866918905830225,
      "learning_rate": 0.003,
      "loss": 4.3209,
      "step": 1450
    },
    {
      "epoch": 0.01451,
      "grad_norm": 0.6723277356365825,
      "learning_rate": 0.003,
      "loss": 4.305,
      "step": 1451
    },
    {
      "epoch": 0.01452,
      "grad_norm": 0.7335524034560207,
      "learning_rate": 0.003,
      "loss": 4.3067,
      "step": 1452
    },
    {
      "epoch": 0.01453,
      "grad_norm": 0.6471889029033779,
      "learning_rate": 0.003,
      "loss": 4.3216,
      "step": 1453
    },
    {
      "epoch": 0.01454,
      "grad_norm": 0.5685184481528504,
      "learning_rate": 0.003,
      "loss": 4.2828,
      "step": 1454
    },
    {
      "epoch": 0.01455,
      "grad_norm": 0.6421432623095609,
      "learning_rate": 0.003,
      "loss": 4.3177,
      "step": 1455
    },
    {
      "epoch": 0.01456,
      "grad_norm": 0.7565614399335465,
      "learning_rate": 0.003,
      "loss": 4.3006,
      "step": 1456
    },
    {
      "epoch": 0.01457,
      "grad_norm": 0.8829290957594472,
      "learning_rate": 0.003,
      "loss": 4.2963,
      "step": 1457
    },
    {
      "epoch": 0.01458,
      "grad_norm": 0.7109663363437955,
      "learning_rate": 0.003,
      "loss": 4.2893,
      "step": 1458
    },
    {
      "epoch": 0.01459,
      "grad_norm": 0.6673339135688805,
      "learning_rate": 0.003,
      "loss": 4.3158,
      "step": 1459
    },
    {
      "epoch": 0.0146,
      "grad_norm": 0.718687934953482,
      "learning_rate": 0.003,
      "loss": 4.3224,
      "step": 1460
    },
    {
      "epoch": 0.01461,
      "grad_norm": 0.9126976820757061,
      "learning_rate": 0.003,
      "loss": 4.3127,
      "step": 1461
    },
    {
      "epoch": 0.01462,
      "grad_norm": 0.9933957093431367,
      "learning_rate": 0.003,
      "loss": 4.3197,
      "step": 1462
    },
    {
      "epoch": 0.01463,
      "grad_norm": 1.005474136949317,
      "learning_rate": 0.003,
      "loss": 4.3328,
      "step": 1463
    },
    {
      "epoch": 0.01464,
      "grad_norm": 0.8550438674851449,
      "learning_rate": 0.003,
      "loss": 4.3041,
      "step": 1464
    },
    {
      "epoch": 0.01465,
      "grad_norm": 0.95918166934002,
      "learning_rate": 0.003,
      "loss": 4.3575,
      "step": 1465
    },
    {
      "epoch": 0.01466,
      "grad_norm": 0.942212627065398,
      "learning_rate": 0.003,
      "loss": 4.3242,
      "step": 1466
    },
    {
      "epoch": 0.01467,
      "grad_norm": 0.8654410144546052,
      "learning_rate": 0.003,
      "loss": 4.361,
      "step": 1467
    },
    {
      "epoch": 0.01468,
      "grad_norm": 0.9143233338435491,
      "learning_rate": 0.003,
      "loss": 4.3391,
      "step": 1468
    },
    {
      "epoch": 0.01469,
      "grad_norm": 0.9793313044459684,
      "learning_rate": 0.003,
      "loss": 4.3458,
      "step": 1469
    },
    {
      "epoch": 0.0147,
      "grad_norm": 0.9281471829982783,
      "learning_rate": 0.003,
      "loss": 4.3406,
      "step": 1470
    },
    {
      "epoch": 0.01471,
      "grad_norm": 0.8585363920781103,
      "learning_rate": 0.003,
      "loss": 4.351,
      "step": 1471
    },
    {
      "epoch": 0.01472,
      "grad_norm": 0.7828394264486358,
      "learning_rate": 0.003,
      "loss": 4.3304,
      "step": 1472
    },
    {
      "epoch": 0.01473,
      "grad_norm": 0.6693695526508024,
      "learning_rate": 0.003,
      "loss": 4.3418,
      "step": 1473
    },
    {
      "epoch": 0.01474,
      "grad_norm": 0.64488720414541,
      "learning_rate": 0.003,
      "loss": 4.3238,
      "step": 1474
    },
    {
      "epoch": 0.01475,
      "grad_norm": 0.6058521136594603,
      "learning_rate": 0.003,
      "loss": 4.3387,
      "step": 1475
    },
    {
      "epoch": 0.01476,
      "grad_norm": 0.6790050602776871,
      "learning_rate": 0.003,
      "loss": 4.3289,
      "step": 1476
    },
    {
      "epoch": 0.01477,
      "grad_norm": 0.7601717274529913,
      "learning_rate": 0.003,
      "loss": 4.3343,
      "step": 1477
    },
    {
      "epoch": 0.01478,
      "grad_norm": 0.8194656851972575,
      "learning_rate": 0.003,
      "loss": 4.321,
      "step": 1478
    },
    {
      "epoch": 0.01479,
      "grad_norm": 0.8305578095720372,
      "learning_rate": 0.003,
      "loss": 4.3542,
      "step": 1479
    },
    {
      "epoch": 0.0148,
      "grad_norm": 0.5999680255334954,
      "learning_rate": 0.003,
      "loss": 4.3147,
      "step": 1480
    },
    {
      "epoch": 0.01481,
      "grad_norm": 0.5003001300590547,
      "learning_rate": 0.003,
      "loss": 4.3103,
      "step": 1481
    },
    {
      "epoch": 0.01482,
      "grad_norm": 0.536436369218564,
      "learning_rate": 0.003,
      "loss": 4.2976,
      "step": 1482
    },
    {
      "epoch": 0.01483,
      "grad_norm": 0.5356648177629082,
      "learning_rate": 0.003,
      "loss": 4.2936,
      "step": 1483
    },
    {
      "epoch": 0.01484,
      "grad_norm": 0.527550162162803,
      "learning_rate": 0.003,
      "loss": 4.3417,
      "step": 1484
    },
    {
      "epoch": 0.01485,
      "grad_norm": 0.42071613110356837,
      "learning_rate": 0.003,
      "loss": 4.3019,
      "step": 1485
    },
    {
      "epoch": 0.01486,
      "grad_norm": 0.36653534804688837,
      "learning_rate": 0.003,
      "loss": 4.2968,
      "step": 1486
    },
    {
      "epoch": 0.01487,
      "grad_norm": 0.382280891744833,
      "learning_rate": 0.003,
      "loss": 4.2989,
      "step": 1487
    },
    {
      "epoch": 0.01488,
      "grad_norm": 0.33605679846225744,
      "learning_rate": 0.003,
      "loss": 4.3064,
      "step": 1488
    },
    {
      "epoch": 0.01489,
      "grad_norm": 0.3437240442761271,
      "learning_rate": 0.003,
      "loss": 4.2981,
      "step": 1489
    },
    {
      "epoch": 0.0149,
      "grad_norm": 0.39625220166661973,
      "learning_rate": 0.003,
      "loss": 4.2911,
      "step": 1490
    },
    {
      "epoch": 0.01491,
      "grad_norm": 0.46513148809594546,
      "learning_rate": 0.003,
      "loss": 4.3054,
      "step": 1491
    },
    {
      "epoch": 0.01492,
      "grad_norm": 0.6303349430841945,
      "learning_rate": 0.003,
      "loss": 4.2763,
      "step": 1492
    },
    {
      "epoch": 0.01493,
      "grad_norm": 1.0123021829900336,
      "learning_rate": 0.003,
      "loss": 4.3178,
      "step": 1493
    },
    {
      "epoch": 0.01494,
      "grad_norm": 0.9944229901028623,
      "learning_rate": 0.003,
      "loss": 4.3204,
      "step": 1494
    },
    {
      "epoch": 0.01495,
      "grad_norm": 0.6683641506440907,
      "learning_rate": 0.003,
      "loss": 4.3148,
      "step": 1495
    },
    {
      "epoch": 0.01496,
      "grad_norm": 0.7298167952745037,
      "learning_rate": 0.003,
      "loss": 4.2971,
      "step": 1496
    },
    {
      "epoch": 0.01497,
      "grad_norm": 0.6993756176552323,
      "learning_rate": 0.003,
      "loss": 4.269,
      "step": 1497
    },
    {
      "epoch": 0.01498,
      "grad_norm": 0.5745337821538584,
      "learning_rate": 0.003,
      "loss": 4.3187,
      "step": 1498
    },
    {
      "epoch": 0.01499,
      "grad_norm": 0.7283462126028899,
      "learning_rate": 0.003,
      "loss": 4.3429,
      "step": 1499
    },
    {
      "epoch": 0.015,
      "grad_norm": 0.6525566784858753,
      "learning_rate": 0.003,
      "loss": 4.2959,
      "step": 1500
    },
    {
      "epoch": 0.01501,
      "grad_norm": 0.5378940164040006,
      "learning_rate": 0.003,
      "loss": 4.2893,
      "step": 1501
    },
    {
      "epoch": 0.01502,
      "grad_norm": 0.6104287567817624,
      "learning_rate": 0.003,
      "loss": 4.3317,
      "step": 1502
    },
    {
      "epoch": 0.01503,
      "grad_norm": 0.5521816301476207,
      "learning_rate": 0.003,
      "loss": 4.3091,
      "step": 1503
    },
    {
      "epoch": 0.01504,
      "grad_norm": 0.5286969991166217,
      "learning_rate": 0.003,
      "loss": 4.307,
      "step": 1504
    },
    {
      "epoch": 0.01505,
      "grad_norm": 0.5474279287326073,
      "learning_rate": 0.003,
      "loss": 4.2885,
      "step": 1505
    },
    {
      "epoch": 0.01506,
      "grad_norm": 0.5984521889334727,
      "learning_rate": 0.003,
      "loss": 4.3094,
      "step": 1506
    },
    {
      "epoch": 0.01507,
      "grad_norm": 0.5232830237119964,
      "learning_rate": 0.003,
      "loss": 4.3199,
      "step": 1507
    },
    {
      "epoch": 0.01508,
      "grad_norm": 0.5466455223079885,
      "learning_rate": 0.003,
      "loss": 4.2974,
      "step": 1508
    },
    {
      "epoch": 0.01509,
      "grad_norm": 0.5109583189523649,
      "learning_rate": 0.003,
      "loss": 4.3142,
      "step": 1509
    },
    {
      "epoch": 0.0151,
      "grad_norm": 0.4768548441806941,
      "learning_rate": 0.003,
      "loss": 4.2831,
      "step": 1510
    },
    {
      "epoch": 0.01511,
      "grad_norm": 0.4945814249796924,
      "learning_rate": 0.003,
      "loss": 4.2714,
      "step": 1511
    },
    {
      "epoch": 0.01512,
      "grad_norm": 0.48932835669515945,
      "learning_rate": 0.003,
      "loss": 4.2822,
      "step": 1512
    },
    {
      "epoch": 0.01513,
      "grad_norm": 0.47324181197347215,
      "learning_rate": 0.003,
      "loss": 4.2875,
      "step": 1513
    },
    {
      "epoch": 0.01514,
      "grad_norm": 0.47033014673052803,
      "learning_rate": 0.003,
      "loss": 4.294,
      "step": 1514
    },
    {
      "epoch": 0.01515,
      "grad_norm": 0.5275023241779081,
      "learning_rate": 0.003,
      "loss": 4.2871,
      "step": 1515
    },
    {
      "epoch": 0.01516,
      "grad_norm": 0.581253055640685,
      "learning_rate": 0.003,
      "loss": 4.2949,
      "step": 1516
    },
    {
      "epoch": 0.01517,
      "grad_norm": 0.5995156865571368,
      "learning_rate": 0.003,
      "loss": 4.245,
      "step": 1517
    },
    {
      "epoch": 0.01518,
      "grad_norm": 0.6879717293853466,
      "learning_rate": 0.003,
      "loss": 4.2878,
      "step": 1518
    },
    {
      "epoch": 0.01519,
      "grad_norm": 0.8658495938690788,
      "learning_rate": 0.003,
      "loss": 4.3291,
      "step": 1519
    },
    {
      "epoch": 0.0152,
      "grad_norm": 1.0602978136723928,
      "learning_rate": 0.003,
      "loss": 4.3012,
      "step": 1520
    },
    {
      "epoch": 0.01521,
      "grad_norm": 1.0404355729302586,
      "learning_rate": 0.003,
      "loss": 4.315,
      "step": 1521
    },
    {
      "epoch": 0.01522,
      "grad_norm": 0.840417859609453,
      "learning_rate": 0.003,
      "loss": 4.3065,
      "step": 1522
    },
    {
      "epoch": 0.01523,
      "grad_norm": 0.8469845154820008,
      "learning_rate": 0.003,
      "loss": 4.305,
      "step": 1523
    },
    {
      "epoch": 0.01524,
      "grad_norm": 0.7929331263188221,
      "learning_rate": 0.003,
      "loss": 4.3335,
      "step": 1524
    },
    {
      "epoch": 0.01525,
      "grad_norm": 0.698848745465075,
      "learning_rate": 0.003,
      "loss": 4.3297,
      "step": 1525
    },
    {
      "epoch": 0.01526,
      "grad_norm": 0.7207511646846017,
      "learning_rate": 0.003,
      "loss": 4.3193,
      "step": 1526
    },
    {
      "epoch": 0.01527,
      "grad_norm": 0.763308654071157,
      "learning_rate": 0.003,
      "loss": 4.2936,
      "step": 1527
    },
    {
      "epoch": 0.01528,
      "grad_norm": 0.8559921469360918,
      "learning_rate": 0.003,
      "loss": 4.3141,
      "step": 1528
    },
    {
      "epoch": 0.01529,
      "grad_norm": 0.7339586416101129,
      "learning_rate": 0.003,
      "loss": 4.3059,
      "step": 1529
    },
    {
      "epoch": 0.0153,
      "grad_norm": 0.6111884320793721,
      "learning_rate": 0.003,
      "loss": 4.3081,
      "step": 1530
    },
    {
      "epoch": 0.01531,
      "grad_norm": 0.5381224123525938,
      "learning_rate": 0.003,
      "loss": 4.2987,
      "step": 1531
    },
    {
      "epoch": 0.01532,
      "grad_norm": 0.4961970461284698,
      "learning_rate": 0.003,
      "loss": 4.2768,
      "step": 1532
    },
    {
      "epoch": 0.01533,
      "grad_norm": 0.4947055691935514,
      "learning_rate": 0.003,
      "loss": 4.2996,
      "step": 1533
    },
    {
      "epoch": 0.01534,
      "grad_norm": 0.4918952201118249,
      "learning_rate": 0.003,
      "loss": 4.3,
      "step": 1534
    },
    {
      "epoch": 0.01535,
      "grad_norm": 0.5164537975132518,
      "learning_rate": 0.003,
      "loss": 4.3084,
      "step": 1535
    },
    {
      "epoch": 0.01536,
      "grad_norm": 0.5875973667797125,
      "learning_rate": 0.003,
      "loss": 4.2915,
      "step": 1536
    },
    {
      "epoch": 0.01537,
      "grad_norm": 0.6164689981624115,
      "learning_rate": 0.003,
      "loss": 4.2859,
      "step": 1537
    },
    {
      "epoch": 0.01538,
      "grad_norm": 0.5757348971929218,
      "learning_rate": 0.003,
      "loss": 4.2647,
      "step": 1538
    },
    {
      "epoch": 0.01539,
      "grad_norm": 0.47927981795762703,
      "learning_rate": 0.003,
      "loss": 4.2913,
      "step": 1539
    },
    {
      "epoch": 0.0154,
      "grad_norm": 0.471938076315617,
      "learning_rate": 0.003,
      "loss": 4.2958,
      "step": 1540
    },
    {
      "epoch": 0.01541,
      "grad_norm": 0.41121323274233956,
      "learning_rate": 0.003,
      "loss": 4.2738,
      "step": 1541
    },
    {
      "epoch": 0.01542,
      "grad_norm": 0.5568654804454234,
      "learning_rate": 0.003,
      "loss": 4.3052,
      "step": 1542
    },
    {
      "epoch": 0.01543,
      "grad_norm": 0.6966538302893103,
      "learning_rate": 0.003,
      "loss": 4.2931,
      "step": 1543
    },
    {
      "epoch": 0.01544,
      "grad_norm": 0.9220011119214641,
      "learning_rate": 0.003,
      "loss": 4.2897,
      "step": 1544
    },
    {
      "epoch": 0.01545,
      "grad_norm": 0.9623507193232845,
      "learning_rate": 0.003,
      "loss": 4.3672,
      "step": 1545
    },
    {
      "epoch": 0.01546,
      "grad_norm": 0.8825760974316569,
      "learning_rate": 0.003,
      "loss": 4.3354,
      "step": 1546
    },
    {
      "epoch": 0.01547,
      "grad_norm": 0.7561843927072682,
      "learning_rate": 0.003,
      "loss": 4.3343,
      "step": 1547
    },
    {
      "epoch": 0.01548,
      "grad_norm": 0.9189821005507283,
      "learning_rate": 0.003,
      "loss": 4.3071,
      "step": 1548
    },
    {
      "epoch": 0.01549,
      "grad_norm": 0.8566529575807743,
      "learning_rate": 0.003,
      "loss": 4.2971,
      "step": 1549
    },
    {
      "epoch": 0.0155,
      "grad_norm": 0.8661559591366523,
      "learning_rate": 0.003,
      "loss": 4.3171,
      "step": 1550
    },
    {
      "epoch": 0.01551,
      "grad_norm": 0.8122000693068794,
      "learning_rate": 0.003,
      "loss": 4.2987,
      "step": 1551
    },
    {
      "epoch": 0.01552,
      "grad_norm": 0.8086836292136306,
      "learning_rate": 0.003,
      "loss": 4.3198,
      "step": 1552
    },
    {
      "epoch": 0.01553,
      "grad_norm": 0.6333228013975016,
      "learning_rate": 0.003,
      "loss": 4.3325,
      "step": 1553
    },
    {
      "epoch": 0.01554,
      "grad_norm": 0.6383647650899242,
      "learning_rate": 0.003,
      "loss": 4.3203,
      "step": 1554
    },
    {
      "epoch": 0.01555,
      "grad_norm": 0.581782197952493,
      "learning_rate": 0.003,
      "loss": 4.2877,
      "step": 1555
    },
    {
      "epoch": 0.01556,
      "grad_norm": 0.534117297541484,
      "learning_rate": 0.003,
      "loss": 4.3229,
      "step": 1556
    },
    {
      "epoch": 0.01557,
      "grad_norm": 0.6143591785578596,
      "learning_rate": 0.003,
      "loss": 4.2988,
      "step": 1557
    },
    {
      "epoch": 0.01558,
      "grad_norm": 0.7112024057013003,
      "learning_rate": 0.003,
      "loss": 4.2905,
      "step": 1558
    },
    {
      "epoch": 0.01559,
      "grad_norm": 0.8091627873292117,
      "learning_rate": 0.003,
      "loss": 4.3321,
      "step": 1559
    },
    {
      "epoch": 0.0156,
      "grad_norm": 0.9168702262156472,
      "learning_rate": 0.003,
      "loss": 4.328,
      "step": 1560
    },
    {
      "epoch": 0.01561,
      "grad_norm": 0.8247101322453514,
      "learning_rate": 0.003,
      "loss": 4.3121,
      "step": 1561
    },
    {
      "epoch": 0.01562,
      "grad_norm": 0.7450493194783364,
      "learning_rate": 0.003,
      "loss": 4.2998,
      "step": 1562
    },
    {
      "epoch": 0.01563,
      "grad_norm": 0.7681213259871257,
      "learning_rate": 0.003,
      "loss": 4.3069,
      "step": 1563
    },
    {
      "epoch": 0.01564,
      "grad_norm": 0.661087083405721,
      "learning_rate": 0.003,
      "loss": 4.3306,
      "step": 1564
    },
    {
      "epoch": 0.01565,
      "grad_norm": 0.7106779315303835,
      "learning_rate": 0.003,
      "loss": 4.3058,
      "step": 1565
    },
    {
      "epoch": 0.01566,
      "grad_norm": 0.7769110124623916,
      "learning_rate": 0.003,
      "loss": 4.3279,
      "step": 1566
    },
    {
      "epoch": 0.01567,
      "grad_norm": 0.7669441510687823,
      "learning_rate": 0.003,
      "loss": 4.3233,
      "step": 1567
    },
    {
      "epoch": 0.01568,
      "grad_norm": 0.7337582454656044,
      "learning_rate": 0.003,
      "loss": 4.3021,
      "step": 1568
    },
    {
      "epoch": 0.01569,
      "grad_norm": 0.7439380247484012,
      "learning_rate": 0.003,
      "loss": 4.3114,
      "step": 1569
    },
    {
      "epoch": 0.0157,
      "grad_norm": 0.6078620360255516,
      "learning_rate": 0.003,
      "loss": 4.3206,
      "step": 1570
    },
    {
      "epoch": 0.01571,
      "grad_norm": 0.540728762720418,
      "learning_rate": 0.003,
      "loss": 4.3025,
      "step": 1571
    },
    {
      "epoch": 0.01572,
      "grad_norm": 0.46359920637885943,
      "learning_rate": 0.003,
      "loss": 4.3098,
      "step": 1572
    },
    {
      "epoch": 0.01573,
      "grad_norm": 0.44623862939090225,
      "learning_rate": 0.003,
      "loss": 4.2906,
      "step": 1573
    },
    {
      "epoch": 0.01574,
      "grad_norm": 0.40532340498334285,
      "learning_rate": 0.003,
      "loss": 4.3042,
      "step": 1574
    },
    {
      "epoch": 0.01575,
      "grad_norm": 0.4266009557557695,
      "learning_rate": 0.003,
      "loss": 4.2644,
      "step": 1575
    },
    {
      "epoch": 0.01576,
      "grad_norm": 0.44274797480320294,
      "learning_rate": 0.003,
      "loss": 4.2843,
      "step": 1576
    },
    {
      "epoch": 0.01577,
      "grad_norm": 0.5663565081777049,
      "learning_rate": 0.003,
      "loss": 4.2511,
      "step": 1577
    },
    {
      "epoch": 0.01578,
      "grad_norm": 0.7961886454684144,
      "learning_rate": 0.003,
      "loss": 4.2885,
      "step": 1578
    },
    {
      "epoch": 0.01579,
      "grad_norm": 1.0530430537405686,
      "learning_rate": 0.003,
      "loss": 4.2959,
      "step": 1579
    },
    {
      "epoch": 0.0158,
      "grad_norm": 0.8358119926881179,
      "learning_rate": 0.003,
      "loss": 4.3048,
      "step": 1580
    },
    {
      "epoch": 0.01581,
      "grad_norm": 0.6320448131483168,
      "learning_rate": 0.003,
      "loss": 4.3019,
      "step": 1581
    },
    {
      "epoch": 0.01582,
      "grad_norm": 0.8911221736338306,
      "learning_rate": 0.003,
      "loss": 4.2956,
      "step": 1582
    },
    {
      "epoch": 0.01583,
      "grad_norm": 0.8257028036510016,
      "learning_rate": 0.003,
      "loss": 4.2853,
      "step": 1583
    },
    {
      "epoch": 0.01584,
      "grad_norm": 0.6483281568426533,
      "learning_rate": 0.003,
      "loss": 4.3088,
      "step": 1584
    },
    {
      "epoch": 0.01585,
      "grad_norm": 0.8350847075329101,
      "learning_rate": 0.003,
      "loss": 4.3083,
      "step": 1585
    },
    {
      "epoch": 0.01586,
      "grad_norm": 0.7763840572673907,
      "learning_rate": 0.003,
      "loss": 4.2866,
      "step": 1586
    },
    {
      "epoch": 0.01587,
      "grad_norm": 0.6255279228525287,
      "learning_rate": 0.003,
      "loss": 4.2947,
      "step": 1587
    },
    {
      "epoch": 0.01588,
      "grad_norm": 0.610133020546242,
      "learning_rate": 0.003,
      "loss": 4.322,
      "step": 1588
    },
    {
      "epoch": 0.01589,
      "grad_norm": 0.5957874820344337,
      "learning_rate": 0.003,
      "loss": 4.3124,
      "step": 1589
    },
    {
      "epoch": 0.0159,
      "grad_norm": 0.5537392612219366,
      "learning_rate": 0.003,
      "loss": 4.3034,
      "step": 1590
    },
    {
      "epoch": 0.01591,
      "grad_norm": 0.661097427877216,
      "learning_rate": 0.003,
      "loss": 4.3134,
      "step": 1591
    },
    {
      "epoch": 0.01592,
      "grad_norm": 0.6367246417778138,
      "learning_rate": 0.003,
      "loss": 4.3034,
      "step": 1592
    },
    {
      "epoch": 0.01593,
      "grad_norm": 0.6726022188533456,
      "learning_rate": 0.003,
      "loss": 4.3067,
      "step": 1593
    },
    {
      "epoch": 0.01594,
      "grad_norm": 0.806293600982127,
      "learning_rate": 0.003,
      "loss": 4.302,
      "step": 1594
    },
    {
      "epoch": 0.01595,
      "grad_norm": 0.7676445449638729,
      "learning_rate": 0.003,
      "loss": 4.3248,
      "step": 1595
    },
    {
      "epoch": 0.01596,
      "grad_norm": 0.7697612568511226,
      "learning_rate": 0.003,
      "loss": 4.3187,
      "step": 1596
    },
    {
      "epoch": 0.01597,
      "grad_norm": 0.6464451152190547,
      "learning_rate": 0.003,
      "loss": 4.278,
      "step": 1597
    },
    {
      "epoch": 0.01598,
      "grad_norm": 0.5841344344075732,
      "learning_rate": 0.003,
      "loss": 4.2687,
      "step": 1598
    },
    {
      "epoch": 0.01599,
      "grad_norm": 0.6015534953604151,
      "learning_rate": 0.003,
      "loss": 4.2919,
      "step": 1599
    },
    {
      "epoch": 0.016,
      "grad_norm": 0.633770866019041,
      "learning_rate": 0.003,
      "loss": 4.2557,
      "step": 1600
    },
    {
      "epoch": 0.01601,
      "grad_norm": 0.7204786711919867,
      "learning_rate": 0.003,
      "loss": 4.3091,
      "step": 1601
    },
    {
      "epoch": 0.01602,
      "grad_norm": 0.8805640455608943,
      "learning_rate": 0.003,
      "loss": 4.2878,
      "step": 1602
    },
    {
      "epoch": 0.01603,
      "grad_norm": 0.9584887795577403,
      "learning_rate": 0.003,
      "loss": 4.3165,
      "step": 1603
    },
    {
      "epoch": 0.01604,
      "grad_norm": 0.7106738441517636,
      "learning_rate": 0.003,
      "loss": 4.3017,
      "step": 1604
    },
    {
      "epoch": 0.01605,
      "grad_norm": 0.5768021422231241,
      "learning_rate": 0.003,
      "loss": 4.27,
      "step": 1605
    },
    {
      "epoch": 0.01606,
      "grad_norm": 0.7201210994211575,
      "learning_rate": 0.003,
      "loss": 4.2807,
      "step": 1606
    },
    {
      "epoch": 0.01607,
      "grad_norm": 0.657321999239318,
      "learning_rate": 0.003,
      "loss": 4.3034,
      "step": 1607
    },
    {
      "epoch": 0.01608,
      "grad_norm": 0.6130445370824126,
      "learning_rate": 0.003,
      "loss": 4.2741,
      "step": 1608
    },
    {
      "epoch": 0.01609,
      "grad_norm": 0.5653798376158339,
      "learning_rate": 0.003,
      "loss": 4.283,
      "step": 1609
    },
    {
      "epoch": 0.0161,
      "grad_norm": 0.513983969651206,
      "learning_rate": 0.003,
      "loss": 4.2462,
      "step": 1610
    },
    {
      "epoch": 0.01611,
      "grad_norm": 0.46001761549467546,
      "learning_rate": 0.003,
      "loss": 4.2646,
      "step": 1611
    },
    {
      "epoch": 0.01612,
      "grad_norm": 0.4061198970721476,
      "learning_rate": 0.003,
      "loss": 4.2535,
      "step": 1612
    },
    {
      "epoch": 0.01613,
      "grad_norm": 0.38584265397880474,
      "learning_rate": 0.003,
      "loss": 4.298,
      "step": 1613
    },
    {
      "epoch": 0.01614,
      "grad_norm": 0.3634152641224797,
      "learning_rate": 0.003,
      "loss": 4.3203,
      "step": 1614
    },
    {
      "epoch": 0.01615,
      "grad_norm": 0.3355266833643695,
      "learning_rate": 0.003,
      "loss": 4.286,
      "step": 1615
    },
    {
      "epoch": 0.01616,
      "grad_norm": 0.37730904621972394,
      "learning_rate": 0.003,
      "loss": 4.267,
      "step": 1616
    },
    {
      "epoch": 0.01617,
      "grad_norm": 0.39995734331949706,
      "learning_rate": 0.003,
      "loss": 4.2576,
      "step": 1617
    },
    {
      "epoch": 0.01618,
      "grad_norm": 0.47091829338804436,
      "learning_rate": 0.003,
      "loss": 4.2623,
      "step": 1618
    },
    {
      "epoch": 0.01619,
      "grad_norm": 0.6125482990333543,
      "learning_rate": 0.003,
      "loss": 4.2565,
      "step": 1619
    },
    {
      "epoch": 0.0162,
      "grad_norm": 0.7863263974252909,
      "learning_rate": 0.003,
      "loss": 4.2735,
      "step": 1620
    },
    {
      "epoch": 0.01621,
      "grad_norm": 1.0441142509760066,
      "learning_rate": 0.003,
      "loss": 4.2794,
      "step": 1621
    },
    {
      "epoch": 0.01622,
      "grad_norm": 0.9658316314084775,
      "learning_rate": 0.003,
      "loss": 4.3224,
      "step": 1622
    },
    {
      "epoch": 0.01623,
      "grad_norm": 0.8426526797330657,
      "learning_rate": 0.003,
      "loss": 4.2964,
      "step": 1623
    },
    {
      "epoch": 0.01624,
      "grad_norm": 0.9277624680796056,
      "learning_rate": 0.003,
      "loss": 4.3239,
      "step": 1624
    },
    {
      "epoch": 0.01625,
      "grad_norm": 0.9569376041612643,
      "learning_rate": 0.003,
      "loss": 4.3005,
      "step": 1625
    },
    {
      "epoch": 0.01626,
      "grad_norm": 0.9128199086554656,
      "learning_rate": 0.003,
      "loss": 4.3221,
      "step": 1626
    },
    {
      "epoch": 0.01627,
      "grad_norm": 1.0524568862767718,
      "learning_rate": 0.003,
      "loss": 4.3041,
      "step": 1627
    },
    {
      "epoch": 0.01628,
      "grad_norm": 1.051492497501998,
      "learning_rate": 0.003,
      "loss": 4.3114,
      "step": 1628
    },
    {
      "epoch": 0.01629,
      "grad_norm": 0.9795755381617424,
      "learning_rate": 0.003,
      "loss": 4.3139,
      "step": 1629
    },
    {
      "epoch": 0.0163,
      "grad_norm": 0.8842687005527395,
      "learning_rate": 0.003,
      "loss": 4.301,
      "step": 1630
    },
    {
      "epoch": 0.01631,
      "grad_norm": 0.8178000647706527,
      "learning_rate": 0.003,
      "loss": 4.3269,
      "step": 1631
    },
    {
      "epoch": 0.01632,
      "grad_norm": 0.8099859797576497,
      "learning_rate": 0.003,
      "loss": 4.3331,
      "step": 1632
    },
    {
      "epoch": 0.01633,
      "grad_norm": 0.8783297751338949,
      "learning_rate": 0.003,
      "loss": 4.3124,
      "step": 1633
    },
    {
      "epoch": 0.01634,
      "grad_norm": 0.9683176525782718,
      "learning_rate": 0.003,
      "loss": 4.3368,
      "step": 1634
    },
    {
      "epoch": 0.01635,
      "grad_norm": 1.0378085972515856,
      "learning_rate": 0.003,
      "loss": 4.3267,
      "step": 1635
    },
    {
      "epoch": 0.01636,
      "grad_norm": 0.8713002838428562,
      "learning_rate": 0.003,
      "loss": 4.3348,
      "step": 1636
    },
    {
      "epoch": 0.01637,
      "grad_norm": 0.6789025774910509,
      "learning_rate": 0.003,
      "loss": 4.3135,
      "step": 1637
    },
    {
      "epoch": 0.01638,
      "grad_norm": 0.7190263237234923,
      "learning_rate": 0.003,
      "loss": 4.3194,
      "step": 1638
    },
    {
      "epoch": 0.01639,
      "grad_norm": 0.6786031217139576,
      "learning_rate": 0.003,
      "loss": 4.3383,
      "step": 1639
    },
    {
      "epoch": 0.0164,
      "grad_norm": 0.6789558457431205,
      "learning_rate": 0.003,
      "loss": 4.3166,
      "step": 1640
    },
    {
      "epoch": 0.01641,
      "grad_norm": 0.575948861365857,
      "learning_rate": 0.003,
      "loss": 4.2883,
      "step": 1641
    },
    {
      "epoch": 0.01642,
      "grad_norm": 0.5365877148633437,
      "learning_rate": 0.003,
      "loss": 4.3254,
      "step": 1642
    },
    {
      "epoch": 0.01643,
      "grad_norm": 0.5249085338304371,
      "learning_rate": 0.003,
      "loss": 4.3093,
      "step": 1643
    },
    {
      "epoch": 0.01644,
      "grad_norm": 0.5581803861532463,
      "learning_rate": 0.003,
      "loss": 4.3259,
      "step": 1644
    },
    {
      "epoch": 0.01645,
      "grad_norm": 0.5119961826198595,
      "learning_rate": 0.003,
      "loss": 4.2692,
      "step": 1645
    },
    {
      "epoch": 0.01646,
      "grad_norm": 0.5843459575894068,
      "learning_rate": 0.003,
      "loss": 4.3002,
      "step": 1646
    },
    {
      "epoch": 0.01647,
      "grad_norm": 0.6503610890489644,
      "learning_rate": 0.003,
      "loss": 4.299,
      "step": 1647
    },
    {
      "epoch": 0.01648,
      "grad_norm": 0.7233252288289422,
      "learning_rate": 0.003,
      "loss": 4.2792,
      "step": 1648
    },
    {
      "epoch": 0.01649,
      "grad_norm": 0.6990475841330179,
      "learning_rate": 0.003,
      "loss": 4.315,
      "step": 1649
    },
    {
      "epoch": 0.0165,
      "grad_norm": 0.6420391516572309,
      "learning_rate": 0.003,
      "loss": 4.2891,
      "step": 1650
    },
    {
      "epoch": 0.01651,
      "grad_norm": 0.5226368448156455,
      "learning_rate": 0.003,
      "loss": 4.2942,
      "step": 1651
    },
    {
      "epoch": 0.01652,
      "grad_norm": 0.610714083694528,
      "learning_rate": 0.003,
      "loss": 4.2513,
      "step": 1652
    },
    {
      "epoch": 0.01653,
      "grad_norm": 0.6780860125642644,
      "learning_rate": 0.003,
      "loss": 4.295,
      "step": 1653
    },
    {
      "epoch": 0.01654,
      "grad_norm": 0.6227039363886209,
      "learning_rate": 0.003,
      "loss": 4.284,
      "step": 1654
    },
    {
      "epoch": 0.01655,
      "grad_norm": 0.5328823365124649,
      "learning_rate": 0.003,
      "loss": 4.2906,
      "step": 1655
    },
    {
      "epoch": 0.01656,
      "grad_norm": 0.4763312033593008,
      "learning_rate": 0.003,
      "loss": 4.2565,
      "step": 1656
    },
    {
      "epoch": 0.01657,
      "grad_norm": 0.4375990417622915,
      "learning_rate": 0.003,
      "loss": 4.275,
      "step": 1657
    },
    {
      "epoch": 0.01658,
      "grad_norm": 0.41683501760814345,
      "learning_rate": 0.003,
      "loss": 4.2614,
      "step": 1658
    },
    {
      "epoch": 0.01659,
      "grad_norm": 0.36860502577039955,
      "learning_rate": 0.003,
      "loss": 4.2638,
      "step": 1659
    },
    {
      "epoch": 0.0166,
      "grad_norm": 0.3864652159021637,
      "learning_rate": 0.003,
      "loss": 4.2474,
      "step": 1660
    },
    {
      "epoch": 0.01661,
      "grad_norm": 0.41682106736234836,
      "learning_rate": 0.003,
      "loss": 4.2505,
      "step": 1661
    },
    {
      "epoch": 0.01662,
      "grad_norm": 0.476701756557898,
      "learning_rate": 0.003,
      "loss": 4.2548,
      "step": 1662
    },
    {
      "epoch": 0.01663,
      "grad_norm": 0.5969063400757824,
      "learning_rate": 0.003,
      "loss": 4.2805,
      "step": 1663
    },
    {
      "epoch": 0.01664,
      "grad_norm": 0.8680763918881209,
      "learning_rate": 0.003,
      "loss": 4.3046,
      "step": 1664
    },
    {
      "epoch": 0.01665,
      "grad_norm": 1.1237216483274195,
      "learning_rate": 0.003,
      "loss": 4.2931,
      "step": 1665
    },
    {
      "epoch": 0.01666,
      "grad_norm": 0.7417184604500126,
      "learning_rate": 0.003,
      "loss": 4.2771,
      "step": 1666
    },
    {
      "epoch": 0.01667,
      "grad_norm": 0.7184967197248885,
      "learning_rate": 0.003,
      "loss": 4.2946,
      "step": 1667
    },
    {
      "epoch": 0.01668,
      "grad_norm": 0.8198630062596927,
      "learning_rate": 0.003,
      "loss": 4.305,
      "step": 1668
    },
    {
      "epoch": 0.01669,
      "grad_norm": 0.7847871916676272,
      "learning_rate": 0.003,
      "loss": 4.2844,
      "step": 1669
    },
    {
      "epoch": 0.0167,
      "grad_norm": 0.9437529917495628,
      "learning_rate": 0.003,
      "loss": 4.2839,
      "step": 1670
    },
    {
      "epoch": 0.01671,
      "grad_norm": 0.9912342863425158,
      "learning_rate": 0.003,
      "loss": 4.2849,
      "step": 1671
    },
    {
      "epoch": 0.01672,
      "grad_norm": 0.881393206900321,
      "learning_rate": 0.003,
      "loss": 4.2921,
      "step": 1672
    },
    {
      "epoch": 0.01673,
      "grad_norm": 0.8454070541781228,
      "learning_rate": 0.003,
      "loss": 4.2895,
      "step": 1673
    },
    {
      "epoch": 0.01674,
      "grad_norm": 0.7698332601553451,
      "learning_rate": 0.003,
      "loss": 4.3011,
      "step": 1674
    },
    {
      "epoch": 0.01675,
      "grad_norm": 0.7526884572805107,
      "learning_rate": 0.003,
      "loss": 4.2859,
      "step": 1675
    },
    {
      "epoch": 0.01676,
      "grad_norm": 0.608849775196141,
      "learning_rate": 0.003,
      "loss": 4.288,
      "step": 1676
    },
    {
      "epoch": 0.01677,
      "grad_norm": 0.587083227500864,
      "learning_rate": 0.003,
      "loss": 4.2754,
      "step": 1677
    },
    {
      "epoch": 0.01678,
      "grad_norm": 0.5796575721567071,
      "learning_rate": 0.003,
      "loss": 4.2871,
      "step": 1678
    },
    {
      "epoch": 0.01679,
      "grad_norm": 0.5746576795495367,
      "learning_rate": 0.003,
      "loss": 4.2709,
      "step": 1679
    },
    {
      "epoch": 0.0168,
      "grad_norm": 0.6091387034729067,
      "learning_rate": 0.003,
      "loss": 4.2876,
      "step": 1680
    },
    {
      "epoch": 0.01681,
      "grad_norm": 0.5440787081039196,
      "learning_rate": 0.003,
      "loss": 4.2859,
      "step": 1681
    },
    {
      "epoch": 0.01682,
      "grad_norm": 0.5021874872953848,
      "learning_rate": 0.003,
      "loss": 4.2746,
      "step": 1682
    },
    {
      "epoch": 0.01683,
      "grad_norm": 0.5600641996520215,
      "learning_rate": 0.003,
      "loss": 4.2891,
      "step": 1683
    },
    {
      "epoch": 0.01684,
      "grad_norm": 0.6044969913973193,
      "learning_rate": 0.003,
      "loss": 4.2655,
      "step": 1684
    },
    {
      "epoch": 0.01685,
      "grad_norm": 0.7175607093807479,
      "learning_rate": 0.003,
      "loss": 4.2832,
      "step": 1685
    },
    {
      "epoch": 0.01686,
      "grad_norm": 0.7107262709635087,
      "learning_rate": 0.003,
      "loss": 4.278,
      "step": 1686
    },
    {
      "epoch": 0.01687,
      "grad_norm": 0.6547857303100532,
      "learning_rate": 0.003,
      "loss": 4.2707,
      "step": 1687
    },
    {
      "epoch": 0.01688,
      "grad_norm": 0.6516245322949453,
      "learning_rate": 0.003,
      "loss": 4.258,
      "step": 1688
    },
    {
      "epoch": 0.01689,
      "grad_norm": 0.6229389769593134,
      "learning_rate": 0.003,
      "loss": 4.2721,
      "step": 1689
    },
    {
      "epoch": 0.0169,
      "grad_norm": 0.6103687657490365,
      "learning_rate": 0.003,
      "loss": 4.2607,
      "step": 1690
    },
    {
      "epoch": 0.01691,
      "grad_norm": 0.5335862349810607,
      "learning_rate": 0.003,
      "loss": 4.27,
      "step": 1691
    },
    {
      "epoch": 0.01692,
      "grad_norm": 0.5575535626191942,
      "learning_rate": 0.003,
      "loss": 4.2695,
      "step": 1692
    },
    {
      "epoch": 0.01693,
      "grad_norm": 0.568239333319019,
      "learning_rate": 0.003,
      "loss": 4.2967,
      "step": 1693
    },
    {
      "epoch": 0.01694,
      "grad_norm": 0.6200186749942788,
      "learning_rate": 0.003,
      "loss": 4.2481,
      "step": 1694
    },
    {
      "epoch": 0.01695,
      "grad_norm": 0.7306002506956177,
      "learning_rate": 0.003,
      "loss": 4.2852,
      "step": 1695
    },
    {
      "epoch": 0.01696,
      "grad_norm": 0.9861942877771345,
      "learning_rate": 0.003,
      "loss": 4.2865,
      "step": 1696
    },
    {
      "epoch": 0.01697,
      "grad_norm": 0.9202516729690784,
      "learning_rate": 0.003,
      "loss": 4.2714,
      "step": 1697
    },
    {
      "epoch": 0.01698,
      "grad_norm": 0.5813652600697033,
      "learning_rate": 0.003,
      "loss": 4.2517,
      "step": 1698
    },
    {
      "epoch": 0.01699,
      "grad_norm": 0.7906905904924819,
      "learning_rate": 0.003,
      "loss": 4.2666,
      "step": 1699
    },
    {
      "epoch": 0.017,
      "grad_norm": 0.8251190957906807,
      "learning_rate": 0.003,
      "loss": 4.2912,
      "step": 1700
    },
    {
      "epoch": 0.01701,
      "grad_norm": 0.5760899682258609,
      "learning_rate": 0.003,
      "loss": 4.2636,
      "step": 1701
    },
    {
      "epoch": 0.01702,
      "grad_norm": 0.6197604398341542,
      "learning_rate": 0.003,
      "loss": 4.2892,
      "step": 1702
    },
    {
      "epoch": 0.01703,
      "grad_norm": 0.6600695849762079,
      "learning_rate": 0.003,
      "loss": 4.2772,
      "step": 1703
    },
    {
      "epoch": 0.01704,
      "grad_norm": 0.6083291374384225,
      "learning_rate": 0.003,
      "loss": 4.2615,
      "step": 1704
    },
    {
      "epoch": 0.01705,
      "grad_norm": 0.6628061782787544,
      "learning_rate": 0.003,
      "loss": 4.2656,
      "step": 1705
    },
    {
      "epoch": 0.01706,
      "grad_norm": 0.6779759701213098,
      "learning_rate": 0.003,
      "loss": 4.2652,
      "step": 1706
    },
    {
      "epoch": 0.01707,
      "grad_norm": 0.6565491562339272,
      "learning_rate": 0.003,
      "loss": 4.2864,
      "step": 1707
    },
    {
      "epoch": 0.01708,
      "grad_norm": 0.72356139072098,
      "learning_rate": 0.003,
      "loss": 4.2639,
      "step": 1708
    },
    {
      "epoch": 0.01709,
      "grad_norm": 0.7465033977044144,
      "learning_rate": 0.003,
      "loss": 4.2882,
      "step": 1709
    },
    {
      "epoch": 0.0171,
      "grad_norm": 0.8329207828708123,
      "learning_rate": 0.003,
      "loss": 4.2425,
      "step": 1710
    },
    {
      "epoch": 0.01711,
      "grad_norm": 0.7868917410962508,
      "learning_rate": 0.003,
      "loss": 4.2732,
      "step": 1711
    },
    {
      "epoch": 0.01712,
      "grad_norm": 0.7908188817256777,
      "learning_rate": 0.003,
      "loss": 4.283,
      "step": 1712
    },
    {
      "epoch": 0.01713,
      "grad_norm": 0.7158559020644585,
      "learning_rate": 0.003,
      "loss": 4.2586,
      "step": 1713
    },
    {
      "epoch": 0.01714,
      "grad_norm": 0.6677106459549558,
      "learning_rate": 0.003,
      "loss": 4.2919,
      "step": 1714
    },
    {
      "epoch": 0.01715,
      "grad_norm": 0.6431660408355871,
      "learning_rate": 0.003,
      "loss": 4.2988,
      "step": 1715
    },
    {
      "epoch": 0.01716,
      "grad_norm": 0.6261280035670769,
      "learning_rate": 0.003,
      "loss": 4.2745,
      "step": 1716
    },
    {
      "epoch": 0.01717,
      "grad_norm": 0.654451460271765,
      "learning_rate": 0.003,
      "loss": 4.308,
      "step": 1717
    },
    {
      "epoch": 0.01718,
      "grad_norm": 0.7724030260270882,
      "learning_rate": 0.003,
      "loss": 4.2766,
      "step": 1718
    },
    {
      "epoch": 0.01719,
      "grad_norm": 0.8282403870632441,
      "learning_rate": 0.003,
      "loss": 4.3039,
      "step": 1719
    },
    {
      "epoch": 0.0172,
      "grad_norm": 0.8311264118816778,
      "learning_rate": 0.003,
      "loss": 4.2657,
      "step": 1720
    },
    {
      "epoch": 0.01721,
      "grad_norm": 0.6805806640783407,
      "learning_rate": 0.003,
      "loss": 4.2779,
      "step": 1721
    },
    {
      "epoch": 0.01722,
      "grad_norm": 0.7016597203863224,
      "learning_rate": 0.003,
      "loss": 4.2711,
      "step": 1722
    },
    {
      "epoch": 0.01723,
      "grad_norm": 0.8810025499012973,
      "learning_rate": 0.003,
      "loss": 4.2844,
      "step": 1723
    },
    {
      "epoch": 0.01724,
      "grad_norm": 0.843829134491477,
      "learning_rate": 0.003,
      "loss": 4.2918,
      "step": 1724
    },
    {
      "epoch": 0.01725,
      "grad_norm": 0.7958111364701067,
      "learning_rate": 0.003,
      "loss": 4.2741,
      "step": 1725
    },
    {
      "epoch": 0.01726,
      "grad_norm": 0.7747340922535558,
      "learning_rate": 0.003,
      "loss": 4.2656,
      "step": 1726
    },
    {
      "epoch": 0.01727,
      "grad_norm": 0.7603713990411842,
      "learning_rate": 0.003,
      "loss": 4.2713,
      "step": 1727
    },
    {
      "epoch": 0.01728,
      "grad_norm": 0.6106099006000164,
      "learning_rate": 0.003,
      "loss": 4.2671,
      "step": 1728
    },
    {
      "epoch": 0.01729,
      "grad_norm": 0.6017782155965361,
      "learning_rate": 0.003,
      "loss": 4.2747,
      "step": 1729
    },
    {
      "epoch": 0.0173,
      "grad_norm": 0.48590601550516704,
      "learning_rate": 0.003,
      "loss": 4.2446,
      "step": 1730
    },
    {
      "epoch": 0.01731,
      "grad_norm": 0.44227547910603526,
      "learning_rate": 0.003,
      "loss": 4.2625,
      "step": 1731
    },
    {
      "epoch": 0.01732,
      "grad_norm": 0.38121233110398844,
      "learning_rate": 0.003,
      "loss": 4.2664,
      "step": 1732
    },
    {
      "epoch": 0.01733,
      "grad_norm": 0.36765597229957125,
      "learning_rate": 0.003,
      "loss": 4.2477,
      "step": 1733
    },
    {
      "epoch": 0.01734,
      "grad_norm": 0.3640345239833258,
      "learning_rate": 0.003,
      "loss": 4.2562,
      "step": 1734
    },
    {
      "epoch": 0.01735,
      "grad_norm": 0.41470571844372184,
      "learning_rate": 0.003,
      "loss": 4.2282,
      "step": 1735
    },
    {
      "epoch": 0.01736,
      "grad_norm": 0.4147613401672916,
      "learning_rate": 0.003,
      "loss": 4.2285,
      "step": 1736
    },
    {
      "epoch": 0.01737,
      "grad_norm": 0.38708456345044623,
      "learning_rate": 0.003,
      "loss": 4.2561,
      "step": 1737
    },
    {
      "epoch": 0.01738,
      "grad_norm": 0.40641591602818083,
      "learning_rate": 0.003,
      "loss": 4.2459,
      "step": 1738
    },
    {
      "epoch": 0.01739,
      "grad_norm": 0.4886770308383728,
      "learning_rate": 0.003,
      "loss": 4.2634,
      "step": 1739
    },
    {
      "epoch": 0.0174,
      "grad_norm": 0.5760188069320612,
      "learning_rate": 0.003,
      "loss": 4.2625,
      "step": 1740
    },
    {
      "epoch": 0.01741,
      "grad_norm": 0.7342918297652692,
      "learning_rate": 0.003,
      "loss": 4.2751,
      "step": 1741
    },
    {
      "epoch": 0.01742,
      "grad_norm": 0.9752937691555351,
      "learning_rate": 0.003,
      "loss": 4.2762,
      "step": 1742
    },
    {
      "epoch": 0.01743,
      "grad_norm": 1.253306135016296,
      "learning_rate": 0.003,
      "loss": 4.3051,
      "step": 1743
    },
    {
      "epoch": 0.01744,
      "grad_norm": 0.7172428934954133,
      "learning_rate": 0.003,
      "loss": 4.2682,
      "step": 1744
    },
    {
      "epoch": 0.01745,
      "grad_norm": 0.7979588075730017,
      "learning_rate": 0.003,
      "loss": 4.2486,
      "step": 1745
    },
    {
      "epoch": 0.01746,
      "grad_norm": 0.7015732940358177,
      "learning_rate": 0.003,
      "loss": 4.2632,
      "step": 1746
    },
    {
      "epoch": 0.01747,
      "grad_norm": 0.5874159188966563,
      "learning_rate": 0.003,
      "loss": 4.2593,
      "step": 1747
    },
    {
      "epoch": 0.01748,
      "grad_norm": 0.6008438864541681,
      "learning_rate": 0.003,
      "loss": 4.2481,
      "step": 1748
    },
    {
      "epoch": 0.01749,
      "grad_norm": 0.720103709774815,
      "learning_rate": 0.003,
      "loss": 4.2531,
      "step": 1749
    },
    {
      "epoch": 0.0175,
      "grad_norm": 0.7257456079108818,
      "learning_rate": 0.003,
      "loss": 4.2707,
      "step": 1750
    },
    {
      "epoch": 0.01751,
      "grad_norm": 0.6976887398900127,
      "learning_rate": 0.003,
      "loss": 4.2594,
      "step": 1751
    },
    {
      "epoch": 0.01752,
      "grad_norm": 0.7240389649357895,
      "learning_rate": 0.003,
      "loss": 4.3015,
      "step": 1752
    },
    {
      "epoch": 0.01753,
      "grad_norm": 0.7475175322353548,
      "learning_rate": 0.003,
      "loss": 4.2524,
      "step": 1753
    },
    {
      "epoch": 0.01754,
      "grad_norm": 0.7135161794547551,
      "learning_rate": 0.003,
      "loss": 4.283,
      "step": 1754
    },
    {
      "epoch": 0.01755,
      "grad_norm": 0.6772806399058181,
      "learning_rate": 0.003,
      "loss": 4.2234,
      "step": 1755
    },
    {
      "epoch": 0.01756,
      "grad_norm": 0.5967370744888761,
      "learning_rate": 0.003,
      "loss": 4.2978,
      "step": 1756
    },
    {
      "epoch": 0.01757,
      "grad_norm": 0.6301290522164216,
      "learning_rate": 0.003,
      "loss": 4.2774,
      "step": 1757
    },
    {
      "epoch": 0.01758,
      "grad_norm": 0.6353245600564023,
      "learning_rate": 0.003,
      "loss": 4.2822,
      "step": 1758
    },
    {
      "epoch": 0.01759,
      "grad_norm": 0.7049975443178684,
      "learning_rate": 0.003,
      "loss": 4.254,
      "step": 1759
    },
    {
      "epoch": 0.0176,
      "grad_norm": 0.8640938687611407,
      "learning_rate": 0.003,
      "loss": 4.2815,
      "step": 1760
    },
    {
      "epoch": 0.01761,
      "grad_norm": 0.9711991589720537,
      "learning_rate": 0.003,
      "loss": 4.2777,
      "step": 1761
    },
    {
      "epoch": 0.01762,
      "grad_norm": 1.005290948352538,
      "learning_rate": 0.003,
      "loss": 4.2996,
      "step": 1762
    },
    {
      "epoch": 0.01763,
      "grad_norm": 0.8102250953399133,
      "learning_rate": 0.003,
      "loss": 4.2897,
      "step": 1763
    },
    {
      "epoch": 0.01764,
      "grad_norm": 0.6930668584086521,
      "learning_rate": 0.003,
      "loss": 4.255,
      "step": 1764
    },
    {
      "epoch": 0.01765,
      "grad_norm": 0.7660300448887684,
      "learning_rate": 0.003,
      "loss": 4.2792,
      "step": 1765
    },
    {
      "epoch": 0.01766,
      "grad_norm": 0.7821227683200507,
      "learning_rate": 0.003,
      "loss": 4.2929,
      "step": 1766
    },
    {
      "epoch": 0.01767,
      "grad_norm": 0.7890818722807966,
      "learning_rate": 0.003,
      "loss": 4.2574,
      "step": 1767
    },
    {
      "epoch": 0.01768,
      "grad_norm": 0.6779554463169366,
      "learning_rate": 0.003,
      "loss": 4.2602,
      "step": 1768
    },
    {
      "epoch": 0.01769,
      "grad_norm": 0.5803675139310225,
      "learning_rate": 0.003,
      "loss": 4.2549,
      "step": 1769
    },
    {
      "epoch": 0.0177,
      "grad_norm": 0.4980219429490106,
      "learning_rate": 0.003,
      "loss": 4.2387,
      "step": 1770
    },
    {
      "epoch": 0.01771,
      "grad_norm": 0.5000355123582322,
      "learning_rate": 0.003,
      "loss": 4.2683,
      "step": 1771
    },
    {
      "epoch": 0.01772,
      "grad_norm": 0.5357025492673388,
      "learning_rate": 0.003,
      "loss": 4.2422,
      "step": 1772
    },
    {
      "epoch": 0.01773,
      "grad_norm": 0.6531498662416136,
      "learning_rate": 0.003,
      "loss": 4.238,
      "step": 1773
    },
    {
      "epoch": 0.01774,
      "grad_norm": 0.6353732229323361,
      "learning_rate": 0.003,
      "loss": 4.26,
      "step": 1774
    },
    {
      "epoch": 0.01775,
      "grad_norm": 0.5393954118207904,
      "learning_rate": 0.003,
      "loss": 4.2437,
      "step": 1775
    },
    {
      "epoch": 0.01776,
      "grad_norm": 0.5278117971354394,
      "learning_rate": 0.003,
      "loss": 4.234,
      "step": 1776
    },
    {
      "epoch": 0.01777,
      "grad_norm": 0.5939469450707184,
      "learning_rate": 0.003,
      "loss": 4.2579,
      "step": 1777
    },
    {
      "epoch": 0.01778,
      "grad_norm": 0.647641685256514,
      "learning_rate": 0.003,
      "loss": 4.2506,
      "step": 1778
    },
    {
      "epoch": 0.01779,
      "grad_norm": 0.8446707979262026,
      "learning_rate": 0.003,
      "loss": 4.2758,
      "step": 1779
    },
    {
      "epoch": 0.0178,
      "grad_norm": 1.0570404395411184,
      "learning_rate": 0.003,
      "loss": 4.2552,
      "step": 1780
    },
    {
      "epoch": 0.01781,
      "grad_norm": 0.9787736640469828,
      "learning_rate": 0.003,
      "loss": 4.2815,
      "step": 1781
    },
    {
      "epoch": 0.01782,
      "grad_norm": 0.847524414138198,
      "learning_rate": 0.003,
      "loss": 4.2954,
      "step": 1782
    },
    {
      "epoch": 0.01783,
      "grad_norm": 0.9639344454548374,
      "learning_rate": 0.003,
      "loss": 4.2851,
      "step": 1783
    },
    {
      "epoch": 0.01784,
      "grad_norm": 1.0002151519491012,
      "learning_rate": 0.003,
      "loss": 4.293,
      "step": 1784
    },
    {
      "epoch": 0.01785,
      "grad_norm": 0.8702738147646256,
      "learning_rate": 0.003,
      "loss": 4.2479,
      "step": 1785
    },
    {
      "epoch": 0.01786,
      "grad_norm": 0.9523430146284131,
      "learning_rate": 0.003,
      "loss": 4.2832,
      "step": 1786
    },
    {
      "epoch": 0.01787,
      "grad_norm": 0.9420856301640329,
      "learning_rate": 0.003,
      "loss": 4.298,
      "step": 1787
    },
    {
      "epoch": 0.01788,
      "grad_norm": 0.9591055635668716,
      "learning_rate": 0.003,
      "loss": 4.3053,
      "step": 1788
    },
    {
      "epoch": 0.01789,
      "grad_norm": 1.102256290411044,
      "learning_rate": 0.003,
      "loss": 4.2935,
      "step": 1789
    },
    {
      "epoch": 0.0179,
      "grad_norm": 0.7277732179516643,
      "learning_rate": 0.003,
      "loss": 4.2907,
      "step": 1790
    },
    {
      "epoch": 0.01791,
      "grad_norm": 0.6846616583850235,
      "learning_rate": 0.003,
      "loss": 4.3029,
      "step": 1791
    },
    {
      "epoch": 0.01792,
      "grad_norm": 0.6106358124886894,
      "learning_rate": 0.003,
      "loss": 4.2999,
      "step": 1792
    },
    {
      "epoch": 0.01793,
      "grad_norm": 0.576370585363149,
      "learning_rate": 0.003,
      "loss": 4.2594,
      "step": 1793
    },
    {
      "epoch": 0.01794,
      "grad_norm": 0.5513804110166618,
      "learning_rate": 0.003,
      "loss": 4.2731,
      "step": 1794
    },
    {
      "epoch": 0.01795,
      "grad_norm": 0.5576782681087106,
      "learning_rate": 0.003,
      "loss": 4.2693,
      "step": 1795
    },
    {
      "epoch": 0.01796,
      "grad_norm": 0.6017873787216107,
      "learning_rate": 0.003,
      "loss": 4.285,
      "step": 1796
    },
    {
      "epoch": 0.01797,
      "grad_norm": 0.5615285064368845,
      "learning_rate": 0.003,
      "loss": 4.245,
      "step": 1797
    },
    {
      "epoch": 0.01798,
      "grad_norm": 0.5947488985532953,
      "learning_rate": 0.003,
      "loss": 4.2588,
      "step": 1798
    },
    {
      "epoch": 0.01799,
      "grad_norm": 0.6522379824680304,
      "learning_rate": 0.003,
      "loss": 4.2758,
      "step": 1799
    },
    {
      "epoch": 0.018,
      "grad_norm": 0.6500825178193054,
      "learning_rate": 0.003,
      "loss": 4.2646,
      "step": 1800
    },
    {
      "epoch": 0.01801,
      "grad_norm": 0.5651900712619045,
      "learning_rate": 0.003,
      "loss": 4.2555,
      "step": 1801
    },
    {
      "epoch": 0.01802,
      "grad_norm": 0.519529158537289,
      "learning_rate": 0.003,
      "loss": 4.2696,
      "step": 1802
    },
    {
      "epoch": 0.01803,
      "grad_norm": 0.46754842901397486,
      "learning_rate": 0.003,
      "loss": 4.2723,
      "step": 1803
    },
    {
      "epoch": 0.01804,
      "grad_norm": 0.398959720777888,
      "learning_rate": 0.003,
      "loss": 4.2421,
      "step": 1804
    },
    {
      "epoch": 0.01805,
      "grad_norm": 0.3895139350920711,
      "learning_rate": 0.003,
      "loss": 4.2599,
      "step": 1805
    },
    {
      "epoch": 0.01806,
      "grad_norm": 0.4525813527222224,
      "learning_rate": 0.003,
      "loss": 4.2195,
      "step": 1806
    },
    {
      "epoch": 0.01807,
      "grad_norm": 0.5410650860568182,
      "learning_rate": 0.003,
      "loss": 4.2567,
      "step": 1807
    },
    {
      "epoch": 0.01808,
      "grad_norm": 0.7496417695037725,
      "learning_rate": 0.003,
      "loss": 4.2549,
      "step": 1808
    },
    {
      "epoch": 0.01809,
      "grad_norm": 1.0581291305731644,
      "learning_rate": 0.003,
      "loss": 4.2876,
      "step": 1809
    },
    {
      "epoch": 0.0181,
      "grad_norm": 0.9128142976079175,
      "learning_rate": 0.003,
      "loss": 4.2835,
      "step": 1810
    },
    {
      "epoch": 0.01811,
      "grad_norm": 0.5801170839897873,
      "learning_rate": 0.003,
      "loss": 4.255,
      "step": 1811
    },
    {
      "epoch": 0.01812,
      "grad_norm": 0.7987219111431881,
      "learning_rate": 0.003,
      "loss": 4.246,
      "step": 1812
    },
    {
      "epoch": 0.01813,
      "grad_norm": 0.7650732236875453,
      "learning_rate": 0.003,
      "loss": 4.2961,
      "step": 1813
    },
    {
      "epoch": 0.01814,
      "grad_norm": 0.54823874278624,
      "learning_rate": 0.003,
      "loss": 4.2509,
      "step": 1814
    },
    {
      "epoch": 0.01815,
      "grad_norm": 0.6377545297521496,
      "learning_rate": 0.003,
      "loss": 4.2406,
      "step": 1815
    },
    {
      "epoch": 0.01816,
      "grad_norm": 0.6497702590778472,
      "learning_rate": 0.003,
      "loss": 4.2694,
      "step": 1816
    },
    {
      "epoch": 0.01817,
      "grad_norm": 0.491025518180584,
      "learning_rate": 0.003,
      "loss": 4.252,
      "step": 1817
    },
    {
      "epoch": 0.01818,
      "grad_norm": 0.4902524212628076,
      "learning_rate": 0.003,
      "loss": 4.2511,
      "step": 1818
    },
    {
      "epoch": 0.01819,
      "grad_norm": 0.5084432143761608,
      "learning_rate": 0.003,
      "loss": 4.2348,
      "step": 1819
    },
    {
      "epoch": 0.0182,
      "grad_norm": 0.501595173107435,
      "learning_rate": 0.003,
      "loss": 4.2533,
      "step": 1820
    },
    {
      "epoch": 0.01821,
      "grad_norm": 0.46978349716766427,
      "learning_rate": 0.003,
      "loss": 4.2375,
      "step": 1821
    },
    {
      "epoch": 0.01822,
      "grad_norm": 0.48716840432846525,
      "learning_rate": 0.003,
      "loss": 4.2508,
      "step": 1822
    },
    {
      "epoch": 0.01823,
      "grad_norm": 0.5021205527055153,
      "learning_rate": 0.003,
      "loss": 4.2638,
      "step": 1823
    },
    {
      "epoch": 0.01824,
      "grad_norm": 0.46836488848103075,
      "learning_rate": 0.003,
      "loss": 4.2464,
      "step": 1824
    },
    {
      "epoch": 0.01825,
      "grad_norm": 0.47295661191938854,
      "learning_rate": 0.003,
      "loss": 4.2499,
      "step": 1825
    },
    {
      "epoch": 0.01826,
      "grad_norm": 0.49963653178248174,
      "learning_rate": 0.003,
      "loss": 4.2612,
      "step": 1826
    },
    {
      "epoch": 0.01827,
      "grad_norm": 0.5766246258857382,
      "learning_rate": 0.003,
      "loss": 4.2338,
      "step": 1827
    },
    {
      "epoch": 0.01828,
      "grad_norm": 0.6974117711131345,
      "learning_rate": 0.003,
      "loss": 4.2498,
      "step": 1828
    },
    {
      "epoch": 0.01829,
      "grad_norm": 0.920645288162959,
      "learning_rate": 0.003,
      "loss": 4.274,
      "step": 1829
    },
    {
      "epoch": 0.0183,
      "grad_norm": 1.0716665979253641,
      "learning_rate": 0.003,
      "loss": 4.2835,
      "step": 1830
    },
    {
      "epoch": 0.01831,
      "grad_norm": 0.7899385717194873,
      "learning_rate": 0.003,
      "loss": 4.263,
      "step": 1831
    },
    {
      "epoch": 0.01832,
      "grad_norm": 0.6909653405865042,
      "learning_rate": 0.003,
      "loss": 4.2522,
      "step": 1832
    },
    {
      "epoch": 0.01833,
      "grad_norm": 0.7631451682534196,
      "learning_rate": 0.003,
      "loss": 4.292,
      "step": 1833
    },
    {
      "epoch": 0.01834,
      "grad_norm": 0.8518021198547727,
      "learning_rate": 0.003,
      "loss": 4.2616,
      "step": 1834
    },
    {
      "epoch": 0.01835,
      "grad_norm": 0.9608680856911266,
      "learning_rate": 0.003,
      "loss": 4.2683,
      "step": 1835
    },
    {
      "epoch": 0.01836,
      "grad_norm": 0.9370565424064707,
      "learning_rate": 0.003,
      "loss": 4.2765,
      "step": 1836
    },
    {
      "epoch": 0.01837,
      "grad_norm": 0.8673926076902538,
      "learning_rate": 0.003,
      "loss": 4.2749,
      "step": 1837
    },
    {
      "epoch": 0.01838,
      "grad_norm": 0.8912993490476474,
      "learning_rate": 0.003,
      "loss": 4.2839,
      "step": 1838
    },
    {
      "epoch": 0.01839,
      "grad_norm": 0.8988310733902859,
      "learning_rate": 0.003,
      "loss": 4.3084,
      "step": 1839
    },
    {
      "epoch": 0.0184,
      "grad_norm": 0.7369259574653209,
      "learning_rate": 0.003,
      "loss": 4.2963,
      "step": 1840
    },
    {
      "epoch": 0.01841,
      "grad_norm": 0.8862375704705813,
      "learning_rate": 0.003,
      "loss": 4.2485,
      "step": 1841
    },
    {
      "epoch": 0.01842,
      "grad_norm": 0.887067457430843,
      "learning_rate": 0.003,
      "loss": 4.2772,
      "step": 1842
    },
    {
      "epoch": 0.01843,
      "grad_norm": 1.0320986494734232,
      "learning_rate": 0.003,
      "loss": 4.2654,
      "step": 1843
    },
    {
      "epoch": 0.01844,
      "grad_norm": 1.0361956597350352,
      "learning_rate": 0.003,
      "loss": 4.2646,
      "step": 1844
    },
    {
      "epoch": 0.01845,
      "grad_norm": 1.0400807778898855,
      "learning_rate": 0.003,
      "loss": 4.2766,
      "step": 1845
    },
    {
      "epoch": 0.01846,
      "grad_norm": 0.8263035202069795,
      "learning_rate": 0.003,
      "loss": 4.2922,
      "step": 1846
    },
    {
      "epoch": 0.01847,
      "grad_norm": 0.9419440433358707,
      "learning_rate": 0.003,
      "loss": 4.2908,
      "step": 1847
    },
    {
      "epoch": 0.01848,
      "grad_norm": 1.106935925830779,
      "learning_rate": 0.003,
      "loss": 4.3123,
      "step": 1848
    },
    {
      "epoch": 0.01849,
      "grad_norm": 0.9994394701361481,
      "learning_rate": 0.003,
      "loss": 4.3126,
      "step": 1849
    },
    {
      "epoch": 0.0185,
      "grad_norm": 1.0255088552432885,
      "learning_rate": 0.003,
      "loss": 4.3054,
      "step": 1850
    },
    {
      "epoch": 0.01851,
      "grad_norm": 0.7784174512435112,
      "learning_rate": 0.003,
      "loss": 4.2831,
      "step": 1851
    },
    {
      "epoch": 0.01852,
      "grad_norm": 0.6916717444333976,
      "learning_rate": 0.003,
      "loss": 4.278,
      "step": 1852
    },
    {
      "epoch": 0.01853,
      "grad_norm": 0.6721602041720395,
      "learning_rate": 0.003,
      "loss": 4.2766,
      "step": 1853
    },
    {
      "epoch": 0.01854,
      "grad_norm": 0.7396050760038246,
      "learning_rate": 0.003,
      "loss": 4.3052,
      "step": 1854
    },
    {
      "epoch": 0.01855,
      "grad_norm": 0.6847507864796065,
      "learning_rate": 0.003,
      "loss": 4.2912,
      "step": 1855
    },
    {
      "epoch": 0.01856,
      "grad_norm": 0.5367217650881859,
      "learning_rate": 0.003,
      "loss": 4.2681,
      "step": 1856
    },
    {
      "epoch": 0.01857,
      "grad_norm": 0.44508328180737716,
      "learning_rate": 0.003,
      "loss": 4.2464,
      "step": 1857
    },
    {
      "epoch": 0.01858,
      "grad_norm": 0.5010417067873371,
      "learning_rate": 0.003,
      "loss": 4.2561,
      "step": 1858
    },
    {
      "epoch": 0.01859,
      "grad_norm": 0.5235096490961759,
      "learning_rate": 0.003,
      "loss": 4.249,
      "step": 1859
    },
    {
      "epoch": 0.0186,
      "grad_norm": 0.5541997655191133,
      "learning_rate": 0.003,
      "loss": 4.2298,
      "step": 1860
    },
    {
      "epoch": 0.01861,
      "grad_norm": 0.5732056681109601,
      "learning_rate": 0.003,
      "loss": 4.2735,
      "step": 1861
    },
    {
      "epoch": 0.01862,
      "grad_norm": 0.5224594796450702,
      "learning_rate": 0.003,
      "loss": 4.2366,
      "step": 1862
    },
    {
      "epoch": 0.01863,
      "grad_norm": 0.5478228224050402,
      "learning_rate": 0.003,
      "loss": 4.2429,
      "step": 1863
    },
    {
      "epoch": 0.01864,
      "grad_norm": 0.5857768503913141,
      "learning_rate": 0.003,
      "loss": 4.2761,
      "step": 1864
    },
    {
      "epoch": 0.01865,
      "grad_norm": 0.6587143054588716,
      "learning_rate": 0.003,
      "loss": 4.2521,
      "step": 1865
    },
    {
      "epoch": 0.01866,
      "grad_norm": 0.7633854583380161,
      "learning_rate": 0.003,
      "loss": 4.2576,
      "step": 1866
    },
    {
      "epoch": 0.01867,
      "grad_norm": 0.8083803725891301,
      "learning_rate": 0.003,
      "loss": 4.2353,
      "step": 1867
    },
    {
      "epoch": 0.01868,
      "grad_norm": 0.6884248738266137,
      "learning_rate": 0.003,
      "loss": 4.25,
      "step": 1868
    },
    {
      "epoch": 0.01869,
      "grad_norm": 0.6947172674027756,
      "learning_rate": 0.003,
      "loss": 4.262,
      "step": 1869
    },
    {
      "epoch": 0.0187,
      "grad_norm": 0.6248124486931457,
      "learning_rate": 0.003,
      "loss": 4.2526,
      "step": 1870
    },
    {
      "epoch": 0.01871,
      "grad_norm": 0.5753685708476268,
      "learning_rate": 0.003,
      "loss": 4.2255,
      "step": 1871
    },
    {
      "epoch": 0.01872,
      "grad_norm": 0.549064563357354,
      "learning_rate": 0.003,
      "loss": 4.269,
      "step": 1872
    },
    {
      "epoch": 0.01873,
      "grad_norm": 0.5405289107554082,
      "learning_rate": 0.003,
      "loss": 4.2299,
      "step": 1873
    },
    {
      "epoch": 0.01874,
      "grad_norm": 0.5900459025774544,
      "learning_rate": 0.003,
      "loss": 4.2352,
      "step": 1874
    },
    {
      "epoch": 0.01875,
      "grad_norm": 0.6830694870998308,
      "learning_rate": 0.003,
      "loss": 4.2563,
      "step": 1875
    },
    {
      "epoch": 0.01876,
      "grad_norm": 0.7610298506439436,
      "learning_rate": 0.003,
      "loss": 4.265,
      "step": 1876
    },
    {
      "epoch": 0.01877,
      "grad_norm": 0.7821612711704626,
      "learning_rate": 0.003,
      "loss": 4.2702,
      "step": 1877
    },
    {
      "epoch": 0.01878,
      "grad_norm": 0.733027236852046,
      "learning_rate": 0.003,
      "loss": 4.2525,
      "step": 1878
    },
    {
      "epoch": 0.01879,
      "grad_norm": 0.7522348111794812,
      "learning_rate": 0.003,
      "loss": 4.2356,
      "step": 1879
    },
    {
      "epoch": 0.0188,
      "grad_norm": 0.8189114551871165,
      "learning_rate": 0.003,
      "loss": 4.252,
      "step": 1880
    },
    {
      "epoch": 0.01881,
      "grad_norm": 0.7856053328002562,
      "learning_rate": 0.003,
      "loss": 4.2461,
      "step": 1881
    },
    {
      "epoch": 0.01882,
      "grad_norm": 0.7957940195866625,
      "learning_rate": 0.003,
      "loss": 4.2764,
      "step": 1882
    },
    {
      "epoch": 0.01883,
      "grad_norm": 0.702292744282797,
      "learning_rate": 0.003,
      "loss": 4.2466,
      "step": 1883
    },
    {
      "epoch": 0.01884,
      "grad_norm": 0.7056698003000252,
      "learning_rate": 0.003,
      "loss": 4.2474,
      "step": 1884
    },
    {
      "epoch": 0.01885,
      "grad_norm": 0.7144737169743209,
      "learning_rate": 0.003,
      "loss": 4.2257,
      "step": 1885
    },
    {
      "epoch": 0.01886,
      "grad_norm": 0.8271097653458277,
      "learning_rate": 0.003,
      "loss": 4.2677,
      "step": 1886
    },
    {
      "epoch": 0.01887,
      "grad_norm": 0.8342710620866813,
      "learning_rate": 0.003,
      "loss": 4.2611,
      "step": 1887
    },
    {
      "epoch": 0.01888,
      "grad_norm": 0.734160166098297,
      "learning_rate": 0.003,
      "loss": 4.2419,
      "step": 1888
    },
    {
      "epoch": 0.01889,
      "grad_norm": 0.6217950481881471,
      "learning_rate": 0.003,
      "loss": 4.2534,
      "step": 1889
    },
    {
      "epoch": 0.0189,
      "grad_norm": 0.5252786175682846,
      "learning_rate": 0.003,
      "loss": 4.2284,
      "step": 1890
    },
    {
      "epoch": 0.01891,
      "grad_norm": 0.506921167946394,
      "learning_rate": 0.003,
      "loss": 4.2769,
      "step": 1891
    },
    {
      "epoch": 0.01892,
      "grad_norm": 0.5754872458989723,
      "learning_rate": 0.003,
      "loss": 4.2536,
      "step": 1892
    },
    {
      "epoch": 0.01893,
      "grad_norm": 0.6096816763681697,
      "learning_rate": 0.003,
      "loss": 4.2344,
      "step": 1893
    },
    {
      "epoch": 0.01894,
      "grad_norm": 0.5451335732215307,
      "learning_rate": 0.003,
      "loss": 4.239,
      "step": 1894
    },
    {
      "epoch": 0.01895,
      "grad_norm": 0.5874969024823826,
      "learning_rate": 0.003,
      "loss": 4.2256,
      "step": 1895
    },
    {
      "epoch": 0.01896,
      "grad_norm": 0.5969294645671555,
      "learning_rate": 0.003,
      "loss": 4.2484,
      "step": 1896
    },
    {
      "epoch": 0.01897,
      "grad_norm": 0.6886300551222337,
      "learning_rate": 0.003,
      "loss": 4.251,
      "step": 1897
    },
    {
      "epoch": 0.01898,
      "grad_norm": 0.7942601312190114,
      "learning_rate": 0.003,
      "loss": 4.225,
      "step": 1898
    },
    {
      "epoch": 0.01899,
      "grad_norm": 0.7734115484923887,
      "learning_rate": 0.003,
      "loss": 4.2391,
      "step": 1899
    },
    {
      "epoch": 0.019,
      "grad_norm": 0.6488046036081325,
      "learning_rate": 0.003,
      "loss": 4.2302,
      "step": 1900
    },
    {
      "epoch": 0.01901,
      "grad_norm": 0.5904392009872547,
      "learning_rate": 0.003,
      "loss": 4.274,
      "step": 1901
    },
    {
      "epoch": 0.01902,
      "grad_norm": 0.659409531187468,
      "learning_rate": 0.003,
      "loss": 4.2537,
      "step": 1902
    },
    {
      "epoch": 0.01903,
      "grad_norm": 0.6235497653402786,
      "learning_rate": 0.003,
      "loss": 4.2072,
      "step": 1903
    },
    {
      "epoch": 0.01904,
      "grad_norm": 0.5825245241241579,
      "learning_rate": 0.003,
      "loss": 4.262,
      "step": 1904
    },
    {
      "epoch": 0.01905,
      "grad_norm": 0.5746731920133754,
      "learning_rate": 0.003,
      "loss": 4.2376,
      "step": 1905
    },
    {
      "epoch": 0.01906,
      "grad_norm": 0.589593122182735,
      "learning_rate": 0.003,
      "loss": 4.2554,
      "step": 1906
    },
    {
      "epoch": 0.01907,
      "grad_norm": 0.5928254576730478,
      "learning_rate": 0.003,
      "loss": 4.2503,
      "step": 1907
    },
    {
      "epoch": 0.01908,
      "grad_norm": 0.6382953184353721,
      "learning_rate": 0.003,
      "loss": 4.2621,
      "step": 1908
    },
    {
      "epoch": 0.01909,
      "grad_norm": 0.6563665705772666,
      "learning_rate": 0.003,
      "loss": 4.2294,
      "step": 1909
    },
    {
      "epoch": 0.0191,
      "grad_norm": 0.6240866408195912,
      "learning_rate": 0.003,
      "loss": 4.2393,
      "step": 1910
    },
    {
      "epoch": 0.01911,
      "grad_norm": 0.7154059568811579,
      "learning_rate": 0.003,
      "loss": 4.2201,
      "step": 1911
    },
    {
      "epoch": 0.01912,
      "grad_norm": 0.7017374192608556,
      "learning_rate": 0.003,
      "loss": 4.229,
      "step": 1912
    },
    {
      "epoch": 0.01913,
      "grad_norm": 0.8305634249070047,
      "learning_rate": 0.003,
      "loss": 4.2505,
      "step": 1913
    },
    {
      "epoch": 0.01914,
      "grad_norm": 1.0869820826090637,
      "learning_rate": 0.003,
      "loss": 4.2792,
      "step": 1914
    },
    {
      "epoch": 0.01915,
      "grad_norm": 0.9088319848025848,
      "learning_rate": 0.003,
      "loss": 4.2668,
      "step": 1915
    },
    {
      "epoch": 0.01916,
      "grad_norm": 0.7254253941299005,
      "learning_rate": 0.003,
      "loss": 4.2403,
      "step": 1916
    },
    {
      "epoch": 0.01917,
      "grad_norm": 0.7115705570447282,
      "learning_rate": 0.003,
      "loss": 4.2636,
      "step": 1917
    },
    {
      "epoch": 0.01918,
      "grad_norm": 0.7162193279368824,
      "learning_rate": 0.003,
      "loss": 4.2488,
      "step": 1918
    },
    {
      "epoch": 0.01919,
      "grad_norm": 0.7467289060307204,
      "learning_rate": 0.003,
      "loss": 4.2482,
      "step": 1919
    },
    {
      "epoch": 0.0192,
      "grad_norm": 0.7103235241692503,
      "learning_rate": 0.003,
      "loss": 4.2521,
      "step": 1920
    },
    {
      "epoch": 0.01921,
      "grad_norm": 0.6854531851275022,
      "learning_rate": 0.003,
      "loss": 4.2411,
      "step": 1921
    },
    {
      "epoch": 0.01922,
      "grad_norm": 0.6161601419108617,
      "learning_rate": 0.003,
      "loss": 4.2512,
      "step": 1922
    },
    {
      "epoch": 0.01923,
      "grad_norm": 0.5835969123302268,
      "learning_rate": 0.003,
      "loss": 4.2556,
      "step": 1923
    },
    {
      "epoch": 0.01924,
      "grad_norm": 0.5301507132316713,
      "learning_rate": 0.003,
      "loss": 4.2545,
      "step": 1924
    },
    {
      "epoch": 0.01925,
      "grad_norm": 0.5123517463043641,
      "learning_rate": 0.003,
      "loss": 4.2425,
      "step": 1925
    },
    {
      "epoch": 0.01926,
      "grad_norm": 0.515823650813431,
      "learning_rate": 0.003,
      "loss": 4.2482,
      "step": 1926
    },
    {
      "epoch": 0.01927,
      "grad_norm": 0.5134711460859718,
      "learning_rate": 0.003,
      "loss": 4.2381,
      "step": 1927
    },
    {
      "epoch": 0.01928,
      "grad_norm": 0.5394990415668017,
      "learning_rate": 0.003,
      "loss": 4.2124,
      "step": 1928
    },
    {
      "epoch": 0.01929,
      "grad_norm": 0.600924196245884,
      "learning_rate": 0.003,
      "loss": 4.2341,
      "step": 1929
    },
    {
      "epoch": 0.0193,
      "grad_norm": 0.5841670599595433,
      "learning_rate": 0.003,
      "loss": 4.222,
      "step": 1930
    },
    {
      "epoch": 0.01931,
      "grad_norm": 0.6141351653971785,
      "learning_rate": 0.003,
      "loss": 4.2269,
      "step": 1931
    },
    {
      "epoch": 0.01932,
      "grad_norm": 0.630171190249486,
      "learning_rate": 0.003,
      "loss": 4.2357,
      "step": 1932
    },
    {
      "epoch": 0.01933,
      "grad_norm": 0.6348706668276403,
      "learning_rate": 0.003,
      "loss": 4.2241,
      "step": 1933
    },
    {
      "epoch": 0.01934,
      "grad_norm": 0.7486061864959305,
      "learning_rate": 0.003,
      "loss": 4.2368,
      "step": 1934
    },
    {
      "epoch": 0.01935,
      "grad_norm": 0.837656099844549,
      "learning_rate": 0.003,
      "loss": 4.2566,
      "step": 1935
    },
    {
      "epoch": 0.01936,
      "grad_norm": 1.235132295685672,
      "learning_rate": 0.003,
      "loss": 4.2553,
      "step": 1936
    },
    {
      "epoch": 0.01937,
      "grad_norm": 1.1952922931590582,
      "learning_rate": 0.003,
      "loss": 4.2793,
      "step": 1937
    },
    {
      "epoch": 0.01938,
      "grad_norm": 0.8545368063846817,
      "learning_rate": 0.003,
      "loss": 4.2672,
      "step": 1938
    },
    {
      "epoch": 0.01939,
      "grad_norm": 0.8693625520829354,
      "learning_rate": 0.003,
      "loss": 4.2945,
      "step": 1939
    },
    {
      "epoch": 0.0194,
      "grad_norm": 0.9374945609912207,
      "learning_rate": 0.003,
      "loss": 4.2792,
      "step": 1940
    },
    {
      "epoch": 0.01941,
      "grad_norm": 0.8417804839614496,
      "learning_rate": 0.003,
      "loss": 4.2519,
      "step": 1941
    },
    {
      "epoch": 0.01942,
      "grad_norm": 0.6835551217455754,
      "learning_rate": 0.003,
      "loss": 4.2765,
      "step": 1942
    },
    {
      "epoch": 0.01943,
      "grad_norm": 0.6326583755464011,
      "learning_rate": 0.003,
      "loss": 4.247,
      "step": 1943
    },
    {
      "epoch": 0.01944,
      "grad_norm": 0.6336766610195198,
      "learning_rate": 0.003,
      "loss": 4.2845,
      "step": 1944
    },
    {
      "epoch": 0.01945,
      "grad_norm": 0.7023249661919384,
      "learning_rate": 0.003,
      "loss": 4.2549,
      "step": 1945
    },
    {
      "epoch": 0.01946,
      "grad_norm": 0.7532289611006365,
      "learning_rate": 0.003,
      "loss": 4.2536,
      "step": 1946
    },
    {
      "epoch": 0.01947,
      "grad_norm": 0.7241899675160898,
      "learning_rate": 0.003,
      "loss": 4.262,
      "step": 1947
    },
    {
      "epoch": 0.01948,
      "grad_norm": 0.6630467127195124,
      "learning_rate": 0.003,
      "loss": 4.2785,
      "step": 1948
    },
    {
      "epoch": 0.01949,
      "grad_norm": 0.7074334475974129,
      "learning_rate": 0.003,
      "loss": 4.2694,
      "step": 1949
    },
    {
      "epoch": 0.0195,
      "grad_norm": 0.6466673056304594,
      "learning_rate": 0.003,
      "loss": 4.2571,
      "step": 1950
    },
    {
      "epoch": 0.01951,
      "grad_norm": 0.654263769555975,
      "learning_rate": 0.003,
      "loss": 4.2499,
      "step": 1951
    },
    {
      "epoch": 0.01952,
      "grad_norm": 0.5576275389008273,
      "learning_rate": 0.003,
      "loss": 4.2552,
      "step": 1952
    },
    {
      "epoch": 0.01953,
      "grad_norm": 0.5279513641785941,
      "learning_rate": 0.003,
      "loss": 4.2571,
      "step": 1953
    },
    {
      "epoch": 0.01954,
      "grad_norm": 0.6435233353308943,
      "learning_rate": 0.003,
      "loss": 4.2595,
      "step": 1954
    },
    {
      "epoch": 0.01955,
      "grad_norm": 0.9239174746697658,
      "learning_rate": 0.003,
      "loss": 4.2611,
      "step": 1955
    },
    {
      "epoch": 0.01956,
      "grad_norm": 1.1595233997823533,
      "learning_rate": 0.003,
      "loss": 4.2763,
      "step": 1956
    },
    {
      "epoch": 0.01957,
      "grad_norm": 0.6293751148089686,
      "learning_rate": 0.003,
      "loss": 4.2223,
      "step": 1957
    },
    {
      "epoch": 0.01958,
      "grad_norm": 0.638290484912288,
      "learning_rate": 0.003,
      "loss": 4.2174,
      "step": 1958
    },
    {
      "epoch": 0.01959,
      "grad_norm": 0.7260665225455007,
      "learning_rate": 0.003,
      "loss": 4.245,
      "step": 1959
    },
    {
      "epoch": 0.0196,
      "grad_norm": 0.6163944682591169,
      "learning_rate": 0.003,
      "loss": 4.232,
      "step": 1960
    },
    {
      "epoch": 0.01961,
      "grad_norm": 0.6978053518640758,
      "learning_rate": 0.003,
      "loss": 4.236,
      "step": 1961
    },
    {
      "epoch": 0.01962,
      "grad_norm": 0.6587488710703514,
      "learning_rate": 0.003,
      "loss": 4.2366,
      "step": 1962
    },
    {
      "epoch": 0.01963,
      "grad_norm": 0.5260594263866168,
      "learning_rate": 0.003,
      "loss": 4.2173,
      "step": 1963
    },
    {
      "epoch": 0.01964,
      "grad_norm": 0.490485720862937,
      "learning_rate": 0.003,
      "loss": 4.2458,
      "step": 1964
    },
    {
      "epoch": 0.01965,
      "grad_norm": 0.45667875143823883,
      "learning_rate": 0.003,
      "loss": 4.2346,
      "step": 1965
    },
    {
      "epoch": 0.01966,
      "grad_norm": 0.5549508938343672,
      "learning_rate": 0.003,
      "loss": 4.2386,
      "step": 1966
    },
    {
      "epoch": 0.01967,
      "grad_norm": 0.5947105420816633,
      "learning_rate": 0.003,
      "loss": 4.2168,
      "step": 1967
    },
    {
      "epoch": 0.01968,
      "grad_norm": 0.7273871579402625,
      "learning_rate": 0.003,
      "loss": 4.2427,
      "step": 1968
    },
    {
      "epoch": 0.01969,
      "grad_norm": 0.8131307161709899,
      "learning_rate": 0.003,
      "loss": 4.2447,
      "step": 1969
    },
    {
      "epoch": 0.0197,
      "grad_norm": 0.8153412941444168,
      "learning_rate": 0.003,
      "loss": 4.242,
      "step": 1970
    },
    {
      "epoch": 0.01971,
      "grad_norm": 0.7076673538361063,
      "learning_rate": 0.003,
      "loss": 4.2271,
      "step": 1971
    },
    {
      "epoch": 0.01972,
      "grad_norm": 0.6697097595169021,
      "learning_rate": 0.003,
      "loss": 4.2266,
      "step": 1972
    },
    {
      "epoch": 0.01973,
      "grad_norm": 0.6208505648120144,
      "learning_rate": 0.003,
      "loss": 4.2061,
      "step": 1973
    },
    {
      "epoch": 0.01974,
      "grad_norm": 0.6078249030800872,
      "learning_rate": 0.003,
      "loss": 4.2469,
      "step": 1974
    },
    {
      "epoch": 0.01975,
      "grad_norm": 0.6193855466160911,
      "learning_rate": 0.003,
      "loss": 4.2383,
      "step": 1975
    },
    {
      "epoch": 0.01976,
      "grad_norm": 0.5723863500156643,
      "learning_rate": 0.003,
      "loss": 4.228,
      "step": 1976
    },
    {
      "epoch": 0.01977,
      "grad_norm": 0.614684480005163,
      "learning_rate": 0.003,
      "loss": 4.2636,
      "step": 1977
    },
    {
      "epoch": 0.01978,
      "grad_norm": 0.8002502395832268,
      "learning_rate": 0.003,
      "loss": 4.269,
      "step": 1978
    },
    {
      "epoch": 0.01979,
      "grad_norm": 1.1734742193181502,
      "learning_rate": 0.003,
      "loss": 4.2712,
      "step": 1979
    },
    {
      "epoch": 0.0198,
      "grad_norm": 1.0542111183452296,
      "learning_rate": 0.003,
      "loss": 4.2573,
      "step": 1980
    },
    {
      "epoch": 0.01981,
      "grad_norm": 0.8526982225261958,
      "learning_rate": 0.003,
      "loss": 4.2367,
      "step": 1981
    },
    {
      "epoch": 0.01982,
      "grad_norm": 0.7282783707767795,
      "learning_rate": 0.003,
      "loss": 4.2575,
      "step": 1982
    },
    {
      "epoch": 0.01983,
      "grad_norm": 0.7406100600030446,
      "learning_rate": 0.003,
      "loss": 4.2502,
      "step": 1983
    },
    {
      "epoch": 0.01984,
      "grad_norm": 0.7725656708208015,
      "learning_rate": 0.003,
      "loss": 4.2593,
      "step": 1984
    },
    {
      "epoch": 0.01985,
      "grad_norm": 0.8754720019719067,
      "learning_rate": 0.003,
      "loss": 4.2582,
      "step": 1985
    },
    {
      "epoch": 0.01986,
      "grad_norm": 0.9542865913273004,
      "learning_rate": 0.003,
      "loss": 4.2928,
      "step": 1986
    },
    {
      "epoch": 0.01987,
      "grad_norm": 0.9042901186857757,
      "learning_rate": 0.003,
      "loss": 4.248,
      "step": 1987
    },
    {
      "epoch": 0.01988,
      "grad_norm": 0.9236965754299921,
      "learning_rate": 0.003,
      "loss": 4.2653,
      "step": 1988
    },
    {
      "epoch": 0.01989,
      "grad_norm": 0.8847485842619162,
      "learning_rate": 0.003,
      "loss": 4.2507,
      "step": 1989
    },
    {
      "epoch": 0.0199,
      "grad_norm": 0.7262740969822487,
      "learning_rate": 0.003,
      "loss": 4.2593,
      "step": 1990
    },
    {
      "epoch": 0.01991,
      "grad_norm": 0.6829209219384283,
      "learning_rate": 0.003,
      "loss": 4.2367,
      "step": 1991
    },
    {
      "epoch": 0.01992,
      "grad_norm": 0.6687499652079453,
      "learning_rate": 0.003,
      "loss": 4.2729,
      "step": 1992
    },
    {
      "epoch": 0.01993,
      "grad_norm": 0.6270120579020493,
      "learning_rate": 0.003,
      "loss": 4.2552,
      "step": 1993
    },
    {
      "epoch": 0.01994,
      "grad_norm": 0.695428830391491,
      "learning_rate": 0.003,
      "loss": 4.2763,
      "step": 1994
    },
    {
      "epoch": 0.01995,
      "grad_norm": 0.6511062075978684,
      "learning_rate": 0.003,
      "loss": 4.265,
      "step": 1995
    },
    {
      "epoch": 0.01996,
      "grad_norm": 0.7071538607579048,
      "learning_rate": 0.003,
      "loss": 4.2345,
      "step": 1996
    },
    {
      "epoch": 0.01997,
      "grad_norm": 0.837055308686826,
      "learning_rate": 0.003,
      "loss": 4.2713,
      "step": 1997
    },
    {
      "epoch": 0.01998,
      "grad_norm": 0.9265767295150127,
      "learning_rate": 0.003,
      "loss": 4.2575,
      "step": 1998
    },
    {
      "epoch": 0.01999,
      "grad_norm": 0.776786308598987,
      "learning_rate": 0.003,
      "loss": 4.2456,
      "step": 1999
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.544115707262644,
      "learning_rate": 0.003,
      "loss": 4.223,
      "step": 2000
    },
    {
      "epoch": 0.02001,
      "grad_norm": 0.5968209168617383,
      "learning_rate": 0.003,
      "loss": 4.2587,
      "step": 2001
    },
    {
      "epoch": 0.02002,
      "grad_norm": 0.6230815008994128,
      "learning_rate": 0.003,
      "loss": 4.2573,
      "step": 2002
    },
    {
      "epoch": 0.02003,
      "grad_norm": 0.6144419617298068,
      "learning_rate": 0.003,
      "loss": 4.2517,
      "step": 2003
    },
    {
      "epoch": 0.02004,
      "grad_norm": 0.5244593563834906,
      "learning_rate": 0.003,
      "loss": 4.2348,
      "step": 2004
    },
    {
      "epoch": 0.02005,
      "grad_norm": 0.4555996828045135,
      "learning_rate": 0.003,
      "loss": 4.2382,
      "step": 2005
    },
    {
      "epoch": 0.02006,
      "grad_norm": 0.47265795693259455,
      "learning_rate": 0.003,
      "loss": 4.2356,
      "step": 2006
    },
    {
      "epoch": 0.02007,
      "grad_norm": 0.5068378835965273,
      "learning_rate": 0.003,
      "loss": 4.2513,
      "step": 2007
    },
    {
      "epoch": 0.02008,
      "grad_norm": 0.5368129535807056,
      "learning_rate": 0.003,
      "loss": 4.2399,
      "step": 2008
    },
    {
      "epoch": 0.02009,
      "grad_norm": 0.6542343628757692,
      "learning_rate": 0.003,
      "loss": 4.2391,
      "step": 2009
    },
    {
      "epoch": 0.0201,
      "grad_norm": 0.7550868705205384,
      "learning_rate": 0.003,
      "loss": 4.2546,
      "step": 2010
    },
    {
      "epoch": 0.02011,
      "grad_norm": 0.8523385764253874,
      "learning_rate": 0.003,
      "loss": 4.2454,
      "step": 2011
    },
    {
      "epoch": 0.02012,
      "grad_norm": 0.7590508053821384,
      "learning_rate": 0.003,
      "loss": 4.2437,
      "step": 2012
    },
    {
      "epoch": 0.02013,
      "grad_norm": 0.6112232485700291,
      "learning_rate": 0.003,
      "loss": 4.2454,
      "step": 2013
    },
    {
      "epoch": 0.02014,
      "grad_norm": 0.580521076473927,
      "learning_rate": 0.003,
      "loss": 4.2288,
      "step": 2014
    },
    {
      "epoch": 0.02015,
      "grad_norm": 0.5972048548713036,
      "learning_rate": 0.003,
      "loss": 4.2254,
      "step": 2015
    },
    {
      "epoch": 0.02016,
      "grad_norm": 0.502339786086644,
      "learning_rate": 0.003,
      "loss": 4.228,
      "step": 2016
    },
    {
      "epoch": 0.02017,
      "grad_norm": 0.5744684235266054,
      "learning_rate": 0.003,
      "loss": 4.224,
      "step": 2017
    },
    {
      "epoch": 0.02018,
      "grad_norm": 0.686457625935553,
      "learning_rate": 0.003,
      "loss": 4.2281,
      "step": 2018
    },
    {
      "epoch": 0.02019,
      "grad_norm": 0.8023792999105707,
      "learning_rate": 0.003,
      "loss": 4.2399,
      "step": 2019
    },
    {
      "epoch": 0.0202,
      "grad_norm": 0.779213027451944,
      "learning_rate": 0.003,
      "loss": 4.2469,
      "step": 2020
    },
    {
      "epoch": 0.02021,
      "grad_norm": 0.747271315164086,
      "learning_rate": 0.003,
      "loss": 4.2601,
      "step": 2021
    },
    {
      "epoch": 0.02022,
      "grad_norm": 0.7712990219385492,
      "learning_rate": 0.003,
      "loss": 4.2242,
      "step": 2022
    },
    {
      "epoch": 0.02023,
      "grad_norm": 0.678848353295381,
      "learning_rate": 0.003,
      "loss": 4.1929,
      "step": 2023
    },
    {
      "epoch": 0.02024,
      "grad_norm": 0.7080112136621078,
      "learning_rate": 0.003,
      "loss": 4.2211,
      "step": 2024
    },
    {
      "epoch": 0.02025,
      "grad_norm": 0.7015387197783911,
      "learning_rate": 0.003,
      "loss": 4.2713,
      "step": 2025
    },
    {
      "epoch": 0.02026,
      "grad_norm": 0.6618325774284712,
      "learning_rate": 0.003,
      "loss": 4.249,
      "step": 2026
    },
    {
      "epoch": 0.02027,
      "grad_norm": 0.6649314977064322,
      "learning_rate": 0.003,
      "loss": 4.2493,
      "step": 2027
    },
    {
      "epoch": 0.02028,
      "grad_norm": 0.7311305035019059,
      "learning_rate": 0.003,
      "loss": 4.2336,
      "step": 2028
    },
    {
      "epoch": 0.02029,
      "grad_norm": 0.829605271368658,
      "learning_rate": 0.003,
      "loss": 4.243,
      "step": 2029
    },
    {
      "epoch": 0.0203,
      "grad_norm": 0.8235570307782024,
      "learning_rate": 0.003,
      "loss": 4.2354,
      "step": 2030
    },
    {
      "epoch": 0.02031,
      "grad_norm": 0.8039697362095677,
      "learning_rate": 0.003,
      "loss": 4.2412,
      "step": 2031
    },
    {
      "epoch": 0.02032,
      "grad_norm": 0.7849326035149696,
      "learning_rate": 0.003,
      "loss": 4.2148,
      "step": 2032
    },
    {
      "epoch": 0.02033,
      "grad_norm": 0.7613884372790184,
      "learning_rate": 0.003,
      "loss": 4.2543,
      "step": 2033
    },
    {
      "epoch": 0.02034,
      "grad_norm": 0.7131747920559885,
      "learning_rate": 0.003,
      "loss": 4.2453,
      "step": 2034
    },
    {
      "epoch": 0.02035,
      "grad_norm": 0.7017452038094153,
      "learning_rate": 0.003,
      "loss": 4.2298,
      "step": 2035
    },
    {
      "epoch": 0.02036,
      "grad_norm": 0.6789439698745199,
      "learning_rate": 0.003,
      "loss": 4.232,
      "step": 2036
    },
    {
      "epoch": 0.02037,
      "grad_norm": 0.5966860547597203,
      "learning_rate": 0.003,
      "loss": 4.2423,
      "step": 2037
    },
    {
      "epoch": 0.02038,
      "grad_norm": 0.5724376516658686,
      "learning_rate": 0.003,
      "loss": 4.224,
      "step": 2038
    },
    {
      "epoch": 0.02039,
      "grad_norm": 0.5467166846835673,
      "learning_rate": 0.003,
      "loss": 4.2314,
      "step": 2039
    },
    {
      "epoch": 0.0204,
      "grad_norm": 0.5471552582468747,
      "learning_rate": 0.003,
      "loss": 4.2353,
      "step": 2040
    },
    {
      "epoch": 0.02041,
      "grad_norm": 0.5793884373224328,
      "learning_rate": 0.003,
      "loss": 4.1982,
      "step": 2041
    },
    {
      "epoch": 0.02042,
      "grad_norm": 0.5725852678050465,
      "learning_rate": 0.003,
      "loss": 4.197,
      "step": 2042
    },
    {
      "epoch": 0.02043,
      "grad_norm": 0.7022295528607846,
      "learning_rate": 0.003,
      "loss": 4.2474,
      "step": 2043
    },
    {
      "epoch": 0.02044,
      "grad_norm": 0.8601243032306309,
      "learning_rate": 0.003,
      "loss": 4.2273,
      "step": 2044
    },
    {
      "epoch": 0.02045,
      "grad_norm": 0.8046253539648219,
      "learning_rate": 0.003,
      "loss": 4.2424,
      "step": 2045
    },
    {
      "epoch": 0.02046,
      "grad_norm": 0.855965931301316,
      "learning_rate": 0.003,
      "loss": 4.2237,
      "step": 2046
    },
    {
      "epoch": 0.02047,
      "grad_norm": 1.0357234424752648,
      "learning_rate": 0.003,
      "loss": 4.2697,
      "step": 2047
    },
    {
      "epoch": 0.02048,
      "grad_norm": 1.0439384854619227,
      "learning_rate": 0.003,
      "loss": 4.266,
      "step": 2048
    },
    {
      "epoch": 0.02049,
      "grad_norm": 0.9412093564870287,
      "learning_rate": 0.003,
      "loss": 4.2808,
      "step": 2049
    },
    {
      "epoch": 0.0205,
      "grad_norm": 0.9243748599866667,
      "learning_rate": 0.003,
      "loss": 4.2289,
      "step": 2050
    },
    {
      "epoch": 0.02051,
      "grad_norm": 0.7552410516608989,
      "learning_rate": 0.003,
      "loss": 4.2287,
      "step": 2051
    },
    {
      "epoch": 0.02052,
      "grad_norm": 0.6501532577962565,
      "learning_rate": 0.003,
      "loss": 4.226,
      "step": 2052
    },
    {
      "epoch": 0.02053,
      "grad_norm": 0.7652062065498313,
      "learning_rate": 0.003,
      "loss": 4.2323,
      "step": 2053
    },
    {
      "epoch": 0.02054,
      "grad_norm": 0.8858538626101129,
      "learning_rate": 0.003,
      "loss": 4.2751,
      "step": 2054
    },
    {
      "epoch": 0.02055,
      "grad_norm": 0.7846832781207133,
      "learning_rate": 0.003,
      "loss": 4.231,
      "step": 2055
    },
    {
      "epoch": 0.02056,
      "grad_norm": 0.7220550070691274,
      "learning_rate": 0.003,
      "loss": 4.2248,
      "step": 2056
    },
    {
      "epoch": 0.02057,
      "grad_norm": 0.7368090739126946,
      "learning_rate": 0.003,
      "loss": 4.2673,
      "step": 2057
    },
    {
      "epoch": 0.02058,
      "grad_norm": 0.7702851748272688,
      "learning_rate": 0.003,
      "loss": 4.2441,
      "step": 2058
    },
    {
      "epoch": 0.02059,
      "grad_norm": 0.6929544007844861,
      "learning_rate": 0.003,
      "loss": 4.2219,
      "step": 2059
    },
    {
      "epoch": 0.0206,
      "grad_norm": 0.6471217940270918,
      "learning_rate": 0.003,
      "loss": 4.2645,
      "step": 2060
    },
    {
      "epoch": 0.02061,
      "grad_norm": 0.7586644703414864,
      "learning_rate": 0.003,
      "loss": 4.2388,
      "step": 2061
    },
    {
      "epoch": 0.02062,
      "grad_norm": 0.8861471216459856,
      "learning_rate": 0.003,
      "loss": 4.2462,
      "step": 2062
    },
    {
      "epoch": 0.02063,
      "grad_norm": 0.9668317520421095,
      "learning_rate": 0.003,
      "loss": 4.2463,
      "step": 2063
    },
    {
      "epoch": 0.02064,
      "grad_norm": 0.9399457261661122,
      "learning_rate": 0.003,
      "loss": 4.2579,
      "step": 2064
    },
    {
      "epoch": 0.02065,
      "grad_norm": 0.8512260541477842,
      "learning_rate": 0.003,
      "loss": 4.235,
      "step": 2065
    },
    {
      "epoch": 0.02066,
      "grad_norm": 0.7854637650895366,
      "learning_rate": 0.003,
      "loss": 4.2504,
      "step": 2066
    },
    {
      "epoch": 0.02067,
      "grad_norm": 0.7442570611327062,
      "learning_rate": 0.003,
      "loss": 4.248,
      "step": 2067
    },
    {
      "epoch": 0.02068,
      "grad_norm": 0.7336263498203625,
      "learning_rate": 0.003,
      "loss": 4.2275,
      "step": 2068
    },
    {
      "epoch": 0.02069,
      "grad_norm": 0.6599353273196071,
      "learning_rate": 0.003,
      "loss": 4.2493,
      "step": 2069
    },
    {
      "epoch": 0.0207,
      "grad_norm": 0.5850836932137713,
      "learning_rate": 0.003,
      "loss": 4.2188,
      "step": 2070
    },
    {
      "epoch": 0.02071,
      "grad_norm": 0.6074733078697232,
      "learning_rate": 0.003,
      "loss": 4.2476,
      "step": 2071
    },
    {
      "epoch": 0.02072,
      "grad_norm": 0.5920803977844451,
      "learning_rate": 0.003,
      "loss": 4.2537,
      "step": 2072
    },
    {
      "epoch": 0.02073,
      "grad_norm": 0.5924717703900239,
      "learning_rate": 0.003,
      "loss": 4.2322,
      "step": 2073
    },
    {
      "epoch": 0.02074,
      "grad_norm": 0.594618787583075,
      "learning_rate": 0.003,
      "loss": 4.2368,
      "step": 2074
    },
    {
      "epoch": 0.02075,
      "grad_norm": 0.5532830540657053,
      "learning_rate": 0.003,
      "loss": 4.2603,
      "step": 2075
    },
    {
      "epoch": 0.02076,
      "grad_norm": 0.6184715370341762,
      "learning_rate": 0.003,
      "loss": 4.2559,
      "step": 2076
    },
    {
      "epoch": 0.02077,
      "grad_norm": 0.6677430190730856,
      "learning_rate": 0.003,
      "loss": 4.2214,
      "step": 2077
    },
    {
      "epoch": 0.02078,
      "grad_norm": 0.8898743231750045,
      "learning_rate": 0.003,
      "loss": 4.2253,
      "step": 2078
    },
    {
      "epoch": 0.02079,
      "grad_norm": 1.1171860016868687,
      "learning_rate": 0.003,
      "loss": 4.2251,
      "step": 2079
    },
    {
      "epoch": 0.0208,
      "grad_norm": 1.0675912571031756,
      "learning_rate": 0.003,
      "loss": 4.2358,
      "step": 2080
    },
    {
      "epoch": 0.02081,
      "grad_norm": 1.02172099105771,
      "learning_rate": 0.003,
      "loss": 4.2752,
      "step": 2081
    },
    {
      "epoch": 0.02082,
      "grad_norm": 0.7358740084680295,
      "learning_rate": 0.003,
      "loss": 4.2588,
      "step": 2082
    },
    {
      "epoch": 0.02083,
      "grad_norm": 0.678950866367781,
      "learning_rate": 0.003,
      "loss": 4.2499,
      "step": 2083
    },
    {
      "epoch": 0.02084,
      "grad_norm": 0.6900443906154086,
      "learning_rate": 0.003,
      "loss": 4.2447,
      "step": 2084
    },
    {
      "epoch": 0.02085,
      "grad_norm": 0.709096344071076,
      "learning_rate": 0.003,
      "loss": 4.2198,
      "step": 2085
    },
    {
      "epoch": 0.02086,
      "grad_norm": 0.6415519097957504,
      "learning_rate": 0.003,
      "loss": 4.2227,
      "step": 2086
    },
    {
      "epoch": 0.02087,
      "grad_norm": 0.6111018129236451,
      "learning_rate": 0.003,
      "loss": 4.2218,
      "step": 2087
    },
    {
      "epoch": 0.02088,
      "grad_norm": 0.5614795365012508,
      "learning_rate": 0.003,
      "loss": 4.2531,
      "step": 2088
    },
    {
      "epoch": 0.02089,
      "grad_norm": 0.4937030612790938,
      "learning_rate": 0.003,
      "loss": 4.2175,
      "step": 2089
    },
    {
      "epoch": 0.0209,
      "grad_norm": 0.5203347838934052,
      "learning_rate": 0.003,
      "loss": 4.2372,
      "step": 2090
    },
    {
      "epoch": 0.02091,
      "grad_norm": 0.5562686236826955,
      "learning_rate": 0.003,
      "loss": 4.2263,
      "step": 2091
    },
    {
      "epoch": 0.02092,
      "grad_norm": 0.6151784723639154,
      "learning_rate": 0.003,
      "loss": 4.2246,
      "step": 2092
    },
    {
      "epoch": 0.02093,
      "grad_norm": 0.5966017113888583,
      "learning_rate": 0.003,
      "loss": 4.1906,
      "step": 2093
    },
    {
      "epoch": 0.02094,
      "grad_norm": 0.5339381970320102,
      "learning_rate": 0.003,
      "loss": 4.197,
      "step": 2094
    },
    {
      "epoch": 0.02095,
      "grad_norm": 0.6298496102722991,
      "learning_rate": 0.003,
      "loss": 4.2379,
      "step": 2095
    },
    {
      "epoch": 0.02096,
      "grad_norm": 0.7946260614902717,
      "learning_rate": 0.003,
      "loss": 4.2367,
      "step": 2096
    },
    {
      "epoch": 0.02097,
      "grad_norm": 0.9470719378143645,
      "learning_rate": 0.003,
      "loss": 4.222,
      "step": 2097
    },
    {
      "epoch": 0.02098,
      "grad_norm": 0.9116485878338217,
      "learning_rate": 0.003,
      "loss": 4.2439,
      "step": 2098
    },
    {
      "epoch": 0.02099,
      "grad_norm": 0.7133877388191158,
      "learning_rate": 0.003,
      "loss": 4.2442,
      "step": 2099
    },
    {
      "epoch": 0.021,
      "grad_norm": 0.6125463461181375,
      "learning_rate": 0.003,
      "loss": 4.2549,
      "step": 2100
    },
    {
      "epoch": 0.02101,
      "grad_norm": 0.6276708108187462,
      "learning_rate": 0.003,
      "loss": 4.2467,
      "step": 2101
    },
    {
      "epoch": 0.02102,
      "grad_norm": 0.76243038603311,
      "learning_rate": 0.003,
      "loss": 4.2509,
      "step": 2102
    },
    {
      "epoch": 0.02103,
      "grad_norm": 0.831814217647559,
      "learning_rate": 0.003,
      "loss": 4.2186,
      "step": 2103
    },
    {
      "epoch": 0.02104,
      "grad_norm": 0.8037622250501507,
      "learning_rate": 0.003,
      "loss": 4.223,
      "step": 2104
    },
    {
      "epoch": 0.02105,
      "grad_norm": 0.773242874233749,
      "learning_rate": 0.003,
      "loss": 4.2254,
      "step": 2105
    },
    {
      "epoch": 0.02106,
      "grad_norm": 0.7723032929932112,
      "learning_rate": 0.003,
      "loss": 4.2206,
      "step": 2106
    },
    {
      "epoch": 0.02107,
      "grad_norm": 0.6777196727394571,
      "learning_rate": 0.003,
      "loss": 4.2302,
      "step": 2107
    },
    {
      "epoch": 0.02108,
      "grad_norm": 0.7155266180317776,
      "learning_rate": 0.003,
      "loss": 4.2133,
      "step": 2108
    },
    {
      "epoch": 0.02109,
      "grad_norm": 0.7307044349882023,
      "learning_rate": 0.003,
      "loss": 4.2034,
      "step": 2109
    },
    {
      "epoch": 0.0211,
      "grad_norm": 0.7246010606158528,
      "learning_rate": 0.003,
      "loss": 4.2211,
      "step": 2110
    },
    {
      "epoch": 0.02111,
      "grad_norm": 0.7003859578773313,
      "learning_rate": 0.003,
      "loss": 4.2271,
      "step": 2111
    },
    {
      "epoch": 0.02112,
      "grad_norm": 0.6571289356027246,
      "learning_rate": 0.003,
      "loss": 4.2265,
      "step": 2112
    },
    {
      "epoch": 0.02113,
      "grad_norm": 0.7142951538736501,
      "learning_rate": 0.003,
      "loss": 4.2259,
      "step": 2113
    },
    {
      "epoch": 0.02114,
      "grad_norm": 0.642746334410043,
      "learning_rate": 0.003,
      "loss": 4.2136,
      "step": 2114
    },
    {
      "epoch": 0.02115,
      "grad_norm": 0.5561928823494658,
      "learning_rate": 0.003,
      "loss": 4.2025,
      "step": 2115
    },
    {
      "epoch": 0.02116,
      "grad_norm": 0.4854911541916316,
      "learning_rate": 0.003,
      "loss": 4.2271,
      "step": 2116
    },
    {
      "epoch": 0.02117,
      "grad_norm": 0.4406562024092029,
      "learning_rate": 0.003,
      "loss": 4.2222,
      "step": 2117
    },
    {
      "epoch": 0.02118,
      "grad_norm": 0.44267808922719293,
      "learning_rate": 0.003,
      "loss": 4.2156,
      "step": 2118
    },
    {
      "epoch": 0.02119,
      "grad_norm": 0.4799600611934466,
      "learning_rate": 0.003,
      "loss": 4.2269,
      "step": 2119
    },
    {
      "epoch": 0.0212,
      "grad_norm": 0.5994897503195796,
      "learning_rate": 0.003,
      "loss": 4.202,
      "step": 2120
    },
    {
      "epoch": 0.02121,
      "grad_norm": 0.8543941081816084,
      "learning_rate": 0.003,
      "loss": 4.2042,
      "step": 2121
    },
    {
      "epoch": 0.02122,
      "grad_norm": 1.1792972986526515,
      "learning_rate": 0.003,
      "loss": 4.2347,
      "step": 2122
    },
    {
      "epoch": 0.02123,
      "grad_norm": 0.6741324159024102,
      "learning_rate": 0.003,
      "loss": 4.1979,
      "step": 2123
    },
    {
      "epoch": 0.02124,
      "grad_norm": 0.5314863327354328,
      "learning_rate": 0.003,
      "loss": 4.2178,
      "step": 2124
    },
    {
      "epoch": 0.02125,
      "grad_norm": 0.7086470592360375,
      "learning_rate": 0.003,
      "loss": 4.2389,
      "step": 2125
    },
    {
      "epoch": 0.02126,
      "grad_norm": 0.7578318446121494,
      "learning_rate": 0.003,
      "loss": 4.2324,
      "step": 2126
    },
    {
      "epoch": 0.02127,
      "grad_norm": 0.8658999413430398,
      "learning_rate": 0.003,
      "loss": 4.253,
      "step": 2127
    },
    {
      "epoch": 0.02128,
      "grad_norm": 0.8986841353197695,
      "learning_rate": 0.003,
      "loss": 4.2443,
      "step": 2128
    },
    {
      "epoch": 0.02129,
      "grad_norm": 0.8385623851205096,
      "learning_rate": 0.003,
      "loss": 4.2447,
      "step": 2129
    },
    {
      "epoch": 0.0213,
      "grad_norm": 0.8355943131013069,
      "learning_rate": 0.003,
      "loss": 4.2211,
      "step": 2130
    },
    {
      "epoch": 0.02131,
      "grad_norm": 0.793529743088508,
      "learning_rate": 0.003,
      "loss": 4.2487,
      "step": 2131
    },
    {
      "epoch": 0.02132,
      "grad_norm": 0.7536314785478804,
      "learning_rate": 0.003,
      "loss": 4.2567,
      "step": 2132
    },
    {
      "epoch": 0.02133,
      "grad_norm": 0.7395002132947835,
      "learning_rate": 0.003,
      "loss": 4.2539,
      "step": 2133
    },
    {
      "epoch": 0.02134,
      "grad_norm": 0.7463724088809484,
      "learning_rate": 0.003,
      "loss": 4.2559,
      "step": 2134
    },
    {
      "epoch": 0.02135,
      "grad_norm": 0.8316464480461356,
      "learning_rate": 0.003,
      "loss": 4.2584,
      "step": 2135
    },
    {
      "epoch": 0.02136,
      "grad_norm": 0.8795824396478255,
      "learning_rate": 0.003,
      "loss": 4.2294,
      "step": 2136
    },
    {
      "epoch": 0.02137,
      "grad_norm": 0.9902402983774369,
      "learning_rate": 0.003,
      "loss": 4.2714,
      "step": 2137
    },
    {
      "epoch": 0.02138,
      "grad_norm": 1.1393734479896327,
      "learning_rate": 0.003,
      "loss": 4.2998,
      "step": 2138
    },
    {
      "epoch": 0.02139,
      "grad_norm": 0.880379491281086,
      "learning_rate": 0.003,
      "loss": 4.2799,
      "step": 2139
    },
    {
      "epoch": 0.0214,
      "grad_norm": 0.7579598710856811,
      "learning_rate": 0.003,
      "loss": 4.2687,
      "step": 2140
    },
    {
      "epoch": 0.02141,
      "grad_norm": 0.7913256313860993,
      "learning_rate": 0.003,
      "loss": 4.2459,
      "step": 2141
    },
    {
      "epoch": 0.02142,
      "grad_norm": 0.8825604009248581,
      "learning_rate": 0.003,
      "loss": 4.2418,
      "step": 2142
    },
    {
      "epoch": 0.02143,
      "grad_norm": 0.9777590043525748,
      "learning_rate": 0.003,
      "loss": 4.2642,
      "step": 2143
    },
    {
      "epoch": 0.02144,
      "grad_norm": 1.0275563743451126,
      "learning_rate": 0.003,
      "loss": 4.2678,
      "step": 2144
    },
    {
      "epoch": 0.02145,
      "grad_norm": 0.7699073494834429,
      "learning_rate": 0.003,
      "loss": 4.2427,
      "step": 2145
    },
    {
      "epoch": 0.02146,
      "grad_norm": 0.5960629563316048,
      "learning_rate": 0.003,
      "loss": 4.2482,
      "step": 2146
    },
    {
      "epoch": 0.02147,
      "grad_norm": 0.5490133168290037,
      "learning_rate": 0.003,
      "loss": 4.2644,
      "step": 2147
    },
    {
      "epoch": 0.02148,
      "grad_norm": 0.5767497961920597,
      "learning_rate": 0.003,
      "loss": 4.2739,
      "step": 2148
    },
    {
      "epoch": 0.02149,
      "grad_norm": 0.4897484785451818,
      "learning_rate": 0.003,
      "loss": 4.2537,
      "step": 2149
    },
    {
      "epoch": 0.0215,
      "grad_norm": 0.45116068678975524,
      "learning_rate": 0.003,
      "loss": 4.2384,
      "step": 2150
    },
    {
      "epoch": 0.02151,
      "grad_norm": 0.37866677504367807,
      "learning_rate": 0.003,
      "loss": 4.2406,
      "step": 2151
    },
    {
      "epoch": 0.02152,
      "grad_norm": 0.35025559255506084,
      "learning_rate": 0.003,
      "loss": 4.2149,
      "step": 2152
    },
    {
      "epoch": 0.02153,
      "grad_norm": 0.34582936725569746,
      "learning_rate": 0.003,
      "loss": 4.2226,
      "step": 2153
    },
    {
      "epoch": 0.02154,
      "grad_norm": 0.3203952559256608,
      "learning_rate": 0.003,
      "loss": 4.2119,
      "step": 2154
    },
    {
      "epoch": 0.02155,
      "grad_norm": 0.29250609362491636,
      "learning_rate": 0.003,
      "loss": 4.2391,
      "step": 2155
    },
    {
      "epoch": 0.02156,
      "grad_norm": 0.3438204060042884,
      "learning_rate": 0.003,
      "loss": 4.2258,
      "step": 2156
    },
    {
      "epoch": 0.02157,
      "grad_norm": 0.3595905001354517,
      "learning_rate": 0.003,
      "loss": 4.1905,
      "step": 2157
    },
    {
      "epoch": 0.02158,
      "grad_norm": 0.42595432724572757,
      "learning_rate": 0.003,
      "loss": 4.2116,
      "step": 2158
    },
    {
      "epoch": 0.02159,
      "grad_norm": 0.6633695244520449,
      "learning_rate": 0.003,
      "loss": 4.2281,
      "step": 2159
    },
    {
      "epoch": 0.0216,
      "grad_norm": 1.007521973270438,
      "learning_rate": 0.003,
      "loss": 4.2436,
      "step": 2160
    },
    {
      "epoch": 0.02161,
      "grad_norm": 1.2248634852939608,
      "learning_rate": 0.003,
      "loss": 4.2476,
      "step": 2161
    },
    {
      "epoch": 0.02162,
      "grad_norm": 0.44924134107129593,
      "learning_rate": 0.003,
      "loss": 4.2258,
      "step": 2162
    },
    {
      "epoch": 0.02163,
      "grad_norm": 0.708237336037978,
      "learning_rate": 0.003,
      "loss": 4.2521,
      "step": 2163
    },
    {
      "epoch": 0.02164,
      "grad_norm": 0.7651720506018448,
      "learning_rate": 0.003,
      "loss": 4.2406,
      "step": 2164
    },
    {
      "epoch": 0.02165,
      "grad_norm": 0.7208522677692027,
      "learning_rate": 0.003,
      "loss": 4.2056,
      "step": 2165
    },
    {
      "epoch": 0.02166,
      "grad_norm": 0.7338853238041586,
      "learning_rate": 0.003,
      "loss": 4.248,
      "step": 2166
    },
    {
      "epoch": 0.02167,
      "grad_norm": 0.7434196157225088,
      "learning_rate": 0.003,
      "loss": 4.223,
      "step": 2167
    },
    {
      "epoch": 0.02168,
      "grad_norm": 0.726421176827236,
      "learning_rate": 0.003,
      "loss": 4.2235,
      "step": 2168
    },
    {
      "epoch": 0.02169,
      "grad_norm": 0.6029539671407806,
      "learning_rate": 0.003,
      "loss": 4.2188,
      "step": 2169
    },
    {
      "epoch": 0.0217,
      "grad_norm": 0.6113346670733881,
      "learning_rate": 0.003,
      "loss": 4.2262,
      "step": 2170
    },
    {
      "epoch": 0.02171,
      "grad_norm": 0.5554372591572285,
      "learning_rate": 0.003,
      "loss": 4.2379,
      "step": 2171
    },
    {
      "epoch": 0.02172,
      "grad_norm": 0.5137481804853822,
      "learning_rate": 0.003,
      "loss": 4.2085,
      "step": 2172
    },
    {
      "epoch": 0.02173,
      "grad_norm": 0.43826435194750174,
      "learning_rate": 0.003,
      "loss": 4.2079,
      "step": 2173
    },
    {
      "epoch": 0.02174,
      "grad_norm": 0.4550674538213562,
      "learning_rate": 0.003,
      "loss": 4.2039,
      "step": 2174
    },
    {
      "epoch": 0.02175,
      "grad_norm": 0.4826492500585534,
      "learning_rate": 0.003,
      "loss": 4.2274,
      "step": 2175
    },
    {
      "epoch": 0.02176,
      "grad_norm": 0.5204683333159428,
      "learning_rate": 0.003,
      "loss": 4.2082,
      "step": 2176
    },
    {
      "epoch": 0.02177,
      "grad_norm": 0.5967890811282951,
      "learning_rate": 0.003,
      "loss": 4.2167,
      "step": 2177
    },
    {
      "epoch": 0.02178,
      "grad_norm": 0.6463619597188142,
      "learning_rate": 0.003,
      "loss": 4.1999,
      "step": 2178
    },
    {
      "epoch": 0.02179,
      "grad_norm": 0.6800531318971829,
      "learning_rate": 0.003,
      "loss": 4.2149,
      "step": 2179
    },
    {
      "epoch": 0.0218,
      "grad_norm": 0.6580527094136631,
      "learning_rate": 0.003,
      "loss": 4.229,
      "step": 2180
    },
    {
      "epoch": 0.02181,
      "grad_norm": 0.6208184897317314,
      "learning_rate": 0.003,
      "loss": 4.2289,
      "step": 2181
    },
    {
      "epoch": 0.02182,
      "grad_norm": 0.5846113298952428,
      "learning_rate": 0.003,
      "loss": 4.2148,
      "step": 2182
    },
    {
      "epoch": 0.02183,
      "grad_norm": 0.6556384450212658,
      "learning_rate": 0.003,
      "loss": 4.2271,
      "step": 2183
    },
    {
      "epoch": 0.02184,
      "grad_norm": 0.8938194731118257,
      "learning_rate": 0.003,
      "loss": 4.21,
      "step": 2184
    },
    {
      "epoch": 0.02185,
      "grad_norm": 1.4521805158873795,
      "learning_rate": 0.003,
      "loss": 4.2324,
      "step": 2185
    },
    {
      "epoch": 0.02186,
      "grad_norm": 0.7715834342950255,
      "learning_rate": 0.003,
      "loss": 4.231,
      "step": 2186
    },
    {
      "epoch": 0.02187,
      "grad_norm": 0.8581699640168193,
      "learning_rate": 0.003,
      "loss": 4.2381,
      "step": 2187
    },
    {
      "epoch": 0.02188,
      "grad_norm": 1.069027806910656,
      "learning_rate": 0.003,
      "loss": 4.2275,
      "step": 2188
    },
    {
      "epoch": 0.02189,
      "grad_norm": 1.0002813983051297,
      "learning_rate": 0.003,
      "loss": 4.2597,
      "step": 2189
    },
    {
      "epoch": 0.0219,
      "grad_norm": 0.9466471656385088,
      "learning_rate": 0.003,
      "loss": 4.2521,
      "step": 2190
    },
    {
      "epoch": 0.02191,
      "grad_norm": 0.8750563192902713,
      "learning_rate": 0.003,
      "loss": 4.236,
      "step": 2191
    },
    {
      "epoch": 0.02192,
      "grad_norm": 0.840341624246557,
      "learning_rate": 0.003,
      "loss": 4.2668,
      "step": 2192
    },
    {
      "epoch": 0.02193,
      "grad_norm": 0.7540813398590741,
      "learning_rate": 0.003,
      "loss": 4.2289,
      "step": 2193
    },
    {
      "epoch": 0.02194,
      "grad_norm": 0.6938742966655788,
      "learning_rate": 0.003,
      "loss": 4.2759,
      "step": 2194
    },
    {
      "epoch": 0.02195,
      "grad_norm": 0.6175768159775354,
      "learning_rate": 0.003,
      "loss": 4.2375,
      "step": 2195
    },
    {
      "epoch": 0.02196,
      "grad_norm": 0.525709002541586,
      "learning_rate": 0.003,
      "loss": 4.2424,
      "step": 2196
    },
    {
      "epoch": 0.02197,
      "grad_norm": 0.5074240255085518,
      "learning_rate": 0.003,
      "loss": 4.2329,
      "step": 2197
    },
    {
      "epoch": 0.02198,
      "grad_norm": 0.5397790588786494,
      "learning_rate": 0.003,
      "loss": 4.2239,
      "step": 2198
    },
    {
      "epoch": 0.02199,
      "grad_norm": 0.6107923796986595,
      "learning_rate": 0.003,
      "loss": 4.1928,
      "step": 2199
    },
    {
      "epoch": 0.022,
      "grad_norm": 0.843024485274174,
      "learning_rate": 0.003,
      "loss": 4.2284,
      "step": 2200
    },
    {
      "epoch": 0.02201,
      "grad_norm": 0.9840256721686866,
      "learning_rate": 0.003,
      "loss": 4.2546,
      "step": 2201
    },
    {
      "epoch": 0.02202,
      "grad_norm": 1.0784933389723226,
      "learning_rate": 0.003,
      "loss": 4.2428,
      "step": 2202
    },
    {
      "epoch": 0.02203,
      "grad_norm": 0.8532329582759304,
      "learning_rate": 0.003,
      "loss": 4.2706,
      "step": 2203
    },
    {
      "epoch": 0.02204,
      "grad_norm": 0.7139369856529667,
      "learning_rate": 0.003,
      "loss": 4.259,
      "step": 2204
    },
    {
      "epoch": 0.02205,
      "grad_norm": 0.7536669209652429,
      "learning_rate": 0.003,
      "loss": 4.258,
      "step": 2205
    },
    {
      "epoch": 0.02206,
      "grad_norm": 0.7857084413842441,
      "learning_rate": 0.003,
      "loss": 4.224,
      "step": 2206
    },
    {
      "epoch": 0.02207,
      "grad_norm": 0.7046511159473173,
      "learning_rate": 0.003,
      "loss": 4.2327,
      "step": 2207
    },
    {
      "epoch": 0.02208,
      "grad_norm": 0.6175016908695475,
      "learning_rate": 0.003,
      "loss": 4.2135,
      "step": 2208
    },
    {
      "epoch": 0.02209,
      "grad_norm": 0.6407707276531509,
      "learning_rate": 0.003,
      "loss": 4.2282,
      "step": 2209
    },
    {
      "epoch": 0.0221,
      "grad_norm": 0.5593920924645548,
      "learning_rate": 0.003,
      "loss": 4.2057,
      "step": 2210
    },
    {
      "epoch": 0.02211,
      "grad_norm": 0.5677819571205628,
      "learning_rate": 0.003,
      "loss": 4.216,
      "step": 2211
    },
    {
      "epoch": 0.02212,
      "grad_norm": 0.5260851177623344,
      "learning_rate": 0.003,
      "loss": 4.2125,
      "step": 2212
    },
    {
      "epoch": 0.02213,
      "grad_norm": 0.5193985345861573,
      "learning_rate": 0.003,
      "loss": 4.2187,
      "step": 2213
    },
    {
      "epoch": 0.02214,
      "grad_norm": 0.5150545706654319,
      "learning_rate": 0.003,
      "loss": 4.2111,
      "step": 2214
    },
    {
      "epoch": 0.02215,
      "grad_norm": 0.5981375357124284,
      "learning_rate": 0.003,
      "loss": 4.2178,
      "step": 2215
    },
    {
      "epoch": 0.02216,
      "grad_norm": 0.6928559733565658,
      "learning_rate": 0.003,
      "loss": 4.1968,
      "step": 2216
    },
    {
      "epoch": 0.02217,
      "grad_norm": 0.8190269184049962,
      "learning_rate": 0.003,
      "loss": 4.2012,
      "step": 2217
    },
    {
      "epoch": 0.02218,
      "grad_norm": 1.0426971364675302,
      "learning_rate": 0.003,
      "loss": 4.2407,
      "step": 2218
    },
    {
      "epoch": 0.02219,
      "grad_norm": 0.9675276960649843,
      "learning_rate": 0.003,
      "loss": 4.2359,
      "step": 2219
    },
    {
      "epoch": 0.0222,
      "grad_norm": 0.7683132622272745,
      "learning_rate": 0.003,
      "loss": 4.2262,
      "step": 2220
    },
    {
      "epoch": 0.02221,
      "grad_norm": 0.870281319346899,
      "learning_rate": 0.003,
      "loss": 4.2211,
      "step": 2221
    },
    {
      "epoch": 0.02222,
      "grad_norm": 0.9031315012246321,
      "learning_rate": 0.003,
      "loss": 4.2256,
      "step": 2222
    },
    {
      "epoch": 0.02223,
      "grad_norm": 0.981876469109676,
      "learning_rate": 0.003,
      "loss": 4.243,
      "step": 2223
    },
    {
      "epoch": 0.02224,
      "grad_norm": 1.0242324809795773,
      "learning_rate": 0.003,
      "loss": 4.2285,
      "step": 2224
    },
    {
      "epoch": 0.02225,
      "grad_norm": 0.9687768727218721,
      "learning_rate": 0.003,
      "loss": 4.2331,
      "step": 2225
    },
    {
      "epoch": 0.02226,
      "grad_norm": 0.8180232793014202,
      "learning_rate": 0.003,
      "loss": 4.2439,
      "step": 2226
    },
    {
      "epoch": 0.02227,
      "grad_norm": 0.8470443159450464,
      "learning_rate": 0.003,
      "loss": 4.244,
      "step": 2227
    },
    {
      "epoch": 0.02228,
      "grad_norm": 0.892588366281847,
      "learning_rate": 0.003,
      "loss": 4.2316,
      "step": 2228
    },
    {
      "epoch": 0.02229,
      "grad_norm": 0.9147609345948713,
      "learning_rate": 0.003,
      "loss": 4.2199,
      "step": 2229
    },
    {
      "epoch": 0.0223,
      "grad_norm": 0.7916782102081119,
      "learning_rate": 0.003,
      "loss": 4.2579,
      "step": 2230
    },
    {
      "epoch": 0.02231,
      "grad_norm": 0.8498212362921145,
      "learning_rate": 0.003,
      "loss": 4.2621,
      "step": 2231
    },
    {
      "epoch": 0.02232,
      "grad_norm": 0.8525982405062639,
      "learning_rate": 0.003,
      "loss": 4.268,
      "step": 2232
    },
    {
      "epoch": 0.02233,
      "grad_norm": 0.8718378998708317,
      "learning_rate": 0.003,
      "loss": 4.2384,
      "step": 2233
    },
    {
      "epoch": 0.02234,
      "grad_norm": 0.9791386495186906,
      "learning_rate": 0.003,
      "loss": 4.2628,
      "step": 2234
    },
    {
      "epoch": 0.02235,
      "grad_norm": 1.017250595852017,
      "learning_rate": 0.003,
      "loss": 4.2938,
      "step": 2235
    },
    {
      "epoch": 0.02236,
      "grad_norm": 0.963171893049642,
      "learning_rate": 0.003,
      "loss": 4.2483,
      "step": 2236
    },
    {
      "epoch": 0.02237,
      "grad_norm": 0.8231940931617269,
      "learning_rate": 0.003,
      "loss": 4.2712,
      "step": 2237
    },
    {
      "epoch": 0.02238,
      "grad_norm": 0.6960455304532995,
      "learning_rate": 0.003,
      "loss": 4.2462,
      "step": 2238
    },
    {
      "epoch": 0.02239,
      "grad_norm": 0.6934460578181165,
      "learning_rate": 0.003,
      "loss": 4.2313,
      "step": 2239
    },
    {
      "epoch": 0.0224,
      "grad_norm": 0.7144921574976273,
      "learning_rate": 0.003,
      "loss": 4.2144,
      "step": 2240
    },
    {
      "epoch": 0.02241,
      "grad_norm": 0.7809367350220192,
      "learning_rate": 0.003,
      "loss": 4.2455,
      "step": 2241
    },
    {
      "epoch": 0.02242,
      "grad_norm": 0.7589150655929028,
      "learning_rate": 0.003,
      "loss": 4.2271,
      "step": 2242
    },
    {
      "epoch": 0.02243,
      "grad_norm": 0.6383337337000002,
      "learning_rate": 0.003,
      "loss": 4.2512,
      "step": 2243
    },
    {
      "epoch": 0.02244,
      "grad_norm": 0.5988340655516636,
      "learning_rate": 0.003,
      "loss": 4.2259,
      "step": 2244
    },
    {
      "epoch": 0.02245,
      "grad_norm": 0.6045948555456119,
      "learning_rate": 0.003,
      "loss": 4.2118,
      "step": 2245
    },
    {
      "epoch": 0.02246,
      "grad_norm": 0.5516002496150979,
      "learning_rate": 0.003,
      "loss": 4.2377,
      "step": 2246
    },
    {
      "epoch": 0.02247,
      "grad_norm": 0.5370313797025699,
      "learning_rate": 0.003,
      "loss": 4.2109,
      "step": 2247
    },
    {
      "epoch": 0.02248,
      "grad_norm": 0.48933002653887103,
      "learning_rate": 0.003,
      "loss": 4.2154,
      "step": 2248
    },
    {
      "epoch": 0.02249,
      "grad_norm": 0.42750734194283135,
      "learning_rate": 0.003,
      "loss": 4.2034,
      "step": 2249
    },
    {
      "epoch": 0.0225,
      "grad_norm": 0.3448759025517304,
      "learning_rate": 0.003,
      "loss": 4.2156,
      "step": 2250
    },
    {
      "epoch": 0.02251,
      "grad_norm": 0.3579581291564086,
      "learning_rate": 0.003,
      "loss": 4.2368,
      "step": 2251
    },
    {
      "epoch": 0.02252,
      "grad_norm": 0.3338868919973551,
      "learning_rate": 0.003,
      "loss": 4.2252,
      "step": 2252
    },
    {
      "epoch": 0.02253,
      "grad_norm": 0.2833980373722387,
      "learning_rate": 0.003,
      "loss": 4.2038,
      "step": 2253
    },
    {
      "epoch": 0.02254,
      "grad_norm": 0.3354848889066889,
      "learning_rate": 0.003,
      "loss": 4.2108,
      "step": 2254
    },
    {
      "epoch": 0.02255,
      "grad_norm": 0.34008562867521774,
      "learning_rate": 0.003,
      "loss": 4.192,
      "step": 2255
    },
    {
      "epoch": 0.02256,
      "grad_norm": 0.40752535782237725,
      "learning_rate": 0.003,
      "loss": 4.1822,
      "step": 2256
    },
    {
      "epoch": 0.02257,
      "grad_norm": 0.7100736173105294,
      "learning_rate": 0.003,
      "loss": 4.204,
      "step": 2257
    },
    {
      "epoch": 0.02258,
      "grad_norm": 1.2598699348045919,
      "learning_rate": 0.003,
      "loss": 4.2395,
      "step": 2258
    },
    {
      "epoch": 0.02259,
      "grad_norm": 0.8257304825969743,
      "learning_rate": 0.003,
      "loss": 4.1899,
      "step": 2259
    },
    {
      "epoch": 0.0226,
      "grad_norm": 0.5276115234939553,
      "learning_rate": 0.003,
      "loss": 4.235,
      "step": 2260
    },
    {
      "epoch": 0.02261,
      "grad_norm": 0.7264131381401914,
      "learning_rate": 0.003,
      "loss": 4.2191,
      "step": 2261
    },
    {
      "epoch": 0.02262,
      "grad_norm": 0.694830058733267,
      "learning_rate": 0.003,
      "loss": 4.2151,
      "step": 2262
    },
    {
      "epoch": 0.02263,
      "grad_norm": 0.6143146874121539,
      "learning_rate": 0.003,
      "loss": 4.228,
      "step": 2263
    },
    {
      "epoch": 0.02264,
      "grad_norm": 0.6817476502572059,
      "learning_rate": 0.003,
      "loss": 4.2217,
      "step": 2264
    },
    {
      "epoch": 0.02265,
      "grad_norm": 0.7652944991612053,
      "learning_rate": 0.003,
      "loss": 4.1817,
      "step": 2265
    },
    {
      "epoch": 0.02266,
      "grad_norm": 0.756233264477269,
      "learning_rate": 0.003,
      "loss": 4.2012,
      "step": 2266
    },
    {
      "epoch": 0.02267,
      "grad_norm": 0.736689028154492,
      "learning_rate": 0.003,
      "loss": 4.2277,
      "step": 2267
    },
    {
      "epoch": 0.02268,
      "grad_norm": 0.7402792571581283,
      "learning_rate": 0.003,
      "loss": 4.2036,
      "step": 2268
    },
    {
      "epoch": 0.02269,
      "grad_norm": 0.6161983408604007,
      "learning_rate": 0.003,
      "loss": 4.2232,
      "step": 2269
    },
    {
      "epoch": 0.0227,
      "grad_norm": 0.4962119655121251,
      "learning_rate": 0.003,
      "loss": 4.2158,
      "step": 2270
    },
    {
      "epoch": 0.02271,
      "grad_norm": 0.5302680760721596,
      "learning_rate": 0.003,
      "loss": 4.2188,
      "step": 2271
    },
    {
      "epoch": 0.02272,
      "grad_norm": 0.6394586434048679,
      "learning_rate": 0.003,
      "loss": 4.221,
      "step": 2272
    },
    {
      "epoch": 0.02273,
      "grad_norm": 0.7045324906964335,
      "learning_rate": 0.003,
      "loss": 4.2241,
      "step": 2273
    },
    {
      "epoch": 0.02274,
      "grad_norm": 0.7390109699118109,
      "learning_rate": 0.003,
      "loss": 4.2137,
      "step": 2274
    },
    {
      "epoch": 0.02275,
      "grad_norm": 0.7499581715087588,
      "learning_rate": 0.003,
      "loss": 4.2081,
      "step": 2275
    },
    {
      "epoch": 0.02276,
      "grad_norm": 0.7669015119641691,
      "learning_rate": 0.003,
      "loss": 4.2021,
      "step": 2276
    },
    {
      "epoch": 0.02277,
      "grad_norm": 0.7961571446299023,
      "learning_rate": 0.003,
      "loss": 4.2142,
      "step": 2277
    },
    {
      "epoch": 0.02278,
      "grad_norm": 0.9864062453804502,
      "learning_rate": 0.003,
      "loss": 4.2276,
      "step": 2278
    },
    {
      "epoch": 0.02279,
      "grad_norm": 0.9358624340244462,
      "learning_rate": 0.003,
      "loss": 4.2442,
      "step": 2279
    },
    {
      "epoch": 0.0228,
      "grad_norm": 0.812834471353606,
      "learning_rate": 0.003,
      "loss": 4.2285,
      "step": 2280
    },
    {
      "epoch": 0.02281,
      "grad_norm": 0.6684943400610098,
      "learning_rate": 0.003,
      "loss": 4.2176,
      "step": 2281
    },
    {
      "epoch": 0.02282,
      "grad_norm": 0.669619706421883,
      "learning_rate": 0.003,
      "loss": 4.2295,
      "step": 2282
    },
    {
      "epoch": 0.02283,
      "grad_norm": 0.6330282624675099,
      "learning_rate": 0.003,
      "loss": 4.2424,
      "step": 2283
    },
    {
      "epoch": 0.02284,
      "grad_norm": 0.5990447110243563,
      "learning_rate": 0.003,
      "loss": 4.2354,
      "step": 2284
    },
    {
      "epoch": 0.02285,
      "grad_norm": 0.628614060536484,
      "learning_rate": 0.003,
      "loss": 4.1768,
      "step": 2285
    },
    {
      "epoch": 0.02286,
      "grad_norm": 0.6697445056976077,
      "learning_rate": 0.003,
      "loss": 4.2382,
      "step": 2286
    },
    {
      "epoch": 0.02287,
      "grad_norm": 0.7952589491731732,
      "learning_rate": 0.003,
      "loss": 4.2228,
      "step": 2287
    },
    {
      "epoch": 0.02288,
      "grad_norm": 1.0045332281072228,
      "learning_rate": 0.003,
      "loss": 4.2596,
      "step": 2288
    },
    {
      "epoch": 0.02289,
      "grad_norm": 1.1532936973113286,
      "learning_rate": 0.003,
      "loss": 4.2697,
      "step": 2289
    },
    {
      "epoch": 0.0229,
      "grad_norm": 0.819312692673835,
      "learning_rate": 0.003,
      "loss": 4.2317,
      "step": 2290
    },
    {
      "epoch": 0.02291,
      "grad_norm": 0.7813095877942711,
      "learning_rate": 0.003,
      "loss": 4.2134,
      "step": 2291
    },
    {
      "epoch": 0.02292,
      "grad_norm": 0.7936575982558383,
      "learning_rate": 0.003,
      "loss": 4.207,
      "step": 2292
    },
    {
      "epoch": 0.02293,
      "grad_norm": 1.0074422757908905,
      "learning_rate": 0.003,
      "loss": 4.2266,
      "step": 2293
    },
    {
      "epoch": 0.02294,
      "grad_norm": 1.40961843940546,
      "learning_rate": 0.003,
      "loss": 4.2493,
      "step": 2294
    },
    {
      "epoch": 0.02295,
      "grad_norm": 0.7960792761101736,
      "learning_rate": 0.003,
      "loss": 4.2214,
      "step": 2295
    },
    {
      "epoch": 0.02296,
      "grad_norm": 0.7966163433797685,
      "learning_rate": 0.003,
      "loss": 4.2574,
      "step": 2296
    },
    {
      "epoch": 0.02297,
      "grad_norm": 0.9441515456535015,
      "learning_rate": 0.003,
      "loss": 4.2281,
      "step": 2297
    },
    {
      "epoch": 0.02298,
      "grad_norm": 0.9385676483518116,
      "learning_rate": 0.003,
      "loss": 4.2472,
      "step": 2298
    },
    {
      "epoch": 0.02299,
      "grad_norm": 0.8766202093405785,
      "learning_rate": 0.003,
      "loss": 4.243,
      "step": 2299
    },
    {
      "epoch": 0.023,
      "grad_norm": 0.8448821550504723,
      "learning_rate": 0.003,
      "loss": 4.2644,
      "step": 2300
    },
    {
      "epoch": 0.02301,
      "grad_norm": 0.9129468837162014,
      "learning_rate": 0.003,
      "loss": 4.2345,
      "step": 2301
    },
    {
      "epoch": 0.02302,
      "grad_norm": 0.8382327875972556,
      "learning_rate": 0.003,
      "loss": 4.2375,
      "step": 2302
    },
    {
      "epoch": 0.02303,
      "grad_norm": 0.6647794411742001,
      "learning_rate": 0.003,
      "loss": 4.2192,
      "step": 2303
    },
    {
      "epoch": 0.02304,
      "grad_norm": 0.567815223248062,
      "learning_rate": 0.003,
      "loss": 4.2476,
      "step": 2304
    },
    {
      "epoch": 0.02305,
      "grad_norm": 0.5306257096149566,
      "learning_rate": 0.003,
      "loss": 4.2208,
      "step": 2305
    },
    {
      "epoch": 0.02306,
      "grad_norm": 0.4934389293531822,
      "learning_rate": 0.003,
      "loss": 4.1942,
      "step": 2306
    },
    {
      "epoch": 0.02307,
      "grad_norm": 0.4668795011764801,
      "learning_rate": 0.003,
      "loss": 4.2225,
      "step": 2307
    },
    {
      "epoch": 0.02308,
      "grad_norm": 0.4206347578571924,
      "learning_rate": 0.003,
      "loss": 4.2125,
      "step": 2308
    },
    {
      "epoch": 0.02309,
      "grad_norm": 0.4940433849553269,
      "learning_rate": 0.003,
      "loss": 4.2381,
      "step": 2309
    },
    {
      "epoch": 0.0231,
      "grad_norm": 0.5558762008475976,
      "learning_rate": 0.003,
      "loss": 4.2182,
      "step": 2310
    },
    {
      "epoch": 0.02311,
      "grad_norm": 0.675026786943639,
      "learning_rate": 0.003,
      "loss": 4.2246,
      "step": 2311
    },
    {
      "epoch": 0.02312,
      "grad_norm": 0.8262505859792513,
      "learning_rate": 0.003,
      "loss": 4.2059,
      "step": 2312
    },
    {
      "epoch": 0.02313,
      "grad_norm": 0.9646885017751712,
      "learning_rate": 0.003,
      "loss": 4.2035,
      "step": 2313
    },
    {
      "epoch": 0.02314,
      "grad_norm": 0.9620181533726643,
      "learning_rate": 0.003,
      "loss": 4.2279,
      "step": 2314
    },
    {
      "epoch": 0.02315,
      "grad_norm": 0.7208688026542501,
      "learning_rate": 0.003,
      "loss": 4.2198,
      "step": 2315
    },
    {
      "epoch": 0.02316,
      "grad_norm": 0.6871143951606865,
      "learning_rate": 0.003,
      "loss": 4.233,
      "step": 2316
    },
    {
      "epoch": 0.02317,
      "grad_norm": 0.7343972203270459,
      "learning_rate": 0.003,
      "loss": 4.2178,
      "step": 2317
    },
    {
      "epoch": 0.02318,
      "grad_norm": 0.672729666884604,
      "learning_rate": 0.003,
      "loss": 4.2019,
      "step": 2318
    },
    {
      "epoch": 0.02319,
      "grad_norm": 0.5294586305376263,
      "learning_rate": 0.003,
      "loss": 4.2232,
      "step": 2319
    },
    {
      "epoch": 0.0232,
      "grad_norm": 0.45021836108537966,
      "learning_rate": 0.003,
      "loss": 4.228,
      "step": 2320
    },
    {
      "epoch": 0.02321,
      "grad_norm": 0.4332276722950085,
      "learning_rate": 0.003,
      "loss": 4.2056,
      "step": 2321
    },
    {
      "epoch": 0.02322,
      "grad_norm": 0.415331953066457,
      "learning_rate": 0.003,
      "loss": 4.2122,
      "step": 2322
    },
    {
      "epoch": 0.02323,
      "grad_norm": 0.4782940324906927,
      "learning_rate": 0.003,
      "loss": 4.188,
      "step": 2323
    },
    {
      "epoch": 0.02324,
      "grad_norm": 0.44973622532022933,
      "learning_rate": 0.003,
      "loss": 4.1851,
      "step": 2324
    },
    {
      "epoch": 0.02325,
      "grad_norm": 0.5367241864779755,
      "learning_rate": 0.003,
      "loss": 4.2104,
      "step": 2325
    },
    {
      "epoch": 0.02326,
      "grad_norm": 0.6485437733030329,
      "learning_rate": 0.003,
      "loss": 4.2262,
      "step": 2326
    },
    {
      "epoch": 0.02327,
      "grad_norm": 0.8108781550029248,
      "learning_rate": 0.003,
      "loss": 4.2167,
      "step": 2327
    },
    {
      "epoch": 0.02328,
      "grad_norm": 0.8492355692805335,
      "learning_rate": 0.003,
      "loss": 4.2165,
      "step": 2328
    },
    {
      "epoch": 0.02329,
      "grad_norm": 0.8631583290092871,
      "learning_rate": 0.003,
      "loss": 4.2143,
      "step": 2329
    },
    {
      "epoch": 0.0233,
      "grad_norm": 0.7974678745136863,
      "learning_rate": 0.003,
      "loss": 4.208,
      "step": 2330
    },
    {
      "epoch": 0.02331,
      "grad_norm": 0.7747922073327778,
      "learning_rate": 0.003,
      "loss": 4.2484,
      "step": 2331
    },
    {
      "epoch": 0.02332,
      "grad_norm": 0.7695930959807518,
      "learning_rate": 0.003,
      "loss": 4.2133,
      "step": 2332
    },
    {
      "epoch": 0.02333,
      "grad_norm": 0.8630069509222793,
      "learning_rate": 0.003,
      "loss": 4.2258,
      "step": 2333
    },
    {
      "epoch": 0.02334,
      "grad_norm": 0.8750358925315561,
      "learning_rate": 0.003,
      "loss": 4.1963,
      "step": 2334
    },
    {
      "epoch": 0.02335,
      "grad_norm": 0.8509103316399725,
      "learning_rate": 0.003,
      "loss": 4.2374,
      "step": 2335
    },
    {
      "epoch": 0.02336,
      "grad_norm": 0.7392709118908145,
      "learning_rate": 0.003,
      "loss": 4.2257,
      "step": 2336
    },
    {
      "epoch": 0.02337,
      "grad_norm": 0.6895369704663263,
      "learning_rate": 0.003,
      "loss": 4.2245,
      "step": 2337
    },
    {
      "epoch": 0.02338,
      "grad_norm": 0.7845765410625875,
      "learning_rate": 0.003,
      "loss": 4.2198,
      "step": 2338
    },
    {
      "epoch": 0.02339,
      "grad_norm": 0.874209978127432,
      "learning_rate": 0.003,
      "loss": 4.2152,
      "step": 2339
    },
    {
      "epoch": 0.0234,
      "grad_norm": 1.0163700561103481,
      "learning_rate": 0.003,
      "loss": 4.2423,
      "step": 2340
    },
    {
      "epoch": 0.02341,
      "grad_norm": 0.8157418854610229,
      "learning_rate": 0.003,
      "loss": 4.2078,
      "step": 2341
    },
    {
      "epoch": 0.02342,
      "grad_norm": 0.74346999717558,
      "learning_rate": 0.003,
      "loss": 4.2154,
      "step": 2342
    },
    {
      "epoch": 0.02343,
      "grad_norm": 0.7104070086359986,
      "learning_rate": 0.003,
      "loss": 4.2179,
      "step": 2343
    },
    {
      "epoch": 0.02344,
      "grad_norm": 0.6775910262494933,
      "learning_rate": 0.003,
      "loss": 4.2138,
      "step": 2344
    },
    {
      "epoch": 0.02345,
      "grad_norm": 0.7196047539014085,
      "learning_rate": 0.003,
      "loss": 4.2099,
      "step": 2345
    },
    {
      "epoch": 0.02346,
      "grad_norm": 0.8287165981467728,
      "learning_rate": 0.003,
      "loss": 4.2085,
      "step": 2346
    },
    {
      "epoch": 0.02347,
      "grad_norm": 0.9371300769994676,
      "learning_rate": 0.003,
      "loss": 4.252,
      "step": 2347
    },
    {
      "epoch": 0.02348,
      "grad_norm": 1.0680546915325266,
      "learning_rate": 0.003,
      "loss": 4.2253,
      "step": 2348
    },
    {
      "epoch": 0.02349,
      "grad_norm": 0.8280068780663966,
      "learning_rate": 0.003,
      "loss": 4.2341,
      "step": 2349
    },
    {
      "epoch": 0.0235,
      "grad_norm": 0.7133386737402891,
      "learning_rate": 0.003,
      "loss": 4.2259,
      "step": 2350
    },
    {
      "epoch": 0.02351,
      "grad_norm": 0.7596846967896084,
      "learning_rate": 0.003,
      "loss": 4.2382,
      "step": 2351
    },
    {
      "epoch": 0.02352,
      "grad_norm": 0.7235609932262353,
      "learning_rate": 0.003,
      "loss": 4.1922,
      "step": 2352
    },
    {
      "epoch": 0.02353,
      "grad_norm": 0.6625191060428061,
      "learning_rate": 0.003,
      "loss": 4.2225,
      "step": 2353
    },
    {
      "epoch": 0.02354,
      "grad_norm": 0.7210256062729306,
      "learning_rate": 0.003,
      "loss": 4.2281,
      "step": 2354
    },
    {
      "epoch": 0.02355,
      "grad_norm": 0.6784409469826875,
      "learning_rate": 0.003,
      "loss": 4.2244,
      "step": 2355
    },
    {
      "epoch": 0.02356,
      "grad_norm": 0.6340113796322493,
      "learning_rate": 0.003,
      "loss": 4.2034,
      "step": 2356
    },
    {
      "epoch": 0.02357,
      "grad_norm": 0.5872909081662198,
      "learning_rate": 0.003,
      "loss": 4.2037,
      "step": 2357
    },
    {
      "epoch": 0.02358,
      "grad_norm": 0.6012777742156055,
      "learning_rate": 0.003,
      "loss": 4.2251,
      "step": 2358
    },
    {
      "epoch": 0.02359,
      "grad_norm": 0.71706729373374,
      "learning_rate": 0.003,
      "loss": 4.2046,
      "step": 2359
    },
    {
      "epoch": 0.0236,
      "grad_norm": 0.815136439263558,
      "learning_rate": 0.003,
      "loss": 4.2434,
      "step": 2360
    },
    {
      "epoch": 0.02361,
      "grad_norm": 0.8003325831199607,
      "learning_rate": 0.003,
      "loss": 4.2261,
      "step": 2361
    },
    {
      "epoch": 0.02362,
      "grad_norm": 0.8097756127870204,
      "learning_rate": 0.003,
      "loss": 4.2177,
      "step": 2362
    },
    {
      "epoch": 0.02363,
      "grad_norm": 0.7914189012727164,
      "learning_rate": 0.003,
      "loss": 4.1921,
      "step": 2363
    },
    {
      "epoch": 0.02364,
      "grad_norm": 0.7565060855875264,
      "learning_rate": 0.003,
      "loss": 4.2126,
      "step": 2364
    },
    {
      "epoch": 0.02365,
      "grad_norm": 0.7332274502623093,
      "learning_rate": 0.003,
      "loss": 4.2307,
      "step": 2365
    },
    {
      "epoch": 0.02366,
      "grad_norm": 0.7247749395727022,
      "learning_rate": 0.003,
      "loss": 4.2058,
      "step": 2366
    },
    {
      "epoch": 0.02367,
      "grad_norm": 0.6754475163243551,
      "learning_rate": 0.003,
      "loss": 4.2138,
      "step": 2367
    },
    {
      "epoch": 0.02368,
      "grad_norm": 0.6381389762534017,
      "learning_rate": 0.003,
      "loss": 4.1873,
      "step": 2368
    },
    {
      "epoch": 0.02369,
      "grad_norm": 0.6120733905364695,
      "learning_rate": 0.003,
      "loss": 4.1978,
      "step": 2369
    },
    {
      "epoch": 0.0237,
      "grad_norm": 0.7329047764963631,
      "learning_rate": 0.003,
      "loss": 4.2283,
      "step": 2370
    },
    {
      "epoch": 0.02371,
      "grad_norm": 0.7596120866036141,
      "learning_rate": 0.003,
      "loss": 4.2137,
      "step": 2371
    },
    {
      "epoch": 0.02372,
      "grad_norm": 0.7920778056584535,
      "learning_rate": 0.003,
      "loss": 4.202,
      "step": 2372
    },
    {
      "epoch": 0.02373,
      "grad_norm": 0.6541726044177485,
      "learning_rate": 0.003,
      "loss": 4.1876,
      "step": 2373
    },
    {
      "epoch": 0.02374,
      "grad_norm": 0.4980669374964598,
      "learning_rate": 0.003,
      "loss": 4.1906,
      "step": 2374
    },
    {
      "epoch": 0.02375,
      "grad_norm": 0.48620968233730555,
      "learning_rate": 0.003,
      "loss": 4.1923,
      "step": 2375
    },
    {
      "epoch": 0.02376,
      "grad_norm": 0.5343060557665553,
      "learning_rate": 0.003,
      "loss": 4.2157,
      "step": 2376
    },
    {
      "epoch": 0.02377,
      "grad_norm": 0.5346079035944942,
      "learning_rate": 0.003,
      "loss": 4.1829,
      "step": 2377
    },
    {
      "epoch": 0.02378,
      "grad_norm": 0.5345430148085093,
      "learning_rate": 0.003,
      "loss": 4.1926,
      "step": 2378
    },
    {
      "epoch": 0.02379,
      "grad_norm": 0.5981101437562691,
      "learning_rate": 0.003,
      "loss": 4.1826,
      "step": 2379
    },
    {
      "epoch": 0.0238,
      "grad_norm": 0.6559236976426223,
      "learning_rate": 0.003,
      "loss": 4.1575,
      "step": 2380
    },
    {
      "epoch": 0.02381,
      "grad_norm": 0.7048531875426997,
      "learning_rate": 0.003,
      "loss": 4.1695,
      "step": 2381
    },
    {
      "epoch": 0.02382,
      "grad_norm": 0.7200426361012294,
      "learning_rate": 0.003,
      "loss": 4.2311,
      "step": 2382
    },
    {
      "epoch": 0.02383,
      "grad_norm": 0.6892017198060378,
      "learning_rate": 0.003,
      "loss": 4.1909,
      "step": 2383
    },
    {
      "epoch": 0.02384,
      "grad_norm": 0.6858037418633958,
      "learning_rate": 0.003,
      "loss": 4.1796,
      "step": 2384
    },
    {
      "epoch": 0.02385,
      "grad_norm": 0.7487982436774401,
      "learning_rate": 0.003,
      "loss": 4.2087,
      "step": 2385
    },
    {
      "epoch": 0.02386,
      "grad_norm": 0.7730947217702177,
      "learning_rate": 0.003,
      "loss": 4.1913,
      "step": 2386
    },
    {
      "epoch": 0.02387,
      "grad_norm": 0.7349341583623172,
      "learning_rate": 0.003,
      "loss": 4.1849,
      "step": 2387
    },
    {
      "epoch": 0.02388,
      "grad_norm": 0.6224368008762636,
      "learning_rate": 0.003,
      "loss": 4.2345,
      "step": 2388
    },
    {
      "epoch": 0.02389,
      "grad_norm": 0.6226480564621836,
      "learning_rate": 0.003,
      "loss": 4.1982,
      "step": 2389
    },
    {
      "epoch": 0.0239,
      "grad_norm": 0.6254191342622962,
      "learning_rate": 0.003,
      "loss": 4.1914,
      "step": 2390
    },
    {
      "epoch": 0.02391,
      "grad_norm": 0.6631610099694426,
      "learning_rate": 0.003,
      "loss": 4.2141,
      "step": 2391
    },
    {
      "epoch": 0.02392,
      "grad_norm": 0.8204612150837974,
      "learning_rate": 0.003,
      "loss": 4.2245,
      "step": 2392
    },
    {
      "epoch": 0.02393,
      "grad_norm": 1.0323111703597159,
      "learning_rate": 0.003,
      "loss": 4.1943,
      "step": 2393
    },
    {
      "epoch": 0.02394,
      "grad_norm": 1.0006388257189816,
      "learning_rate": 0.003,
      "loss": 4.2112,
      "step": 2394
    },
    {
      "epoch": 0.02395,
      "grad_norm": 0.7572494256888903,
      "learning_rate": 0.003,
      "loss": 4.2171,
      "step": 2395
    },
    {
      "epoch": 0.02396,
      "grad_norm": 0.5951984266508508,
      "learning_rate": 0.003,
      "loss": 4.2255,
      "step": 2396
    },
    {
      "epoch": 0.02397,
      "grad_norm": 0.6650102744746705,
      "learning_rate": 0.003,
      "loss": 4.2116,
      "step": 2397
    },
    {
      "epoch": 0.02398,
      "grad_norm": 0.6825039278747758,
      "learning_rate": 0.003,
      "loss": 4.2155,
      "step": 2398
    },
    {
      "epoch": 0.02399,
      "grad_norm": 0.6712187274269288,
      "learning_rate": 0.003,
      "loss": 4.1963,
      "step": 2399
    },
    {
      "epoch": 0.024,
      "grad_norm": 0.9110222795383771,
      "learning_rate": 0.003,
      "loss": 4.2186,
      "step": 2400
    },
    {
      "epoch": 0.02401,
      "grad_norm": 1.0426771564263924,
      "learning_rate": 0.003,
      "loss": 4.2141,
      "step": 2401
    },
    {
      "epoch": 0.02402,
      "grad_norm": 0.8152831624353678,
      "learning_rate": 0.003,
      "loss": 4.2285,
      "step": 2402
    },
    {
      "epoch": 0.02403,
      "grad_norm": 0.8195630042569122,
      "learning_rate": 0.003,
      "loss": 4.2044,
      "step": 2403
    },
    {
      "epoch": 0.02404,
      "grad_norm": 0.8040024828088439,
      "learning_rate": 0.003,
      "loss": 4.243,
      "step": 2404
    },
    {
      "epoch": 0.02405,
      "grad_norm": 0.885388520382783,
      "learning_rate": 0.003,
      "loss": 4.2578,
      "step": 2405
    },
    {
      "epoch": 0.02406,
      "grad_norm": 1.0169970243763446,
      "learning_rate": 0.003,
      "loss": 4.2493,
      "step": 2406
    },
    {
      "epoch": 0.02407,
      "grad_norm": 1.1565060428680438,
      "learning_rate": 0.003,
      "loss": 4.2434,
      "step": 2407
    },
    {
      "epoch": 0.02408,
      "grad_norm": 1.1677513319022517,
      "learning_rate": 0.003,
      "loss": 4.2497,
      "step": 2408
    },
    {
      "epoch": 0.02409,
      "grad_norm": 1.151489835753338,
      "learning_rate": 0.003,
      "loss": 4.2634,
      "step": 2409
    },
    {
      "epoch": 0.0241,
      "grad_norm": 0.9730712891416885,
      "learning_rate": 0.003,
      "loss": 4.2617,
      "step": 2410
    },
    {
      "epoch": 0.02411,
      "grad_norm": 1.0934561640650202,
      "learning_rate": 0.003,
      "loss": 4.2362,
      "step": 2411
    },
    {
      "epoch": 0.02412,
      "grad_norm": 0.7564247171167698,
      "learning_rate": 0.003,
      "loss": 4.2331,
      "step": 2412
    },
    {
      "epoch": 0.02413,
      "grad_norm": 0.7364564581436038,
      "learning_rate": 0.003,
      "loss": 4.2269,
      "step": 2413
    },
    {
      "epoch": 0.02414,
      "grad_norm": 0.7499470770108977,
      "learning_rate": 0.003,
      "loss": 4.2434,
      "step": 2414
    },
    {
      "epoch": 0.02415,
      "grad_norm": 0.6688500224519887,
      "learning_rate": 0.003,
      "loss": 4.2361,
      "step": 2415
    },
    {
      "epoch": 0.02416,
      "grad_norm": 0.7749349047772426,
      "learning_rate": 0.003,
      "loss": 4.2233,
      "step": 2416
    },
    {
      "epoch": 0.02417,
      "grad_norm": 0.7661333853513042,
      "learning_rate": 0.003,
      "loss": 4.2471,
      "step": 2417
    },
    {
      "epoch": 0.02418,
      "grad_norm": 0.7328202199869238,
      "learning_rate": 0.003,
      "loss": 4.2352,
      "step": 2418
    },
    {
      "epoch": 0.02419,
      "grad_norm": 0.656398684318785,
      "learning_rate": 0.003,
      "loss": 4.2286,
      "step": 2419
    },
    {
      "epoch": 0.0242,
      "grad_norm": 0.7711466513132326,
      "learning_rate": 0.003,
      "loss": 4.2421,
      "step": 2420
    },
    {
      "epoch": 0.02421,
      "grad_norm": 0.8045742727715058,
      "learning_rate": 0.003,
      "loss": 4.2109,
      "step": 2421
    },
    {
      "epoch": 0.02422,
      "grad_norm": 0.7983836017582577,
      "learning_rate": 0.003,
      "loss": 4.2596,
      "step": 2422
    },
    {
      "epoch": 0.02423,
      "grad_norm": 0.6735385382658433,
      "learning_rate": 0.003,
      "loss": 4.2455,
      "step": 2423
    },
    {
      "epoch": 0.02424,
      "grad_norm": 0.5460827395253804,
      "learning_rate": 0.003,
      "loss": 4.2363,
      "step": 2424
    },
    {
      "epoch": 0.02425,
      "grad_norm": 0.5835576651817266,
      "learning_rate": 0.003,
      "loss": 4.2198,
      "step": 2425
    },
    {
      "epoch": 0.02426,
      "grad_norm": 0.6629194085179074,
      "learning_rate": 0.003,
      "loss": 4.227,
      "step": 2426
    },
    {
      "epoch": 0.02427,
      "grad_norm": 0.613590623947857,
      "learning_rate": 0.003,
      "loss": 4.2496,
      "step": 2427
    },
    {
      "epoch": 0.02428,
      "grad_norm": 0.5297080386636701,
      "learning_rate": 0.003,
      "loss": 4.2173,
      "step": 2428
    },
    {
      "epoch": 0.02429,
      "grad_norm": 0.6059447656989463,
      "learning_rate": 0.003,
      "loss": 4.2063,
      "step": 2429
    },
    {
      "epoch": 0.0243,
      "grad_norm": 0.4983588726462706,
      "learning_rate": 0.003,
      "loss": 4.2252,
      "step": 2430
    },
    {
      "epoch": 0.02431,
      "grad_norm": 0.4714791093056159,
      "learning_rate": 0.003,
      "loss": 4.1992,
      "step": 2431
    },
    {
      "epoch": 0.02432,
      "grad_norm": 0.462354333964094,
      "learning_rate": 0.003,
      "loss": 4.2211,
      "step": 2432
    },
    {
      "epoch": 0.02433,
      "grad_norm": 0.5377944411614727,
      "learning_rate": 0.003,
      "loss": 4.2352,
      "step": 2433
    },
    {
      "epoch": 0.02434,
      "grad_norm": 0.72053618445036,
      "learning_rate": 0.003,
      "loss": 4.205,
      "step": 2434
    },
    {
      "epoch": 0.02435,
      "grad_norm": 0.9717077197610761,
      "learning_rate": 0.003,
      "loss": 4.2126,
      "step": 2435
    },
    {
      "epoch": 0.02436,
      "grad_norm": 1.0867436803782795,
      "learning_rate": 0.003,
      "loss": 4.2337,
      "step": 2436
    },
    {
      "epoch": 0.02437,
      "grad_norm": 0.7354406575791109,
      "learning_rate": 0.003,
      "loss": 4.204,
      "step": 2437
    },
    {
      "epoch": 0.02438,
      "grad_norm": 0.5659849585693548,
      "learning_rate": 0.003,
      "loss": 4.2172,
      "step": 2438
    },
    {
      "epoch": 0.02439,
      "grad_norm": 0.7582631377117253,
      "learning_rate": 0.003,
      "loss": 4.1906,
      "step": 2439
    },
    {
      "epoch": 0.0244,
      "grad_norm": 0.7519666599937619,
      "learning_rate": 0.003,
      "loss": 4.2388,
      "step": 2440
    },
    {
      "epoch": 0.02441,
      "grad_norm": 0.7095321843053358,
      "learning_rate": 0.003,
      "loss": 4.2023,
      "step": 2441
    },
    {
      "epoch": 0.02442,
      "grad_norm": 0.7610624757093507,
      "learning_rate": 0.003,
      "loss": 4.2377,
      "step": 2442
    },
    {
      "epoch": 0.02443,
      "grad_norm": 0.7404568199239232,
      "learning_rate": 0.003,
      "loss": 4.1741,
      "step": 2443
    },
    {
      "epoch": 0.02444,
      "grad_norm": 0.737847324812997,
      "learning_rate": 0.003,
      "loss": 4.2035,
      "step": 2444
    },
    {
      "epoch": 0.02445,
      "grad_norm": 0.8038624975628923,
      "learning_rate": 0.003,
      "loss": 4.2104,
      "step": 2445
    },
    {
      "epoch": 0.02446,
      "grad_norm": 0.9990291440971695,
      "learning_rate": 0.003,
      "loss": 4.2202,
      "step": 2446
    },
    {
      "epoch": 0.02447,
      "grad_norm": 0.9412206600088139,
      "learning_rate": 0.003,
      "loss": 4.2318,
      "step": 2447
    },
    {
      "epoch": 0.02448,
      "grad_norm": 0.8294445152569498,
      "learning_rate": 0.003,
      "loss": 4.2285,
      "step": 2448
    },
    {
      "epoch": 0.02449,
      "grad_norm": 0.689492443833379,
      "learning_rate": 0.003,
      "loss": 4.2155,
      "step": 2449
    },
    {
      "epoch": 0.0245,
      "grad_norm": 0.7414638724886635,
      "learning_rate": 0.003,
      "loss": 4.222,
      "step": 2450
    },
    {
      "epoch": 0.02451,
      "grad_norm": 0.685024298981967,
      "learning_rate": 0.003,
      "loss": 4.198,
      "step": 2451
    },
    {
      "epoch": 0.02452,
      "grad_norm": 0.8346267890501767,
      "learning_rate": 0.003,
      "loss": 4.2105,
      "step": 2452
    },
    {
      "epoch": 0.02453,
      "grad_norm": 0.7934750617758409,
      "learning_rate": 0.003,
      "loss": 4.1906,
      "step": 2453
    },
    {
      "epoch": 0.02454,
      "grad_norm": 0.751875693379249,
      "learning_rate": 0.003,
      "loss": 4.1943,
      "step": 2454
    },
    {
      "epoch": 0.02455,
      "grad_norm": 0.756218693628031,
      "learning_rate": 0.003,
      "loss": 4.2083,
      "step": 2455
    },
    {
      "epoch": 0.02456,
      "grad_norm": 0.7329329951461823,
      "learning_rate": 0.003,
      "loss": 4.1738,
      "step": 2456
    },
    {
      "epoch": 0.02457,
      "grad_norm": 0.6861603609350935,
      "learning_rate": 0.003,
      "loss": 4.2258,
      "step": 2457
    },
    {
      "epoch": 0.02458,
      "grad_norm": 0.7668783056267265,
      "learning_rate": 0.003,
      "loss": 4.1965,
      "step": 2458
    },
    {
      "epoch": 0.02459,
      "grad_norm": 0.9611463702448455,
      "learning_rate": 0.003,
      "loss": 4.2358,
      "step": 2459
    },
    {
      "epoch": 0.0246,
      "grad_norm": 1.2716359645134816,
      "learning_rate": 0.003,
      "loss": 4.2528,
      "step": 2460
    },
    {
      "epoch": 0.02461,
      "grad_norm": 0.6580810366854414,
      "learning_rate": 0.003,
      "loss": 4.2104,
      "step": 2461
    },
    {
      "epoch": 0.02462,
      "grad_norm": 0.663557756308718,
      "learning_rate": 0.003,
      "loss": 4.2392,
      "step": 2462
    },
    {
      "epoch": 0.02463,
      "grad_norm": 0.7136733681949328,
      "learning_rate": 0.003,
      "loss": 4.2379,
      "step": 2463
    },
    {
      "epoch": 0.02464,
      "grad_norm": 0.733904485720918,
      "learning_rate": 0.003,
      "loss": 4.2153,
      "step": 2464
    },
    {
      "epoch": 0.02465,
      "grad_norm": 0.7484568404460167,
      "learning_rate": 0.003,
      "loss": 4.2316,
      "step": 2465
    },
    {
      "epoch": 0.02466,
      "grad_norm": 0.7165740498978449,
      "learning_rate": 0.003,
      "loss": 4.2108,
      "step": 2466
    },
    {
      "epoch": 0.02467,
      "grad_norm": 0.7346927598557101,
      "learning_rate": 0.003,
      "loss": 4.1872,
      "step": 2467
    },
    {
      "epoch": 0.02468,
      "grad_norm": 0.7134409778557855,
      "learning_rate": 0.003,
      "loss": 4.2303,
      "step": 2468
    },
    {
      "epoch": 0.02469,
      "grad_norm": 0.7525235307550779,
      "learning_rate": 0.003,
      "loss": 4.2116,
      "step": 2469
    },
    {
      "epoch": 0.0247,
      "grad_norm": 0.6233720556235373,
      "learning_rate": 0.003,
      "loss": 4.2038,
      "step": 2470
    },
    {
      "epoch": 0.02471,
      "grad_norm": 0.6188790379984411,
      "learning_rate": 0.003,
      "loss": 4.1898,
      "step": 2471
    },
    {
      "epoch": 0.02472,
      "grad_norm": 0.5576000965553038,
      "learning_rate": 0.003,
      "loss": 4.1904,
      "step": 2472
    },
    {
      "epoch": 0.02473,
      "grad_norm": 0.5733874773234577,
      "learning_rate": 0.003,
      "loss": 4.1763,
      "step": 2473
    },
    {
      "epoch": 0.02474,
      "grad_norm": 0.5058689455726723,
      "learning_rate": 0.003,
      "loss": 4.2046,
      "step": 2474
    },
    {
      "epoch": 0.02475,
      "grad_norm": 0.5073987946337717,
      "learning_rate": 0.003,
      "loss": 4.1805,
      "step": 2475
    },
    {
      "epoch": 0.02476,
      "grad_norm": 0.5627394935612221,
      "learning_rate": 0.003,
      "loss": 4.1881,
      "step": 2476
    },
    {
      "epoch": 0.02477,
      "grad_norm": 0.6236986968628518,
      "learning_rate": 0.003,
      "loss": 4.2085,
      "step": 2477
    },
    {
      "epoch": 0.02478,
      "grad_norm": 0.7245870854664911,
      "learning_rate": 0.003,
      "loss": 4.1902,
      "step": 2478
    },
    {
      "epoch": 0.02479,
      "grad_norm": 0.8546743805326398,
      "learning_rate": 0.003,
      "loss": 4.1806,
      "step": 2479
    },
    {
      "epoch": 0.0248,
      "grad_norm": 0.8522780412659844,
      "learning_rate": 0.003,
      "loss": 4.2257,
      "step": 2480
    },
    {
      "epoch": 0.02481,
      "grad_norm": 0.6535653800757177,
      "learning_rate": 0.003,
      "loss": 4.2011,
      "step": 2481
    },
    {
      "epoch": 0.02482,
      "grad_norm": 0.6344492058438189,
      "learning_rate": 0.003,
      "loss": 4.2089,
      "step": 2482
    },
    {
      "epoch": 0.02483,
      "grad_norm": 0.6268828941754008,
      "learning_rate": 0.003,
      "loss": 4.1621,
      "step": 2483
    },
    {
      "epoch": 0.02484,
      "grad_norm": 0.5957623078783284,
      "learning_rate": 0.003,
      "loss": 4.1893,
      "step": 2484
    },
    {
      "epoch": 0.02485,
      "grad_norm": 0.6986492947662325,
      "learning_rate": 0.003,
      "loss": 4.2156,
      "step": 2485
    },
    {
      "epoch": 0.02486,
      "grad_norm": 0.7414315414337274,
      "learning_rate": 0.003,
      "loss": 4.24,
      "step": 2486
    },
    {
      "epoch": 0.02487,
      "grad_norm": 0.6569802880365471,
      "learning_rate": 0.003,
      "loss": 4.1965,
      "step": 2487
    },
    {
      "epoch": 0.02488,
      "grad_norm": 0.5756977058711745,
      "learning_rate": 0.003,
      "loss": 4.1695,
      "step": 2488
    },
    {
      "epoch": 0.02489,
      "grad_norm": 0.5841485659003718,
      "learning_rate": 0.003,
      "loss": 4.1603,
      "step": 2489
    },
    {
      "epoch": 0.0249,
      "grad_norm": 0.6313109185297392,
      "learning_rate": 0.003,
      "loss": 4.1659,
      "step": 2490
    },
    {
      "epoch": 0.02491,
      "grad_norm": 0.587258429220503,
      "learning_rate": 0.003,
      "loss": 4.1919,
      "step": 2491
    },
    {
      "epoch": 0.02492,
      "grad_norm": 0.6796255021246645,
      "learning_rate": 0.003,
      "loss": 4.2045,
      "step": 2492
    },
    {
      "epoch": 0.02493,
      "grad_norm": 0.9386099062698943,
      "learning_rate": 0.003,
      "loss": 4.1849,
      "step": 2493
    },
    {
      "epoch": 0.02494,
      "grad_norm": 1.0325794653718277,
      "learning_rate": 0.003,
      "loss": 4.2058,
      "step": 2494
    },
    {
      "epoch": 0.02495,
      "grad_norm": 0.8034595706724524,
      "learning_rate": 0.003,
      "loss": 4.2081,
      "step": 2495
    },
    {
      "epoch": 0.02496,
      "grad_norm": 0.716975622530452,
      "learning_rate": 0.003,
      "loss": 4.1957,
      "step": 2496
    },
    {
      "epoch": 0.02497,
      "grad_norm": 0.7796292204460298,
      "learning_rate": 0.003,
      "loss": 4.2044,
      "step": 2497
    },
    {
      "epoch": 0.02498,
      "grad_norm": 0.8038852090110774,
      "learning_rate": 0.003,
      "loss": 4.1929,
      "step": 2498
    },
    {
      "epoch": 0.02499,
      "grad_norm": 0.8923238576494511,
      "learning_rate": 0.003,
      "loss": 4.2097,
      "step": 2499
    },
    {
      "epoch": 0.025,
      "grad_norm": 0.7727598755079679,
      "learning_rate": 0.003,
      "loss": 4.1936,
      "step": 2500
    },
    {
      "epoch": 0.02501,
      "grad_norm": 0.6495309431942861,
      "learning_rate": 0.003,
      "loss": 4.2218,
      "step": 2501
    },
    {
      "epoch": 0.02502,
      "grad_norm": 0.7137648751903825,
      "learning_rate": 0.003,
      "loss": 4.2041,
      "step": 2502
    },
    {
      "epoch": 0.02503,
      "grad_norm": 0.6940762540850597,
      "learning_rate": 0.003,
      "loss": 4.2094,
      "step": 2503
    },
    {
      "epoch": 0.02504,
      "grad_norm": 0.6648229735163252,
      "learning_rate": 0.003,
      "loss": 4.1887,
      "step": 2504
    },
    {
      "epoch": 0.02505,
      "grad_norm": 0.6560245286717432,
      "learning_rate": 0.003,
      "loss": 4.1892,
      "step": 2505
    },
    {
      "epoch": 0.02506,
      "grad_norm": 0.6908954566997763,
      "learning_rate": 0.003,
      "loss": 4.2059,
      "step": 2506
    },
    {
      "epoch": 0.02507,
      "grad_norm": 0.7070750009001837,
      "learning_rate": 0.003,
      "loss": 4.1925,
      "step": 2507
    },
    {
      "epoch": 0.02508,
      "grad_norm": 0.6441719185425088,
      "learning_rate": 0.003,
      "loss": 4.1881,
      "step": 2508
    },
    {
      "epoch": 0.02509,
      "grad_norm": 0.6590084093122774,
      "learning_rate": 0.003,
      "loss": 4.1949,
      "step": 2509
    },
    {
      "epoch": 0.0251,
      "grad_norm": 0.7745888760033045,
      "learning_rate": 0.003,
      "loss": 4.2104,
      "step": 2510
    },
    {
      "epoch": 0.02511,
      "grad_norm": 0.7154617490554088,
      "learning_rate": 0.003,
      "loss": 4.2169,
      "step": 2511
    },
    {
      "epoch": 0.02512,
      "grad_norm": 0.6776987606227394,
      "learning_rate": 0.003,
      "loss": 4.198,
      "step": 2512
    },
    {
      "epoch": 0.02513,
      "grad_norm": 0.766684849732399,
      "learning_rate": 0.003,
      "loss": 4.1988,
      "step": 2513
    },
    {
      "epoch": 0.02514,
      "grad_norm": 0.727274001755,
      "learning_rate": 0.003,
      "loss": 4.1879,
      "step": 2514
    },
    {
      "epoch": 0.02515,
      "grad_norm": 0.8761441531793727,
      "learning_rate": 0.003,
      "loss": 4.1813,
      "step": 2515
    },
    {
      "epoch": 0.02516,
      "grad_norm": 1.0755772178541487,
      "learning_rate": 0.003,
      "loss": 4.2163,
      "step": 2516
    },
    {
      "epoch": 0.02517,
      "grad_norm": 1.1923834520518084,
      "learning_rate": 0.003,
      "loss": 4.2272,
      "step": 2517
    },
    {
      "epoch": 0.02518,
      "grad_norm": 0.9485906454874968,
      "learning_rate": 0.003,
      "loss": 4.2292,
      "step": 2518
    },
    {
      "epoch": 0.02519,
      "grad_norm": 0.7415170618596796,
      "learning_rate": 0.003,
      "loss": 4.1985,
      "step": 2519
    },
    {
      "epoch": 0.0252,
      "grad_norm": 0.6944412745696904,
      "learning_rate": 0.003,
      "loss": 4.2134,
      "step": 2520
    },
    {
      "epoch": 0.02521,
      "grad_norm": 0.7068074135536067,
      "learning_rate": 0.003,
      "loss": 4.208,
      "step": 2521
    },
    {
      "epoch": 0.02522,
      "grad_norm": 0.7700461552439566,
      "learning_rate": 0.003,
      "loss": 4.2059,
      "step": 2522
    },
    {
      "epoch": 0.02523,
      "grad_norm": 0.9732286210252518,
      "learning_rate": 0.003,
      "loss": 4.2168,
      "step": 2523
    },
    {
      "epoch": 0.02524,
      "grad_norm": 1.2306281629844058,
      "learning_rate": 0.003,
      "loss": 4.2416,
      "step": 2524
    },
    {
      "epoch": 0.02525,
      "grad_norm": 0.8190686438601459,
      "learning_rate": 0.003,
      "loss": 4.241,
      "step": 2525
    },
    {
      "epoch": 0.02526,
      "grad_norm": 0.742262591344972,
      "learning_rate": 0.003,
      "loss": 4.182,
      "step": 2526
    },
    {
      "epoch": 0.02527,
      "grad_norm": 0.8569784978950316,
      "learning_rate": 0.003,
      "loss": 4.2213,
      "step": 2527
    },
    {
      "epoch": 0.02528,
      "grad_norm": 1.0273779919027248,
      "learning_rate": 0.003,
      "loss": 4.2493,
      "step": 2528
    },
    {
      "epoch": 0.02529,
      "grad_norm": 1.4013611330219795,
      "learning_rate": 0.003,
      "loss": 4.236,
      "step": 2529
    },
    {
      "epoch": 0.0253,
      "grad_norm": 0.6267544057110621,
      "learning_rate": 0.003,
      "loss": 4.234,
      "step": 2530
    },
    {
      "epoch": 0.02531,
      "grad_norm": 0.7194215796299043,
      "learning_rate": 0.003,
      "loss": 4.2346,
      "step": 2531
    },
    {
      "epoch": 0.02532,
      "grad_norm": 0.8042428452042162,
      "learning_rate": 0.003,
      "loss": 4.206,
      "step": 2532
    },
    {
      "epoch": 0.02533,
      "grad_norm": 0.7542292983077659,
      "learning_rate": 0.003,
      "loss": 4.242,
      "step": 2533
    },
    {
      "epoch": 0.02534,
      "grad_norm": 0.7181838268024839,
      "learning_rate": 0.003,
      "loss": 4.2168,
      "step": 2534
    },
    {
      "epoch": 0.02535,
      "grad_norm": 0.8381740222549681,
      "learning_rate": 0.003,
      "loss": 4.2139,
      "step": 2535
    },
    {
      "epoch": 0.02536,
      "grad_norm": 0.745200112168368,
      "learning_rate": 0.003,
      "loss": 4.2088,
      "step": 2536
    },
    {
      "epoch": 0.02537,
      "grad_norm": 0.6069281829403523,
      "learning_rate": 0.003,
      "loss": 4.1974,
      "step": 2537
    },
    {
      "epoch": 0.02538,
      "grad_norm": 0.590427378826761,
      "learning_rate": 0.003,
      "loss": 4.1708,
      "step": 2538
    },
    {
      "epoch": 0.02539,
      "grad_norm": 0.5837823282964978,
      "learning_rate": 0.003,
      "loss": 4.2201,
      "step": 2539
    },
    {
      "epoch": 0.0254,
      "grad_norm": 0.538959841561464,
      "learning_rate": 0.003,
      "loss": 4.2093,
      "step": 2540
    },
    {
      "epoch": 0.02541,
      "grad_norm": 0.42790014377314195,
      "learning_rate": 0.003,
      "loss": 4.1959,
      "step": 2541
    },
    {
      "epoch": 0.02542,
      "grad_norm": 0.3893070770616937,
      "learning_rate": 0.003,
      "loss": 4.1716,
      "step": 2542
    },
    {
      "epoch": 0.02543,
      "grad_norm": 0.43694196231407156,
      "learning_rate": 0.003,
      "loss": 4.2002,
      "step": 2543
    },
    {
      "epoch": 0.02544,
      "grad_norm": 0.5763133602608426,
      "learning_rate": 0.003,
      "loss": 4.1903,
      "step": 2544
    },
    {
      "epoch": 0.02545,
      "grad_norm": 0.8655900046851385,
      "learning_rate": 0.003,
      "loss": 4.179,
      "step": 2545
    },
    {
      "epoch": 0.02546,
      "grad_norm": 1.0957171422862184,
      "learning_rate": 0.003,
      "loss": 4.2013,
      "step": 2546
    },
    {
      "epoch": 0.02547,
      "grad_norm": 0.7852241671565661,
      "learning_rate": 0.003,
      "loss": 4.2326,
      "step": 2547
    },
    {
      "epoch": 0.02548,
      "grad_norm": 0.568158079879503,
      "learning_rate": 0.003,
      "loss": 4.2222,
      "step": 2548
    },
    {
      "epoch": 0.02549,
      "grad_norm": 0.6340487565296964,
      "learning_rate": 0.003,
      "loss": 4.217,
      "step": 2549
    },
    {
      "epoch": 0.0255,
      "grad_norm": 0.7406725193163024,
      "learning_rate": 0.003,
      "loss": 4.1793,
      "step": 2550
    },
    {
      "epoch": 0.02551,
      "grad_norm": 0.6828594076090021,
      "learning_rate": 0.003,
      "loss": 4.2087,
      "step": 2551
    },
    {
      "epoch": 0.02552,
      "grad_norm": 0.6588298930877766,
      "learning_rate": 0.003,
      "loss": 4.1785,
      "step": 2552
    },
    {
      "epoch": 0.02553,
      "grad_norm": 0.6711194686482366,
      "learning_rate": 0.003,
      "loss": 4.2417,
      "step": 2553
    },
    {
      "epoch": 0.02554,
      "grad_norm": 0.7183069611221199,
      "learning_rate": 0.003,
      "loss": 4.1948,
      "step": 2554
    },
    {
      "epoch": 0.02555,
      "grad_norm": 0.7594283334724947,
      "learning_rate": 0.003,
      "loss": 4.1875,
      "step": 2555
    },
    {
      "epoch": 0.02556,
      "grad_norm": 0.7544165526703748,
      "learning_rate": 0.003,
      "loss": 4.1963,
      "step": 2556
    },
    {
      "epoch": 0.02557,
      "grad_norm": 0.8031811764161461,
      "learning_rate": 0.003,
      "loss": 4.2063,
      "step": 2557
    },
    {
      "epoch": 0.02558,
      "grad_norm": 0.7417815254052628,
      "learning_rate": 0.003,
      "loss": 4.2195,
      "step": 2558
    },
    {
      "epoch": 0.02559,
      "grad_norm": 0.668258837116674,
      "learning_rate": 0.003,
      "loss": 4.216,
      "step": 2559
    },
    {
      "epoch": 0.0256,
      "grad_norm": 0.7338986949145504,
      "learning_rate": 0.003,
      "loss": 4.1905,
      "step": 2560
    },
    {
      "epoch": 0.02561,
      "grad_norm": 0.7321938936149771,
      "learning_rate": 0.003,
      "loss": 4.1926,
      "step": 2561
    },
    {
      "epoch": 0.02562,
      "grad_norm": 0.7675474512341959,
      "learning_rate": 0.003,
      "loss": 4.2107,
      "step": 2562
    },
    {
      "epoch": 0.02563,
      "grad_norm": 0.7554556387560584,
      "learning_rate": 0.003,
      "loss": 4.1935,
      "step": 2563
    },
    {
      "epoch": 0.02564,
      "grad_norm": 0.7775647520234856,
      "learning_rate": 0.003,
      "loss": 4.2162,
      "step": 2564
    },
    {
      "epoch": 0.02565,
      "grad_norm": 0.7534991095864281,
      "learning_rate": 0.003,
      "loss": 4.1929,
      "step": 2565
    },
    {
      "epoch": 0.02566,
      "grad_norm": 0.7288648663551685,
      "learning_rate": 0.003,
      "loss": 4.2173,
      "step": 2566
    },
    {
      "epoch": 0.02567,
      "grad_norm": 0.7215669034727294,
      "learning_rate": 0.003,
      "loss": 4.2131,
      "step": 2567
    },
    {
      "epoch": 0.02568,
      "grad_norm": 0.8563040223027246,
      "learning_rate": 0.003,
      "loss": 4.2138,
      "step": 2568
    },
    {
      "epoch": 0.02569,
      "grad_norm": 1.1080869622355456,
      "learning_rate": 0.003,
      "loss": 4.2025,
      "step": 2569
    },
    {
      "epoch": 0.0257,
      "grad_norm": 1.1075262903516023,
      "learning_rate": 0.003,
      "loss": 4.2183,
      "step": 2570
    },
    {
      "epoch": 0.02571,
      "grad_norm": 0.729899139052775,
      "learning_rate": 0.003,
      "loss": 4.2237,
      "step": 2571
    },
    {
      "epoch": 0.02572,
      "grad_norm": 0.6904609591483578,
      "learning_rate": 0.003,
      "loss": 4.2007,
      "step": 2572
    },
    {
      "epoch": 0.02573,
      "grad_norm": 0.8131518041712269,
      "learning_rate": 0.003,
      "loss": 4.2089,
      "step": 2573
    },
    {
      "epoch": 0.02574,
      "grad_norm": 0.7847619978392997,
      "learning_rate": 0.003,
      "loss": 4.2345,
      "step": 2574
    },
    {
      "epoch": 0.02575,
      "grad_norm": 0.778225241603638,
      "learning_rate": 0.003,
      "loss": 4.1711,
      "step": 2575
    },
    {
      "epoch": 0.02576,
      "grad_norm": 0.8302954615357605,
      "learning_rate": 0.003,
      "loss": 4.1776,
      "step": 2576
    },
    {
      "epoch": 0.02577,
      "grad_norm": 0.8152270887192647,
      "learning_rate": 0.003,
      "loss": 4.1841,
      "step": 2577
    },
    {
      "epoch": 0.02578,
      "grad_norm": 0.7255559351586255,
      "learning_rate": 0.003,
      "loss": 4.2031,
      "step": 2578
    },
    {
      "epoch": 0.02579,
      "grad_norm": 0.7449798683561115,
      "learning_rate": 0.003,
      "loss": 4.1513,
      "step": 2579
    },
    {
      "epoch": 0.0258,
      "grad_norm": 0.6502036682045843,
      "learning_rate": 0.003,
      "loss": 4.2051,
      "step": 2580
    },
    {
      "epoch": 0.02581,
      "grad_norm": 0.6737707023470867,
      "learning_rate": 0.003,
      "loss": 4.2262,
      "step": 2581
    },
    {
      "epoch": 0.02582,
      "grad_norm": 0.764427888615585,
      "learning_rate": 0.003,
      "loss": 4.224,
      "step": 2582
    },
    {
      "epoch": 0.02583,
      "grad_norm": 0.8504470874659034,
      "learning_rate": 0.003,
      "loss": 4.2026,
      "step": 2583
    },
    {
      "epoch": 0.02584,
      "grad_norm": 1.046258332322794,
      "learning_rate": 0.003,
      "loss": 4.221,
      "step": 2584
    },
    {
      "epoch": 0.02585,
      "grad_norm": 0.9031370563550914,
      "learning_rate": 0.003,
      "loss": 4.2014,
      "step": 2585
    },
    {
      "epoch": 0.02586,
      "grad_norm": 0.689996238011,
      "learning_rate": 0.003,
      "loss": 4.217,
      "step": 2586
    },
    {
      "epoch": 0.02587,
      "grad_norm": 0.7274353541891899,
      "learning_rate": 0.003,
      "loss": 4.2051,
      "step": 2587
    },
    {
      "epoch": 0.02588,
      "grad_norm": 0.7517253783207216,
      "learning_rate": 0.003,
      "loss": 4.2032,
      "step": 2588
    },
    {
      "epoch": 0.02589,
      "grad_norm": 0.8107477123925885,
      "learning_rate": 0.003,
      "loss": 4.2123,
      "step": 2589
    },
    {
      "epoch": 0.0259,
      "grad_norm": 0.7806508541147034,
      "learning_rate": 0.003,
      "loss": 4.2204,
      "step": 2590
    },
    {
      "epoch": 0.02591,
      "grad_norm": 0.6296252175534539,
      "learning_rate": 0.003,
      "loss": 4.1497,
      "step": 2591
    },
    {
      "epoch": 0.02592,
      "grad_norm": 0.5752391696870554,
      "learning_rate": 0.003,
      "loss": 4.1918,
      "step": 2592
    },
    {
      "epoch": 0.02593,
      "grad_norm": 0.517372047833294,
      "learning_rate": 0.003,
      "loss": 4.1949,
      "step": 2593
    },
    {
      "epoch": 0.02594,
      "grad_norm": 0.5906922889145111,
      "learning_rate": 0.003,
      "loss": 4.2115,
      "step": 2594
    },
    {
      "epoch": 0.02595,
      "grad_norm": 0.6262440905091147,
      "learning_rate": 0.003,
      "loss": 4.1658,
      "step": 2595
    },
    {
      "epoch": 0.02596,
      "grad_norm": 0.5949484998914474,
      "learning_rate": 0.003,
      "loss": 4.2045,
      "step": 2596
    },
    {
      "epoch": 0.02597,
      "grad_norm": 0.6006456318859384,
      "learning_rate": 0.003,
      "loss": 4.1727,
      "step": 2597
    },
    {
      "epoch": 0.02598,
      "grad_norm": 0.5609952650146841,
      "learning_rate": 0.003,
      "loss": 4.2084,
      "step": 2598
    },
    {
      "epoch": 0.02599,
      "grad_norm": 0.6441455777508067,
      "learning_rate": 0.003,
      "loss": 4.2067,
      "step": 2599
    },
    {
      "epoch": 0.026,
      "grad_norm": 0.7930588451098299,
      "learning_rate": 0.003,
      "loss": 4.2116,
      "step": 2600
    },
    {
      "epoch": 0.02601,
      "grad_norm": 0.8782723700452447,
      "learning_rate": 0.003,
      "loss": 4.2212,
      "step": 2601
    },
    {
      "epoch": 0.02602,
      "grad_norm": 1.1759564543141179,
      "learning_rate": 0.003,
      "loss": 4.2063,
      "step": 2602
    },
    {
      "epoch": 0.02603,
      "grad_norm": 0.8180315225343916,
      "learning_rate": 0.003,
      "loss": 4.19,
      "step": 2603
    },
    {
      "epoch": 0.02604,
      "grad_norm": 0.7244608750007608,
      "learning_rate": 0.003,
      "loss": 4.1809,
      "step": 2604
    },
    {
      "epoch": 0.02605,
      "grad_norm": 0.7440911950642829,
      "learning_rate": 0.003,
      "loss": 4.1962,
      "step": 2605
    },
    {
      "epoch": 0.02606,
      "grad_norm": 0.7799278089279328,
      "learning_rate": 0.003,
      "loss": 4.2191,
      "step": 2606
    },
    {
      "epoch": 0.02607,
      "grad_norm": 0.7713270098545678,
      "learning_rate": 0.003,
      "loss": 4.2101,
      "step": 2607
    },
    {
      "epoch": 0.02608,
      "grad_norm": 0.899578361209406,
      "learning_rate": 0.003,
      "loss": 4.1969,
      "step": 2608
    },
    {
      "epoch": 0.02609,
      "grad_norm": 0.8737732849824265,
      "learning_rate": 0.003,
      "loss": 4.24,
      "step": 2609
    },
    {
      "epoch": 0.0261,
      "grad_norm": 0.8947617166383927,
      "learning_rate": 0.003,
      "loss": 4.1906,
      "step": 2610
    },
    {
      "epoch": 0.02611,
      "grad_norm": 0.8562728807594239,
      "learning_rate": 0.003,
      "loss": 4.1837,
      "step": 2611
    },
    {
      "epoch": 0.02612,
      "grad_norm": 0.7163237085139472,
      "learning_rate": 0.003,
      "loss": 4.2141,
      "step": 2612
    },
    {
      "epoch": 0.02613,
      "grad_norm": 0.7068103471337065,
      "learning_rate": 0.003,
      "loss": 4.2043,
      "step": 2613
    },
    {
      "epoch": 0.02614,
      "grad_norm": 0.7050979670307006,
      "learning_rate": 0.003,
      "loss": 4.1848,
      "step": 2614
    },
    {
      "epoch": 0.02615,
      "grad_norm": 0.6688602437390169,
      "learning_rate": 0.003,
      "loss": 4.1711,
      "step": 2615
    },
    {
      "epoch": 0.02616,
      "grad_norm": 0.7303156076641851,
      "learning_rate": 0.003,
      "loss": 4.1875,
      "step": 2616
    },
    {
      "epoch": 0.02617,
      "grad_norm": 0.8575518403182025,
      "learning_rate": 0.003,
      "loss": 4.1807,
      "step": 2617
    },
    {
      "epoch": 0.02618,
      "grad_norm": 0.7963276560962136,
      "learning_rate": 0.003,
      "loss": 4.2077,
      "step": 2618
    },
    {
      "epoch": 0.02619,
      "grad_norm": 0.7084342029167996,
      "learning_rate": 0.003,
      "loss": 4.2176,
      "step": 2619
    },
    {
      "epoch": 0.0262,
      "grad_norm": 0.5889989743836745,
      "learning_rate": 0.003,
      "loss": 4.1739,
      "step": 2620
    },
    {
      "epoch": 0.02621,
      "grad_norm": 0.5658317825134358,
      "learning_rate": 0.003,
      "loss": 4.2007,
      "step": 2621
    },
    {
      "epoch": 0.02622,
      "grad_norm": 0.5963501897765274,
      "learning_rate": 0.003,
      "loss": 4.1995,
      "step": 2622
    },
    {
      "epoch": 0.02623,
      "grad_norm": 0.6088839510075245,
      "learning_rate": 0.003,
      "loss": 4.2019,
      "step": 2623
    },
    {
      "epoch": 0.02624,
      "grad_norm": 0.639758346515609,
      "learning_rate": 0.003,
      "loss": 4.2384,
      "step": 2624
    },
    {
      "epoch": 0.02625,
      "grad_norm": 0.6760695260129397,
      "learning_rate": 0.003,
      "loss": 4.2327,
      "step": 2625
    },
    {
      "epoch": 0.02626,
      "grad_norm": 0.7394049384951815,
      "learning_rate": 0.003,
      "loss": 4.1692,
      "step": 2626
    },
    {
      "epoch": 0.02627,
      "grad_norm": 0.771569025728823,
      "learning_rate": 0.003,
      "loss": 4.1881,
      "step": 2627
    },
    {
      "epoch": 0.02628,
      "grad_norm": 0.9437506721801893,
      "learning_rate": 0.003,
      "loss": 4.2288,
      "step": 2628
    },
    {
      "epoch": 0.02629,
      "grad_norm": 0.9552775699761499,
      "learning_rate": 0.003,
      "loss": 4.1691,
      "step": 2629
    },
    {
      "epoch": 0.0263,
      "grad_norm": 0.775368472978819,
      "learning_rate": 0.003,
      "loss": 4.1937,
      "step": 2630
    },
    {
      "epoch": 0.02631,
      "grad_norm": 0.7034974913313367,
      "learning_rate": 0.003,
      "loss": 4.2143,
      "step": 2631
    },
    {
      "epoch": 0.02632,
      "grad_norm": 0.84944572579913,
      "learning_rate": 0.003,
      "loss": 4.1797,
      "step": 2632
    },
    {
      "epoch": 0.02633,
      "grad_norm": 0.9315535752380871,
      "learning_rate": 0.003,
      "loss": 4.1973,
      "step": 2633
    },
    {
      "epoch": 0.02634,
      "grad_norm": 0.848318245690014,
      "learning_rate": 0.003,
      "loss": 4.2175,
      "step": 2634
    },
    {
      "epoch": 0.02635,
      "grad_norm": 0.8121240624947638,
      "learning_rate": 0.003,
      "loss": 4.2139,
      "step": 2635
    },
    {
      "epoch": 0.02636,
      "grad_norm": 0.750298491855964,
      "learning_rate": 0.003,
      "loss": 4.196,
      "step": 2636
    },
    {
      "epoch": 0.02637,
      "grad_norm": 0.7715588746790815,
      "learning_rate": 0.003,
      "loss": 4.2261,
      "step": 2637
    },
    {
      "epoch": 0.02638,
      "grad_norm": 0.8941355293873781,
      "learning_rate": 0.003,
      "loss": 4.1969,
      "step": 2638
    },
    {
      "epoch": 0.02639,
      "grad_norm": 1.1208036732650624,
      "learning_rate": 0.003,
      "loss": 4.1946,
      "step": 2639
    },
    {
      "epoch": 0.0264,
      "grad_norm": 0.9871221010165437,
      "learning_rate": 0.003,
      "loss": 4.1957,
      "step": 2640
    },
    {
      "epoch": 0.02641,
      "grad_norm": 0.8874989090023564,
      "learning_rate": 0.003,
      "loss": 4.2028,
      "step": 2641
    },
    {
      "epoch": 0.02642,
      "grad_norm": 0.7458704018113644,
      "learning_rate": 0.003,
      "loss": 4.2079,
      "step": 2642
    },
    {
      "epoch": 0.02643,
      "grad_norm": 0.6742933550823261,
      "learning_rate": 0.003,
      "loss": 4.1912,
      "step": 2643
    },
    {
      "epoch": 0.02644,
      "grad_norm": 0.6392354944191677,
      "learning_rate": 0.003,
      "loss": 4.2038,
      "step": 2644
    },
    {
      "epoch": 0.02645,
      "grad_norm": 0.628167080207442,
      "learning_rate": 0.003,
      "loss": 4.2151,
      "step": 2645
    },
    {
      "epoch": 0.02646,
      "grad_norm": 0.7307984815804015,
      "learning_rate": 0.003,
      "loss": 4.1822,
      "step": 2646
    },
    {
      "epoch": 0.02647,
      "grad_norm": 0.7359159985928708,
      "learning_rate": 0.003,
      "loss": 4.2047,
      "step": 2647
    },
    {
      "epoch": 0.02648,
      "grad_norm": 0.8954441506020118,
      "learning_rate": 0.003,
      "loss": 4.2303,
      "step": 2648
    },
    {
      "epoch": 0.02649,
      "grad_norm": 0.9330516996289556,
      "learning_rate": 0.003,
      "loss": 4.2172,
      "step": 2649
    },
    {
      "epoch": 0.0265,
      "grad_norm": 0.8975744896998445,
      "learning_rate": 0.003,
      "loss": 4.1824,
      "step": 2650
    },
    {
      "epoch": 0.02651,
      "grad_norm": 0.7356166899037149,
      "learning_rate": 0.003,
      "loss": 4.1944,
      "step": 2651
    },
    {
      "epoch": 0.02652,
      "grad_norm": 0.7477158925723587,
      "learning_rate": 0.003,
      "loss": 4.2246,
      "step": 2652
    },
    {
      "epoch": 0.02653,
      "grad_norm": 1.0339979416429652,
      "learning_rate": 0.003,
      "loss": 4.1888,
      "step": 2653
    },
    {
      "epoch": 0.02654,
      "grad_norm": 1.038966179449438,
      "learning_rate": 0.003,
      "loss": 4.2084,
      "step": 2654
    },
    {
      "epoch": 0.02655,
      "grad_norm": 0.9423283848127929,
      "learning_rate": 0.003,
      "loss": 4.2139,
      "step": 2655
    },
    {
      "epoch": 0.02656,
      "grad_norm": 0.8935307253871441,
      "learning_rate": 0.003,
      "loss": 4.1861,
      "step": 2656
    },
    {
      "epoch": 0.02657,
      "grad_norm": 0.9450781561652172,
      "learning_rate": 0.003,
      "loss": 4.2216,
      "step": 2657
    },
    {
      "epoch": 0.02658,
      "grad_norm": 0.9653780583023913,
      "learning_rate": 0.003,
      "loss": 4.2275,
      "step": 2658
    },
    {
      "epoch": 0.02659,
      "grad_norm": 0.9812881650344129,
      "learning_rate": 0.003,
      "loss": 4.2266,
      "step": 2659
    },
    {
      "epoch": 0.0266,
      "grad_norm": 0.7604935125019064,
      "learning_rate": 0.003,
      "loss": 4.199,
      "step": 2660
    },
    {
      "epoch": 0.02661,
      "grad_norm": 0.7898310738914669,
      "learning_rate": 0.003,
      "loss": 4.2224,
      "step": 2661
    },
    {
      "epoch": 0.02662,
      "grad_norm": 0.7821828341075345,
      "learning_rate": 0.003,
      "loss": 4.2109,
      "step": 2662
    },
    {
      "epoch": 0.02663,
      "grad_norm": 0.708129537974612,
      "learning_rate": 0.003,
      "loss": 4.191,
      "step": 2663
    },
    {
      "epoch": 0.02664,
      "grad_norm": 0.614176433467567,
      "learning_rate": 0.003,
      "loss": 4.1879,
      "step": 2664
    },
    {
      "epoch": 0.02665,
      "grad_norm": 0.5871099840572066,
      "learning_rate": 0.003,
      "loss": 4.2354,
      "step": 2665
    },
    {
      "epoch": 0.02666,
      "grad_norm": 0.5413499528000574,
      "learning_rate": 0.003,
      "loss": 4.2215,
      "step": 2666
    },
    {
      "epoch": 0.02667,
      "grad_norm": 0.5004044538450345,
      "learning_rate": 0.003,
      "loss": 4.2007,
      "step": 2667
    },
    {
      "epoch": 0.02668,
      "grad_norm": 0.40369112920994166,
      "learning_rate": 0.003,
      "loss": 4.1654,
      "step": 2668
    },
    {
      "epoch": 0.02669,
      "grad_norm": 0.4696848864082333,
      "learning_rate": 0.003,
      "loss": 4.1859,
      "step": 2669
    },
    {
      "epoch": 0.0267,
      "grad_norm": 0.5279108698543552,
      "learning_rate": 0.003,
      "loss": 4.1854,
      "step": 2670
    },
    {
      "epoch": 0.02671,
      "grad_norm": 0.5815932345300244,
      "learning_rate": 0.003,
      "loss": 4.2011,
      "step": 2671
    },
    {
      "epoch": 0.02672,
      "grad_norm": 0.6492435660477966,
      "learning_rate": 0.003,
      "loss": 4.195,
      "step": 2672
    },
    {
      "epoch": 0.02673,
      "grad_norm": 0.7437813435791281,
      "learning_rate": 0.003,
      "loss": 4.1949,
      "step": 2673
    },
    {
      "epoch": 0.02674,
      "grad_norm": 0.9399474520802658,
      "learning_rate": 0.003,
      "loss": 4.1688,
      "step": 2674
    },
    {
      "epoch": 0.02675,
      "grad_norm": 0.9600957867204232,
      "learning_rate": 0.003,
      "loss": 4.1739,
      "step": 2675
    },
    {
      "epoch": 0.02676,
      "grad_norm": 0.8275264852729368,
      "learning_rate": 0.003,
      "loss": 4.1903,
      "step": 2676
    },
    {
      "epoch": 0.02677,
      "grad_norm": 0.7656126092118313,
      "learning_rate": 0.003,
      "loss": 4.2176,
      "step": 2677
    },
    {
      "epoch": 0.02678,
      "grad_norm": 0.8861030153032051,
      "learning_rate": 0.003,
      "loss": 4.2184,
      "step": 2678
    },
    {
      "epoch": 0.02679,
      "grad_norm": 0.9307488076809898,
      "learning_rate": 0.003,
      "loss": 4.1901,
      "step": 2679
    },
    {
      "epoch": 0.0268,
      "grad_norm": 0.805330899134272,
      "learning_rate": 0.003,
      "loss": 4.204,
      "step": 2680
    },
    {
      "epoch": 0.02681,
      "grad_norm": 0.6764861017583965,
      "learning_rate": 0.003,
      "loss": 4.2192,
      "step": 2681
    },
    {
      "epoch": 0.02682,
      "grad_norm": 0.6705225418393664,
      "learning_rate": 0.003,
      "loss": 4.1964,
      "step": 2682
    },
    {
      "epoch": 0.02683,
      "grad_norm": 0.7147920779432392,
      "learning_rate": 0.003,
      "loss": 4.2187,
      "step": 2683
    },
    {
      "epoch": 0.02684,
      "grad_norm": 0.7227393027644701,
      "learning_rate": 0.003,
      "loss": 4.1944,
      "step": 2684
    },
    {
      "epoch": 0.02685,
      "grad_norm": 0.6704279875962865,
      "learning_rate": 0.003,
      "loss": 4.1953,
      "step": 2685
    },
    {
      "epoch": 0.02686,
      "grad_norm": 0.715031165504719,
      "learning_rate": 0.003,
      "loss": 4.1799,
      "step": 2686
    },
    {
      "epoch": 0.02687,
      "grad_norm": 0.7065638636939903,
      "learning_rate": 0.003,
      "loss": 4.1889,
      "step": 2687
    },
    {
      "epoch": 0.02688,
      "grad_norm": 0.6230800929041215,
      "learning_rate": 0.003,
      "loss": 4.1646,
      "step": 2688
    },
    {
      "epoch": 0.02689,
      "grad_norm": 0.6057809629157821,
      "learning_rate": 0.003,
      "loss": 4.1944,
      "step": 2689
    },
    {
      "epoch": 0.0269,
      "grad_norm": 0.6893536336683767,
      "learning_rate": 0.003,
      "loss": 4.2016,
      "step": 2690
    },
    {
      "epoch": 0.02691,
      "grad_norm": 0.889230963330981,
      "learning_rate": 0.003,
      "loss": 4.2018,
      "step": 2691
    },
    {
      "epoch": 0.02692,
      "grad_norm": 0.9254094540705526,
      "learning_rate": 0.003,
      "loss": 4.2128,
      "step": 2692
    },
    {
      "epoch": 0.02693,
      "grad_norm": 0.7506239085511861,
      "learning_rate": 0.003,
      "loss": 4.2167,
      "step": 2693
    },
    {
      "epoch": 0.02694,
      "grad_norm": 0.8733320575384562,
      "learning_rate": 0.003,
      "loss": 4.1861,
      "step": 2694
    },
    {
      "epoch": 0.02695,
      "grad_norm": 1.0144492752003067,
      "learning_rate": 0.003,
      "loss": 4.2059,
      "step": 2695
    },
    {
      "epoch": 0.02696,
      "grad_norm": 1.1997862653069007,
      "learning_rate": 0.003,
      "loss": 4.223,
      "step": 2696
    },
    {
      "epoch": 0.02697,
      "grad_norm": 0.766270544323272,
      "learning_rate": 0.003,
      "loss": 4.2162,
      "step": 2697
    },
    {
      "epoch": 0.02698,
      "grad_norm": 0.8571240526593925,
      "learning_rate": 0.003,
      "loss": 4.2045,
      "step": 2698
    },
    {
      "epoch": 0.02699,
      "grad_norm": 0.8669468835973597,
      "learning_rate": 0.003,
      "loss": 4.2202,
      "step": 2699
    },
    {
      "epoch": 0.027,
      "grad_norm": 0.8651896057336759,
      "learning_rate": 0.003,
      "loss": 4.1922,
      "step": 2700
    },
    {
      "epoch": 0.02701,
      "grad_norm": 0.9521341410803329,
      "learning_rate": 0.003,
      "loss": 4.1854,
      "step": 2701
    },
    {
      "epoch": 0.02702,
      "grad_norm": 0.8813922958551177,
      "learning_rate": 0.003,
      "loss": 4.2351,
      "step": 2702
    },
    {
      "epoch": 0.02703,
      "grad_norm": 0.7638729271713497,
      "learning_rate": 0.003,
      "loss": 4.2319,
      "step": 2703
    },
    {
      "epoch": 0.02704,
      "grad_norm": 0.6755135996338237,
      "learning_rate": 0.003,
      "loss": 4.1703,
      "step": 2704
    },
    {
      "epoch": 0.02705,
      "grad_norm": 0.7165610100052193,
      "learning_rate": 0.003,
      "loss": 4.2247,
      "step": 2705
    },
    {
      "epoch": 0.02706,
      "grad_norm": 0.7030374550316207,
      "learning_rate": 0.003,
      "loss": 4.1863,
      "step": 2706
    },
    {
      "epoch": 0.02707,
      "grad_norm": 0.7231373056232907,
      "learning_rate": 0.003,
      "loss": 4.1831,
      "step": 2707
    },
    {
      "epoch": 0.02708,
      "grad_norm": 0.7250863254981224,
      "learning_rate": 0.003,
      "loss": 4.2139,
      "step": 2708
    },
    {
      "epoch": 0.02709,
      "grad_norm": 0.7719809697889917,
      "learning_rate": 0.003,
      "loss": 4.1893,
      "step": 2709
    },
    {
      "epoch": 0.0271,
      "grad_norm": 0.7100576834257082,
      "learning_rate": 0.003,
      "loss": 4.1967,
      "step": 2710
    },
    {
      "epoch": 0.02711,
      "grad_norm": 0.7047852834402715,
      "learning_rate": 0.003,
      "loss": 4.1784,
      "step": 2711
    },
    {
      "epoch": 0.02712,
      "grad_norm": 0.6821379465786586,
      "learning_rate": 0.003,
      "loss": 4.19,
      "step": 2712
    },
    {
      "epoch": 0.02713,
      "grad_norm": 0.6215562238389798,
      "learning_rate": 0.003,
      "loss": 4.1809,
      "step": 2713
    },
    {
      "epoch": 0.02714,
      "grad_norm": 0.5936031532564715,
      "learning_rate": 0.003,
      "loss": 4.1774,
      "step": 2714
    },
    {
      "epoch": 0.02715,
      "grad_norm": 0.7208100744430564,
      "learning_rate": 0.003,
      "loss": 4.1746,
      "step": 2715
    },
    {
      "epoch": 0.02716,
      "grad_norm": 0.8524838885321797,
      "learning_rate": 0.003,
      "loss": 4.1901,
      "step": 2716
    },
    {
      "epoch": 0.02717,
      "grad_norm": 0.909492018841103,
      "learning_rate": 0.003,
      "loss": 4.1949,
      "step": 2717
    },
    {
      "epoch": 0.02718,
      "grad_norm": 0.7607068991983567,
      "learning_rate": 0.003,
      "loss": 4.2085,
      "step": 2718
    },
    {
      "epoch": 0.02719,
      "grad_norm": 0.680611263653079,
      "learning_rate": 0.003,
      "loss": 4.1801,
      "step": 2719
    },
    {
      "epoch": 0.0272,
      "grad_norm": 0.8513436164853936,
      "learning_rate": 0.003,
      "loss": 4.1774,
      "step": 2720
    },
    {
      "epoch": 0.02721,
      "grad_norm": 0.8639998466288582,
      "learning_rate": 0.003,
      "loss": 4.1904,
      "step": 2721
    },
    {
      "epoch": 0.02722,
      "grad_norm": 0.8772434078332314,
      "learning_rate": 0.003,
      "loss": 4.1929,
      "step": 2722
    },
    {
      "epoch": 0.02723,
      "grad_norm": 0.7454610624653967,
      "learning_rate": 0.003,
      "loss": 4.1955,
      "step": 2723
    },
    {
      "epoch": 0.02724,
      "grad_norm": 0.6102446662829699,
      "learning_rate": 0.003,
      "loss": 4.1921,
      "step": 2724
    },
    {
      "epoch": 0.02725,
      "grad_norm": 0.7200880629570631,
      "learning_rate": 0.003,
      "loss": 4.1879,
      "step": 2725
    },
    {
      "epoch": 0.02726,
      "grad_norm": 0.8294654813405002,
      "learning_rate": 0.003,
      "loss": 4.1624,
      "step": 2726
    },
    {
      "epoch": 0.02727,
      "grad_norm": 0.8088332759795207,
      "learning_rate": 0.003,
      "loss": 4.1952,
      "step": 2727
    },
    {
      "epoch": 0.02728,
      "grad_norm": 0.7352078940910927,
      "learning_rate": 0.003,
      "loss": 4.1816,
      "step": 2728
    },
    {
      "epoch": 0.02729,
      "grad_norm": 0.6377589758910231,
      "learning_rate": 0.003,
      "loss": 4.2114,
      "step": 2729
    },
    {
      "epoch": 0.0273,
      "grad_norm": 0.5626731480237941,
      "learning_rate": 0.003,
      "loss": 4.1797,
      "step": 2730
    },
    {
      "epoch": 0.02731,
      "grad_norm": 0.601770644577552,
      "learning_rate": 0.003,
      "loss": 4.1785,
      "step": 2731
    },
    {
      "epoch": 0.02732,
      "grad_norm": 0.6533204081603035,
      "learning_rate": 0.003,
      "loss": 4.1725,
      "step": 2732
    },
    {
      "epoch": 0.02733,
      "grad_norm": 0.7772077073193634,
      "learning_rate": 0.003,
      "loss": 4.2077,
      "step": 2733
    },
    {
      "epoch": 0.02734,
      "grad_norm": 0.792467609111156,
      "learning_rate": 0.003,
      "loss": 4.1587,
      "step": 2734
    },
    {
      "epoch": 0.02735,
      "grad_norm": 0.6727759433070883,
      "learning_rate": 0.003,
      "loss": 4.2012,
      "step": 2735
    },
    {
      "epoch": 0.02736,
      "grad_norm": 0.58224974581077,
      "learning_rate": 0.003,
      "loss": 4.1727,
      "step": 2736
    },
    {
      "epoch": 0.02737,
      "grad_norm": 0.6428828815739132,
      "learning_rate": 0.003,
      "loss": 4.1675,
      "step": 2737
    },
    {
      "epoch": 0.02738,
      "grad_norm": 0.6634618804729325,
      "learning_rate": 0.003,
      "loss": 4.1909,
      "step": 2738
    },
    {
      "epoch": 0.02739,
      "grad_norm": 0.7033876856529395,
      "learning_rate": 0.003,
      "loss": 4.185,
      "step": 2739
    },
    {
      "epoch": 0.0274,
      "grad_norm": 0.8647329601476124,
      "learning_rate": 0.003,
      "loss": 4.181,
      "step": 2740
    },
    {
      "epoch": 0.02741,
      "grad_norm": 0.8928671349531622,
      "learning_rate": 0.003,
      "loss": 4.1979,
      "step": 2741
    },
    {
      "epoch": 0.02742,
      "grad_norm": 0.8249061089459991,
      "learning_rate": 0.003,
      "loss": 4.1871,
      "step": 2742
    },
    {
      "epoch": 0.02743,
      "grad_norm": 0.9230002686874078,
      "learning_rate": 0.003,
      "loss": 4.2149,
      "step": 2743
    },
    {
      "epoch": 0.02744,
      "grad_norm": 0.7539080769994848,
      "learning_rate": 0.003,
      "loss": 4.1915,
      "step": 2744
    },
    {
      "epoch": 0.02745,
      "grad_norm": 0.7938564140089303,
      "learning_rate": 0.003,
      "loss": 4.2078,
      "step": 2745
    },
    {
      "epoch": 0.02746,
      "grad_norm": 0.6662256659438859,
      "learning_rate": 0.003,
      "loss": 4.1751,
      "step": 2746
    },
    {
      "epoch": 0.02747,
      "grad_norm": 0.6274900377940358,
      "learning_rate": 0.003,
      "loss": 4.211,
      "step": 2747
    },
    {
      "epoch": 0.02748,
      "grad_norm": 0.5811994917442085,
      "learning_rate": 0.003,
      "loss": 4.1596,
      "step": 2748
    },
    {
      "epoch": 0.02749,
      "grad_norm": 0.5961679125130114,
      "learning_rate": 0.003,
      "loss": 4.1658,
      "step": 2749
    },
    {
      "epoch": 0.0275,
      "grad_norm": 0.6582517345217201,
      "learning_rate": 0.003,
      "loss": 4.1997,
      "step": 2750
    },
    {
      "epoch": 0.02751,
      "grad_norm": 0.6778193569737023,
      "learning_rate": 0.003,
      "loss": 4.2149,
      "step": 2751
    },
    {
      "epoch": 0.02752,
      "grad_norm": 0.7403973900306051,
      "learning_rate": 0.003,
      "loss": 4.2023,
      "step": 2752
    },
    {
      "epoch": 0.02753,
      "grad_norm": 0.9384163074447154,
      "learning_rate": 0.003,
      "loss": 4.1905,
      "step": 2753
    },
    {
      "epoch": 0.02754,
      "grad_norm": 1.548459064020229,
      "learning_rate": 0.003,
      "loss": 4.1973,
      "step": 2754
    },
    {
      "epoch": 0.02755,
      "grad_norm": 0.6860176417927059,
      "learning_rate": 0.003,
      "loss": 4.1785,
      "step": 2755
    },
    {
      "epoch": 0.02756,
      "grad_norm": 0.6932160826643162,
      "learning_rate": 0.003,
      "loss": 4.207,
      "step": 2756
    },
    {
      "epoch": 0.02757,
      "grad_norm": 0.9436043093562364,
      "learning_rate": 0.003,
      "loss": 4.2258,
      "step": 2757
    },
    {
      "epoch": 0.02758,
      "grad_norm": 0.932694610265977,
      "learning_rate": 0.003,
      "loss": 4.1741,
      "step": 2758
    },
    {
      "epoch": 0.02759,
      "grad_norm": 1.0183137769286543,
      "learning_rate": 0.003,
      "loss": 4.209,
      "step": 2759
    },
    {
      "epoch": 0.0276,
      "grad_norm": 0.9568486135614797,
      "learning_rate": 0.003,
      "loss": 4.2119,
      "step": 2760
    },
    {
      "epoch": 0.02761,
      "grad_norm": 0.9337487058436278,
      "learning_rate": 0.003,
      "loss": 4.2307,
      "step": 2761
    },
    {
      "epoch": 0.02762,
      "grad_norm": 0.9026730273084307,
      "learning_rate": 0.003,
      "loss": 4.2002,
      "step": 2762
    },
    {
      "epoch": 0.02763,
      "grad_norm": 0.8685760175250772,
      "learning_rate": 0.003,
      "loss": 4.2102,
      "step": 2763
    },
    {
      "epoch": 0.02764,
      "grad_norm": 0.7723173760613741,
      "learning_rate": 0.003,
      "loss": 4.2012,
      "step": 2764
    },
    {
      "epoch": 0.02765,
      "grad_norm": 0.6561586138290864,
      "learning_rate": 0.003,
      "loss": 4.2025,
      "step": 2765
    },
    {
      "epoch": 0.02766,
      "grad_norm": 0.6637574717745266,
      "learning_rate": 0.003,
      "loss": 4.1694,
      "step": 2766
    },
    {
      "epoch": 0.02767,
      "grad_norm": 0.7045106510208712,
      "learning_rate": 0.003,
      "loss": 4.2241,
      "step": 2767
    },
    {
      "epoch": 0.02768,
      "grad_norm": 0.7181706443439274,
      "learning_rate": 0.003,
      "loss": 4.1577,
      "step": 2768
    },
    {
      "epoch": 0.02769,
      "grad_norm": 0.8144040817598821,
      "learning_rate": 0.003,
      "loss": 4.2199,
      "step": 2769
    },
    {
      "epoch": 0.0277,
      "grad_norm": 0.8055251249780363,
      "learning_rate": 0.003,
      "loss": 4.1893,
      "step": 2770
    },
    {
      "epoch": 0.02771,
      "grad_norm": 0.7962246824499264,
      "learning_rate": 0.003,
      "loss": 4.1781,
      "step": 2771
    },
    {
      "epoch": 0.02772,
      "grad_norm": 0.7090461373687846,
      "learning_rate": 0.003,
      "loss": 4.1657,
      "step": 2772
    },
    {
      "epoch": 0.02773,
      "grad_norm": 0.7104242382637275,
      "learning_rate": 0.003,
      "loss": 4.2,
      "step": 2773
    },
    {
      "epoch": 0.02774,
      "grad_norm": 0.7495245887650404,
      "learning_rate": 0.003,
      "loss": 4.2232,
      "step": 2774
    },
    {
      "epoch": 0.02775,
      "grad_norm": 0.7766387259726212,
      "learning_rate": 0.003,
      "loss": 4.1914,
      "step": 2775
    },
    {
      "epoch": 0.02776,
      "grad_norm": 0.8895259744115902,
      "learning_rate": 0.003,
      "loss": 4.1913,
      "step": 2776
    },
    {
      "epoch": 0.02777,
      "grad_norm": 1.0136447873992764,
      "learning_rate": 0.003,
      "loss": 4.1757,
      "step": 2777
    },
    {
      "epoch": 0.02778,
      "grad_norm": 0.9270708223991836,
      "learning_rate": 0.003,
      "loss": 4.2171,
      "step": 2778
    },
    {
      "epoch": 0.02779,
      "grad_norm": 0.7449945983496725,
      "learning_rate": 0.003,
      "loss": 4.1821,
      "step": 2779
    },
    {
      "epoch": 0.0278,
      "grad_norm": 0.6376494685394721,
      "learning_rate": 0.003,
      "loss": 4.1771,
      "step": 2780
    },
    {
      "epoch": 0.02781,
      "grad_norm": 0.6404601478719466,
      "learning_rate": 0.003,
      "loss": 4.1583,
      "step": 2781
    },
    {
      "epoch": 0.02782,
      "grad_norm": 0.6406219320739934,
      "learning_rate": 0.003,
      "loss": 4.1805,
      "step": 2782
    },
    {
      "epoch": 0.02783,
      "grad_norm": 0.6965574269000856,
      "learning_rate": 0.003,
      "loss": 4.1881,
      "step": 2783
    },
    {
      "epoch": 0.02784,
      "grad_norm": 0.6353952783759534,
      "learning_rate": 0.003,
      "loss": 4.18,
      "step": 2784
    },
    {
      "epoch": 0.02785,
      "grad_norm": 0.6672684060024227,
      "learning_rate": 0.003,
      "loss": 4.1968,
      "step": 2785
    },
    {
      "epoch": 0.02786,
      "grad_norm": 0.8009924222538798,
      "learning_rate": 0.003,
      "loss": 4.1936,
      "step": 2786
    },
    {
      "epoch": 0.02787,
      "grad_norm": 1.0512671252378267,
      "learning_rate": 0.003,
      "loss": 4.2056,
      "step": 2787
    },
    {
      "epoch": 0.02788,
      "grad_norm": 0.9344102358218531,
      "learning_rate": 0.003,
      "loss": 4.193,
      "step": 2788
    },
    {
      "epoch": 0.02789,
      "grad_norm": 0.6966687816191528,
      "learning_rate": 0.003,
      "loss": 4.1893,
      "step": 2789
    },
    {
      "epoch": 0.0279,
      "grad_norm": 0.5812915901595114,
      "learning_rate": 0.003,
      "loss": 4.1592,
      "step": 2790
    },
    {
      "epoch": 0.02791,
      "grad_norm": 0.6498216487712081,
      "learning_rate": 0.003,
      "loss": 4.1719,
      "step": 2791
    },
    {
      "epoch": 0.02792,
      "grad_norm": 0.7276557261785184,
      "learning_rate": 0.003,
      "loss": 4.1906,
      "step": 2792
    },
    {
      "epoch": 0.02793,
      "grad_norm": 0.8633571725468037,
      "learning_rate": 0.003,
      "loss": 4.192,
      "step": 2793
    },
    {
      "epoch": 0.02794,
      "grad_norm": 0.9525705083591802,
      "learning_rate": 0.003,
      "loss": 4.1932,
      "step": 2794
    },
    {
      "epoch": 0.02795,
      "grad_norm": 0.8234824382946722,
      "learning_rate": 0.003,
      "loss": 4.2045,
      "step": 2795
    },
    {
      "epoch": 0.02796,
      "grad_norm": 0.6758642343246372,
      "learning_rate": 0.003,
      "loss": 4.194,
      "step": 2796
    },
    {
      "epoch": 0.02797,
      "grad_norm": 0.5487569332106049,
      "learning_rate": 0.003,
      "loss": 4.2,
      "step": 2797
    },
    {
      "epoch": 0.02798,
      "grad_norm": 0.663303491252161,
      "learning_rate": 0.003,
      "loss": 4.2001,
      "step": 2798
    },
    {
      "epoch": 0.02799,
      "grad_norm": 0.718345645781191,
      "learning_rate": 0.003,
      "loss": 4.1857,
      "step": 2799
    },
    {
      "epoch": 0.028,
      "grad_norm": 0.8551146813140691,
      "learning_rate": 0.003,
      "loss": 4.2047,
      "step": 2800
    },
    {
      "epoch": 0.02801,
      "grad_norm": 0.8363235146801704,
      "learning_rate": 0.003,
      "loss": 4.183,
      "step": 2801
    },
    {
      "epoch": 0.02802,
      "grad_norm": 0.7702327941304277,
      "learning_rate": 0.003,
      "loss": 4.1854,
      "step": 2802
    },
    {
      "epoch": 0.02803,
      "grad_norm": 0.7820522108252399,
      "learning_rate": 0.003,
      "loss": 4.2042,
      "step": 2803
    },
    {
      "epoch": 0.02804,
      "grad_norm": 0.8315360958990581,
      "learning_rate": 0.003,
      "loss": 4.1781,
      "step": 2804
    },
    {
      "epoch": 0.02805,
      "grad_norm": 0.7877464535188091,
      "learning_rate": 0.003,
      "loss": 4.2115,
      "step": 2805
    },
    {
      "epoch": 0.02806,
      "grad_norm": 0.761746586266924,
      "learning_rate": 0.003,
      "loss": 4.1891,
      "step": 2806
    },
    {
      "epoch": 0.02807,
      "grad_norm": 0.7896019466390173,
      "learning_rate": 0.003,
      "loss": 4.1905,
      "step": 2807
    },
    {
      "epoch": 0.02808,
      "grad_norm": 0.8252866588896443,
      "learning_rate": 0.003,
      "loss": 4.1764,
      "step": 2808
    },
    {
      "epoch": 0.02809,
      "grad_norm": 0.8153674183671854,
      "learning_rate": 0.003,
      "loss": 4.2161,
      "step": 2809
    },
    {
      "epoch": 0.0281,
      "grad_norm": 0.8179736763027198,
      "learning_rate": 0.003,
      "loss": 4.1676,
      "step": 2810
    },
    {
      "epoch": 0.02811,
      "grad_norm": 0.7182358526616855,
      "learning_rate": 0.003,
      "loss": 4.1746,
      "step": 2811
    },
    {
      "epoch": 0.02812,
      "grad_norm": 0.7222785348647399,
      "learning_rate": 0.003,
      "loss": 4.1904,
      "step": 2812
    },
    {
      "epoch": 0.02813,
      "grad_norm": 0.7312392563417645,
      "learning_rate": 0.003,
      "loss": 4.182,
      "step": 2813
    },
    {
      "epoch": 0.02814,
      "grad_norm": 0.7621373960798801,
      "learning_rate": 0.003,
      "loss": 4.1842,
      "step": 2814
    },
    {
      "epoch": 0.02815,
      "grad_norm": 0.892573891744398,
      "learning_rate": 0.003,
      "loss": 4.2237,
      "step": 2815
    },
    {
      "epoch": 0.02816,
      "grad_norm": 1.0660491465880368,
      "learning_rate": 0.003,
      "loss": 4.2109,
      "step": 2816
    },
    {
      "epoch": 0.02817,
      "grad_norm": 1.1762397108284373,
      "learning_rate": 0.003,
      "loss": 4.1757,
      "step": 2817
    },
    {
      "epoch": 0.02818,
      "grad_norm": 0.8114644715240712,
      "learning_rate": 0.003,
      "loss": 4.1788,
      "step": 2818
    },
    {
      "epoch": 0.02819,
      "grad_norm": 0.7485947937835542,
      "learning_rate": 0.003,
      "loss": 4.2117,
      "step": 2819
    },
    {
      "epoch": 0.0282,
      "grad_norm": 0.7342650912183687,
      "learning_rate": 0.003,
      "loss": 4.1986,
      "step": 2820
    },
    {
      "epoch": 0.02821,
      "grad_norm": 0.908756776200653,
      "learning_rate": 0.003,
      "loss": 4.2223,
      "step": 2821
    },
    {
      "epoch": 0.02822,
      "grad_norm": 1.1149965537202893,
      "learning_rate": 0.003,
      "loss": 4.2047,
      "step": 2822
    },
    {
      "epoch": 0.02823,
      "grad_norm": 0.9035892234929067,
      "learning_rate": 0.003,
      "loss": 4.2252,
      "step": 2823
    },
    {
      "epoch": 0.02824,
      "grad_norm": 0.8715687787834273,
      "learning_rate": 0.003,
      "loss": 4.2131,
      "step": 2824
    },
    {
      "epoch": 0.02825,
      "grad_norm": 0.9190846271911702,
      "learning_rate": 0.003,
      "loss": 4.2162,
      "step": 2825
    },
    {
      "epoch": 0.02826,
      "grad_norm": 0.9656110778760747,
      "learning_rate": 0.003,
      "loss": 4.1982,
      "step": 2826
    },
    {
      "epoch": 0.02827,
      "grad_norm": 0.7985488107577273,
      "learning_rate": 0.003,
      "loss": 4.1811,
      "step": 2827
    },
    {
      "epoch": 0.02828,
      "grad_norm": 0.6338765134372335,
      "learning_rate": 0.003,
      "loss": 4.2122,
      "step": 2828
    },
    {
      "epoch": 0.02829,
      "grad_norm": 0.5839189064796922,
      "learning_rate": 0.003,
      "loss": 4.2333,
      "step": 2829
    },
    {
      "epoch": 0.0283,
      "grad_norm": 0.5171339276770696,
      "learning_rate": 0.003,
      "loss": 4.1933,
      "step": 2830
    },
    {
      "epoch": 0.02831,
      "grad_norm": 0.4981234575873454,
      "learning_rate": 0.003,
      "loss": 4.1933,
      "step": 2831
    },
    {
      "epoch": 0.02832,
      "grad_norm": 0.48374057680202603,
      "learning_rate": 0.003,
      "loss": 4.2024,
      "step": 2832
    },
    {
      "epoch": 0.02833,
      "grad_norm": 0.49701505240619126,
      "learning_rate": 0.003,
      "loss": 4.1763,
      "step": 2833
    },
    {
      "epoch": 0.02834,
      "grad_norm": 0.4375442041063234,
      "learning_rate": 0.003,
      "loss": 4.1612,
      "step": 2834
    },
    {
      "epoch": 0.02835,
      "grad_norm": 0.4492758317663205,
      "learning_rate": 0.003,
      "loss": 4.1724,
      "step": 2835
    },
    {
      "epoch": 0.02836,
      "grad_norm": 0.5303942295307315,
      "learning_rate": 0.003,
      "loss": 4.1912,
      "step": 2836
    },
    {
      "epoch": 0.02837,
      "grad_norm": 0.6807714027442211,
      "learning_rate": 0.003,
      "loss": 4.1659,
      "step": 2837
    },
    {
      "epoch": 0.02838,
      "grad_norm": 1.054384165651217,
      "learning_rate": 0.003,
      "loss": 4.1421,
      "step": 2838
    },
    {
      "epoch": 0.02839,
      "grad_norm": 1.1296655727309317,
      "learning_rate": 0.003,
      "loss": 4.1551,
      "step": 2839
    },
    {
      "epoch": 0.0284,
      "grad_norm": 0.6812539648150485,
      "learning_rate": 0.003,
      "loss": 4.1554,
      "step": 2840
    },
    {
      "epoch": 0.02841,
      "grad_norm": 0.6214085041332542,
      "learning_rate": 0.003,
      "loss": 4.1932,
      "step": 2841
    },
    {
      "epoch": 0.02842,
      "grad_norm": 0.7921932057609014,
      "learning_rate": 0.003,
      "loss": 4.1778,
      "step": 2842
    },
    {
      "epoch": 0.02843,
      "grad_norm": 0.7844292346786299,
      "learning_rate": 0.003,
      "loss": 4.2017,
      "step": 2843
    },
    {
      "epoch": 0.02844,
      "grad_norm": 0.8202507673079593,
      "learning_rate": 0.003,
      "loss": 4.1992,
      "step": 2844
    },
    {
      "epoch": 0.02845,
      "grad_norm": 0.899571446747117,
      "learning_rate": 0.003,
      "loss": 4.1842,
      "step": 2845
    },
    {
      "epoch": 0.02846,
      "grad_norm": 0.9854433598194003,
      "learning_rate": 0.003,
      "loss": 4.178,
      "step": 2846
    },
    {
      "epoch": 0.02847,
      "grad_norm": 0.9578244699339834,
      "learning_rate": 0.003,
      "loss": 4.2412,
      "step": 2847
    },
    {
      "epoch": 0.02848,
      "grad_norm": 0.8611603069570741,
      "learning_rate": 0.003,
      "loss": 4.1991,
      "step": 2848
    },
    {
      "epoch": 0.02849,
      "grad_norm": 0.8677890731777738,
      "learning_rate": 0.003,
      "loss": 4.1875,
      "step": 2849
    },
    {
      "epoch": 0.0285,
      "grad_norm": 0.9345387668269812,
      "learning_rate": 0.003,
      "loss": 4.2004,
      "step": 2850
    },
    {
      "epoch": 0.02851,
      "grad_norm": 1.022611523078909,
      "learning_rate": 0.003,
      "loss": 4.2443,
      "step": 2851
    },
    {
      "epoch": 0.02852,
      "grad_norm": 1.08179004175546,
      "learning_rate": 0.003,
      "loss": 4.2232,
      "step": 2852
    },
    {
      "epoch": 0.02853,
      "grad_norm": 0.8903248915496341,
      "learning_rate": 0.003,
      "loss": 4.1837,
      "step": 2853
    },
    {
      "epoch": 0.02854,
      "grad_norm": 0.8065798213036774,
      "learning_rate": 0.003,
      "loss": 4.222,
      "step": 2854
    },
    {
      "epoch": 0.02855,
      "grad_norm": 0.8325761138878125,
      "learning_rate": 0.003,
      "loss": 4.2217,
      "step": 2855
    },
    {
      "epoch": 0.02856,
      "grad_norm": 0.8291292758652558,
      "learning_rate": 0.003,
      "loss": 4.2197,
      "step": 2856
    },
    {
      "epoch": 0.02857,
      "grad_norm": 1.0231797870146355,
      "learning_rate": 0.003,
      "loss": 4.2081,
      "step": 2857
    },
    {
      "epoch": 0.02858,
      "grad_norm": 1.135404325827927,
      "learning_rate": 0.003,
      "loss": 4.201,
      "step": 2858
    },
    {
      "epoch": 0.02859,
      "grad_norm": 0.7053658846015558,
      "learning_rate": 0.003,
      "loss": 4.1963,
      "step": 2859
    },
    {
      "epoch": 0.0286,
      "grad_norm": 0.6736135876048694,
      "learning_rate": 0.003,
      "loss": 4.2084,
      "step": 2860
    },
    {
      "epoch": 0.02861,
      "grad_norm": 0.5759862291513759,
      "learning_rate": 0.003,
      "loss": 4.1731,
      "step": 2861
    },
    {
      "epoch": 0.02862,
      "grad_norm": 0.5616808260442542,
      "learning_rate": 0.003,
      "loss": 4.1894,
      "step": 2862
    },
    {
      "epoch": 0.02863,
      "grad_norm": 0.49788279563964805,
      "learning_rate": 0.003,
      "loss": 4.2111,
      "step": 2863
    },
    {
      "epoch": 0.02864,
      "grad_norm": 0.4647689038314713,
      "learning_rate": 0.003,
      "loss": 4.1819,
      "step": 2864
    },
    {
      "epoch": 0.02865,
      "grad_norm": 0.46260884486023074,
      "learning_rate": 0.003,
      "loss": 4.2095,
      "step": 2865
    },
    {
      "epoch": 0.02866,
      "grad_norm": 0.4834529405017652,
      "learning_rate": 0.003,
      "loss": 4.19,
      "step": 2866
    },
    {
      "epoch": 0.02867,
      "grad_norm": 0.5645551159123001,
      "learning_rate": 0.003,
      "loss": 4.1773,
      "step": 2867
    },
    {
      "epoch": 0.02868,
      "grad_norm": 0.660748140178769,
      "learning_rate": 0.003,
      "loss": 4.1865,
      "step": 2868
    },
    {
      "epoch": 0.02869,
      "grad_norm": 0.779662997391975,
      "learning_rate": 0.003,
      "loss": 4.1786,
      "step": 2869
    },
    {
      "epoch": 0.0287,
      "grad_norm": 0.8316388530903273,
      "learning_rate": 0.003,
      "loss": 4.2026,
      "step": 2870
    },
    {
      "epoch": 0.02871,
      "grad_norm": 0.7635742727135532,
      "learning_rate": 0.003,
      "loss": 4.1842,
      "step": 2871
    },
    {
      "epoch": 0.02872,
      "grad_norm": 0.7623759966436428,
      "learning_rate": 0.003,
      "loss": 4.2184,
      "step": 2872
    },
    {
      "epoch": 0.02873,
      "grad_norm": 0.7828397443503498,
      "learning_rate": 0.003,
      "loss": 4.1929,
      "step": 2873
    },
    {
      "epoch": 0.02874,
      "grad_norm": 0.7046479053883313,
      "learning_rate": 0.003,
      "loss": 4.1997,
      "step": 2874
    },
    {
      "epoch": 0.02875,
      "grad_norm": 0.6539896261531868,
      "learning_rate": 0.003,
      "loss": 4.2062,
      "step": 2875
    },
    {
      "epoch": 0.02876,
      "grad_norm": 0.5905081172799793,
      "learning_rate": 0.003,
      "loss": 4.1857,
      "step": 2876
    },
    {
      "epoch": 0.02877,
      "grad_norm": 0.5633633650090469,
      "learning_rate": 0.003,
      "loss": 4.1675,
      "step": 2877
    },
    {
      "epoch": 0.02878,
      "grad_norm": 0.6177245641938598,
      "learning_rate": 0.003,
      "loss": 4.1622,
      "step": 2878
    },
    {
      "epoch": 0.02879,
      "grad_norm": 0.6747485726027824,
      "learning_rate": 0.003,
      "loss": 4.1563,
      "step": 2879
    },
    {
      "epoch": 0.0288,
      "grad_norm": 0.9112144299725644,
      "learning_rate": 0.003,
      "loss": 4.1679,
      "step": 2880
    },
    {
      "epoch": 0.02881,
      "grad_norm": 1.1185243563448626,
      "learning_rate": 0.003,
      "loss": 4.1965,
      "step": 2881
    },
    {
      "epoch": 0.02882,
      "grad_norm": 0.7715679266943174,
      "learning_rate": 0.003,
      "loss": 4.2008,
      "step": 2882
    },
    {
      "epoch": 0.02883,
      "grad_norm": 0.8113547513229802,
      "learning_rate": 0.003,
      "loss": 4.1841,
      "step": 2883
    },
    {
      "epoch": 0.02884,
      "grad_norm": 0.9501941505139021,
      "learning_rate": 0.003,
      "loss": 4.2319,
      "step": 2884
    },
    {
      "epoch": 0.02885,
      "grad_norm": 0.8794293648016848,
      "learning_rate": 0.003,
      "loss": 4.1687,
      "step": 2885
    },
    {
      "epoch": 0.02886,
      "grad_norm": 0.6791421144716309,
      "learning_rate": 0.003,
      "loss": 4.1926,
      "step": 2886
    },
    {
      "epoch": 0.02887,
      "grad_norm": 0.7217161523796813,
      "learning_rate": 0.003,
      "loss": 4.1813,
      "step": 2887
    },
    {
      "epoch": 0.02888,
      "grad_norm": 0.6947494994368266,
      "learning_rate": 0.003,
      "loss": 4.1758,
      "step": 2888
    },
    {
      "epoch": 0.02889,
      "grad_norm": 0.736779142809904,
      "learning_rate": 0.003,
      "loss": 4.168,
      "step": 2889
    },
    {
      "epoch": 0.0289,
      "grad_norm": 0.7371012950268315,
      "learning_rate": 0.003,
      "loss": 4.1974,
      "step": 2890
    },
    {
      "epoch": 0.02891,
      "grad_norm": 0.7996974659760072,
      "learning_rate": 0.003,
      "loss": 4.217,
      "step": 2891
    },
    {
      "epoch": 0.02892,
      "grad_norm": 0.8033610158227403,
      "learning_rate": 0.003,
      "loss": 4.1701,
      "step": 2892
    },
    {
      "epoch": 0.02893,
      "grad_norm": 0.7086173042508274,
      "learning_rate": 0.003,
      "loss": 4.1738,
      "step": 2893
    },
    {
      "epoch": 0.02894,
      "grad_norm": 0.6418053792744189,
      "learning_rate": 0.003,
      "loss": 4.1778,
      "step": 2894
    },
    {
      "epoch": 0.02895,
      "grad_norm": 0.6478672327859655,
      "learning_rate": 0.003,
      "loss": 4.2197,
      "step": 2895
    },
    {
      "epoch": 0.02896,
      "grad_norm": 0.640640821327547,
      "learning_rate": 0.003,
      "loss": 4.1749,
      "step": 2896
    },
    {
      "epoch": 0.02897,
      "grad_norm": 0.5304859237589287,
      "learning_rate": 0.003,
      "loss": 4.1548,
      "step": 2897
    },
    {
      "epoch": 0.02898,
      "grad_norm": 0.5580042897470411,
      "learning_rate": 0.003,
      "loss": 4.1966,
      "step": 2898
    },
    {
      "epoch": 0.02899,
      "grad_norm": 0.7047206692629422,
      "learning_rate": 0.003,
      "loss": 4.1829,
      "step": 2899
    },
    {
      "epoch": 0.029,
      "grad_norm": 0.8635840660671906,
      "learning_rate": 0.003,
      "loss": 4.1794,
      "step": 2900
    },
    {
      "epoch": 0.02901,
      "grad_norm": 0.9189375044658029,
      "learning_rate": 0.003,
      "loss": 4.1972,
      "step": 2901
    },
    {
      "epoch": 0.02902,
      "grad_norm": 0.708585231495253,
      "learning_rate": 0.003,
      "loss": 4.2109,
      "step": 2902
    },
    {
      "epoch": 0.02903,
      "grad_norm": 0.6446440355575891,
      "learning_rate": 0.003,
      "loss": 4.1679,
      "step": 2903
    },
    {
      "epoch": 0.02904,
      "grad_norm": 0.8782661322489321,
      "learning_rate": 0.003,
      "loss": 4.1888,
      "step": 2904
    },
    {
      "epoch": 0.02905,
      "grad_norm": 0.9531560335950827,
      "learning_rate": 0.003,
      "loss": 4.2128,
      "step": 2905
    },
    {
      "epoch": 0.02906,
      "grad_norm": 0.855726406973352,
      "learning_rate": 0.003,
      "loss": 4.192,
      "step": 2906
    },
    {
      "epoch": 0.02907,
      "grad_norm": 0.8360790928903014,
      "learning_rate": 0.003,
      "loss": 4.168,
      "step": 2907
    },
    {
      "epoch": 0.02908,
      "grad_norm": 1.006275418407078,
      "learning_rate": 0.003,
      "loss": 4.1962,
      "step": 2908
    },
    {
      "epoch": 0.02909,
      "grad_norm": 1.069782048483947,
      "learning_rate": 0.003,
      "loss": 4.1953,
      "step": 2909
    },
    {
      "epoch": 0.0291,
      "grad_norm": 0.9075229413559223,
      "learning_rate": 0.003,
      "loss": 4.2047,
      "step": 2910
    },
    {
      "epoch": 0.02911,
      "grad_norm": 0.8275942965443872,
      "learning_rate": 0.003,
      "loss": 4.1994,
      "step": 2911
    },
    {
      "epoch": 0.02912,
      "grad_norm": 0.8193725896423978,
      "learning_rate": 0.003,
      "loss": 4.1688,
      "step": 2912
    },
    {
      "epoch": 0.02913,
      "grad_norm": 0.8507541445865096,
      "learning_rate": 0.003,
      "loss": 4.205,
      "step": 2913
    },
    {
      "epoch": 0.02914,
      "grad_norm": 0.8452395132518625,
      "learning_rate": 0.003,
      "loss": 4.1928,
      "step": 2914
    },
    {
      "epoch": 0.02915,
      "grad_norm": 0.9311887044384434,
      "learning_rate": 0.003,
      "loss": 4.1964,
      "step": 2915
    },
    {
      "epoch": 0.02916,
      "grad_norm": 0.8243141989685264,
      "learning_rate": 0.003,
      "loss": 4.1979,
      "step": 2916
    },
    {
      "epoch": 0.02917,
      "grad_norm": 0.8091183982304383,
      "learning_rate": 0.003,
      "loss": 4.2266,
      "step": 2917
    },
    {
      "epoch": 0.02918,
      "grad_norm": 0.7297783258634746,
      "learning_rate": 0.003,
      "loss": 4.1808,
      "step": 2918
    },
    {
      "epoch": 0.02919,
      "grad_norm": 0.7037586226632049,
      "learning_rate": 0.003,
      "loss": 4.2193,
      "step": 2919
    },
    {
      "epoch": 0.0292,
      "grad_norm": 0.7593009293515595,
      "learning_rate": 0.003,
      "loss": 4.1588,
      "step": 2920
    },
    {
      "epoch": 0.02921,
      "grad_norm": 0.7917149655134618,
      "learning_rate": 0.003,
      "loss": 4.2005,
      "step": 2921
    },
    {
      "epoch": 0.02922,
      "grad_norm": 0.9187945531692804,
      "learning_rate": 0.003,
      "loss": 4.2011,
      "step": 2922
    },
    {
      "epoch": 0.02923,
      "grad_norm": 1.0139939826189068,
      "learning_rate": 0.003,
      "loss": 4.1831,
      "step": 2923
    },
    {
      "epoch": 0.02924,
      "grad_norm": 1.2164555418920324,
      "learning_rate": 0.003,
      "loss": 4.2224,
      "step": 2924
    },
    {
      "epoch": 0.02925,
      "grad_norm": 0.7496642706044158,
      "learning_rate": 0.003,
      "loss": 4.215,
      "step": 2925
    },
    {
      "epoch": 0.02926,
      "grad_norm": 0.6478065305573352,
      "learning_rate": 0.003,
      "loss": 4.1896,
      "step": 2926
    },
    {
      "epoch": 0.02927,
      "grad_norm": 0.7254565724940762,
      "learning_rate": 0.003,
      "loss": 4.1952,
      "step": 2927
    },
    {
      "epoch": 0.02928,
      "grad_norm": 0.8685255954161141,
      "learning_rate": 0.003,
      "loss": 4.2213,
      "step": 2928
    },
    {
      "epoch": 0.02929,
      "grad_norm": 0.9237416579241535,
      "learning_rate": 0.003,
      "loss": 4.2095,
      "step": 2929
    },
    {
      "epoch": 0.0293,
      "grad_norm": 0.8372395013359059,
      "learning_rate": 0.003,
      "loss": 4.1992,
      "step": 2930
    },
    {
      "epoch": 0.02931,
      "grad_norm": 0.7932357657287773,
      "learning_rate": 0.003,
      "loss": 4.1982,
      "step": 2931
    },
    {
      "epoch": 0.02932,
      "grad_norm": 0.7073630933550545,
      "learning_rate": 0.003,
      "loss": 4.1822,
      "step": 2932
    },
    {
      "epoch": 0.02933,
      "grad_norm": 0.7308618206431018,
      "learning_rate": 0.003,
      "loss": 4.2074,
      "step": 2933
    },
    {
      "epoch": 0.02934,
      "grad_norm": 0.6003080161076961,
      "learning_rate": 0.003,
      "loss": 4.1908,
      "step": 2934
    },
    {
      "epoch": 0.02935,
      "grad_norm": 0.5616005585738562,
      "learning_rate": 0.003,
      "loss": 4.1759,
      "step": 2935
    },
    {
      "epoch": 0.02936,
      "grad_norm": 0.5762089224720499,
      "learning_rate": 0.003,
      "loss": 4.193,
      "step": 2936
    },
    {
      "epoch": 0.02937,
      "grad_norm": 0.6340624962039538,
      "learning_rate": 0.003,
      "loss": 4.1748,
      "step": 2937
    },
    {
      "epoch": 0.02938,
      "grad_norm": 0.5985017877094587,
      "learning_rate": 0.003,
      "loss": 4.1887,
      "step": 2938
    },
    {
      "epoch": 0.02939,
      "grad_norm": 0.5822648909618956,
      "learning_rate": 0.003,
      "loss": 4.1748,
      "step": 2939
    },
    {
      "epoch": 0.0294,
      "grad_norm": 0.5818506972026382,
      "learning_rate": 0.003,
      "loss": 4.1973,
      "step": 2940
    },
    {
      "epoch": 0.02941,
      "grad_norm": 0.5990628549146402,
      "learning_rate": 0.003,
      "loss": 4.1977,
      "step": 2941
    },
    {
      "epoch": 0.02942,
      "grad_norm": 0.5191939816216448,
      "learning_rate": 0.003,
      "loss": 4.1819,
      "step": 2942
    },
    {
      "epoch": 0.02943,
      "grad_norm": 0.4807951045470019,
      "learning_rate": 0.003,
      "loss": 4.1672,
      "step": 2943
    },
    {
      "epoch": 0.02944,
      "grad_norm": 0.5163327805251324,
      "learning_rate": 0.003,
      "loss": 4.1874,
      "step": 2944
    },
    {
      "epoch": 0.02945,
      "grad_norm": 0.6847252582164058,
      "learning_rate": 0.003,
      "loss": 4.1723,
      "step": 2945
    },
    {
      "epoch": 0.02946,
      "grad_norm": 1.187036076408125,
      "learning_rate": 0.003,
      "loss": 4.1786,
      "step": 2946
    },
    {
      "epoch": 0.02947,
      "grad_norm": 1.1413035518662917,
      "learning_rate": 0.003,
      "loss": 4.2213,
      "step": 2947
    },
    {
      "epoch": 0.02948,
      "grad_norm": 0.701324069469179,
      "learning_rate": 0.003,
      "loss": 4.1882,
      "step": 2948
    },
    {
      "epoch": 0.02949,
      "grad_norm": 0.8381653938972113,
      "learning_rate": 0.003,
      "loss": 4.1676,
      "step": 2949
    },
    {
      "epoch": 0.0295,
      "grad_norm": 1.0096266796093847,
      "learning_rate": 0.003,
      "loss": 4.1958,
      "step": 2950
    },
    {
      "epoch": 0.02951,
      "grad_norm": 0.8969101683640668,
      "learning_rate": 0.003,
      "loss": 4.1443,
      "step": 2951
    },
    {
      "epoch": 0.02952,
      "grad_norm": 0.7052992950548165,
      "learning_rate": 0.003,
      "loss": 4.1731,
      "step": 2952
    },
    {
      "epoch": 0.02953,
      "grad_norm": 0.7325132030616415,
      "learning_rate": 0.003,
      "loss": 4.1805,
      "step": 2953
    },
    {
      "epoch": 0.02954,
      "grad_norm": 0.750908559373497,
      "learning_rate": 0.003,
      "loss": 4.2202,
      "step": 2954
    },
    {
      "epoch": 0.02955,
      "grad_norm": 0.8349240528523505,
      "learning_rate": 0.003,
      "loss": 4.1975,
      "step": 2955
    },
    {
      "epoch": 0.02956,
      "grad_norm": 0.8858972744774044,
      "learning_rate": 0.003,
      "loss": 4.1874,
      "step": 2956
    },
    {
      "epoch": 0.02957,
      "grad_norm": 0.7827116360148241,
      "learning_rate": 0.003,
      "loss": 4.2001,
      "step": 2957
    },
    {
      "epoch": 0.02958,
      "grad_norm": 0.5580040747767873,
      "learning_rate": 0.003,
      "loss": 4.2039,
      "step": 2958
    },
    {
      "epoch": 0.02959,
      "grad_norm": 0.6281493340504504,
      "learning_rate": 0.003,
      "loss": 4.1953,
      "step": 2959
    },
    {
      "epoch": 0.0296,
      "grad_norm": 0.705986539007765,
      "learning_rate": 0.003,
      "loss": 4.2031,
      "step": 2960
    },
    {
      "epoch": 0.02961,
      "grad_norm": 0.7736093332970924,
      "learning_rate": 0.003,
      "loss": 4.176,
      "step": 2961
    },
    {
      "epoch": 0.02962,
      "grad_norm": 0.8407411528623118,
      "learning_rate": 0.003,
      "loss": 4.1877,
      "step": 2962
    },
    {
      "epoch": 0.02963,
      "grad_norm": 0.8815056405402751,
      "learning_rate": 0.003,
      "loss": 4.165,
      "step": 2963
    },
    {
      "epoch": 0.02964,
      "grad_norm": 0.8322635911509938,
      "learning_rate": 0.003,
      "loss": 4.1498,
      "step": 2964
    },
    {
      "epoch": 0.02965,
      "grad_norm": 0.7078370731458429,
      "learning_rate": 0.003,
      "loss": 4.1928,
      "step": 2965
    },
    {
      "epoch": 0.02966,
      "grad_norm": 0.8092500020138753,
      "learning_rate": 0.003,
      "loss": 4.2062,
      "step": 2966
    },
    {
      "epoch": 0.02967,
      "grad_norm": 0.8427099437715561,
      "learning_rate": 0.003,
      "loss": 4.1949,
      "step": 2967
    },
    {
      "epoch": 0.02968,
      "grad_norm": 0.8731297599816912,
      "learning_rate": 0.003,
      "loss": 4.1942,
      "step": 2968
    },
    {
      "epoch": 0.02969,
      "grad_norm": 0.9059211009671025,
      "learning_rate": 0.003,
      "loss": 4.1953,
      "step": 2969
    },
    {
      "epoch": 0.0297,
      "grad_norm": 0.9050829103933709,
      "learning_rate": 0.003,
      "loss": 4.1813,
      "step": 2970
    },
    {
      "epoch": 0.02971,
      "grad_norm": 0.8269889845972395,
      "learning_rate": 0.003,
      "loss": 4.1982,
      "step": 2971
    },
    {
      "epoch": 0.02972,
      "grad_norm": 0.9293646500807372,
      "learning_rate": 0.003,
      "loss": 4.1751,
      "step": 2972
    },
    {
      "epoch": 0.02973,
      "grad_norm": 0.8613265194586818,
      "learning_rate": 0.003,
      "loss": 4.158,
      "step": 2973
    },
    {
      "epoch": 0.02974,
      "grad_norm": 0.7340587403227173,
      "learning_rate": 0.003,
      "loss": 4.1916,
      "step": 2974
    },
    {
      "epoch": 0.02975,
      "grad_norm": 0.8605959770191105,
      "learning_rate": 0.003,
      "loss": 4.1821,
      "step": 2975
    },
    {
      "epoch": 0.02976,
      "grad_norm": 0.90470438565766,
      "learning_rate": 0.003,
      "loss": 4.1986,
      "step": 2976
    },
    {
      "epoch": 0.02977,
      "grad_norm": 0.8424713452839194,
      "learning_rate": 0.003,
      "loss": 4.1814,
      "step": 2977
    },
    {
      "epoch": 0.02978,
      "grad_norm": 0.8244805201805542,
      "learning_rate": 0.003,
      "loss": 4.2015,
      "step": 2978
    },
    {
      "epoch": 0.02979,
      "grad_norm": 0.776777955492617,
      "learning_rate": 0.003,
      "loss": 4.184,
      "step": 2979
    },
    {
      "epoch": 0.0298,
      "grad_norm": 0.8027239829996253,
      "learning_rate": 0.003,
      "loss": 4.1903,
      "step": 2980
    },
    {
      "epoch": 0.02981,
      "grad_norm": 0.8379341642233197,
      "learning_rate": 0.003,
      "loss": 4.1995,
      "step": 2981
    },
    {
      "epoch": 0.02982,
      "grad_norm": 0.8371771421098021,
      "learning_rate": 0.003,
      "loss": 4.1969,
      "step": 2982
    },
    {
      "epoch": 0.02983,
      "grad_norm": 0.963864757218071,
      "learning_rate": 0.003,
      "loss": 4.215,
      "step": 2983
    },
    {
      "epoch": 0.02984,
      "grad_norm": 0.9620939482544487,
      "learning_rate": 0.003,
      "loss": 4.1919,
      "step": 2984
    },
    {
      "epoch": 0.02985,
      "grad_norm": 0.7838854043078648,
      "learning_rate": 0.003,
      "loss": 4.2185,
      "step": 2985
    },
    {
      "epoch": 0.02986,
      "grad_norm": 0.5932011838530595,
      "learning_rate": 0.003,
      "loss": 4.1694,
      "step": 2986
    },
    {
      "epoch": 0.02987,
      "grad_norm": 0.6307482323122638,
      "learning_rate": 0.003,
      "loss": 4.1654,
      "step": 2987
    },
    {
      "epoch": 0.02988,
      "grad_norm": 0.774070001834325,
      "learning_rate": 0.003,
      "loss": 4.178,
      "step": 2988
    },
    {
      "epoch": 0.02989,
      "grad_norm": 0.9409000008465417,
      "learning_rate": 0.003,
      "loss": 4.1972,
      "step": 2989
    },
    {
      "epoch": 0.0299,
      "grad_norm": 0.8897814762817067,
      "learning_rate": 0.003,
      "loss": 4.1573,
      "step": 2990
    },
    {
      "epoch": 0.02991,
      "grad_norm": 0.7283012653732146,
      "learning_rate": 0.003,
      "loss": 4.2071,
      "step": 2991
    },
    {
      "epoch": 0.02992,
      "grad_norm": 0.8056037736979436,
      "learning_rate": 0.003,
      "loss": 4.2066,
      "step": 2992
    },
    {
      "epoch": 0.02993,
      "grad_norm": 0.8453294693115473,
      "learning_rate": 0.003,
      "loss": 4.1756,
      "step": 2993
    },
    {
      "epoch": 0.02994,
      "grad_norm": 0.7681040966163926,
      "learning_rate": 0.003,
      "loss": 4.1818,
      "step": 2994
    },
    {
      "epoch": 0.02995,
      "grad_norm": 0.737046402942997,
      "learning_rate": 0.003,
      "loss": 4.2193,
      "step": 2995
    },
    {
      "epoch": 0.02996,
      "grad_norm": 0.8016830093512028,
      "learning_rate": 0.003,
      "loss": 4.2066,
      "step": 2996
    },
    {
      "epoch": 0.02997,
      "grad_norm": 0.9076425430381243,
      "learning_rate": 0.003,
      "loss": 4.1734,
      "step": 2997
    },
    {
      "epoch": 0.02998,
      "grad_norm": 0.9119888579778009,
      "learning_rate": 0.003,
      "loss": 4.1736,
      "step": 2998
    },
    {
      "epoch": 0.02999,
      "grad_norm": 0.8734639454385718,
      "learning_rate": 0.003,
      "loss": 4.1667,
      "step": 2999
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.8753881822879969,
      "learning_rate": 0.003,
      "loss": 4.2133,
      "step": 3000
    },
    {
      "epoch": 0.03001,
      "grad_norm": 0.6613925712555209,
      "learning_rate": 0.003,
      "loss": 4.1755,
      "step": 3001
    },
    {
      "epoch": 0.03002,
      "grad_norm": 0.6268534594644447,
      "learning_rate": 0.003,
      "loss": 4.1859,
      "step": 3002
    },
    {
      "epoch": 0.03003,
      "grad_norm": 0.7131894634569484,
      "learning_rate": 0.003,
      "loss": 4.1681,
      "step": 3003
    },
    {
      "epoch": 0.03004,
      "grad_norm": 0.7222298682350133,
      "learning_rate": 0.003,
      "loss": 4.1458,
      "step": 3004
    },
    {
      "epoch": 0.03005,
      "grad_norm": 0.7851659647153477,
      "learning_rate": 0.003,
      "loss": 4.1973,
      "step": 3005
    },
    {
      "epoch": 0.03006,
      "grad_norm": 0.9823948008432,
      "learning_rate": 0.003,
      "loss": 4.1985,
      "step": 3006
    },
    {
      "epoch": 0.03007,
      "grad_norm": 1.0677576033708733,
      "learning_rate": 0.003,
      "loss": 4.1791,
      "step": 3007
    },
    {
      "epoch": 0.03008,
      "grad_norm": 0.765529440508669,
      "learning_rate": 0.003,
      "loss": 4.1951,
      "step": 3008
    },
    {
      "epoch": 0.03009,
      "grad_norm": 0.7479572849164975,
      "learning_rate": 0.003,
      "loss": 4.1694,
      "step": 3009
    },
    {
      "epoch": 0.0301,
      "grad_norm": 0.7406412396521943,
      "learning_rate": 0.003,
      "loss": 4.194,
      "step": 3010
    },
    {
      "epoch": 0.03011,
      "grad_norm": 0.7262544536437181,
      "learning_rate": 0.003,
      "loss": 4.1743,
      "step": 3011
    },
    {
      "epoch": 0.03012,
      "grad_norm": 0.7544052154649398,
      "learning_rate": 0.003,
      "loss": 4.1816,
      "step": 3012
    },
    {
      "epoch": 0.03013,
      "grad_norm": 0.7162089680767648,
      "learning_rate": 0.003,
      "loss": 4.1417,
      "step": 3013
    },
    {
      "epoch": 0.03014,
      "grad_norm": 0.6940516993879415,
      "learning_rate": 0.003,
      "loss": 4.1682,
      "step": 3014
    },
    {
      "epoch": 0.03015,
      "grad_norm": 0.6844442507390428,
      "learning_rate": 0.003,
      "loss": 4.1663,
      "step": 3015
    },
    {
      "epoch": 0.03016,
      "grad_norm": 0.5615506405814443,
      "learning_rate": 0.003,
      "loss": 4.171,
      "step": 3016
    },
    {
      "epoch": 0.03017,
      "grad_norm": 0.6754454864107804,
      "learning_rate": 0.003,
      "loss": 4.1716,
      "step": 3017
    },
    {
      "epoch": 0.03018,
      "grad_norm": 0.768819694641026,
      "learning_rate": 0.003,
      "loss": 4.1698,
      "step": 3018
    },
    {
      "epoch": 0.03019,
      "grad_norm": 0.7485745058334217,
      "learning_rate": 0.003,
      "loss": 4.1872,
      "step": 3019
    },
    {
      "epoch": 0.0302,
      "grad_norm": 0.7254394591812489,
      "learning_rate": 0.003,
      "loss": 4.1737,
      "step": 3020
    },
    {
      "epoch": 0.03021,
      "grad_norm": 0.8549519124776134,
      "learning_rate": 0.003,
      "loss": 4.2019,
      "step": 3021
    },
    {
      "epoch": 0.03022,
      "grad_norm": 1.0108282452619028,
      "learning_rate": 0.003,
      "loss": 4.2049,
      "step": 3022
    },
    {
      "epoch": 0.03023,
      "grad_norm": 1.1260058425339694,
      "learning_rate": 0.003,
      "loss": 4.2268,
      "step": 3023
    },
    {
      "epoch": 0.03024,
      "grad_norm": 0.8578380008226317,
      "learning_rate": 0.003,
      "loss": 4.2105,
      "step": 3024
    },
    {
      "epoch": 0.03025,
      "grad_norm": 0.8141336248728617,
      "learning_rate": 0.003,
      "loss": 4.1943,
      "step": 3025
    },
    {
      "epoch": 0.03026,
      "grad_norm": 0.8343860321958456,
      "learning_rate": 0.003,
      "loss": 4.1674,
      "step": 3026
    },
    {
      "epoch": 0.03027,
      "grad_norm": 0.7174969120361776,
      "learning_rate": 0.003,
      "loss": 4.1858,
      "step": 3027
    },
    {
      "epoch": 0.03028,
      "grad_norm": 0.7652490077013342,
      "learning_rate": 0.003,
      "loss": 4.1482,
      "step": 3028
    },
    {
      "epoch": 0.03029,
      "grad_norm": 0.8261933696416869,
      "learning_rate": 0.003,
      "loss": 4.1934,
      "step": 3029
    },
    {
      "epoch": 0.0303,
      "grad_norm": 0.8845522708435402,
      "learning_rate": 0.003,
      "loss": 4.1774,
      "step": 3030
    },
    {
      "epoch": 0.03031,
      "grad_norm": 0.8283071846166148,
      "learning_rate": 0.003,
      "loss": 4.1606,
      "step": 3031
    },
    {
      "epoch": 0.03032,
      "grad_norm": 0.7987428540661371,
      "learning_rate": 0.003,
      "loss": 4.1851,
      "step": 3032
    },
    {
      "epoch": 0.03033,
      "grad_norm": 0.792150443356606,
      "learning_rate": 0.003,
      "loss": 4.1782,
      "step": 3033
    },
    {
      "epoch": 0.03034,
      "grad_norm": 0.7134255568100923,
      "learning_rate": 0.003,
      "loss": 4.1822,
      "step": 3034
    },
    {
      "epoch": 0.03035,
      "grad_norm": 0.5893848032969043,
      "learning_rate": 0.003,
      "loss": 4.2068,
      "step": 3035
    },
    {
      "epoch": 0.03036,
      "grad_norm": 0.5971843518344567,
      "learning_rate": 0.003,
      "loss": 4.178,
      "step": 3036
    },
    {
      "epoch": 0.03037,
      "grad_norm": 0.5650725934297753,
      "learning_rate": 0.003,
      "loss": 4.161,
      "step": 3037
    },
    {
      "epoch": 0.03038,
      "grad_norm": 0.6879606089602287,
      "learning_rate": 0.003,
      "loss": 4.1635,
      "step": 3038
    },
    {
      "epoch": 0.03039,
      "grad_norm": 0.893681525755944,
      "learning_rate": 0.003,
      "loss": 4.1741,
      "step": 3039
    },
    {
      "epoch": 0.0304,
      "grad_norm": 1.133698039918333,
      "learning_rate": 0.003,
      "loss": 4.192,
      "step": 3040
    },
    {
      "epoch": 0.03041,
      "grad_norm": 0.8486015662534218,
      "learning_rate": 0.003,
      "loss": 4.1894,
      "step": 3041
    },
    {
      "epoch": 0.03042,
      "grad_norm": 0.6236514545633368,
      "learning_rate": 0.003,
      "loss": 4.2098,
      "step": 3042
    },
    {
      "epoch": 0.03043,
      "grad_norm": 0.6770107818972176,
      "learning_rate": 0.003,
      "loss": 4.1618,
      "step": 3043
    },
    {
      "epoch": 0.03044,
      "grad_norm": 0.7747790040576381,
      "learning_rate": 0.003,
      "loss": 4.1619,
      "step": 3044
    },
    {
      "epoch": 0.03045,
      "grad_norm": 0.8128614331690203,
      "learning_rate": 0.003,
      "loss": 4.1673,
      "step": 3045
    },
    {
      "epoch": 0.03046,
      "grad_norm": 0.7607067403493086,
      "learning_rate": 0.003,
      "loss": 4.1756,
      "step": 3046
    },
    {
      "epoch": 0.03047,
      "grad_norm": 0.749167754293152,
      "learning_rate": 0.003,
      "loss": 4.1823,
      "step": 3047
    },
    {
      "epoch": 0.03048,
      "grad_norm": 0.8841149975528951,
      "learning_rate": 0.003,
      "loss": 4.1695,
      "step": 3048
    },
    {
      "epoch": 0.03049,
      "grad_norm": 0.902586510586676,
      "learning_rate": 0.003,
      "loss": 4.1608,
      "step": 3049
    },
    {
      "epoch": 0.0305,
      "grad_norm": 0.918737753442424,
      "learning_rate": 0.003,
      "loss": 4.1801,
      "step": 3050
    },
    {
      "epoch": 0.03051,
      "grad_norm": 0.8824746251699788,
      "learning_rate": 0.003,
      "loss": 4.1888,
      "step": 3051
    },
    {
      "epoch": 0.03052,
      "grad_norm": 0.8446124122566411,
      "learning_rate": 0.003,
      "loss": 4.1969,
      "step": 3052
    },
    {
      "epoch": 0.03053,
      "grad_norm": 0.7873211881465576,
      "learning_rate": 0.003,
      "loss": 4.1977,
      "step": 3053
    },
    {
      "epoch": 0.03054,
      "grad_norm": 0.8281204675386242,
      "learning_rate": 0.003,
      "loss": 4.1831,
      "step": 3054
    },
    {
      "epoch": 0.03055,
      "grad_norm": 0.8380195613474594,
      "learning_rate": 0.003,
      "loss": 4.1856,
      "step": 3055
    },
    {
      "epoch": 0.03056,
      "grad_norm": 0.8532831277535057,
      "learning_rate": 0.003,
      "loss": 4.1983,
      "step": 3056
    },
    {
      "epoch": 0.03057,
      "grad_norm": 0.8740896523349351,
      "learning_rate": 0.003,
      "loss": 4.1932,
      "step": 3057
    },
    {
      "epoch": 0.03058,
      "grad_norm": 0.9800189086277834,
      "learning_rate": 0.003,
      "loss": 4.1746,
      "step": 3058
    },
    {
      "epoch": 0.03059,
      "grad_norm": 0.9987130256226004,
      "learning_rate": 0.003,
      "loss": 4.183,
      "step": 3059
    },
    {
      "epoch": 0.0306,
      "grad_norm": 0.9698665311539535,
      "learning_rate": 0.003,
      "loss": 4.1715,
      "step": 3060
    },
    {
      "epoch": 0.03061,
      "grad_norm": 0.9452499042848614,
      "learning_rate": 0.003,
      "loss": 4.1983,
      "step": 3061
    },
    {
      "epoch": 0.03062,
      "grad_norm": 0.8545512015880272,
      "learning_rate": 0.003,
      "loss": 4.221,
      "step": 3062
    },
    {
      "epoch": 0.03063,
      "grad_norm": 0.8263018139013485,
      "learning_rate": 0.003,
      "loss": 4.1631,
      "step": 3063
    },
    {
      "epoch": 0.03064,
      "grad_norm": 0.8252780037841233,
      "learning_rate": 0.003,
      "loss": 4.196,
      "step": 3064
    },
    {
      "epoch": 0.03065,
      "grad_norm": 0.6876624447118844,
      "learning_rate": 0.003,
      "loss": 4.1593,
      "step": 3065
    },
    {
      "epoch": 0.03066,
      "grad_norm": 0.6367341430832967,
      "learning_rate": 0.003,
      "loss": 4.2021,
      "step": 3066
    },
    {
      "epoch": 0.03067,
      "grad_norm": 0.6017521480419693,
      "learning_rate": 0.003,
      "loss": 4.2046,
      "step": 3067
    },
    {
      "epoch": 0.03068,
      "grad_norm": 0.6343160042573421,
      "learning_rate": 0.003,
      "loss": 4.1735,
      "step": 3068
    },
    {
      "epoch": 0.03069,
      "grad_norm": 0.7404679311351042,
      "learning_rate": 0.003,
      "loss": 4.159,
      "step": 3069
    },
    {
      "epoch": 0.0307,
      "grad_norm": 0.9866650257634313,
      "learning_rate": 0.003,
      "loss": 4.1769,
      "step": 3070
    },
    {
      "epoch": 0.03071,
      "grad_norm": 1.1343017898084369,
      "learning_rate": 0.003,
      "loss": 4.1897,
      "step": 3071
    },
    {
      "epoch": 0.03072,
      "grad_norm": 0.8123202985082579,
      "learning_rate": 0.003,
      "loss": 4.1861,
      "step": 3072
    },
    {
      "epoch": 0.03073,
      "grad_norm": 0.7144691968854603,
      "learning_rate": 0.003,
      "loss": 4.179,
      "step": 3073
    },
    {
      "epoch": 0.03074,
      "grad_norm": 0.6377413488302888,
      "learning_rate": 0.003,
      "loss": 4.1781,
      "step": 3074
    },
    {
      "epoch": 0.03075,
      "grad_norm": 0.5884624483293499,
      "learning_rate": 0.003,
      "loss": 4.1988,
      "step": 3075
    },
    {
      "epoch": 0.03076,
      "grad_norm": 0.6087144704470088,
      "learning_rate": 0.003,
      "loss": 4.1903,
      "step": 3076
    },
    {
      "epoch": 0.03077,
      "grad_norm": 0.6783576790084909,
      "learning_rate": 0.003,
      "loss": 4.187,
      "step": 3077
    },
    {
      "epoch": 0.03078,
      "grad_norm": 0.788987640301813,
      "learning_rate": 0.003,
      "loss": 4.1748,
      "step": 3078
    },
    {
      "epoch": 0.03079,
      "grad_norm": 0.8815514064205557,
      "learning_rate": 0.003,
      "loss": 4.167,
      "step": 3079
    },
    {
      "epoch": 0.0308,
      "grad_norm": 0.9464779295877167,
      "learning_rate": 0.003,
      "loss": 4.1986,
      "step": 3080
    },
    {
      "epoch": 0.03081,
      "grad_norm": 0.8460781162865396,
      "learning_rate": 0.003,
      "loss": 4.1704,
      "step": 3081
    },
    {
      "epoch": 0.03082,
      "grad_norm": 0.661301125290537,
      "learning_rate": 0.003,
      "loss": 4.1657,
      "step": 3082
    },
    {
      "epoch": 0.03083,
      "grad_norm": 0.6453550230590483,
      "learning_rate": 0.003,
      "loss": 4.1674,
      "step": 3083
    },
    {
      "epoch": 0.03084,
      "grad_norm": 0.6680921770876664,
      "learning_rate": 0.003,
      "loss": 4.1802,
      "step": 3084
    },
    {
      "epoch": 0.03085,
      "grad_norm": 0.717538128453225,
      "learning_rate": 0.003,
      "loss": 4.1726,
      "step": 3085
    },
    {
      "epoch": 0.03086,
      "grad_norm": 0.7497027610096151,
      "learning_rate": 0.003,
      "loss": 4.1995,
      "step": 3086
    },
    {
      "epoch": 0.03087,
      "grad_norm": 0.7866492351798048,
      "learning_rate": 0.003,
      "loss": 4.1738,
      "step": 3087
    },
    {
      "epoch": 0.03088,
      "grad_norm": 0.8335106050070741,
      "learning_rate": 0.003,
      "loss": 4.1936,
      "step": 3088
    },
    {
      "epoch": 0.03089,
      "grad_norm": 0.8694998639476388,
      "learning_rate": 0.003,
      "loss": 4.1593,
      "step": 3089
    },
    {
      "epoch": 0.0309,
      "grad_norm": 0.7525867056979888,
      "learning_rate": 0.003,
      "loss": 4.1633,
      "step": 3090
    },
    {
      "epoch": 0.03091,
      "grad_norm": 0.6460307539732922,
      "learning_rate": 0.003,
      "loss": 4.1235,
      "step": 3091
    },
    {
      "epoch": 0.03092,
      "grad_norm": 0.7095786123893822,
      "learning_rate": 0.003,
      "loss": 4.1521,
      "step": 3092
    },
    {
      "epoch": 0.03093,
      "grad_norm": 0.7678464064388238,
      "learning_rate": 0.003,
      "loss": 4.1891,
      "step": 3093
    },
    {
      "epoch": 0.03094,
      "grad_norm": 0.7114573750951001,
      "learning_rate": 0.003,
      "loss": 4.1689,
      "step": 3094
    },
    {
      "epoch": 0.03095,
      "grad_norm": 0.6317515456248899,
      "learning_rate": 0.003,
      "loss": 4.1793,
      "step": 3095
    },
    {
      "epoch": 0.03096,
      "grad_norm": 0.6225934754004887,
      "learning_rate": 0.003,
      "loss": 4.1574,
      "step": 3096
    },
    {
      "epoch": 0.03097,
      "grad_norm": 0.6381632322240677,
      "learning_rate": 0.003,
      "loss": 4.1813,
      "step": 3097
    },
    {
      "epoch": 0.03098,
      "grad_norm": 0.5493766658656782,
      "learning_rate": 0.003,
      "loss": 4.1746,
      "step": 3098
    },
    {
      "epoch": 0.03099,
      "grad_norm": 0.5474679793693559,
      "learning_rate": 0.003,
      "loss": 4.1656,
      "step": 3099
    },
    {
      "epoch": 0.031,
      "grad_norm": 0.5637661199247416,
      "learning_rate": 0.003,
      "loss": 4.1627,
      "step": 3100
    },
    {
      "epoch": 0.03101,
      "grad_norm": 0.6168936290521686,
      "learning_rate": 0.003,
      "loss": 4.1562,
      "step": 3101
    },
    {
      "epoch": 0.03102,
      "grad_norm": 0.7030123347970183,
      "learning_rate": 0.003,
      "loss": 4.1584,
      "step": 3102
    },
    {
      "epoch": 0.03103,
      "grad_norm": 0.8124137266650002,
      "learning_rate": 0.003,
      "loss": 4.1488,
      "step": 3103
    },
    {
      "epoch": 0.03104,
      "grad_norm": 1.1527056277371386,
      "learning_rate": 0.003,
      "loss": 4.1653,
      "step": 3104
    },
    {
      "epoch": 0.03105,
      "grad_norm": 1.0637389066586085,
      "learning_rate": 0.003,
      "loss": 4.1487,
      "step": 3105
    },
    {
      "epoch": 0.03106,
      "grad_norm": 0.853665919221622,
      "learning_rate": 0.003,
      "loss": 4.1643,
      "step": 3106
    },
    {
      "epoch": 0.03107,
      "grad_norm": 0.6820330366060209,
      "learning_rate": 0.003,
      "loss": 4.1411,
      "step": 3107
    },
    {
      "epoch": 0.03108,
      "grad_norm": 0.7901175506821255,
      "learning_rate": 0.003,
      "loss": 4.157,
      "step": 3108
    },
    {
      "epoch": 0.03109,
      "grad_norm": 0.8999453911608919,
      "learning_rate": 0.003,
      "loss": 4.1614,
      "step": 3109
    },
    {
      "epoch": 0.0311,
      "grad_norm": 0.9752866529372375,
      "learning_rate": 0.003,
      "loss": 4.1901,
      "step": 3110
    },
    {
      "epoch": 0.03111,
      "grad_norm": 0.9099293959689805,
      "learning_rate": 0.003,
      "loss": 4.1663,
      "step": 3111
    },
    {
      "epoch": 0.03112,
      "grad_norm": 0.8946740375764577,
      "learning_rate": 0.003,
      "loss": 4.1897,
      "step": 3112
    },
    {
      "epoch": 0.03113,
      "grad_norm": 0.7357356872971288,
      "learning_rate": 0.003,
      "loss": 4.1716,
      "step": 3113
    },
    {
      "epoch": 0.03114,
      "grad_norm": 0.7508640013687926,
      "learning_rate": 0.003,
      "loss": 4.1761,
      "step": 3114
    },
    {
      "epoch": 0.03115,
      "grad_norm": 0.7602528729276722,
      "learning_rate": 0.003,
      "loss": 4.1792,
      "step": 3115
    },
    {
      "epoch": 0.03116,
      "grad_norm": 0.837948579347557,
      "learning_rate": 0.003,
      "loss": 4.1661,
      "step": 3116
    },
    {
      "epoch": 0.03117,
      "grad_norm": 0.9227910597494478,
      "learning_rate": 0.003,
      "loss": 4.1791,
      "step": 3117
    },
    {
      "epoch": 0.03118,
      "grad_norm": 0.8378702011669498,
      "learning_rate": 0.003,
      "loss": 4.1779,
      "step": 3118
    },
    {
      "epoch": 0.03119,
      "grad_norm": 0.858148891035245,
      "learning_rate": 0.003,
      "loss": 4.1947,
      "step": 3119
    },
    {
      "epoch": 0.0312,
      "grad_norm": 0.8936706763604597,
      "learning_rate": 0.003,
      "loss": 4.1985,
      "step": 3120
    },
    {
      "epoch": 0.03121,
      "grad_norm": 1.0167864501298352,
      "learning_rate": 0.003,
      "loss": 4.1846,
      "step": 3121
    },
    {
      "epoch": 0.03122,
      "grad_norm": 0.906464003938868,
      "learning_rate": 0.003,
      "loss": 4.2022,
      "step": 3122
    },
    {
      "epoch": 0.03123,
      "grad_norm": 0.8273070233817,
      "learning_rate": 0.003,
      "loss": 4.1964,
      "step": 3123
    },
    {
      "epoch": 0.03124,
      "grad_norm": 0.8276168406814042,
      "learning_rate": 0.003,
      "loss": 4.1699,
      "step": 3124
    },
    {
      "epoch": 0.03125,
      "grad_norm": 0.7854717842569616,
      "learning_rate": 0.003,
      "loss": 4.1716,
      "step": 3125
    },
    {
      "epoch": 0.03126,
      "grad_norm": 0.8477910267798683,
      "learning_rate": 0.003,
      "loss": 4.1789,
      "step": 3126
    },
    {
      "epoch": 0.03127,
      "grad_norm": 0.8759271431103222,
      "learning_rate": 0.003,
      "loss": 4.1837,
      "step": 3127
    },
    {
      "epoch": 0.03128,
      "grad_norm": 1.0163995710540448,
      "learning_rate": 0.003,
      "loss": 4.1778,
      "step": 3128
    },
    {
      "epoch": 0.03129,
      "grad_norm": 0.9119429199687523,
      "learning_rate": 0.003,
      "loss": 4.193,
      "step": 3129
    },
    {
      "epoch": 0.0313,
      "grad_norm": 0.8770385869172778,
      "learning_rate": 0.003,
      "loss": 4.1968,
      "step": 3130
    },
    {
      "epoch": 0.03131,
      "grad_norm": 0.768644055037608,
      "learning_rate": 0.003,
      "loss": 4.1747,
      "step": 3131
    },
    {
      "epoch": 0.03132,
      "grad_norm": 0.7588002202266282,
      "learning_rate": 0.003,
      "loss": 4.1972,
      "step": 3132
    },
    {
      "epoch": 0.03133,
      "grad_norm": 0.8833279390524064,
      "learning_rate": 0.003,
      "loss": 4.1902,
      "step": 3133
    },
    {
      "epoch": 0.03134,
      "grad_norm": 0.9445298443136937,
      "learning_rate": 0.003,
      "loss": 4.1915,
      "step": 3134
    },
    {
      "epoch": 0.03135,
      "grad_norm": 0.9704581871113671,
      "learning_rate": 0.003,
      "loss": 4.1649,
      "step": 3135
    },
    {
      "epoch": 0.03136,
      "grad_norm": 0.9377630840593111,
      "learning_rate": 0.003,
      "loss": 4.1782,
      "step": 3136
    },
    {
      "epoch": 0.03137,
      "grad_norm": 0.7590762953987991,
      "learning_rate": 0.003,
      "loss": 4.1745,
      "step": 3137
    },
    {
      "epoch": 0.03138,
      "grad_norm": 0.6615196510693153,
      "learning_rate": 0.003,
      "loss": 4.1722,
      "step": 3138
    },
    {
      "epoch": 0.03139,
      "grad_norm": 0.7417937957321744,
      "learning_rate": 0.003,
      "loss": 4.1818,
      "step": 3139
    },
    {
      "epoch": 0.0314,
      "grad_norm": 0.7539643313234111,
      "learning_rate": 0.003,
      "loss": 4.18,
      "step": 3140
    },
    {
      "epoch": 0.03141,
      "grad_norm": 0.6909968958896604,
      "learning_rate": 0.003,
      "loss": 4.2116,
      "step": 3141
    },
    {
      "epoch": 0.03142,
      "grad_norm": 0.7898474777523338,
      "learning_rate": 0.003,
      "loss": 4.1916,
      "step": 3142
    },
    {
      "epoch": 0.03143,
      "grad_norm": 0.7784656393609168,
      "learning_rate": 0.003,
      "loss": 4.1786,
      "step": 3143
    },
    {
      "epoch": 0.03144,
      "grad_norm": 0.9707258921774178,
      "learning_rate": 0.003,
      "loss": 4.1561,
      "step": 3144
    },
    {
      "epoch": 0.03145,
      "grad_norm": 1.0898056591342788,
      "learning_rate": 0.003,
      "loss": 4.1835,
      "step": 3145
    },
    {
      "epoch": 0.03146,
      "grad_norm": 0.8349734971045476,
      "learning_rate": 0.003,
      "loss": 4.1866,
      "step": 3146
    },
    {
      "epoch": 0.03147,
      "grad_norm": 0.7259181361450073,
      "learning_rate": 0.003,
      "loss": 4.1803,
      "step": 3147
    },
    {
      "epoch": 0.03148,
      "grad_norm": 0.687941001438256,
      "learning_rate": 0.003,
      "loss": 4.1831,
      "step": 3148
    },
    {
      "epoch": 0.03149,
      "grad_norm": 0.7440383665305609,
      "learning_rate": 0.003,
      "loss": 4.1888,
      "step": 3149
    },
    {
      "epoch": 0.0315,
      "grad_norm": 0.7343700663124108,
      "learning_rate": 0.003,
      "loss": 4.1743,
      "step": 3150
    },
    {
      "epoch": 0.03151,
      "grad_norm": 0.701384082950667,
      "learning_rate": 0.003,
      "loss": 4.1825,
      "step": 3151
    },
    {
      "epoch": 0.03152,
      "grad_norm": 0.7986992640658904,
      "learning_rate": 0.003,
      "loss": 4.1873,
      "step": 3152
    },
    {
      "epoch": 0.03153,
      "grad_norm": 0.8011793706646407,
      "learning_rate": 0.003,
      "loss": 4.1933,
      "step": 3153
    },
    {
      "epoch": 0.03154,
      "grad_norm": 0.7113372576063467,
      "learning_rate": 0.003,
      "loss": 4.1855,
      "step": 3154
    },
    {
      "epoch": 0.03155,
      "grad_norm": 0.5804198718855146,
      "learning_rate": 0.003,
      "loss": 4.1711,
      "step": 3155
    },
    {
      "epoch": 0.03156,
      "grad_norm": 0.6180565037439169,
      "learning_rate": 0.003,
      "loss": 4.1541,
      "step": 3156
    },
    {
      "epoch": 0.03157,
      "grad_norm": 0.6588053530299754,
      "learning_rate": 0.003,
      "loss": 4.1807,
      "step": 3157
    },
    {
      "epoch": 0.03158,
      "grad_norm": 0.7176039547135377,
      "learning_rate": 0.003,
      "loss": 4.1644,
      "step": 3158
    },
    {
      "epoch": 0.03159,
      "grad_norm": 0.7544412083171981,
      "learning_rate": 0.003,
      "loss": 4.167,
      "step": 3159
    },
    {
      "epoch": 0.0316,
      "grad_norm": 0.6609856171261774,
      "learning_rate": 0.003,
      "loss": 4.18,
      "step": 3160
    },
    {
      "epoch": 0.03161,
      "grad_norm": 0.601780895016729,
      "learning_rate": 0.003,
      "loss": 4.1861,
      "step": 3161
    },
    {
      "epoch": 0.03162,
      "grad_norm": 0.6040830597762422,
      "learning_rate": 0.003,
      "loss": 4.1883,
      "step": 3162
    },
    {
      "epoch": 0.03163,
      "grad_norm": 0.6320256997655519,
      "learning_rate": 0.003,
      "loss": 4.1721,
      "step": 3163
    },
    {
      "epoch": 0.03164,
      "grad_norm": 0.6389872031652813,
      "learning_rate": 0.003,
      "loss": 4.171,
      "step": 3164
    },
    {
      "epoch": 0.03165,
      "grad_norm": 0.5915242364794046,
      "learning_rate": 0.003,
      "loss": 4.1423,
      "step": 3165
    },
    {
      "epoch": 0.03166,
      "grad_norm": 0.61505912049187,
      "learning_rate": 0.003,
      "loss": 4.1544,
      "step": 3166
    },
    {
      "epoch": 0.03167,
      "grad_norm": 0.6731926870227466,
      "learning_rate": 0.003,
      "loss": 4.1466,
      "step": 3167
    },
    {
      "epoch": 0.03168,
      "grad_norm": 0.5950823041934726,
      "learning_rate": 0.003,
      "loss": 4.1714,
      "step": 3168
    },
    {
      "epoch": 0.03169,
      "grad_norm": 0.5635445406508717,
      "learning_rate": 0.003,
      "loss": 4.179,
      "step": 3169
    },
    {
      "epoch": 0.0317,
      "grad_norm": 0.6365033301443797,
      "learning_rate": 0.003,
      "loss": 4.1377,
      "step": 3170
    },
    {
      "epoch": 0.03171,
      "grad_norm": 0.7998364639202467,
      "learning_rate": 0.003,
      "loss": 4.1808,
      "step": 3171
    },
    {
      "epoch": 0.03172,
      "grad_norm": 1.0979998450099218,
      "learning_rate": 0.003,
      "loss": 4.1776,
      "step": 3172
    },
    {
      "epoch": 0.03173,
      "grad_norm": 1.1291812351216926,
      "learning_rate": 0.003,
      "loss": 4.1584,
      "step": 3173
    },
    {
      "epoch": 0.03174,
      "grad_norm": 0.8472774425289195,
      "learning_rate": 0.003,
      "loss": 4.1683,
      "step": 3174
    },
    {
      "epoch": 0.03175,
      "grad_norm": 0.6918105971567023,
      "learning_rate": 0.003,
      "loss": 4.191,
      "step": 3175
    },
    {
      "epoch": 0.03176,
      "grad_norm": 0.6745403402677648,
      "learning_rate": 0.003,
      "loss": 4.1495,
      "step": 3176
    },
    {
      "epoch": 0.03177,
      "grad_norm": 0.8711762889488271,
      "learning_rate": 0.003,
      "loss": 4.1675,
      "step": 3177
    },
    {
      "epoch": 0.03178,
      "grad_norm": 0.9618302167115096,
      "learning_rate": 0.003,
      "loss": 4.22,
      "step": 3178
    },
    {
      "epoch": 0.03179,
      "grad_norm": 0.8866746933425214,
      "learning_rate": 0.003,
      "loss": 4.1824,
      "step": 3179
    },
    {
      "epoch": 0.0318,
      "grad_norm": 0.8756075769559615,
      "learning_rate": 0.003,
      "loss": 4.1827,
      "step": 3180
    },
    {
      "epoch": 0.03181,
      "grad_norm": 0.8428496447166878,
      "learning_rate": 0.003,
      "loss": 4.1912,
      "step": 3181
    },
    {
      "epoch": 0.03182,
      "grad_norm": 0.7661700460210522,
      "learning_rate": 0.003,
      "loss": 4.1876,
      "step": 3182
    },
    {
      "epoch": 0.03183,
      "grad_norm": 0.8692473733272297,
      "learning_rate": 0.003,
      "loss": 4.1965,
      "step": 3183
    },
    {
      "epoch": 0.03184,
      "grad_norm": 0.8297755272881908,
      "learning_rate": 0.003,
      "loss": 4.1705,
      "step": 3184
    },
    {
      "epoch": 0.03185,
      "grad_norm": 0.859558340717006,
      "learning_rate": 0.003,
      "loss": 4.1915,
      "step": 3185
    },
    {
      "epoch": 0.03186,
      "grad_norm": 1.0619931666528437,
      "learning_rate": 0.003,
      "loss": 4.1964,
      "step": 3186
    },
    {
      "epoch": 0.03187,
      "grad_norm": 1.044280375901733,
      "learning_rate": 0.003,
      "loss": 4.1629,
      "step": 3187
    },
    {
      "epoch": 0.03188,
      "grad_norm": 1.076116225662036,
      "learning_rate": 0.003,
      "loss": 4.1969,
      "step": 3188
    },
    {
      "epoch": 0.03189,
      "grad_norm": 1.103493479731777,
      "learning_rate": 0.003,
      "loss": 4.1925,
      "step": 3189
    },
    {
      "epoch": 0.0319,
      "grad_norm": 1.0030010721198397,
      "learning_rate": 0.003,
      "loss": 4.1821,
      "step": 3190
    },
    {
      "epoch": 0.03191,
      "grad_norm": 0.8727680004016529,
      "learning_rate": 0.003,
      "loss": 4.2163,
      "step": 3191
    },
    {
      "epoch": 0.03192,
      "grad_norm": 0.7351212105515794,
      "learning_rate": 0.003,
      "loss": 4.1806,
      "step": 3192
    },
    {
      "epoch": 0.03193,
      "grad_norm": 0.7321735689318333,
      "learning_rate": 0.003,
      "loss": 4.182,
      "step": 3193
    },
    {
      "epoch": 0.03194,
      "grad_norm": 0.8772456813713311,
      "learning_rate": 0.003,
      "loss": 4.2075,
      "step": 3194
    },
    {
      "epoch": 0.03195,
      "grad_norm": 1.0948458483452161,
      "learning_rate": 0.003,
      "loss": 4.1816,
      "step": 3195
    },
    {
      "epoch": 0.03196,
      "grad_norm": 0.8212538098418293,
      "learning_rate": 0.003,
      "loss": 4.19,
      "step": 3196
    },
    {
      "epoch": 0.03197,
      "grad_norm": 0.6285011842982621,
      "learning_rate": 0.003,
      "loss": 4.1514,
      "step": 3197
    },
    {
      "epoch": 0.03198,
      "grad_norm": 0.6890594394182098,
      "learning_rate": 0.003,
      "loss": 4.1835,
      "step": 3198
    },
    {
      "epoch": 0.03199,
      "grad_norm": 0.7446169713513262,
      "learning_rate": 0.003,
      "loss": 4.1599,
      "step": 3199
    },
    {
      "epoch": 0.032,
      "grad_norm": 0.6287973146330949,
      "learning_rate": 0.003,
      "loss": 4.1743,
      "step": 3200
    },
    {
      "epoch": 0.03201,
      "grad_norm": 0.574028662990781,
      "learning_rate": 0.003,
      "loss": 4.1359,
      "step": 3201
    },
    {
      "epoch": 0.03202,
      "grad_norm": 0.5668166289934877,
      "learning_rate": 0.003,
      "loss": 4.1726,
      "step": 3202
    },
    {
      "epoch": 0.03203,
      "grad_norm": 0.6420200932257698,
      "learning_rate": 0.003,
      "loss": 4.158,
      "step": 3203
    },
    {
      "epoch": 0.03204,
      "grad_norm": 0.6517793013040037,
      "learning_rate": 0.003,
      "loss": 4.1736,
      "step": 3204
    },
    {
      "epoch": 0.03205,
      "grad_norm": 0.6904481493053295,
      "learning_rate": 0.003,
      "loss": 4.1691,
      "step": 3205
    },
    {
      "epoch": 0.03206,
      "grad_norm": 0.6921511390797982,
      "learning_rate": 0.003,
      "loss": 4.1638,
      "step": 3206
    },
    {
      "epoch": 0.03207,
      "grad_norm": 0.6483492465436955,
      "learning_rate": 0.003,
      "loss": 4.1414,
      "step": 3207
    },
    {
      "epoch": 0.03208,
      "grad_norm": 0.6252426422831405,
      "learning_rate": 0.003,
      "loss": 4.1649,
      "step": 3208
    },
    {
      "epoch": 0.03209,
      "grad_norm": 0.6090985425136507,
      "learning_rate": 0.003,
      "loss": 4.1993,
      "step": 3209
    },
    {
      "epoch": 0.0321,
      "grad_norm": 0.7890939094365457,
      "learning_rate": 0.003,
      "loss": 4.182,
      "step": 3210
    },
    {
      "epoch": 0.03211,
      "grad_norm": 0.9334403001508826,
      "learning_rate": 0.003,
      "loss": 4.1592,
      "step": 3211
    },
    {
      "epoch": 0.03212,
      "grad_norm": 0.9752293730760951,
      "learning_rate": 0.003,
      "loss": 4.1748,
      "step": 3212
    },
    {
      "epoch": 0.03213,
      "grad_norm": 0.9917130039556156,
      "learning_rate": 0.003,
      "loss": 4.1791,
      "step": 3213
    },
    {
      "epoch": 0.03214,
      "grad_norm": 1.1323357347525596,
      "learning_rate": 0.003,
      "loss": 4.1546,
      "step": 3214
    },
    {
      "epoch": 0.03215,
      "grad_norm": 0.9029631323088454,
      "learning_rate": 0.003,
      "loss": 4.1597,
      "step": 3215
    },
    {
      "epoch": 0.03216,
      "grad_norm": 0.8650700547902456,
      "learning_rate": 0.003,
      "loss": 4.1698,
      "step": 3216
    },
    {
      "epoch": 0.03217,
      "grad_norm": 0.7818754734092035,
      "learning_rate": 0.003,
      "loss": 4.1662,
      "step": 3217
    },
    {
      "epoch": 0.03218,
      "grad_norm": 0.7666544069164494,
      "learning_rate": 0.003,
      "loss": 4.1592,
      "step": 3218
    },
    {
      "epoch": 0.03219,
      "grad_norm": 0.6769062297915203,
      "learning_rate": 0.003,
      "loss": 4.1636,
      "step": 3219
    },
    {
      "epoch": 0.0322,
      "grad_norm": 0.8021614507600279,
      "learning_rate": 0.003,
      "loss": 4.1784,
      "step": 3220
    },
    {
      "epoch": 0.03221,
      "grad_norm": 0.8570263886415102,
      "learning_rate": 0.003,
      "loss": 4.1856,
      "step": 3221
    },
    {
      "epoch": 0.03222,
      "grad_norm": 0.8752167497732125,
      "learning_rate": 0.003,
      "loss": 4.1822,
      "step": 3222
    },
    {
      "epoch": 0.03223,
      "grad_norm": 0.7864443587317127,
      "learning_rate": 0.003,
      "loss": 4.1809,
      "step": 3223
    },
    {
      "epoch": 0.03224,
      "grad_norm": 0.7212110109835455,
      "learning_rate": 0.003,
      "loss": 4.1404,
      "step": 3224
    },
    {
      "epoch": 0.03225,
      "grad_norm": 0.654641816291879,
      "learning_rate": 0.003,
      "loss": 4.172,
      "step": 3225
    },
    {
      "epoch": 0.03226,
      "grad_norm": 0.8355512444532979,
      "learning_rate": 0.003,
      "loss": 4.1574,
      "step": 3226
    },
    {
      "epoch": 0.03227,
      "grad_norm": 0.9048367283289669,
      "learning_rate": 0.003,
      "loss": 4.182,
      "step": 3227
    },
    {
      "epoch": 0.03228,
      "grad_norm": 0.8603111512659146,
      "learning_rate": 0.003,
      "loss": 4.1544,
      "step": 3228
    },
    {
      "epoch": 0.03229,
      "grad_norm": 0.857474441474401,
      "learning_rate": 0.003,
      "loss": 4.1747,
      "step": 3229
    },
    {
      "epoch": 0.0323,
      "grad_norm": 0.7857692858326722,
      "learning_rate": 0.003,
      "loss": 4.1654,
      "step": 3230
    },
    {
      "epoch": 0.03231,
      "grad_norm": 0.8253717389435257,
      "learning_rate": 0.003,
      "loss": 4.1822,
      "step": 3231
    },
    {
      "epoch": 0.03232,
      "grad_norm": 0.8267544687317752,
      "learning_rate": 0.003,
      "loss": 4.1723,
      "step": 3232
    },
    {
      "epoch": 0.03233,
      "grad_norm": 0.9496934310450251,
      "learning_rate": 0.003,
      "loss": 4.1973,
      "step": 3233
    },
    {
      "epoch": 0.03234,
      "grad_norm": 1.0636307547806063,
      "learning_rate": 0.003,
      "loss": 4.1839,
      "step": 3234
    },
    {
      "epoch": 0.03235,
      "grad_norm": 1.1231119530164886,
      "learning_rate": 0.003,
      "loss": 4.1901,
      "step": 3235
    },
    {
      "epoch": 0.03236,
      "grad_norm": 0.8190269419203875,
      "learning_rate": 0.003,
      "loss": 4.1751,
      "step": 3236
    },
    {
      "epoch": 0.03237,
      "grad_norm": 0.8205044577561036,
      "learning_rate": 0.003,
      "loss": 4.1972,
      "step": 3237
    },
    {
      "epoch": 0.03238,
      "grad_norm": 0.78546472021138,
      "learning_rate": 0.003,
      "loss": 4.1993,
      "step": 3238
    },
    {
      "epoch": 0.03239,
      "grad_norm": 0.7702117279920518,
      "learning_rate": 0.003,
      "loss": 4.1784,
      "step": 3239
    },
    {
      "epoch": 0.0324,
      "grad_norm": 0.8238552543857671,
      "learning_rate": 0.003,
      "loss": 4.1954,
      "step": 3240
    },
    {
      "epoch": 0.03241,
      "grad_norm": 0.9026611610808934,
      "learning_rate": 0.003,
      "loss": 4.1935,
      "step": 3241
    },
    {
      "epoch": 0.03242,
      "grad_norm": 1.0678835814399164,
      "learning_rate": 0.003,
      "loss": 4.1757,
      "step": 3242
    },
    {
      "epoch": 0.03243,
      "grad_norm": 0.9396714595581975,
      "learning_rate": 0.003,
      "loss": 4.161,
      "step": 3243
    },
    {
      "epoch": 0.03244,
      "grad_norm": 0.9280118806328921,
      "learning_rate": 0.003,
      "loss": 4.1836,
      "step": 3244
    },
    {
      "epoch": 0.03245,
      "grad_norm": 0.9538979131563459,
      "learning_rate": 0.003,
      "loss": 4.1945,
      "step": 3245
    },
    {
      "epoch": 0.03246,
      "grad_norm": 0.7413512610145042,
      "learning_rate": 0.003,
      "loss": 4.1767,
      "step": 3246
    },
    {
      "epoch": 0.03247,
      "grad_norm": 0.7567991071999487,
      "learning_rate": 0.003,
      "loss": 4.1856,
      "step": 3247
    },
    {
      "epoch": 0.03248,
      "grad_norm": 0.6112013067709322,
      "learning_rate": 0.003,
      "loss": 4.1978,
      "step": 3248
    },
    {
      "epoch": 0.03249,
      "grad_norm": 0.5490973188468005,
      "learning_rate": 0.003,
      "loss": 4.16,
      "step": 3249
    },
    {
      "epoch": 0.0325,
      "grad_norm": 0.5774791971085714,
      "learning_rate": 0.003,
      "loss": 4.1924,
      "step": 3250
    },
    {
      "epoch": 0.03251,
      "grad_norm": 0.6942876437278124,
      "learning_rate": 0.003,
      "loss": 4.1879,
      "step": 3251
    },
    {
      "epoch": 0.03252,
      "grad_norm": 0.8016029308986741,
      "learning_rate": 0.003,
      "loss": 4.1599,
      "step": 3252
    },
    {
      "epoch": 0.03253,
      "grad_norm": 1.060781307197035,
      "learning_rate": 0.003,
      "loss": 4.1707,
      "step": 3253
    },
    {
      "epoch": 0.03254,
      "grad_norm": 0.9120723115867568,
      "learning_rate": 0.003,
      "loss": 4.1638,
      "step": 3254
    },
    {
      "epoch": 0.03255,
      "grad_norm": 0.656644846174074,
      "learning_rate": 0.003,
      "loss": 4.1727,
      "step": 3255
    },
    {
      "epoch": 0.03256,
      "grad_norm": 0.6737599095064468,
      "learning_rate": 0.003,
      "loss": 4.1684,
      "step": 3256
    },
    {
      "epoch": 0.03257,
      "grad_norm": 0.7252801429473028,
      "learning_rate": 0.003,
      "loss": 4.1562,
      "step": 3257
    },
    {
      "epoch": 0.03258,
      "grad_norm": 0.7056456211965811,
      "learning_rate": 0.003,
      "loss": 4.1664,
      "step": 3258
    },
    {
      "epoch": 0.03259,
      "grad_norm": 0.6702493016081907,
      "learning_rate": 0.003,
      "loss": 4.1648,
      "step": 3259
    },
    {
      "epoch": 0.0326,
      "grad_norm": 0.742639840095955,
      "learning_rate": 0.003,
      "loss": 4.1693,
      "step": 3260
    },
    {
      "epoch": 0.03261,
      "grad_norm": 0.7251275113655737,
      "learning_rate": 0.003,
      "loss": 4.1404,
      "step": 3261
    },
    {
      "epoch": 0.03262,
      "grad_norm": 0.8493290703145597,
      "learning_rate": 0.003,
      "loss": 4.1703,
      "step": 3262
    },
    {
      "epoch": 0.03263,
      "grad_norm": 0.8597025731971345,
      "learning_rate": 0.003,
      "loss": 4.1803,
      "step": 3263
    },
    {
      "epoch": 0.03264,
      "grad_norm": 0.8711807826300412,
      "learning_rate": 0.003,
      "loss": 4.1949,
      "step": 3264
    },
    {
      "epoch": 0.03265,
      "grad_norm": 0.866467942160948,
      "learning_rate": 0.003,
      "loss": 4.1874,
      "step": 3265
    },
    {
      "epoch": 0.03266,
      "grad_norm": 0.7907548571319843,
      "learning_rate": 0.003,
      "loss": 4.1421,
      "step": 3266
    },
    {
      "epoch": 0.03267,
      "grad_norm": 0.861876845682946,
      "learning_rate": 0.003,
      "loss": 4.1569,
      "step": 3267
    },
    {
      "epoch": 0.03268,
      "grad_norm": 0.8022006061060821,
      "learning_rate": 0.003,
      "loss": 4.1422,
      "step": 3268
    },
    {
      "epoch": 0.03269,
      "grad_norm": 0.7169849622143066,
      "learning_rate": 0.003,
      "loss": 4.1689,
      "step": 3269
    },
    {
      "epoch": 0.0327,
      "grad_norm": 0.7910109368111681,
      "learning_rate": 0.003,
      "loss": 4.1996,
      "step": 3270
    },
    {
      "epoch": 0.03271,
      "grad_norm": 0.8344969911176661,
      "learning_rate": 0.003,
      "loss": 4.1641,
      "step": 3271
    },
    {
      "epoch": 0.03272,
      "grad_norm": 0.872802512464233,
      "learning_rate": 0.003,
      "loss": 4.173,
      "step": 3272
    },
    {
      "epoch": 0.03273,
      "grad_norm": 1.035019938766142,
      "learning_rate": 0.003,
      "loss": 4.1763,
      "step": 3273
    },
    {
      "epoch": 0.03274,
      "grad_norm": 1.2073229183989687,
      "learning_rate": 0.003,
      "loss": 4.2034,
      "step": 3274
    },
    {
      "epoch": 0.03275,
      "grad_norm": 0.8290793472557604,
      "learning_rate": 0.003,
      "loss": 4.1932,
      "step": 3275
    },
    {
      "epoch": 0.03276,
      "grad_norm": 0.7945684592795406,
      "learning_rate": 0.003,
      "loss": 4.1727,
      "step": 3276
    },
    {
      "epoch": 0.03277,
      "grad_norm": 0.7498594076499748,
      "learning_rate": 0.003,
      "loss": 4.1678,
      "step": 3277
    },
    {
      "epoch": 0.03278,
      "grad_norm": 0.7587982978511185,
      "learning_rate": 0.003,
      "loss": 4.1765,
      "step": 3278
    },
    {
      "epoch": 0.03279,
      "grad_norm": 0.6810453324428601,
      "learning_rate": 0.003,
      "loss": 4.1616,
      "step": 3279
    },
    {
      "epoch": 0.0328,
      "grad_norm": 0.7837448527302995,
      "learning_rate": 0.003,
      "loss": 4.1508,
      "step": 3280
    },
    {
      "epoch": 0.03281,
      "grad_norm": 0.8219512369198226,
      "learning_rate": 0.003,
      "loss": 4.1987,
      "step": 3281
    },
    {
      "epoch": 0.03282,
      "grad_norm": 0.9147716603434757,
      "learning_rate": 0.003,
      "loss": 4.1562,
      "step": 3282
    },
    {
      "epoch": 0.03283,
      "grad_norm": 1.1897707873169607,
      "learning_rate": 0.003,
      "loss": 4.162,
      "step": 3283
    },
    {
      "epoch": 0.03284,
      "grad_norm": 0.9254561336830153,
      "learning_rate": 0.003,
      "loss": 4.1995,
      "step": 3284
    },
    {
      "epoch": 0.03285,
      "grad_norm": 0.7859405478984048,
      "learning_rate": 0.003,
      "loss": 4.1464,
      "step": 3285
    },
    {
      "epoch": 0.03286,
      "grad_norm": 0.8112187817191882,
      "learning_rate": 0.003,
      "loss": 4.1821,
      "step": 3286
    },
    {
      "epoch": 0.03287,
      "grad_norm": 0.8325987650512564,
      "learning_rate": 0.003,
      "loss": 4.1892,
      "step": 3287
    },
    {
      "epoch": 0.03288,
      "grad_norm": 0.7508815679592351,
      "learning_rate": 0.003,
      "loss": 4.1681,
      "step": 3288
    },
    {
      "epoch": 0.03289,
      "grad_norm": 0.7067915480483684,
      "learning_rate": 0.003,
      "loss": 4.1657,
      "step": 3289
    },
    {
      "epoch": 0.0329,
      "grad_norm": 0.6452869815142386,
      "learning_rate": 0.003,
      "loss": 4.1967,
      "step": 3290
    },
    {
      "epoch": 0.03291,
      "grad_norm": 0.7077619390726327,
      "learning_rate": 0.003,
      "loss": 4.1816,
      "step": 3291
    },
    {
      "epoch": 0.03292,
      "grad_norm": 0.764342418854837,
      "learning_rate": 0.003,
      "loss": 4.1458,
      "step": 3292
    },
    {
      "epoch": 0.03293,
      "grad_norm": 0.7980406372998824,
      "learning_rate": 0.003,
      "loss": 4.1624,
      "step": 3293
    },
    {
      "epoch": 0.03294,
      "grad_norm": 0.9021539490595357,
      "learning_rate": 0.003,
      "loss": 4.1688,
      "step": 3294
    },
    {
      "epoch": 0.03295,
      "grad_norm": 1.0144913859148905,
      "learning_rate": 0.003,
      "loss": 4.1649,
      "step": 3295
    },
    {
      "epoch": 0.03296,
      "grad_norm": 0.9261450137381763,
      "learning_rate": 0.003,
      "loss": 4.1633,
      "step": 3296
    },
    {
      "epoch": 0.03297,
      "grad_norm": 0.6064461182372102,
      "learning_rate": 0.003,
      "loss": 4.1622,
      "step": 3297
    },
    {
      "epoch": 0.03298,
      "grad_norm": 0.7850213517125518,
      "learning_rate": 0.003,
      "loss": 4.1831,
      "step": 3298
    },
    {
      "epoch": 0.03299,
      "grad_norm": 0.9549041047998519,
      "learning_rate": 0.003,
      "loss": 4.1725,
      "step": 3299
    },
    {
      "epoch": 0.033,
      "grad_norm": 1.0983996256965927,
      "learning_rate": 0.003,
      "loss": 4.1699,
      "step": 3300
    },
    {
      "epoch": 0.03301,
      "grad_norm": 0.8476481107914999,
      "learning_rate": 0.003,
      "loss": 4.2035,
      "step": 3301
    },
    {
      "epoch": 0.03302,
      "grad_norm": 0.7326575292783787,
      "learning_rate": 0.003,
      "loss": 4.1693,
      "step": 3302
    },
    {
      "epoch": 0.03303,
      "grad_norm": 0.7473518023486005,
      "learning_rate": 0.003,
      "loss": 4.1768,
      "step": 3303
    },
    {
      "epoch": 0.03304,
      "grad_norm": 0.8060023346991733,
      "learning_rate": 0.003,
      "loss": 4.1726,
      "step": 3304
    },
    {
      "epoch": 0.03305,
      "grad_norm": 0.7204535881570415,
      "learning_rate": 0.003,
      "loss": 4.1755,
      "step": 3305
    },
    {
      "epoch": 0.03306,
      "grad_norm": 0.7755543314021593,
      "learning_rate": 0.003,
      "loss": 4.1959,
      "step": 3306
    },
    {
      "epoch": 0.03307,
      "grad_norm": 0.8878094148219878,
      "learning_rate": 0.003,
      "loss": 4.1938,
      "step": 3307
    },
    {
      "epoch": 0.03308,
      "grad_norm": 0.9646699735847878,
      "learning_rate": 0.003,
      "loss": 4.1797,
      "step": 3308
    },
    {
      "epoch": 0.03309,
      "grad_norm": 1.029625480565733,
      "learning_rate": 0.003,
      "loss": 4.1708,
      "step": 3309
    },
    {
      "epoch": 0.0331,
      "grad_norm": 0.9484648928493907,
      "learning_rate": 0.003,
      "loss": 4.1735,
      "step": 3310
    },
    {
      "epoch": 0.03311,
      "grad_norm": 0.7392971191362379,
      "learning_rate": 0.003,
      "loss": 4.1858,
      "step": 3311
    },
    {
      "epoch": 0.03312,
      "grad_norm": 0.5862137687073449,
      "learning_rate": 0.003,
      "loss": 4.1502,
      "step": 3312
    },
    {
      "epoch": 0.03313,
      "grad_norm": 0.596488073707156,
      "learning_rate": 0.003,
      "loss": 4.1792,
      "step": 3313
    },
    {
      "epoch": 0.03314,
      "grad_norm": 0.627442295342148,
      "learning_rate": 0.003,
      "loss": 4.122,
      "step": 3314
    },
    {
      "epoch": 0.03315,
      "grad_norm": 0.8275855239837167,
      "learning_rate": 0.003,
      "loss": 4.1766,
      "step": 3315
    },
    {
      "epoch": 0.03316,
      "grad_norm": 0.9370582268522403,
      "learning_rate": 0.003,
      "loss": 4.1854,
      "step": 3316
    },
    {
      "epoch": 0.03317,
      "grad_norm": 0.9555220232612601,
      "learning_rate": 0.003,
      "loss": 4.176,
      "step": 3317
    },
    {
      "epoch": 0.03318,
      "grad_norm": 0.9212435845363719,
      "learning_rate": 0.003,
      "loss": 4.1593,
      "step": 3318
    },
    {
      "epoch": 0.03319,
      "grad_norm": 0.7776775672878333,
      "learning_rate": 0.003,
      "loss": 4.1535,
      "step": 3319
    },
    {
      "epoch": 0.0332,
      "grad_norm": 0.6545161157490753,
      "learning_rate": 0.003,
      "loss": 4.1717,
      "step": 3320
    },
    {
      "epoch": 0.03321,
      "grad_norm": 0.7075805828392252,
      "learning_rate": 0.003,
      "loss": 4.1886,
      "step": 3321
    },
    {
      "epoch": 0.03322,
      "grad_norm": 0.5823854673371157,
      "learning_rate": 0.003,
      "loss": 4.1369,
      "step": 3322
    },
    {
      "epoch": 0.03323,
      "grad_norm": 0.5681997988958711,
      "learning_rate": 0.003,
      "loss": 4.1781,
      "step": 3323
    },
    {
      "epoch": 0.03324,
      "grad_norm": 0.5871116115690365,
      "learning_rate": 0.003,
      "loss": 4.1444,
      "step": 3324
    },
    {
      "epoch": 0.03325,
      "grad_norm": 0.6563169766957025,
      "learning_rate": 0.003,
      "loss": 4.1787,
      "step": 3325
    },
    {
      "epoch": 0.03326,
      "grad_norm": 0.6448409982175801,
      "learning_rate": 0.003,
      "loss": 4.1374,
      "step": 3326
    },
    {
      "epoch": 0.03327,
      "grad_norm": 0.6600692635622109,
      "learning_rate": 0.003,
      "loss": 4.165,
      "step": 3327
    },
    {
      "epoch": 0.03328,
      "grad_norm": 0.5986942519205176,
      "learning_rate": 0.003,
      "loss": 4.1826,
      "step": 3328
    },
    {
      "epoch": 0.03329,
      "grad_norm": 0.5527742555822713,
      "learning_rate": 0.003,
      "loss": 4.1401,
      "step": 3329
    },
    {
      "epoch": 0.0333,
      "grad_norm": 0.5223056461707817,
      "learning_rate": 0.003,
      "loss": 4.1418,
      "step": 3330
    },
    {
      "epoch": 0.03331,
      "grad_norm": 0.7035002650156569,
      "learning_rate": 0.003,
      "loss": 4.1438,
      "step": 3331
    },
    {
      "epoch": 0.03332,
      "grad_norm": 0.9852513287674988,
      "learning_rate": 0.003,
      "loss": 4.1487,
      "step": 3332
    },
    {
      "epoch": 0.03333,
      "grad_norm": 1.2028937317307933,
      "learning_rate": 0.003,
      "loss": 4.153,
      "step": 3333
    },
    {
      "epoch": 0.03334,
      "grad_norm": 0.7217530187376259,
      "learning_rate": 0.003,
      "loss": 4.1659,
      "step": 3334
    },
    {
      "epoch": 0.03335,
      "grad_norm": 0.856060284710091,
      "learning_rate": 0.003,
      "loss": 4.1802,
      "step": 3335
    },
    {
      "epoch": 0.03336,
      "grad_norm": 0.9360301326604868,
      "learning_rate": 0.003,
      "loss": 4.1865,
      "step": 3336
    },
    {
      "epoch": 0.03337,
      "grad_norm": 0.9783080347996699,
      "learning_rate": 0.003,
      "loss": 4.1684,
      "step": 3337
    },
    {
      "epoch": 0.03338,
      "grad_norm": 0.925336540295906,
      "learning_rate": 0.003,
      "loss": 4.179,
      "step": 3338
    },
    {
      "epoch": 0.03339,
      "grad_norm": 0.8653423496229105,
      "learning_rate": 0.003,
      "loss": 4.1809,
      "step": 3339
    },
    {
      "epoch": 0.0334,
      "grad_norm": 0.8300930142413694,
      "learning_rate": 0.003,
      "loss": 4.1558,
      "step": 3340
    },
    {
      "epoch": 0.03341,
      "grad_norm": 0.7043960651078829,
      "learning_rate": 0.003,
      "loss": 4.1713,
      "step": 3341
    },
    {
      "epoch": 0.03342,
      "grad_norm": 0.7527535496540583,
      "learning_rate": 0.003,
      "loss": 4.1612,
      "step": 3342
    },
    {
      "epoch": 0.03343,
      "grad_norm": 0.7101647240608583,
      "learning_rate": 0.003,
      "loss": 4.1355,
      "step": 3343
    },
    {
      "epoch": 0.03344,
      "grad_norm": 0.6916264160905395,
      "learning_rate": 0.003,
      "loss": 4.1891,
      "step": 3344
    },
    {
      "epoch": 0.03345,
      "grad_norm": 0.6270475152140637,
      "learning_rate": 0.003,
      "loss": 4.1636,
      "step": 3345
    },
    {
      "epoch": 0.03346,
      "grad_norm": 0.6401626891249842,
      "learning_rate": 0.003,
      "loss": 4.1783,
      "step": 3346
    },
    {
      "epoch": 0.03347,
      "grad_norm": 0.7429872046530612,
      "learning_rate": 0.003,
      "loss": 4.1637,
      "step": 3347
    },
    {
      "epoch": 0.03348,
      "grad_norm": 0.9010177786070285,
      "learning_rate": 0.003,
      "loss": 4.1567,
      "step": 3348
    },
    {
      "epoch": 0.03349,
      "grad_norm": 1.2489916491427688,
      "learning_rate": 0.003,
      "loss": 4.2146,
      "step": 3349
    },
    {
      "epoch": 0.0335,
      "grad_norm": 0.7874425616197881,
      "learning_rate": 0.003,
      "loss": 4.1571,
      "step": 3350
    },
    {
      "epoch": 0.03351,
      "grad_norm": 0.6778456165956087,
      "learning_rate": 0.003,
      "loss": 4.1637,
      "step": 3351
    },
    {
      "epoch": 0.03352,
      "grad_norm": 0.6019165379094058,
      "learning_rate": 0.003,
      "loss": 4.1722,
      "step": 3352
    },
    {
      "epoch": 0.03353,
      "grad_norm": 0.5923300369029466,
      "learning_rate": 0.003,
      "loss": 4.174,
      "step": 3353
    },
    {
      "epoch": 0.03354,
      "grad_norm": 0.61539729048598,
      "learning_rate": 0.003,
      "loss": 4.1356,
      "step": 3354
    },
    {
      "epoch": 0.03355,
      "grad_norm": 0.735664235193292,
      "learning_rate": 0.003,
      "loss": 4.1652,
      "step": 3355
    },
    {
      "epoch": 0.03356,
      "grad_norm": 0.8128296289500704,
      "learning_rate": 0.003,
      "loss": 4.1888,
      "step": 3356
    },
    {
      "epoch": 0.03357,
      "grad_norm": 0.9527976996073543,
      "learning_rate": 0.003,
      "loss": 4.1679,
      "step": 3357
    },
    {
      "epoch": 0.03358,
      "grad_norm": 1.0414565142110577,
      "learning_rate": 0.003,
      "loss": 4.1895,
      "step": 3358
    },
    {
      "epoch": 0.03359,
      "grad_norm": 1.008856217533468,
      "learning_rate": 0.003,
      "loss": 4.1793,
      "step": 3359
    },
    {
      "epoch": 0.0336,
      "grad_norm": 0.8810935589863647,
      "learning_rate": 0.003,
      "loss": 4.19,
      "step": 3360
    },
    {
      "epoch": 0.03361,
      "grad_norm": 0.7183884568879403,
      "learning_rate": 0.003,
      "loss": 4.1854,
      "step": 3361
    },
    {
      "epoch": 0.03362,
      "grad_norm": 0.7267007378420227,
      "learning_rate": 0.003,
      "loss": 4.1815,
      "step": 3362
    },
    {
      "epoch": 0.03363,
      "grad_norm": 0.7801299772898054,
      "learning_rate": 0.003,
      "loss": 4.1306,
      "step": 3363
    },
    {
      "epoch": 0.03364,
      "grad_norm": 0.7966856524661053,
      "learning_rate": 0.003,
      "loss": 4.1556,
      "step": 3364
    },
    {
      "epoch": 0.03365,
      "grad_norm": 0.8847580650147542,
      "learning_rate": 0.003,
      "loss": 4.1533,
      "step": 3365
    },
    {
      "epoch": 0.03366,
      "grad_norm": 0.96095183947863,
      "learning_rate": 0.003,
      "loss": 4.1308,
      "step": 3366
    },
    {
      "epoch": 0.03367,
      "grad_norm": 1.0579464638068148,
      "learning_rate": 0.003,
      "loss": 4.1575,
      "step": 3367
    },
    {
      "epoch": 0.03368,
      "grad_norm": 0.9487908981038276,
      "learning_rate": 0.003,
      "loss": 4.2002,
      "step": 3368
    },
    {
      "epoch": 0.03369,
      "grad_norm": 0.926541854271104,
      "learning_rate": 0.003,
      "loss": 4.1569,
      "step": 3369
    },
    {
      "epoch": 0.0337,
      "grad_norm": 0.7454576740316814,
      "learning_rate": 0.003,
      "loss": 4.1891,
      "step": 3370
    },
    {
      "epoch": 0.03371,
      "grad_norm": 0.8137043749598539,
      "learning_rate": 0.003,
      "loss": 4.1272,
      "step": 3371
    },
    {
      "epoch": 0.03372,
      "grad_norm": 0.9674167173768639,
      "learning_rate": 0.003,
      "loss": 4.16,
      "step": 3372
    },
    {
      "epoch": 0.03373,
      "grad_norm": 1.2155494462899379,
      "learning_rate": 0.003,
      "loss": 4.2043,
      "step": 3373
    },
    {
      "epoch": 0.03374,
      "grad_norm": 0.990109962038484,
      "learning_rate": 0.003,
      "loss": 4.1853,
      "step": 3374
    },
    {
      "epoch": 0.03375,
      "grad_norm": 0.8775264313673968,
      "learning_rate": 0.003,
      "loss": 4.1691,
      "step": 3375
    },
    {
      "epoch": 0.03376,
      "grad_norm": 0.85672141967058,
      "learning_rate": 0.003,
      "loss": 4.1457,
      "step": 3376
    },
    {
      "epoch": 0.03377,
      "grad_norm": 0.9737179043643116,
      "learning_rate": 0.003,
      "loss": 4.1884,
      "step": 3377
    },
    {
      "epoch": 0.03378,
      "grad_norm": 1.189069284130068,
      "learning_rate": 0.003,
      "loss": 4.2031,
      "step": 3378
    },
    {
      "epoch": 0.03379,
      "grad_norm": 0.8477238820814195,
      "learning_rate": 0.003,
      "loss": 4.1659,
      "step": 3379
    },
    {
      "epoch": 0.0338,
      "grad_norm": 0.7852120011007475,
      "learning_rate": 0.003,
      "loss": 4.1908,
      "step": 3380
    },
    {
      "epoch": 0.03381,
      "grad_norm": 0.7292342587108829,
      "learning_rate": 0.003,
      "loss": 4.1928,
      "step": 3381
    },
    {
      "epoch": 0.03382,
      "grad_norm": 0.6465901203260986,
      "learning_rate": 0.003,
      "loss": 4.176,
      "step": 3382
    },
    {
      "epoch": 0.03383,
      "grad_norm": 0.6599423928010247,
      "learning_rate": 0.003,
      "loss": 4.141,
      "step": 3383
    },
    {
      "epoch": 0.03384,
      "grad_norm": 0.6191152180710551,
      "learning_rate": 0.003,
      "loss": 4.1621,
      "step": 3384
    },
    {
      "epoch": 0.03385,
      "grad_norm": 0.660294498068497,
      "learning_rate": 0.003,
      "loss": 4.1677,
      "step": 3385
    },
    {
      "epoch": 0.03386,
      "grad_norm": 0.6951272922326287,
      "learning_rate": 0.003,
      "loss": 4.1619,
      "step": 3386
    },
    {
      "epoch": 0.03387,
      "grad_norm": 0.7549085381425057,
      "learning_rate": 0.003,
      "loss": 4.1667,
      "step": 3387
    },
    {
      "epoch": 0.03388,
      "grad_norm": 0.8210352926247219,
      "learning_rate": 0.003,
      "loss": 4.1766,
      "step": 3388
    },
    {
      "epoch": 0.03389,
      "grad_norm": 0.7574190601223717,
      "learning_rate": 0.003,
      "loss": 4.192,
      "step": 3389
    },
    {
      "epoch": 0.0339,
      "grad_norm": 0.6482930381200502,
      "learning_rate": 0.003,
      "loss": 4.176,
      "step": 3390
    },
    {
      "epoch": 0.03391,
      "grad_norm": 0.6321069687041059,
      "learning_rate": 0.003,
      "loss": 4.155,
      "step": 3391
    },
    {
      "epoch": 0.03392,
      "grad_norm": 0.643651481056441,
      "learning_rate": 0.003,
      "loss": 4.1601,
      "step": 3392
    },
    {
      "epoch": 0.03393,
      "grad_norm": 0.701072754557211,
      "learning_rate": 0.003,
      "loss": 4.1704,
      "step": 3393
    },
    {
      "epoch": 0.03394,
      "grad_norm": 0.936207243661443,
      "learning_rate": 0.003,
      "loss": 4.1766,
      "step": 3394
    },
    {
      "epoch": 0.03395,
      "grad_norm": 1.148255848253888,
      "learning_rate": 0.003,
      "loss": 4.1898,
      "step": 3395
    },
    {
      "epoch": 0.03396,
      "grad_norm": 0.9888207982566978,
      "learning_rate": 0.003,
      "loss": 4.1665,
      "step": 3396
    },
    {
      "epoch": 0.03397,
      "grad_norm": 0.8996655993402819,
      "learning_rate": 0.003,
      "loss": 4.1378,
      "step": 3397
    },
    {
      "epoch": 0.03398,
      "grad_norm": 0.8176901251549881,
      "learning_rate": 0.003,
      "loss": 4.1884,
      "step": 3398
    },
    {
      "epoch": 0.03399,
      "grad_norm": 0.9597792168614336,
      "learning_rate": 0.003,
      "loss": 4.2185,
      "step": 3399
    },
    {
      "epoch": 0.034,
      "grad_norm": 0.8953838926214202,
      "learning_rate": 0.003,
      "loss": 4.1958,
      "step": 3400
    },
    {
      "epoch": 0.03401,
      "grad_norm": 0.8709033356594122,
      "learning_rate": 0.003,
      "loss": 4.1686,
      "step": 3401
    },
    {
      "epoch": 0.03402,
      "grad_norm": 0.8085455592831763,
      "learning_rate": 0.003,
      "loss": 4.1978,
      "step": 3402
    },
    {
      "epoch": 0.03403,
      "grad_norm": 0.8446940111741622,
      "learning_rate": 0.003,
      "loss": 4.157,
      "step": 3403
    },
    {
      "epoch": 0.03404,
      "grad_norm": 0.8940660013118596,
      "learning_rate": 0.003,
      "loss": 4.2009,
      "step": 3404
    },
    {
      "epoch": 0.03405,
      "grad_norm": 0.8140937963556486,
      "learning_rate": 0.003,
      "loss": 4.1779,
      "step": 3405
    },
    {
      "epoch": 0.03406,
      "grad_norm": 0.7315344108215677,
      "learning_rate": 0.003,
      "loss": 4.1602,
      "step": 3406
    },
    {
      "epoch": 0.03407,
      "grad_norm": 0.7402747619279555,
      "learning_rate": 0.003,
      "loss": 4.1747,
      "step": 3407
    },
    {
      "epoch": 0.03408,
      "grad_norm": 0.732820367759084,
      "learning_rate": 0.003,
      "loss": 4.1555,
      "step": 3408
    },
    {
      "epoch": 0.03409,
      "grad_norm": 0.7078192424381656,
      "learning_rate": 0.003,
      "loss": 4.163,
      "step": 3409
    },
    {
      "epoch": 0.0341,
      "grad_norm": 0.6568901083029279,
      "learning_rate": 0.003,
      "loss": 4.1544,
      "step": 3410
    },
    {
      "epoch": 0.03411,
      "grad_norm": 0.716531548016939,
      "learning_rate": 0.003,
      "loss": 4.1696,
      "step": 3411
    },
    {
      "epoch": 0.03412,
      "grad_norm": 0.7513105413690504,
      "learning_rate": 0.003,
      "loss": 4.1866,
      "step": 3412
    },
    {
      "epoch": 0.03413,
      "grad_norm": 0.8394550937538491,
      "learning_rate": 0.003,
      "loss": 4.1969,
      "step": 3413
    },
    {
      "epoch": 0.03414,
      "grad_norm": 0.820347608943603,
      "learning_rate": 0.003,
      "loss": 4.1853,
      "step": 3414
    },
    {
      "epoch": 0.03415,
      "grad_norm": 0.8347788267410619,
      "learning_rate": 0.003,
      "loss": 4.1684,
      "step": 3415
    },
    {
      "epoch": 0.03416,
      "grad_norm": 0.7803511833847365,
      "learning_rate": 0.003,
      "loss": 4.1695,
      "step": 3416
    },
    {
      "epoch": 0.03417,
      "grad_norm": 0.8460239573891309,
      "learning_rate": 0.003,
      "loss": 4.1658,
      "step": 3417
    },
    {
      "epoch": 0.03418,
      "grad_norm": 0.833609787064844,
      "learning_rate": 0.003,
      "loss": 4.1414,
      "step": 3418
    },
    {
      "epoch": 0.03419,
      "grad_norm": 0.8708569538552823,
      "learning_rate": 0.003,
      "loss": 4.1684,
      "step": 3419
    },
    {
      "epoch": 0.0342,
      "grad_norm": 0.9586486328814895,
      "learning_rate": 0.003,
      "loss": 4.18,
      "step": 3420
    },
    {
      "epoch": 0.03421,
      "grad_norm": 0.9216001377610666,
      "learning_rate": 0.003,
      "loss": 4.1736,
      "step": 3421
    },
    {
      "epoch": 0.03422,
      "grad_norm": 1.0431113207901683,
      "learning_rate": 0.003,
      "loss": 4.1952,
      "step": 3422
    },
    {
      "epoch": 0.03423,
      "grad_norm": 0.8615543590411862,
      "learning_rate": 0.003,
      "loss": 4.1686,
      "step": 3423
    },
    {
      "epoch": 0.03424,
      "grad_norm": 0.8731178346996326,
      "learning_rate": 0.003,
      "loss": 4.1942,
      "step": 3424
    },
    {
      "epoch": 0.03425,
      "grad_norm": 1.095469547675973,
      "learning_rate": 0.003,
      "loss": 4.1632,
      "step": 3425
    },
    {
      "epoch": 0.03426,
      "grad_norm": 1.1612745485851788,
      "learning_rate": 0.003,
      "loss": 4.1928,
      "step": 3426
    },
    {
      "epoch": 0.03427,
      "grad_norm": 0.8029034983629978,
      "learning_rate": 0.003,
      "loss": 4.1538,
      "step": 3427
    },
    {
      "epoch": 0.03428,
      "grad_norm": 0.6589054159011024,
      "learning_rate": 0.003,
      "loss": 4.1471,
      "step": 3428
    },
    {
      "epoch": 0.03429,
      "grad_norm": 0.8266299146386108,
      "learning_rate": 0.003,
      "loss": 4.1677,
      "step": 3429
    },
    {
      "epoch": 0.0343,
      "grad_norm": 0.9825330051154652,
      "learning_rate": 0.003,
      "loss": 4.1665,
      "step": 3430
    },
    {
      "epoch": 0.03431,
      "grad_norm": 0.910501700935703,
      "learning_rate": 0.003,
      "loss": 4.1613,
      "step": 3431
    },
    {
      "epoch": 0.03432,
      "grad_norm": 0.9004461828637552,
      "learning_rate": 0.003,
      "loss": 4.17,
      "step": 3432
    },
    {
      "epoch": 0.03433,
      "grad_norm": 0.8528177999939314,
      "learning_rate": 0.003,
      "loss": 4.1421,
      "step": 3433
    },
    {
      "epoch": 0.03434,
      "grad_norm": 0.89696000337961,
      "learning_rate": 0.003,
      "loss": 4.1672,
      "step": 3434
    },
    {
      "epoch": 0.03435,
      "grad_norm": 0.9861991758400855,
      "learning_rate": 0.003,
      "loss": 4.1557,
      "step": 3435
    },
    {
      "epoch": 0.03436,
      "grad_norm": 1.0131768497867766,
      "learning_rate": 0.003,
      "loss": 4.1868,
      "step": 3436
    },
    {
      "epoch": 0.03437,
      "grad_norm": 0.9466446408842145,
      "learning_rate": 0.003,
      "loss": 4.174,
      "step": 3437
    },
    {
      "epoch": 0.03438,
      "grad_norm": 0.9102848042032905,
      "learning_rate": 0.003,
      "loss": 4.1899,
      "step": 3438
    },
    {
      "epoch": 0.03439,
      "grad_norm": 0.8647708117802543,
      "learning_rate": 0.003,
      "loss": 4.1847,
      "step": 3439
    },
    {
      "epoch": 0.0344,
      "grad_norm": 0.7164211772779949,
      "learning_rate": 0.003,
      "loss": 4.1535,
      "step": 3440
    },
    {
      "epoch": 0.03441,
      "grad_norm": 0.6301651930660659,
      "learning_rate": 0.003,
      "loss": 4.1723,
      "step": 3441
    },
    {
      "epoch": 0.03442,
      "grad_norm": 0.6803560179535443,
      "learning_rate": 0.003,
      "loss": 4.1393,
      "step": 3442
    },
    {
      "epoch": 0.03443,
      "grad_norm": 0.7027557239683841,
      "learning_rate": 0.003,
      "loss": 4.1683,
      "step": 3443
    },
    {
      "epoch": 0.03444,
      "grad_norm": 0.679437494499468,
      "learning_rate": 0.003,
      "loss": 4.1792,
      "step": 3444
    },
    {
      "epoch": 0.03445,
      "grad_norm": 0.6317151895560527,
      "learning_rate": 0.003,
      "loss": 4.1393,
      "step": 3445
    },
    {
      "epoch": 0.03446,
      "grad_norm": 0.5594532664167771,
      "learning_rate": 0.003,
      "loss": 4.1262,
      "step": 3446
    },
    {
      "epoch": 0.03447,
      "grad_norm": 0.4608224201125752,
      "learning_rate": 0.003,
      "loss": 4.1725,
      "step": 3447
    },
    {
      "epoch": 0.03448,
      "grad_norm": 0.4578940242640693,
      "learning_rate": 0.003,
      "loss": 4.1424,
      "step": 3448
    },
    {
      "epoch": 0.03449,
      "grad_norm": 0.42932231306304686,
      "learning_rate": 0.003,
      "loss": 4.1274,
      "step": 3449
    },
    {
      "epoch": 0.0345,
      "grad_norm": 0.4988907778580992,
      "learning_rate": 0.003,
      "loss": 4.1635,
      "step": 3450
    },
    {
      "epoch": 0.03451,
      "grad_norm": 0.6396019203681813,
      "learning_rate": 0.003,
      "loss": 4.1364,
      "step": 3451
    },
    {
      "epoch": 0.03452,
      "grad_norm": 1.0898993664507333,
      "learning_rate": 0.003,
      "loss": 4.1757,
      "step": 3452
    },
    {
      "epoch": 0.03453,
      "grad_norm": 1.3432647343273045,
      "learning_rate": 0.003,
      "loss": 4.1673,
      "step": 3453
    },
    {
      "epoch": 0.03454,
      "grad_norm": 0.5913981853383808,
      "learning_rate": 0.003,
      "loss": 4.1344,
      "step": 3454
    },
    {
      "epoch": 0.03455,
      "grad_norm": 0.7494952955105579,
      "learning_rate": 0.003,
      "loss": 4.1362,
      "step": 3455
    },
    {
      "epoch": 0.03456,
      "grad_norm": 0.8718264272859545,
      "learning_rate": 0.003,
      "loss": 4.1597,
      "step": 3456
    },
    {
      "epoch": 0.03457,
      "grad_norm": 0.7341902897858598,
      "learning_rate": 0.003,
      "loss": 4.1168,
      "step": 3457
    },
    {
      "epoch": 0.03458,
      "grad_norm": 0.5928314274849413,
      "learning_rate": 0.003,
      "loss": 4.1526,
      "step": 3458
    },
    {
      "epoch": 0.03459,
      "grad_norm": 0.5786112106982269,
      "learning_rate": 0.003,
      "loss": 4.1792,
      "step": 3459
    },
    {
      "epoch": 0.0346,
      "grad_norm": 0.5686114896859036,
      "learning_rate": 0.003,
      "loss": 4.1768,
      "step": 3460
    },
    {
      "epoch": 0.03461,
      "grad_norm": 0.5394258978891616,
      "learning_rate": 0.003,
      "loss": 4.1554,
      "step": 3461
    },
    {
      "epoch": 0.03462,
      "grad_norm": 0.5283747456624726,
      "learning_rate": 0.003,
      "loss": 4.1468,
      "step": 3462
    },
    {
      "epoch": 0.03463,
      "grad_norm": 0.5548545176329134,
      "learning_rate": 0.003,
      "loss": 4.1315,
      "step": 3463
    },
    {
      "epoch": 0.03464,
      "grad_norm": 0.6086252098872936,
      "learning_rate": 0.003,
      "loss": 4.1482,
      "step": 3464
    },
    {
      "epoch": 0.03465,
      "grad_norm": 0.7233117130975524,
      "learning_rate": 0.003,
      "loss": 4.1559,
      "step": 3465
    },
    {
      "epoch": 0.03466,
      "grad_norm": 0.8644314342564327,
      "learning_rate": 0.003,
      "loss": 4.1605,
      "step": 3466
    },
    {
      "epoch": 0.03467,
      "grad_norm": 0.9734945351785561,
      "learning_rate": 0.003,
      "loss": 4.1479,
      "step": 3467
    },
    {
      "epoch": 0.03468,
      "grad_norm": 1.1561496198026826,
      "learning_rate": 0.003,
      "loss": 4.1675,
      "step": 3468
    },
    {
      "epoch": 0.03469,
      "grad_norm": 0.8501361165726434,
      "learning_rate": 0.003,
      "loss": 4.1585,
      "step": 3469
    },
    {
      "epoch": 0.0347,
      "grad_norm": 0.8848493282370733,
      "learning_rate": 0.003,
      "loss": 4.1734,
      "step": 3470
    },
    {
      "epoch": 0.03471,
      "grad_norm": 0.8464627960157501,
      "learning_rate": 0.003,
      "loss": 4.163,
      "step": 3471
    },
    {
      "epoch": 0.03472,
      "grad_norm": 0.8982470984794905,
      "learning_rate": 0.003,
      "loss": 4.1667,
      "step": 3472
    },
    {
      "epoch": 0.03473,
      "grad_norm": 0.9751232721396406,
      "learning_rate": 0.003,
      "loss": 4.1613,
      "step": 3473
    },
    {
      "epoch": 0.03474,
      "grad_norm": 0.8720398622820135,
      "learning_rate": 0.003,
      "loss": 4.1922,
      "step": 3474
    },
    {
      "epoch": 0.03475,
      "grad_norm": 1.0085199767036763,
      "learning_rate": 0.003,
      "loss": 4.1683,
      "step": 3475
    },
    {
      "epoch": 0.03476,
      "grad_norm": 1.087277414889208,
      "learning_rate": 0.003,
      "loss": 4.1878,
      "step": 3476
    },
    {
      "epoch": 0.03477,
      "grad_norm": 1.1234725023256988,
      "learning_rate": 0.003,
      "loss": 4.1862,
      "step": 3477
    },
    {
      "epoch": 0.03478,
      "grad_norm": 0.9746682007392135,
      "learning_rate": 0.003,
      "loss": 4.1844,
      "step": 3478
    },
    {
      "epoch": 0.03479,
      "grad_norm": 1.1292770316423675,
      "learning_rate": 0.003,
      "loss": 4.1696,
      "step": 3479
    },
    {
      "epoch": 0.0348,
      "grad_norm": 1.0966154473553906,
      "learning_rate": 0.003,
      "loss": 4.1668,
      "step": 3480
    },
    {
      "epoch": 0.03481,
      "grad_norm": 0.9630452734638144,
      "learning_rate": 0.003,
      "loss": 4.1815,
      "step": 3481
    },
    {
      "epoch": 0.03482,
      "grad_norm": 1.0282885618312065,
      "learning_rate": 0.003,
      "loss": 4.1913,
      "step": 3482
    },
    {
      "epoch": 0.03483,
      "grad_norm": 0.91830488319391,
      "learning_rate": 0.003,
      "loss": 4.1634,
      "step": 3483
    },
    {
      "epoch": 0.03484,
      "grad_norm": 0.904152019538749,
      "learning_rate": 0.003,
      "loss": 4.2088,
      "step": 3484
    },
    {
      "epoch": 0.03485,
      "grad_norm": 0.9919455357745458,
      "learning_rate": 0.003,
      "loss": 4.2056,
      "step": 3485
    },
    {
      "epoch": 0.03486,
      "grad_norm": 0.9732252412489275,
      "learning_rate": 0.003,
      "loss": 4.1777,
      "step": 3486
    },
    {
      "epoch": 0.03487,
      "grad_norm": 0.8822393408309757,
      "learning_rate": 0.003,
      "loss": 4.1698,
      "step": 3487
    },
    {
      "epoch": 0.03488,
      "grad_norm": 0.7964288583002509,
      "learning_rate": 0.003,
      "loss": 4.1617,
      "step": 3488
    },
    {
      "epoch": 0.03489,
      "grad_norm": 0.76074461149548,
      "learning_rate": 0.003,
      "loss": 4.1754,
      "step": 3489
    },
    {
      "epoch": 0.0349,
      "grad_norm": 0.6520650563876845,
      "learning_rate": 0.003,
      "loss": 4.1689,
      "step": 3490
    },
    {
      "epoch": 0.03491,
      "grad_norm": 0.6914947787275789,
      "learning_rate": 0.003,
      "loss": 4.1866,
      "step": 3491
    },
    {
      "epoch": 0.03492,
      "grad_norm": 0.5749087587448846,
      "learning_rate": 0.003,
      "loss": 4.1672,
      "step": 3492
    },
    {
      "epoch": 0.03493,
      "grad_norm": 0.542359666467121,
      "learning_rate": 0.003,
      "loss": 4.1557,
      "step": 3493
    },
    {
      "epoch": 0.03494,
      "grad_norm": 0.5345171326980744,
      "learning_rate": 0.003,
      "loss": 4.1581,
      "step": 3494
    },
    {
      "epoch": 0.03495,
      "grad_norm": 0.6065181384190205,
      "learning_rate": 0.003,
      "loss": 4.1642,
      "step": 3495
    },
    {
      "epoch": 0.03496,
      "grad_norm": 0.8772556513810549,
      "learning_rate": 0.003,
      "loss": 4.141,
      "step": 3496
    },
    {
      "epoch": 0.03497,
      "grad_norm": 1.592460211390561,
      "learning_rate": 0.003,
      "loss": 4.2001,
      "step": 3497
    },
    {
      "epoch": 0.03498,
      "grad_norm": 0.6866814285580091,
      "learning_rate": 0.003,
      "loss": 4.1572,
      "step": 3498
    },
    {
      "epoch": 0.03499,
      "grad_norm": 0.6490269076299953,
      "learning_rate": 0.003,
      "loss": 4.1576,
      "step": 3499
    },
    {
      "epoch": 0.035,
      "grad_norm": 0.7769966222047817,
      "learning_rate": 0.003,
      "loss": 4.1803,
      "step": 3500
    },
    {
      "epoch": 0.03501,
      "grad_norm": 0.8082329511105066,
      "learning_rate": 0.003,
      "loss": 4.1697,
      "step": 3501
    },
    {
      "epoch": 0.03502,
      "grad_norm": 0.8597363324605595,
      "learning_rate": 0.003,
      "loss": 4.1708,
      "step": 3502
    },
    {
      "epoch": 0.03503,
      "grad_norm": 0.7980430509608143,
      "learning_rate": 0.003,
      "loss": 4.1826,
      "step": 3503
    },
    {
      "epoch": 0.03504,
      "grad_norm": 0.8314809190142672,
      "learning_rate": 0.003,
      "loss": 4.1556,
      "step": 3504
    },
    {
      "epoch": 0.03505,
      "grad_norm": 0.7125747726013733,
      "learning_rate": 0.003,
      "loss": 4.1704,
      "step": 3505
    },
    {
      "epoch": 0.03506,
      "grad_norm": 0.6174504440202525,
      "learning_rate": 0.003,
      "loss": 4.1753,
      "step": 3506
    },
    {
      "epoch": 0.03507,
      "grad_norm": 0.7474444612701099,
      "learning_rate": 0.003,
      "loss": 4.1568,
      "step": 3507
    },
    {
      "epoch": 0.03508,
      "grad_norm": 0.744368255976828,
      "learning_rate": 0.003,
      "loss": 4.1375,
      "step": 3508
    },
    {
      "epoch": 0.03509,
      "grad_norm": 0.7883919396141444,
      "learning_rate": 0.003,
      "loss": 4.1519,
      "step": 3509
    },
    {
      "epoch": 0.0351,
      "grad_norm": 0.8999072142957483,
      "learning_rate": 0.003,
      "loss": 4.1466,
      "step": 3510
    },
    {
      "epoch": 0.03511,
      "grad_norm": 0.8704293057371069,
      "learning_rate": 0.003,
      "loss": 4.1592,
      "step": 3511
    },
    {
      "epoch": 0.03512,
      "grad_norm": 0.9815842496362013,
      "learning_rate": 0.003,
      "loss": 4.1538,
      "step": 3512
    },
    {
      "epoch": 0.03513,
      "grad_norm": 1.1066583346659893,
      "learning_rate": 0.003,
      "loss": 4.163,
      "step": 3513
    },
    {
      "epoch": 0.03514,
      "grad_norm": 0.981954170426782,
      "learning_rate": 0.003,
      "loss": 4.1757,
      "step": 3514
    },
    {
      "epoch": 0.03515,
      "grad_norm": 0.9792417977988364,
      "learning_rate": 0.003,
      "loss": 4.1869,
      "step": 3515
    },
    {
      "epoch": 0.03516,
      "grad_norm": 0.856404411633118,
      "learning_rate": 0.003,
      "loss": 4.2085,
      "step": 3516
    },
    {
      "epoch": 0.03517,
      "grad_norm": 0.7127129042609169,
      "learning_rate": 0.003,
      "loss": 4.155,
      "step": 3517
    },
    {
      "epoch": 0.03518,
      "grad_norm": 0.6898706808403468,
      "learning_rate": 0.003,
      "loss": 4.1428,
      "step": 3518
    },
    {
      "epoch": 0.03519,
      "grad_norm": 0.6402523946118677,
      "learning_rate": 0.003,
      "loss": 4.185,
      "step": 3519
    },
    {
      "epoch": 0.0352,
      "grad_norm": 0.6207863269082038,
      "learning_rate": 0.003,
      "loss": 4.1612,
      "step": 3520
    },
    {
      "epoch": 0.03521,
      "grad_norm": 0.6360401847170623,
      "learning_rate": 0.003,
      "loss": 4.1566,
      "step": 3521
    },
    {
      "epoch": 0.03522,
      "grad_norm": 0.7339165326988526,
      "learning_rate": 0.003,
      "loss": 4.159,
      "step": 3522
    },
    {
      "epoch": 0.03523,
      "grad_norm": 0.8973232567522073,
      "learning_rate": 0.003,
      "loss": 4.1303,
      "step": 3523
    },
    {
      "epoch": 0.03524,
      "grad_norm": 1.1587635718923102,
      "learning_rate": 0.003,
      "loss": 4.1833,
      "step": 3524
    },
    {
      "epoch": 0.03525,
      "grad_norm": 0.7822190217961902,
      "learning_rate": 0.003,
      "loss": 4.1417,
      "step": 3525
    },
    {
      "epoch": 0.03526,
      "grad_norm": 0.7291963363920031,
      "learning_rate": 0.003,
      "loss": 4.1485,
      "step": 3526
    },
    {
      "epoch": 0.03527,
      "grad_norm": 0.779725825468202,
      "learning_rate": 0.003,
      "loss": 4.1812,
      "step": 3527
    },
    {
      "epoch": 0.03528,
      "grad_norm": 0.8316224754491519,
      "learning_rate": 0.003,
      "loss": 4.1645,
      "step": 3528
    },
    {
      "epoch": 0.03529,
      "grad_norm": 0.8639080944248414,
      "learning_rate": 0.003,
      "loss": 4.1577,
      "step": 3529
    },
    {
      "epoch": 0.0353,
      "grad_norm": 0.8612623897066767,
      "learning_rate": 0.003,
      "loss": 4.1605,
      "step": 3530
    },
    {
      "epoch": 0.03531,
      "grad_norm": 0.8610797480294329,
      "learning_rate": 0.003,
      "loss": 4.1683,
      "step": 3531
    },
    {
      "epoch": 0.03532,
      "grad_norm": 0.9008566995316217,
      "learning_rate": 0.003,
      "loss": 4.1748,
      "step": 3532
    },
    {
      "epoch": 0.03533,
      "grad_norm": 0.7630493491147551,
      "learning_rate": 0.003,
      "loss": 4.1696,
      "step": 3533
    },
    {
      "epoch": 0.03534,
      "grad_norm": 0.7799426199854421,
      "learning_rate": 0.003,
      "loss": 4.1652,
      "step": 3534
    },
    {
      "epoch": 0.03535,
      "grad_norm": 0.7443571102643655,
      "learning_rate": 0.003,
      "loss": 4.1329,
      "step": 3535
    },
    {
      "epoch": 0.03536,
      "grad_norm": 0.9162718166038424,
      "learning_rate": 0.003,
      "loss": 4.1425,
      "step": 3536
    },
    {
      "epoch": 0.03537,
      "grad_norm": 0.9471286119751292,
      "learning_rate": 0.003,
      "loss": 4.1547,
      "step": 3537
    },
    {
      "epoch": 0.03538,
      "grad_norm": 0.949840005398208,
      "learning_rate": 0.003,
      "loss": 4.1591,
      "step": 3538
    },
    {
      "epoch": 0.03539,
      "grad_norm": 0.8941824512475612,
      "learning_rate": 0.003,
      "loss": 4.1466,
      "step": 3539
    },
    {
      "epoch": 0.0354,
      "grad_norm": 0.8416985140532036,
      "learning_rate": 0.003,
      "loss": 4.1851,
      "step": 3540
    },
    {
      "epoch": 0.03541,
      "grad_norm": 0.8643157714042061,
      "learning_rate": 0.003,
      "loss": 4.1734,
      "step": 3541
    },
    {
      "epoch": 0.03542,
      "grad_norm": 1.0638367559817616,
      "learning_rate": 0.003,
      "loss": 4.1614,
      "step": 3542
    },
    {
      "epoch": 0.03543,
      "grad_norm": 1.1652953416038865,
      "learning_rate": 0.003,
      "loss": 4.1783,
      "step": 3543
    },
    {
      "epoch": 0.03544,
      "grad_norm": 0.9946375845221913,
      "learning_rate": 0.003,
      "loss": 4.1824,
      "step": 3544
    },
    {
      "epoch": 0.03545,
      "grad_norm": 0.9626017427173171,
      "learning_rate": 0.003,
      "loss": 4.151,
      "step": 3545
    },
    {
      "epoch": 0.03546,
      "grad_norm": 0.9017009636423533,
      "learning_rate": 0.003,
      "loss": 4.1681,
      "step": 3546
    },
    {
      "epoch": 0.03547,
      "grad_norm": 0.8338308420587012,
      "learning_rate": 0.003,
      "loss": 4.1742,
      "step": 3547
    },
    {
      "epoch": 0.03548,
      "grad_norm": 0.795230968098308,
      "learning_rate": 0.003,
      "loss": 4.1761,
      "step": 3548
    },
    {
      "epoch": 0.03549,
      "grad_norm": 0.7285679046345136,
      "learning_rate": 0.003,
      "loss": 4.1663,
      "step": 3549
    },
    {
      "epoch": 0.0355,
      "grad_norm": 0.6534721210956203,
      "learning_rate": 0.003,
      "loss": 4.1208,
      "step": 3550
    },
    {
      "epoch": 0.03551,
      "grad_norm": 0.6108415927364046,
      "learning_rate": 0.003,
      "loss": 4.1778,
      "step": 3551
    },
    {
      "epoch": 0.03552,
      "grad_norm": 0.5975360738111026,
      "learning_rate": 0.003,
      "loss": 4.1357,
      "step": 3552
    },
    {
      "epoch": 0.03553,
      "grad_norm": 0.616283856055616,
      "learning_rate": 0.003,
      "loss": 4.15,
      "step": 3553
    },
    {
      "epoch": 0.03554,
      "grad_norm": 0.6492012815047297,
      "learning_rate": 0.003,
      "loss": 4.1549,
      "step": 3554
    },
    {
      "epoch": 0.03555,
      "grad_norm": 0.7788849687728241,
      "learning_rate": 0.003,
      "loss": 4.1699,
      "step": 3555
    },
    {
      "epoch": 0.03556,
      "grad_norm": 1.0575264249976415,
      "learning_rate": 0.003,
      "loss": 4.1571,
      "step": 3556
    },
    {
      "epoch": 0.03557,
      "grad_norm": 1.2637167881926659,
      "learning_rate": 0.003,
      "loss": 4.1692,
      "step": 3557
    },
    {
      "epoch": 0.03558,
      "grad_norm": 0.6413226636225998,
      "learning_rate": 0.003,
      "loss": 4.1816,
      "step": 3558
    },
    {
      "epoch": 0.03559,
      "grad_norm": 0.7092773893161065,
      "learning_rate": 0.003,
      "loss": 4.1679,
      "step": 3559
    },
    {
      "epoch": 0.0356,
      "grad_norm": 0.9184433367909671,
      "learning_rate": 0.003,
      "loss": 4.1754,
      "step": 3560
    },
    {
      "epoch": 0.03561,
      "grad_norm": 0.933975121612697,
      "learning_rate": 0.003,
      "loss": 4.2011,
      "step": 3561
    },
    {
      "epoch": 0.03562,
      "grad_norm": 0.9304359925283029,
      "learning_rate": 0.003,
      "loss": 4.164,
      "step": 3562
    },
    {
      "epoch": 0.03563,
      "grad_norm": 0.919692600735377,
      "learning_rate": 0.003,
      "loss": 4.1948,
      "step": 3563
    },
    {
      "epoch": 0.03564,
      "grad_norm": 0.7595794330578136,
      "learning_rate": 0.003,
      "loss": 4.1696,
      "step": 3564
    },
    {
      "epoch": 0.03565,
      "grad_norm": 0.6630885781605157,
      "learning_rate": 0.003,
      "loss": 4.1571,
      "step": 3565
    },
    {
      "epoch": 0.03566,
      "grad_norm": 0.6125749874747362,
      "learning_rate": 0.003,
      "loss": 4.1594,
      "step": 3566
    },
    {
      "epoch": 0.03567,
      "grad_norm": 0.7000181582049009,
      "learning_rate": 0.003,
      "loss": 4.1529,
      "step": 3567
    },
    {
      "epoch": 0.03568,
      "grad_norm": 0.668903695971405,
      "learning_rate": 0.003,
      "loss": 4.1481,
      "step": 3568
    },
    {
      "epoch": 0.03569,
      "grad_norm": 0.7198229032250428,
      "learning_rate": 0.003,
      "loss": 4.1448,
      "step": 3569
    },
    {
      "epoch": 0.0357,
      "grad_norm": 0.6904707021821862,
      "learning_rate": 0.003,
      "loss": 4.1473,
      "step": 3570
    },
    {
      "epoch": 0.03571,
      "grad_norm": 0.6259572941993182,
      "learning_rate": 0.003,
      "loss": 4.1522,
      "step": 3571
    },
    {
      "epoch": 0.03572,
      "grad_norm": 0.6307940196938667,
      "learning_rate": 0.003,
      "loss": 4.1717,
      "step": 3572
    },
    {
      "epoch": 0.03573,
      "grad_norm": 0.6869341362818634,
      "learning_rate": 0.003,
      "loss": 4.1406,
      "step": 3573
    },
    {
      "epoch": 0.03574,
      "grad_norm": 0.9390330749868537,
      "learning_rate": 0.003,
      "loss": 4.1723,
      "step": 3574
    },
    {
      "epoch": 0.03575,
      "grad_norm": 1.1544395774812188,
      "learning_rate": 0.003,
      "loss": 4.1625,
      "step": 3575
    },
    {
      "epoch": 0.03576,
      "grad_norm": 0.8068138585754847,
      "learning_rate": 0.003,
      "loss": 4.1373,
      "step": 3576
    },
    {
      "epoch": 0.03577,
      "grad_norm": 0.6631826248776513,
      "learning_rate": 0.003,
      "loss": 4.1679,
      "step": 3577
    },
    {
      "epoch": 0.03578,
      "grad_norm": 0.6113246693019829,
      "learning_rate": 0.003,
      "loss": 4.1541,
      "step": 3578
    },
    {
      "epoch": 0.03579,
      "grad_norm": 0.780311146927158,
      "learning_rate": 0.003,
      "loss": 4.1739,
      "step": 3579
    },
    {
      "epoch": 0.0358,
      "grad_norm": 0.827477686309755,
      "learning_rate": 0.003,
      "loss": 4.1335,
      "step": 3580
    },
    {
      "epoch": 0.03581,
      "grad_norm": 0.8515806061337369,
      "learning_rate": 0.003,
      "loss": 4.1442,
      "step": 3581
    },
    {
      "epoch": 0.03582,
      "grad_norm": 0.7993229318614161,
      "learning_rate": 0.003,
      "loss": 4.1436,
      "step": 3582
    },
    {
      "epoch": 0.03583,
      "grad_norm": 0.8095532689038906,
      "learning_rate": 0.003,
      "loss": 4.1456,
      "step": 3583
    },
    {
      "epoch": 0.03584,
      "grad_norm": 0.8205040467136088,
      "learning_rate": 0.003,
      "loss": 4.1372,
      "step": 3584
    },
    {
      "epoch": 0.03585,
      "grad_norm": 0.7389714832217196,
      "learning_rate": 0.003,
      "loss": 4.13,
      "step": 3585
    },
    {
      "epoch": 0.03586,
      "grad_norm": 0.6688957110779817,
      "learning_rate": 0.003,
      "loss": 4.1525,
      "step": 3586
    },
    {
      "epoch": 0.03587,
      "grad_norm": 0.7092588679384089,
      "learning_rate": 0.003,
      "loss": 4.1506,
      "step": 3587
    },
    {
      "epoch": 0.03588,
      "grad_norm": 0.7650773891669993,
      "learning_rate": 0.003,
      "loss": 4.1498,
      "step": 3588
    },
    {
      "epoch": 0.03589,
      "grad_norm": 0.6712500265923376,
      "learning_rate": 0.003,
      "loss": 4.1646,
      "step": 3589
    },
    {
      "epoch": 0.0359,
      "grad_norm": 0.7094925517899797,
      "learning_rate": 0.003,
      "loss": 4.13,
      "step": 3590
    },
    {
      "epoch": 0.03591,
      "grad_norm": 0.9772899161914086,
      "learning_rate": 0.003,
      "loss": 4.1681,
      "step": 3591
    },
    {
      "epoch": 0.03592,
      "grad_norm": 1.3584031251577058,
      "learning_rate": 0.003,
      "loss": 4.1699,
      "step": 3592
    },
    {
      "epoch": 0.03593,
      "grad_norm": 0.6739062662570949,
      "learning_rate": 0.003,
      "loss": 4.1483,
      "step": 3593
    },
    {
      "epoch": 0.03594,
      "grad_norm": 0.7388127025602351,
      "learning_rate": 0.003,
      "loss": 4.1566,
      "step": 3594
    },
    {
      "epoch": 0.03595,
      "grad_norm": 0.7746906492491002,
      "learning_rate": 0.003,
      "loss": 4.149,
      "step": 3595
    },
    {
      "epoch": 0.03596,
      "grad_norm": 0.7831810343025122,
      "learning_rate": 0.003,
      "loss": 4.1838,
      "step": 3596
    },
    {
      "epoch": 0.03597,
      "grad_norm": 0.8759604826013669,
      "learning_rate": 0.003,
      "loss": 4.1631,
      "step": 3597
    },
    {
      "epoch": 0.03598,
      "grad_norm": 0.9635123949092944,
      "learning_rate": 0.003,
      "loss": 4.1727,
      "step": 3598
    },
    {
      "epoch": 0.03599,
      "grad_norm": 0.9676585543651076,
      "learning_rate": 0.003,
      "loss": 4.1743,
      "step": 3599
    },
    {
      "epoch": 0.036,
      "grad_norm": 0.7973342092648297,
      "learning_rate": 0.003,
      "loss": 4.1881,
      "step": 3600
    },
    {
      "epoch": 0.03601,
      "grad_norm": 0.8952158475064083,
      "learning_rate": 0.003,
      "loss": 4.1641,
      "step": 3601
    },
    {
      "epoch": 0.03602,
      "grad_norm": 0.8297361733958166,
      "learning_rate": 0.003,
      "loss": 4.147,
      "step": 3602
    },
    {
      "epoch": 0.03603,
      "grad_norm": 0.8233542191650651,
      "learning_rate": 0.003,
      "loss": 4.1379,
      "step": 3603
    },
    {
      "epoch": 0.03604,
      "grad_norm": 0.8910587306080446,
      "learning_rate": 0.003,
      "loss": 4.1757,
      "step": 3604
    },
    {
      "epoch": 0.03605,
      "grad_norm": 0.8944750063674012,
      "learning_rate": 0.003,
      "loss": 4.1673,
      "step": 3605
    },
    {
      "epoch": 0.03606,
      "grad_norm": 0.8493957051258396,
      "learning_rate": 0.003,
      "loss": 4.1548,
      "step": 3606
    },
    {
      "epoch": 0.03607,
      "grad_norm": 0.8478455854512117,
      "learning_rate": 0.003,
      "loss": 4.1477,
      "step": 3607
    },
    {
      "epoch": 0.03608,
      "grad_norm": 0.8546181295422765,
      "learning_rate": 0.003,
      "loss": 4.1508,
      "step": 3608
    },
    {
      "epoch": 0.03609,
      "grad_norm": 0.8039599648016552,
      "learning_rate": 0.003,
      "loss": 4.1807,
      "step": 3609
    },
    {
      "epoch": 0.0361,
      "grad_norm": 0.7257991452948148,
      "learning_rate": 0.003,
      "loss": 4.183,
      "step": 3610
    },
    {
      "epoch": 0.03611,
      "grad_norm": 0.8324843473102026,
      "learning_rate": 0.003,
      "loss": 4.1339,
      "step": 3611
    },
    {
      "epoch": 0.03612,
      "grad_norm": 0.824602826243289,
      "learning_rate": 0.003,
      "loss": 4.1343,
      "step": 3612
    },
    {
      "epoch": 0.03613,
      "grad_norm": 1.0069180266978304,
      "learning_rate": 0.003,
      "loss": 4.182,
      "step": 3613
    },
    {
      "epoch": 0.03614,
      "grad_norm": 1.2555634931954907,
      "learning_rate": 0.003,
      "loss": 4.1726,
      "step": 3614
    },
    {
      "epoch": 0.03615,
      "grad_norm": 0.8411513212979675,
      "learning_rate": 0.003,
      "loss": 4.1693,
      "step": 3615
    },
    {
      "epoch": 0.03616,
      "grad_norm": 0.8451688294141301,
      "learning_rate": 0.003,
      "loss": 4.1775,
      "step": 3616
    },
    {
      "epoch": 0.03617,
      "grad_norm": 0.9112344841927535,
      "learning_rate": 0.003,
      "loss": 4.177,
      "step": 3617
    },
    {
      "epoch": 0.03618,
      "grad_norm": 0.9999707710039599,
      "learning_rate": 0.003,
      "loss": 4.1694,
      "step": 3618
    },
    {
      "epoch": 0.03619,
      "grad_norm": 1.0118192200700322,
      "learning_rate": 0.003,
      "loss": 4.1724,
      "step": 3619
    },
    {
      "epoch": 0.0362,
      "grad_norm": 1.0258430778394187,
      "learning_rate": 0.003,
      "loss": 4.1762,
      "step": 3620
    },
    {
      "epoch": 0.03621,
      "grad_norm": 1.0952856306278091,
      "learning_rate": 0.003,
      "loss": 4.185,
      "step": 3621
    },
    {
      "epoch": 0.03622,
      "grad_norm": 1.143882989533265,
      "learning_rate": 0.003,
      "loss": 4.1567,
      "step": 3622
    },
    {
      "epoch": 0.03623,
      "grad_norm": 0.7874622564246553,
      "learning_rate": 0.003,
      "loss": 4.1365,
      "step": 3623
    },
    {
      "epoch": 0.03624,
      "grad_norm": 0.7563110063478746,
      "learning_rate": 0.003,
      "loss": 4.1423,
      "step": 3624
    },
    {
      "epoch": 0.03625,
      "grad_norm": 0.7988179852097972,
      "learning_rate": 0.003,
      "loss": 4.178,
      "step": 3625
    },
    {
      "epoch": 0.03626,
      "grad_norm": 0.7231578892075671,
      "learning_rate": 0.003,
      "loss": 4.1549,
      "step": 3626
    },
    {
      "epoch": 0.03627,
      "grad_norm": 0.6892596641637148,
      "learning_rate": 0.003,
      "loss": 4.1529,
      "step": 3627
    },
    {
      "epoch": 0.03628,
      "grad_norm": 0.7186998166943942,
      "learning_rate": 0.003,
      "loss": 4.1606,
      "step": 3628
    },
    {
      "epoch": 0.03629,
      "grad_norm": 0.7752324983651075,
      "learning_rate": 0.003,
      "loss": 4.1787,
      "step": 3629
    },
    {
      "epoch": 0.0363,
      "grad_norm": 0.9413427411204313,
      "learning_rate": 0.003,
      "loss": 4.1685,
      "step": 3630
    },
    {
      "epoch": 0.03631,
      "grad_norm": 0.9777703909452877,
      "learning_rate": 0.003,
      "loss": 4.1642,
      "step": 3631
    },
    {
      "epoch": 0.03632,
      "grad_norm": 0.8848239522487785,
      "learning_rate": 0.003,
      "loss": 4.1579,
      "step": 3632
    },
    {
      "epoch": 0.03633,
      "grad_norm": 0.9200746928259687,
      "learning_rate": 0.003,
      "loss": 4.1929,
      "step": 3633
    },
    {
      "epoch": 0.03634,
      "grad_norm": 0.9334170004603629,
      "learning_rate": 0.003,
      "loss": 4.1669,
      "step": 3634
    },
    {
      "epoch": 0.03635,
      "grad_norm": 0.9470225102280233,
      "learning_rate": 0.003,
      "loss": 4.1558,
      "step": 3635
    },
    {
      "epoch": 0.03636,
      "grad_norm": 0.9241160510652092,
      "learning_rate": 0.003,
      "loss": 4.1754,
      "step": 3636
    },
    {
      "epoch": 0.03637,
      "grad_norm": 0.7430750130688752,
      "learning_rate": 0.003,
      "loss": 4.1589,
      "step": 3637
    },
    {
      "epoch": 0.03638,
      "grad_norm": 0.7220044156395624,
      "learning_rate": 0.003,
      "loss": 4.1645,
      "step": 3638
    },
    {
      "epoch": 0.03639,
      "grad_norm": 0.7772051390195733,
      "learning_rate": 0.003,
      "loss": 4.1882,
      "step": 3639
    },
    {
      "epoch": 0.0364,
      "grad_norm": 0.8529468509123678,
      "learning_rate": 0.003,
      "loss": 4.1847,
      "step": 3640
    },
    {
      "epoch": 0.03641,
      "grad_norm": 1.0954199441179653,
      "learning_rate": 0.003,
      "loss": 4.1735,
      "step": 3641
    },
    {
      "epoch": 0.03642,
      "grad_norm": 0.8994211892974673,
      "learning_rate": 0.003,
      "loss": 4.1879,
      "step": 3642
    },
    {
      "epoch": 0.03643,
      "grad_norm": 0.8296521561058982,
      "learning_rate": 0.003,
      "loss": 4.1481,
      "step": 3643
    },
    {
      "epoch": 0.03644,
      "grad_norm": 0.8247836328668441,
      "learning_rate": 0.003,
      "loss": 4.1762,
      "step": 3644
    },
    {
      "epoch": 0.03645,
      "grad_norm": 0.840574264399085,
      "learning_rate": 0.003,
      "loss": 4.1477,
      "step": 3645
    },
    {
      "epoch": 0.03646,
      "grad_norm": 0.8080938302750766,
      "learning_rate": 0.003,
      "loss": 4.1646,
      "step": 3646
    },
    {
      "epoch": 0.03647,
      "grad_norm": 0.9664779357124396,
      "learning_rate": 0.003,
      "loss": 4.165,
      "step": 3647
    },
    {
      "epoch": 0.03648,
      "grad_norm": 1.0928052932730679,
      "learning_rate": 0.003,
      "loss": 4.16,
      "step": 3648
    },
    {
      "epoch": 0.03649,
      "grad_norm": 0.7684467951205206,
      "learning_rate": 0.003,
      "loss": 4.1755,
      "step": 3649
    },
    {
      "epoch": 0.0365,
      "grad_norm": 0.5750234241245785,
      "learning_rate": 0.003,
      "loss": 4.148,
      "step": 3650
    },
    {
      "epoch": 0.03651,
      "grad_norm": 0.6573564310770661,
      "learning_rate": 0.003,
      "loss": 4.1708,
      "step": 3651
    },
    {
      "epoch": 0.03652,
      "grad_norm": 0.8888456033371046,
      "learning_rate": 0.003,
      "loss": 4.1579,
      "step": 3652
    },
    {
      "epoch": 0.03653,
      "grad_norm": 1.0419754908407304,
      "learning_rate": 0.003,
      "loss": 4.1477,
      "step": 3653
    },
    {
      "epoch": 0.03654,
      "grad_norm": 0.7921123605584492,
      "learning_rate": 0.003,
      "loss": 4.1421,
      "step": 3654
    },
    {
      "epoch": 0.03655,
      "grad_norm": 0.7289861914496177,
      "learning_rate": 0.003,
      "loss": 4.1668,
      "step": 3655
    },
    {
      "epoch": 0.03656,
      "grad_norm": 0.7796912479293006,
      "learning_rate": 0.003,
      "loss": 4.1547,
      "step": 3656
    },
    {
      "epoch": 0.03657,
      "grad_norm": 0.7324175109079976,
      "learning_rate": 0.003,
      "loss": 4.1261,
      "step": 3657
    },
    {
      "epoch": 0.03658,
      "grad_norm": 0.5756062562769299,
      "learning_rate": 0.003,
      "loss": 4.186,
      "step": 3658
    },
    {
      "epoch": 0.03659,
      "grad_norm": 0.6807858015149528,
      "learning_rate": 0.003,
      "loss": 4.1377,
      "step": 3659
    },
    {
      "epoch": 0.0366,
      "grad_norm": 0.7837064702433023,
      "learning_rate": 0.003,
      "loss": 4.1731,
      "step": 3660
    },
    {
      "epoch": 0.03661,
      "grad_norm": 0.7830429911526481,
      "learning_rate": 0.003,
      "loss": 4.1527,
      "step": 3661
    },
    {
      "epoch": 0.03662,
      "grad_norm": 0.7895739080121226,
      "learning_rate": 0.003,
      "loss": 4.1472,
      "step": 3662
    },
    {
      "epoch": 0.03663,
      "grad_norm": 0.893542071406176,
      "learning_rate": 0.003,
      "loss": 4.1687,
      "step": 3663
    },
    {
      "epoch": 0.03664,
      "grad_norm": 0.8754737269812044,
      "learning_rate": 0.003,
      "loss": 4.1494,
      "step": 3664
    },
    {
      "epoch": 0.03665,
      "grad_norm": 0.8736154039000673,
      "learning_rate": 0.003,
      "loss": 4.1736,
      "step": 3665
    },
    {
      "epoch": 0.03666,
      "grad_norm": 0.8365372537243944,
      "learning_rate": 0.003,
      "loss": 4.1322,
      "step": 3666
    },
    {
      "epoch": 0.03667,
      "grad_norm": 0.8675888077071373,
      "learning_rate": 0.003,
      "loss": 4.1444,
      "step": 3667
    },
    {
      "epoch": 0.03668,
      "grad_norm": 0.8544402197521644,
      "learning_rate": 0.003,
      "loss": 4.1573,
      "step": 3668
    },
    {
      "epoch": 0.03669,
      "grad_norm": 1.051770187962234,
      "learning_rate": 0.003,
      "loss": 4.1381,
      "step": 3669
    },
    {
      "epoch": 0.0367,
      "grad_norm": 1.312127243653887,
      "learning_rate": 0.003,
      "loss": 4.1894,
      "step": 3670
    },
    {
      "epoch": 0.03671,
      "grad_norm": 0.7882271613030375,
      "learning_rate": 0.003,
      "loss": 4.1628,
      "step": 3671
    },
    {
      "epoch": 0.03672,
      "grad_norm": 0.6575380088749906,
      "learning_rate": 0.003,
      "loss": 4.1614,
      "step": 3672
    },
    {
      "epoch": 0.03673,
      "grad_norm": 0.6533332497379345,
      "learning_rate": 0.003,
      "loss": 4.1705,
      "step": 3673
    },
    {
      "epoch": 0.03674,
      "grad_norm": 0.6740570121635143,
      "learning_rate": 0.003,
      "loss": 4.1281,
      "step": 3674
    },
    {
      "epoch": 0.03675,
      "grad_norm": 0.7719822820972118,
      "learning_rate": 0.003,
      "loss": 4.1729,
      "step": 3675
    },
    {
      "epoch": 0.03676,
      "grad_norm": 0.8997102742299902,
      "learning_rate": 0.003,
      "loss": 4.1318,
      "step": 3676
    },
    {
      "epoch": 0.03677,
      "grad_norm": 1.1346925943966613,
      "learning_rate": 0.003,
      "loss": 4.156,
      "step": 3677
    },
    {
      "epoch": 0.03678,
      "grad_norm": 1.3062247019240303,
      "learning_rate": 0.003,
      "loss": 4.1966,
      "step": 3678
    },
    {
      "epoch": 0.03679,
      "grad_norm": 0.7073498950931896,
      "learning_rate": 0.003,
      "loss": 4.1705,
      "step": 3679
    },
    {
      "epoch": 0.0368,
      "grad_norm": 0.7054669066644736,
      "learning_rate": 0.003,
      "loss": 4.1658,
      "step": 3680
    },
    {
      "epoch": 0.03681,
      "grad_norm": 0.6959325528820864,
      "learning_rate": 0.003,
      "loss": 4.1376,
      "step": 3681
    },
    {
      "epoch": 0.03682,
      "grad_norm": 0.6826134908981137,
      "learning_rate": 0.003,
      "loss": 4.1753,
      "step": 3682
    },
    {
      "epoch": 0.03683,
      "grad_norm": 0.9589548694394383,
      "learning_rate": 0.003,
      "loss": 4.1412,
      "step": 3683
    },
    {
      "epoch": 0.03684,
      "grad_norm": 1.3837835667463712,
      "learning_rate": 0.003,
      "loss": 4.185,
      "step": 3684
    },
    {
      "epoch": 0.03685,
      "grad_norm": 0.7029580914010543,
      "learning_rate": 0.003,
      "loss": 4.1508,
      "step": 3685
    },
    {
      "epoch": 0.03686,
      "grad_norm": 0.793184528586383,
      "learning_rate": 0.003,
      "loss": 4.173,
      "step": 3686
    },
    {
      "epoch": 0.03687,
      "grad_norm": 0.8386169263682931,
      "learning_rate": 0.003,
      "loss": 4.1944,
      "step": 3687
    },
    {
      "epoch": 0.03688,
      "grad_norm": 0.8483676049215235,
      "learning_rate": 0.003,
      "loss": 4.1841,
      "step": 3688
    },
    {
      "epoch": 0.03689,
      "grad_norm": 0.950222753372726,
      "learning_rate": 0.003,
      "loss": 4.1554,
      "step": 3689
    },
    {
      "epoch": 0.0369,
      "grad_norm": 0.9833017766191942,
      "learning_rate": 0.003,
      "loss": 4.1873,
      "step": 3690
    },
    {
      "epoch": 0.03691,
      "grad_norm": 1.0141537747097225,
      "learning_rate": 0.003,
      "loss": 4.1746,
      "step": 3691
    },
    {
      "epoch": 0.03692,
      "grad_norm": 0.8888682616492705,
      "learning_rate": 0.003,
      "loss": 4.2059,
      "step": 3692
    },
    {
      "epoch": 0.03693,
      "grad_norm": 0.9207088289233919,
      "learning_rate": 0.003,
      "loss": 4.1775,
      "step": 3693
    },
    {
      "epoch": 0.03694,
      "grad_norm": 0.8638154190597898,
      "learning_rate": 0.003,
      "loss": 4.1732,
      "step": 3694
    },
    {
      "epoch": 0.03695,
      "grad_norm": 0.7865314494927197,
      "learning_rate": 0.003,
      "loss": 4.1682,
      "step": 3695
    },
    {
      "epoch": 0.03696,
      "grad_norm": 0.811239269688311,
      "learning_rate": 0.003,
      "loss": 4.1672,
      "step": 3696
    },
    {
      "epoch": 0.03697,
      "grad_norm": 0.7527149600540498,
      "learning_rate": 0.003,
      "loss": 4.1523,
      "step": 3697
    },
    {
      "epoch": 0.03698,
      "grad_norm": 0.8165577678117001,
      "learning_rate": 0.003,
      "loss": 4.1875,
      "step": 3698
    },
    {
      "epoch": 0.03699,
      "grad_norm": 0.8971546398815292,
      "learning_rate": 0.003,
      "loss": 4.1782,
      "step": 3699
    },
    {
      "epoch": 0.037,
      "grad_norm": 0.9983619391979774,
      "learning_rate": 0.003,
      "loss": 4.1716,
      "step": 3700
    },
    {
      "epoch": 0.03701,
      "grad_norm": 0.8793417785162475,
      "learning_rate": 0.003,
      "loss": 4.1718,
      "step": 3701
    },
    {
      "epoch": 0.03702,
      "grad_norm": 0.8291238684887006,
      "learning_rate": 0.003,
      "loss": 4.1572,
      "step": 3702
    },
    {
      "epoch": 0.03703,
      "grad_norm": 0.6324908569860477,
      "learning_rate": 0.003,
      "loss": 4.1638,
      "step": 3703
    },
    {
      "epoch": 0.03704,
      "grad_norm": 0.6240031858723358,
      "learning_rate": 0.003,
      "loss": 4.1644,
      "step": 3704
    },
    {
      "epoch": 0.03705,
      "grad_norm": 0.7312113193138566,
      "learning_rate": 0.003,
      "loss": 4.1843,
      "step": 3705
    },
    {
      "epoch": 0.03706,
      "grad_norm": 0.8042606510953001,
      "learning_rate": 0.003,
      "loss": 4.1671,
      "step": 3706
    },
    {
      "epoch": 0.03707,
      "grad_norm": 0.8602980641760969,
      "learning_rate": 0.003,
      "loss": 4.1507,
      "step": 3707
    },
    {
      "epoch": 0.03708,
      "grad_norm": 0.8002371092022987,
      "learning_rate": 0.003,
      "loss": 4.1687,
      "step": 3708
    },
    {
      "epoch": 0.03709,
      "grad_norm": 0.8363140997412901,
      "learning_rate": 0.003,
      "loss": 4.1444,
      "step": 3709
    },
    {
      "epoch": 0.0371,
      "grad_norm": 0.7381408226781321,
      "learning_rate": 0.003,
      "loss": 4.1792,
      "step": 3710
    },
    {
      "epoch": 0.03711,
      "grad_norm": 0.6818914716146712,
      "learning_rate": 0.003,
      "loss": 4.1473,
      "step": 3711
    },
    {
      "epoch": 0.03712,
      "grad_norm": 0.6181242736172562,
      "learning_rate": 0.003,
      "loss": 4.1457,
      "step": 3712
    },
    {
      "epoch": 0.03713,
      "grad_norm": 0.5316945560555888,
      "learning_rate": 0.003,
      "loss": 4.1304,
      "step": 3713
    },
    {
      "epoch": 0.03714,
      "grad_norm": 0.4781488240208272,
      "learning_rate": 0.003,
      "loss": 4.1424,
      "step": 3714
    },
    {
      "epoch": 0.03715,
      "grad_norm": 0.5464841559814179,
      "learning_rate": 0.003,
      "loss": 4.1487,
      "step": 3715
    },
    {
      "epoch": 0.03716,
      "grad_norm": 0.6484458989407043,
      "learning_rate": 0.003,
      "loss": 4.1426,
      "step": 3716
    },
    {
      "epoch": 0.03717,
      "grad_norm": 0.7476388907786324,
      "learning_rate": 0.003,
      "loss": 4.1451,
      "step": 3717
    },
    {
      "epoch": 0.03718,
      "grad_norm": 1.011509883442139,
      "learning_rate": 0.003,
      "loss": 4.124,
      "step": 3718
    },
    {
      "epoch": 0.03719,
      "grad_norm": 1.2912072156164622,
      "learning_rate": 0.003,
      "loss": 4.1747,
      "step": 3719
    },
    {
      "epoch": 0.0372,
      "grad_norm": 0.5930338537992184,
      "learning_rate": 0.003,
      "loss": 4.1526,
      "step": 3720
    },
    {
      "epoch": 0.03721,
      "grad_norm": 0.830711899216099,
      "learning_rate": 0.003,
      "loss": 4.1486,
      "step": 3721
    },
    {
      "epoch": 0.03722,
      "grad_norm": 0.9431990623316837,
      "learning_rate": 0.003,
      "loss": 4.1572,
      "step": 3722
    },
    {
      "epoch": 0.03723,
      "grad_norm": 0.9432356918938554,
      "learning_rate": 0.003,
      "loss": 4.188,
      "step": 3723
    },
    {
      "epoch": 0.03724,
      "grad_norm": 0.9636776258118436,
      "learning_rate": 0.003,
      "loss": 4.1721,
      "step": 3724
    },
    {
      "epoch": 0.03725,
      "grad_norm": 0.9139015379611488,
      "learning_rate": 0.003,
      "loss": 4.1794,
      "step": 3725
    },
    {
      "epoch": 0.03726,
      "grad_norm": 0.9516147591612311,
      "learning_rate": 0.003,
      "loss": 4.1577,
      "step": 3726
    },
    {
      "epoch": 0.03727,
      "grad_norm": 0.9752852122304477,
      "learning_rate": 0.003,
      "loss": 4.1953,
      "step": 3727
    },
    {
      "epoch": 0.03728,
      "grad_norm": 0.9695953467892918,
      "learning_rate": 0.003,
      "loss": 4.1409,
      "step": 3728
    },
    {
      "epoch": 0.03729,
      "grad_norm": 0.8893393181343283,
      "learning_rate": 0.003,
      "loss": 4.1731,
      "step": 3729
    },
    {
      "epoch": 0.0373,
      "grad_norm": 0.9218487904123037,
      "learning_rate": 0.003,
      "loss": 4.1605,
      "step": 3730
    },
    {
      "epoch": 0.03731,
      "grad_norm": 0.9098551175322124,
      "learning_rate": 0.003,
      "loss": 4.1839,
      "step": 3731
    },
    {
      "epoch": 0.03732,
      "grad_norm": 0.9256072358050464,
      "learning_rate": 0.003,
      "loss": 4.1811,
      "step": 3732
    },
    {
      "epoch": 0.03733,
      "grad_norm": 0.9379985657503299,
      "learning_rate": 0.003,
      "loss": 4.1595,
      "step": 3733
    },
    {
      "epoch": 0.03734,
      "grad_norm": 0.8308298518052655,
      "learning_rate": 0.003,
      "loss": 4.1584,
      "step": 3734
    },
    {
      "epoch": 0.03735,
      "grad_norm": 0.8761219483009925,
      "learning_rate": 0.003,
      "loss": 4.1571,
      "step": 3735
    },
    {
      "epoch": 0.03736,
      "grad_norm": 1.0090875900995142,
      "learning_rate": 0.003,
      "loss": 4.1916,
      "step": 3736
    },
    {
      "epoch": 0.03737,
      "grad_norm": 0.9618846124849504,
      "learning_rate": 0.003,
      "loss": 4.1751,
      "step": 3737
    },
    {
      "epoch": 0.03738,
      "grad_norm": 0.8758346239827248,
      "learning_rate": 0.003,
      "loss": 4.1926,
      "step": 3738
    },
    {
      "epoch": 0.03739,
      "grad_norm": 0.9609527508841834,
      "learning_rate": 0.003,
      "loss": 4.1574,
      "step": 3739
    },
    {
      "epoch": 0.0374,
      "grad_norm": 1.1169937432795556,
      "learning_rate": 0.003,
      "loss": 4.1844,
      "step": 3740
    },
    {
      "epoch": 0.03741,
      "grad_norm": 0.9067264031417346,
      "learning_rate": 0.003,
      "loss": 4.1598,
      "step": 3741
    },
    {
      "epoch": 0.03742,
      "grad_norm": 0.7898423823168381,
      "learning_rate": 0.003,
      "loss": 4.151,
      "step": 3742
    },
    {
      "epoch": 0.03743,
      "grad_norm": 0.7661101135332484,
      "learning_rate": 0.003,
      "loss": 4.1462,
      "step": 3743
    },
    {
      "epoch": 0.03744,
      "grad_norm": 0.6067121401591796,
      "learning_rate": 0.003,
      "loss": 4.1712,
      "step": 3744
    },
    {
      "epoch": 0.03745,
      "grad_norm": 0.6024185554294552,
      "learning_rate": 0.003,
      "loss": 4.1531,
      "step": 3745
    },
    {
      "epoch": 0.03746,
      "grad_norm": 0.5815455713712403,
      "learning_rate": 0.003,
      "loss": 4.1601,
      "step": 3746
    },
    {
      "epoch": 0.03747,
      "grad_norm": 0.6179967126448696,
      "learning_rate": 0.003,
      "loss": 4.1659,
      "step": 3747
    },
    {
      "epoch": 0.03748,
      "grad_norm": 0.5607743590244431,
      "learning_rate": 0.003,
      "loss": 4.1341,
      "step": 3748
    },
    {
      "epoch": 0.03749,
      "grad_norm": 0.5373283353585078,
      "learning_rate": 0.003,
      "loss": 4.1453,
      "step": 3749
    },
    {
      "epoch": 0.0375,
      "grad_norm": 0.6259765189525954,
      "learning_rate": 0.003,
      "loss": 4.1675,
      "step": 3750
    },
    {
      "epoch": 0.03751,
      "grad_norm": 0.735426915588919,
      "learning_rate": 0.003,
      "loss": 4.1206,
      "step": 3751
    },
    {
      "epoch": 0.03752,
      "grad_norm": 1.1121203063280463,
      "learning_rate": 0.003,
      "loss": 4.1569,
      "step": 3752
    },
    {
      "epoch": 0.03753,
      "grad_norm": 1.1286247639907543,
      "learning_rate": 0.003,
      "loss": 4.1691,
      "step": 3753
    },
    {
      "epoch": 0.03754,
      "grad_norm": 0.654316101247794,
      "learning_rate": 0.003,
      "loss": 4.1646,
      "step": 3754
    },
    {
      "epoch": 0.03755,
      "grad_norm": 0.6144499107508359,
      "learning_rate": 0.003,
      "loss": 4.1549,
      "step": 3755
    },
    {
      "epoch": 0.03756,
      "grad_norm": 0.8558400590466524,
      "learning_rate": 0.003,
      "loss": 4.1306,
      "step": 3756
    },
    {
      "epoch": 0.03757,
      "grad_norm": 0.8999619822176426,
      "learning_rate": 0.003,
      "loss": 4.1636,
      "step": 3757
    },
    {
      "epoch": 0.03758,
      "grad_norm": 0.7120240054121859,
      "learning_rate": 0.003,
      "loss": 4.1028,
      "step": 3758
    },
    {
      "epoch": 0.03759,
      "grad_norm": 0.6839824836130421,
      "learning_rate": 0.003,
      "loss": 4.1611,
      "step": 3759
    },
    {
      "epoch": 0.0376,
      "grad_norm": 0.7584320585496591,
      "learning_rate": 0.003,
      "loss": 4.1487,
      "step": 3760
    },
    {
      "epoch": 0.03761,
      "grad_norm": 0.7956025663217148,
      "learning_rate": 0.003,
      "loss": 4.1206,
      "step": 3761
    },
    {
      "epoch": 0.03762,
      "grad_norm": 0.7804293287119324,
      "learning_rate": 0.003,
      "loss": 4.1782,
      "step": 3762
    },
    {
      "epoch": 0.03763,
      "grad_norm": 0.7176371093477065,
      "learning_rate": 0.003,
      "loss": 4.159,
      "step": 3763
    },
    {
      "epoch": 0.03764,
      "grad_norm": 0.7703219293331768,
      "learning_rate": 0.003,
      "loss": 4.1707,
      "step": 3764
    },
    {
      "epoch": 0.03765,
      "grad_norm": 0.8016568904602233,
      "learning_rate": 0.003,
      "loss": 4.191,
      "step": 3765
    },
    {
      "epoch": 0.03766,
      "grad_norm": 0.8025560943399235,
      "learning_rate": 0.003,
      "loss": 4.1326,
      "step": 3766
    },
    {
      "epoch": 0.03767,
      "grad_norm": 0.891734573749917,
      "learning_rate": 0.003,
      "loss": 4.1763,
      "step": 3767
    },
    {
      "epoch": 0.03768,
      "grad_norm": 1.043115086709847,
      "learning_rate": 0.003,
      "loss": 4.1619,
      "step": 3768
    },
    {
      "epoch": 0.03769,
      "grad_norm": 0.9976222066035275,
      "learning_rate": 0.003,
      "loss": 4.1526,
      "step": 3769
    },
    {
      "epoch": 0.0377,
      "grad_norm": 0.8470649334959093,
      "learning_rate": 0.003,
      "loss": 4.1327,
      "step": 3770
    },
    {
      "epoch": 0.03771,
      "grad_norm": 0.8274399936486776,
      "learning_rate": 0.003,
      "loss": 4.1469,
      "step": 3771
    },
    {
      "epoch": 0.03772,
      "grad_norm": 0.7871019788741513,
      "learning_rate": 0.003,
      "loss": 4.1668,
      "step": 3772
    },
    {
      "epoch": 0.03773,
      "grad_norm": 0.7390202831045702,
      "learning_rate": 0.003,
      "loss": 4.1698,
      "step": 3773
    },
    {
      "epoch": 0.03774,
      "grad_norm": 0.7113411820252128,
      "learning_rate": 0.003,
      "loss": 4.1532,
      "step": 3774
    },
    {
      "epoch": 0.03775,
      "grad_norm": 0.7435094864466253,
      "learning_rate": 0.003,
      "loss": 4.1705,
      "step": 3775
    },
    {
      "epoch": 0.03776,
      "grad_norm": 0.8919757094791062,
      "learning_rate": 0.003,
      "loss": 4.1496,
      "step": 3776
    },
    {
      "epoch": 0.03777,
      "grad_norm": 1.152057291990839,
      "learning_rate": 0.003,
      "loss": 4.1719,
      "step": 3777
    },
    {
      "epoch": 0.03778,
      "grad_norm": 1.0115617936585513,
      "learning_rate": 0.003,
      "loss": 4.1493,
      "step": 3778
    },
    {
      "epoch": 0.03779,
      "grad_norm": 0.9872755421624871,
      "learning_rate": 0.003,
      "loss": 4.1659,
      "step": 3779
    },
    {
      "epoch": 0.0378,
      "grad_norm": 0.8946795429906949,
      "learning_rate": 0.003,
      "loss": 4.164,
      "step": 3780
    },
    {
      "epoch": 0.03781,
      "grad_norm": 0.9373368557855075,
      "learning_rate": 0.003,
      "loss": 4.1956,
      "step": 3781
    },
    {
      "epoch": 0.03782,
      "grad_norm": 0.853748438513948,
      "learning_rate": 0.003,
      "loss": 4.162,
      "step": 3782
    },
    {
      "epoch": 0.03783,
      "grad_norm": 0.718548585744198,
      "learning_rate": 0.003,
      "loss": 4.164,
      "step": 3783
    },
    {
      "epoch": 0.03784,
      "grad_norm": 0.7444967401418248,
      "learning_rate": 0.003,
      "loss": 4.1631,
      "step": 3784
    },
    {
      "epoch": 0.03785,
      "grad_norm": 0.8491520079890601,
      "learning_rate": 0.003,
      "loss": 4.1505,
      "step": 3785
    },
    {
      "epoch": 0.03786,
      "grad_norm": 0.872023269439096,
      "learning_rate": 0.003,
      "loss": 4.1688,
      "step": 3786
    },
    {
      "epoch": 0.03787,
      "grad_norm": 0.8535181083648594,
      "learning_rate": 0.003,
      "loss": 4.153,
      "step": 3787
    },
    {
      "epoch": 0.03788,
      "grad_norm": 0.9108993849141537,
      "learning_rate": 0.003,
      "loss": 4.1855,
      "step": 3788
    },
    {
      "epoch": 0.03789,
      "grad_norm": 0.9794959022265245,
      "learning_rate": 0.003,
      "loss": 4.1225,
      "step": 3789
    },
    {
      "epoch": 0.0379,
      "grad_norm": 1.048224892887674,
      "learning_rate": 0.003,
      "loss": 4.1466,
      "step": 3790
    },
    {
      "epoch": 0.03791,
      "grad_norm": 0.8968035808950884,
      "learning_rate": 0.003,
      "loss": 4.1596,
      "step": 3791
    },
    {
      "epoch": 0.03792,
      "grad_norm": 0.8368499561515244,
      "learning_rate": 0.003,
      "loss": 4.1883,
      "step": 3792
    },
    {
      "epoch": 0.03793,
      "grad_norm": 0.8410884942872239,
      "learning_rate": 0.003,
      "loss": 4.1811,
      "step": 3793
    },
    {
      "epoch": 0.03794,
      "grad_norm": 0.8526793361609423,
      "learning_rate": 0.003,
      "loss": 4.1863,
      "step": 3794
    },
    {
      "epoch": 0.03795,
      "grad_norm": 0.8096867690017788,
      "learning_rate": 0.003,
      "loss": 4.1663,
      "step": 3795
    },
    {
      "epoch": 0.03796,
      "grad_norm": 0.7081089049008689,
      "learning_rate": 0.003,
      "loss": 4.145,
      "step": 3796
    },
    {
      "epoch": 0.03797,
      "grad_norm": 0.6256114559170813,
      "learning_rate": 0.003,
      "loss": 4.13,
      "step": 3797
    },
    {
      "epoch": 0.03798,
      "grad_norm": 0.5712390179880806,
      "learning_rate": 0.003,
      "loss": 4.1444,
      "step": 3798
    },
    {
      "epoch": 0.03799,
      "grad_norm": 0.6395912776533417,
      "learning_rate": 0.003,
      "loss": 4.1549,
      "step": 3799
    },
    {
      "epoch": 0.038,
      "grad_norm": 0.7194653527929541,
      "learning_rate": 0.003,
      "loss": 4.1628,
      "step": 3800
    },
    {
      "epoch": 0.03801,
      "grad_norm": 0.9532685446077319,
      "learning_rate": 0.003,
      "loss": 4.1743,
      "step": 3801
    },
    {
      "epoch": 0.03802,
      "grad_norm": 1.1563370657823024,
      "learning_rate": 0.003,
      "loss": 4.1547,
      "step": 3802
    },
    {
      "epoch": 0.03803,
      "grad_norm": 0.7722962818272365,
      "learning_rate": 0.003,
      "loss": 4.1744,
      "step": 3803
    },
    {
      "epoch": 0.03804,
      "grad_norm": 0.7050390933953039,
      "learning_rate": 0.003,
      "loss": 4.141,
      "step": 3804
    },
    {
      "epoch": 0.03805,
      "grad_norm": 0.8158853348007252,
      "learning_rate": 0.003,
      "loss": 4.158,
      "step": 3805
    },
    {
      "epoch": 0.03806,
      "grad_norm": 0.8472580689148763,
      "learning_rate": 0.003,
      "loss": 4.1592,
      "step": 3806
    },
    {
      "epoch": 0.03807,
      "grad_norm": 0.9423141657174167,
      "learning_rate": 0.003,
      "loss": 4.1545,
      "step": 3807
    },
    {
      "epoch": 0.03808,
      "grad_norm": 1.1215335628084824,
      "learning_rate": 0.003,
      "loss": 4.1793,
      "step": 3808
    },
    {
      "epoch": 0.03809,
      "grad_norm": 0.9834441734645248,
      "learning_rate": 0.003,
      "loss": 4.1571,
      "step": 3809
    },
    {
      "epoch": 0.0381,
      "grad_norm": 0.9317910497451528,
      "learning_rate": 0.003,
      "loss": 4.1413,
      "step": 3810
    },
    {
      "epoch": 0.03811,
      "grad_norm": 0.8413021683381647,
      "learning_rate": 0.003,
      "loss": 4.17,
      "step": 3811
    },
    {
      "epoch": 0.03812,
      "grad_norm": 0.772209856524563,
      "learning_rate": 0.003,
      "loss": 4.1585,
      "step": 3812
    },
    {
      "epoch": 0.03813,
      "grad_norm": 0.7904321983957339,
      "learning_rate": 0.003,
      "loss": 4.1306,
      "step": 3813
    },
    {
      "epoch": 0.03814,
      "grad_norm": 0.7457108043591989,
      "learning_rate": 0.003,
      "loss": 4.1598,
      "step": 3814
    },
    {
      "epoch": 0.03815,
      "grad_norm": 0.7103508451290527,
      "learning_rate": 0.003,
      "loss": 4.1467,
      "step": 3815
    },
    {
      "epoch": 0.03816,
      "grad_norm": 0.6613518230453459,
      "learning_rate": 0.003,
      "loss": 4.1473,
      "step": 3816
    },
    {
      "epoch": 0.03817,
      "grad_norm": 0.6678002497479578,
      "learning_rate": 0.003,
      "loss": 4.126,
      "step": 3817
    },
    {
      "epoch": 0.03818,
      "grad_norm": 0.5546400433556298,
      "learning_rate": 0.003,
      "loss": 4.1735,
      "step": 3818
    },
    {
      "epoch": 0.03819,
      "grad_norm": 0.5048588514692719,
      "learning_rate": 0.003,
      "loss": 4.1246,
      "step": 3819
    },
    {
      "epoch": 0.0382,
      "grad_norm": 0.5022259630766596,
      "learning_rate": 0.003,
      "loss": 4.1719,
      "step": 3820
    },
    {
      "epoch": 0.03821,
      "grad_norm": 0.5319841736014501,
      "learning_rate": 0.003,
      "loss": 4.1165,
      "step": 3821
    },
    {
      "epoch": 0.03822,
      "grad_norm": 0.611206545487655,
      "learning_rate": 0.003,
      "loss": 4.1392,
      "step": 3822
    },
    {
      "epoch": 0.03823,
      "grad_norm": 0.7859585505396786,
      "learning_rate": 0.003,
      "loss": 4.1387,
      "step": 3823
    },
    {
      "epoch": 0.03824,
      "grad_norm": 1.170117791068494,
      "learning_rate": 0.003,
      "loss": 4.1483,
      "step": 3824
    },
    {
      "epoch": 0.03825,
      "grad_norm": 1.1642472923233964,
      "learning_rate": 0.003,
      "loss": 4.1605,
      "step": 3825
    },
    {
      "epoch": 0.03826,
      "grad_norm": 0.8386882795746682,
      "learning_rate": 0.003,
      "loss": 4.148,
      "step": 3826
    },
    {
      "epoch": 0.03827,
      "grad_norm": 0.7280705231607668,
      "learning_rate": 0.003,
      "loss": 4.1699,
      "step": 3827
    },
    {
      "epoch": 0.03828,
      "grad_norm": 0.8654688174806783,
      "learning_rate": 0.003,
      "loss": 4.1514,
      "step": 3828
    },
    {
      "epoch": 0.03829,
      "grad_norm": 0.8475583789249039,
      "learning_rate": 0.003,
      "loss": 4.1549,
      "step": 3829
    },
    {
      "epoch": 0.0383,
      "grad_norm": 0.7947652875636247,
      "learning_rate": 0.003,
      "loss": 4.1418,
      "step": 3830
    },
    {
      "epoch": 0.03831,
      "grad_norm": 0.7168989453848753,
      "learning_rate": 0.003,
      "loss": 4.1398,
      "step": 3831
    },
    {
      "epoch": 0.03832,
      "grad_norm": 0.7200550330645938,
      "learning_rate": 0.003,
      "loss": 4.1382,
      "step": 3832
    },
    {
      "epoch": 0.03833,
      "grad_norm": 0.7587490659810894,
      "learning_rate": 0.003,
      "loss": 4.1411,
      "step": 3833
    },
    {
      "epoch": 0.03834,
      "grad_norm": 0.7136259183365191,
      "learning_rate": 0.003,
      "loss": 4.1692,
      "step": 3834
    },
    {
      "epoch": 0.03835,
      "grad_norm": 0.7659380295468856,
      "learning_rate": 0.003,
      "loss": 4.1602,
      "step": 3835
    },
    {
      "epoch": 0.03836,
      "grad_norm": 0.783318958508309,
      "learning_rate": 0.003,
      "loss": 4.1639,
      "step": 3836
    },
    {
      "epoch": 0.03837,
      "grad_norm": 0.8832315963306094,
      "learning_rate": 0.003,
      "loss": 4.1514,
      "step": 3837
    },
    {
      "epoch": 0.03838,
      "grad_norm": 0.9447883209168796,
      "learning_rate": 0.003,
      "loss": 4.1449,
      "step": 3838
    },
    {
      "epoch": 0.03839,
      "grad_norm": 1.0145507021392315,
      "learning_rate": 0.003,
      "loss": 4.1519,
      "step": 3839
    },
    {
      "epoch": 0.0384,
      "grad_norm": 1.3460191042861327,
      "learning_rate": 0.003,
      "loss": 4.1727,
      "step": 3840
    },
    {
      "epoch": 0.03841,
      "grad_norm": 0.7909608584954623,
      "learning_rate": 0.003,
      "loss": 4.1612,
      "step": 3841
    },
    {
      "epoch": 0.03842,
      "grad_norm": 0.6421152217888612,
      "learning_rate": 0.003,
      "loss": 4.1585,
      "step": 3842
    },
    {
      "epoch": 0.03843,
      "grad_norm": 0.7366594214130205,
      "learning_rate": 0.003,
      "loss": 4.1468,
      "step": 3843
    },
    {
      "epoch": 0.03844,
      "grad_norm": 0.756017458900284,
      "learning_rate": 0.003,
      "loss": 4.1537,
      "step": 3844
    },
    {
      "epoch": 0.03845,
      "grad_norm": 0.7922638056100233,
      "learning_rate": 0.003,
      "loss": 4.1552,
      "step": 3845
    },
    {
      "epoch": 0.03846,
      "grad_norm": 0.8441235594817899,
      "learning_rate": 0.003,
      "loss": 4.1721,
      "step": 3846
    },
    {
      "epoch": 0.03847,
      "grad_norm": 0.8282933496250385,
      "learning_rate": 0.003,
      "loss": 4.1618,
      "step": 3847
    },
    {
      "epoch": 0.03848,
      "grad_norm": 0.9404305490862656,
      "learning_rate": 0.003,
      "loss": 4.1493,
      "step": 3848
    },
    {
      "epoch": 0.03849,
      "grad_norm": 1.3038885588549676,
      "learning_rate": 0.003,
      "loss": 4.1779,
      "step": 3849
    },
    {
      "epoch": 0.0385,
      "grad_norm": 0.910760434256003,
      "learning_rate": 0.003,
      "loss": 4.1759,
      "step": 3850
    },
    {
      "epoch": 0.03851,
      "grad_norm": 0.9851608887041251,
      "learning_rate": 0.003,
      "loss": 4.1424,
      "step": 3851
    },
    {
      "epoch": 0.03852,
      "grad_norm": 1.115936013932957,
      "learning_rate": 0.003,
      "loss": 4.1839,
      "step": 3852
    },
    {
      "epoch": 0.03853,
      "grad_norm": 0.8625131320120981,
      "learning_rate": 0.003,
      "loss": 4.171,
      "step": 3853
    },
    {
      "epoch": 0.03854,
      "grad_norm": 0.8093220938164782,
      "learning_rate": 0.003,
      "loss": 4.1633,
      "step": 3854
    },
    {
      "epoch": 0.03855,
      "grad_norm": 0.726455705711537,
      "learning_rate": 0.003,
      "loss": 4.1793,
      "step": 3855
    },
    {
      "epoch": 0.03856,
      "grad_norm": 0.7638648169981871,
      "learning_rate": 0.003,
      "loss": 4.1493,
      "step": 3856
    },
    {
      "epoch": 0.03857,
      "grad_norm": 0.806255198654299,
      "learning_rate": 0.003,
      "loss": 4.1543,
      "step": 3857
    },
    {
      "epoch": 0.03858,
      "grad_norm": 0.831071410604779,
      "learning_rate": 0.003,
      "loss": 4.15,
      "step": 3858
    },
    {
      "epoch": 0.03859,
      "grad_norm": 0.8360163780390938,
      "learning_rate": 0.003,
      "loss": 4.1729,
      "step": 3859
    },
    {
      "epoch": 0.0386,
      "grad_norm": 0.9270300839146026,
      "learning_rate": 0.003,
      "loss": 4.1383,
      "step": 3860
    },
    {
      "epoch": 0.03861,
      "grad_norm": 1.0571665911277832,
      "learning_rate": 0.003,
      "loss": 4.1787,
      "step": 3861
    },
    {
      "epoch": 0.03862,
      "grad_norm": 0.9184603613158857,
      "learning_rate": 0.003,
      "loss": 4.1542,
      "step": 3862
    },
    {
      "epoch": 0.03863,
      "grad_norm": 0.9497546960268847,
      "learning_rate": 0.003,
      "loss": 4.1512,
      "step": 3863
    },
    {
      "epoch": 0.03864,
      "grad_norm": 1.0608016243824416,
      "learning_rate": 0.003,
      "loss": 4.1672,
      "step": 3864
    },
    {
      "epoch": 0.03865,
      "grad_norm": 0.9123627957656194,
      "learning_rate": 0.003,
      "loss": 4.173,
      "step": 3865
    },
    {
      "epoch": 0.03866,
      "grad_norm": 0.8643631111703186,
      "learning_rate": 0.003,
      "loss": 4.1491,
      "step": 3866
    },
    {
      "epoch": 0.03867,
      "grad_norm": 0.7131249254546375,
      "learning_rate": 0.003,
      "loss": 4.1275,
      "step": 3867
    },
    {
      "epoch": 0.03868,
      "grad_norm": 0.6223536314544267,
      "learning_rate": 0.003,
      "loss": 4.162,
      "step": 3868
    },
    {
      "epoch": 0.03869,
      "grad_norm": 0.6196862864928006,
      "learning_rate": 0.003,
      "loss": 4.1488,
      "step": 3869
    },
    {
      "epoch": 0.0387,
      "grad_norm": 0.6383959402879603,
      "learning_rate": 0.003,
      "loss": 4.1571,
      "step": 3870
    },
    {
      "epoch": 0.03871,
      "grad_norm": 0.7302513860623053,
      "learning_rate": 0.003,
      "loss": 4.1346,
      "step": 3871
    },
    {
      "epoch": 0.03872,
      "grad_norm": 0.8920320690566674,
      "learning_rate": 0.003,
      "loss": 4.1361,
      "step": 3872
    },
    {
      "epoch": 0.03873,
      "grad_norm": 0.9343331762751365,
      "learning_rate": 0.003,
      "loss": 4.1802,
      "step": 3873
    },
    {
      "epoch": 0.03874,
      "grad_norm": 1.0090689056461979,
      "learning_rate": 0.003,
      "loss": 4.1531,
      "step": 3874
    },
    {
      "epoch": 0.03875,
      "grad_norm": 1.0371067027925491,
      "learning_rate": 0.003,
      "loss": 4.156,
      "step": 3875
    },
    {
      "epoch": 0.03876,
      "grad_norm": 1.0801564549744878,
      "learning_rate": 0.003,
      "loss": 4.1919,
      "step": 3876
    },
    {
      "epoch": 0.03877,
      "grad_norm": 1.0484528784031752,
      "learning_rate": 0.003,
      "loss": 4.1737,
      "step": 3877
    },
    {
      "epoch": 0.03878,
      "grad_norm": 0.9887410146395552,
      "learning_rate": 0.003,
      "loss": 4.187,
      "step": 3878
    },
    {
      "epoch": 0.03879,
      "grad_norm": 0.9041370865255791,
      "learning_rate": 0.003,
      "loss": 4.1845,
      "step": 3879
    },
    {
      "epoch": 0.0388,
      "grad_norm": 0.8365742286229014,
      "learning_rate": 0.003,
      "loss": 4.144,
      "step": 3880
    },
    {
      "epoch": 0.03881,
      "grad_norm": 0.8968458423015266,
      "learning_rate": 0.003,
      "loss": 4.1712,
      "step": 3881
    },
    {
      "epoch": 0.03882,
      "grad_norm": 1.1815995202626826,
      "learning_rate": 0.003,
      "loss": 4.1894,
      "step": 3882
    },
    {
      "epoch": 0.03883,
      "grad_norm": 1.004450379233401,
      "learning_rate": 0.003,
      "loss": 4.1547,
      "step": 3883
    },
    {
      "epoch": 0.03884,
      "grad_norm": 0.8667121928698747,
      "learning_rate": 0.003,
      "loss": 4.1656,
      "step": 3884
    },
    {
      "epoch": 0.03885,
      "grad_norm": 0.6868027088778039,
      "learning_rate": 0.003,
      "loss": 4.1755,
      "step": 3885
    },
    {
      "epoch": 0.03886,
      "grad_norm": 0.7182515814021971,
      "learning_rate": 0.003,
      "loss": 4.1483,
      "step": 3886
    },
    {
      "epoch": 0.03887,
      "grad_norm": 0.737302592292138,
      "learning_rate": 0.003,
      "loss": 4.1586,
      "step": 3887
    },
    {
      "epoch": 0.03888,
      "grad_norm": 0.6947474627935248,
      "learning_rate": 0.003,
      "loss": 4.1354,
      "step": 3888
    },
    {
      "epoch": 0.03889,
      "grad_norm": 0.676048384811591,
      "learning_rate": 0.003,
      "loss": 4.1523,
      "step": 3889
    },
    {
      "epoch": 0.0389,
      "grad_norm": 0.6771755441542272,
      "learning_rate": 0.003,
      "loss": 4.1398,
      "step": 3890
    },
    {
      "epoch": 0.03891,
      "grad_norm": 0.7246456194548712,
      "learning_rate": 0.003,
      "loss": 4.149,
      "step": 3891
    },
    {
      "epoch": 0.03892,
      "grad_norm": 0.7172714775987227,
      "learning_rate": 0.003,
      "loss": 4.1606,
      "step": 3892
    },
    {
      "epoch": 0.03893,
      "grad_norm": 0.8341243893405708,
      "learning_rate": 0.003,
      "loss": 4.1388,
      "step": 3893
    },
    {
      "epoch": 0.03894,
      "grad_norm": 0.9586809206015507,
      "learning_rate": 0.003,
      "loss": 4.1519,
      "step": 3894
    },
    {
      "epoch": 0.03895,
      "grad_norm": 1.1044375554559078,
      "learning_rate": 0.003,
      "loss": 4.1352,
      "step": 3895
    },
    {
      "epoch": 0.03896,
      "grad_norm": 0.9190112275090493,
      "learning_rate": 0.003,
      "loss": 4.1578,
      "step": 3896
    },
    {
      "epoch": 0.03897,
      "grad_norm": 0.7602784227805208,
      "learning_rate": 0.003,
      "loss": 4.1256,
      "step": 3897
    },
    {
      "epoch": 0.03898,
      "grad_norm": 0.639713595234581,
      "learning_rate": 0.003,
      "loss": 4.1548,
      "step": 3898
    },
    {
      "epoch": 0.03899,
      "grad_norm": 0.7184087029382192,
      "learning_rate": 0.003,
      "loss": 4.1491,
      "step": 3899
    },
    {
      "epoch": 0.039,
      "grad_norm": 0.7814816186886983,
      "learning_rate": 0.003,
      "loss": 4.1486,
      "step": 3900
    },
    {
      "epoch": 0.03901,
      "grad_norm": 0.7162921844556793,
      "learning_rate": 0.003,
      "loss": 4.1601,
      "step": 3901
    },
    {
      "epoch": 0.03902,
      "grad_norm": 0.8155759206891086,
      "learning_rate": 0.003,
      "loss": 4.1337,
      "step": 3902
    },
    {
      "epoch": 0.03903,
      "grad_norm": 1.0636290055979989,
      "learning_rate": 0.003,
      "loss": 4.1778,
      "step": 3903
    },
    {
      "epoch": 0.03904,
      "grad_norm": 1.0177125932370377,
      "learning_rate": 0.003,
      "loss": 4.1589,
      "step": 3904
    },
    {
      "epoch": 0.03905,
      "grad_norm": 0.9442621578301694,
      "learning_rate": 0.003,
      "loss": 4.1753,
      "step": 3905
    },
    {
      "epoch": 0.03906,
      "grad_norm": 0.790601653351365,
      "learning_rate": 0.003,
      "loss": 4.1438,
      "step": 3906
    },
    {
      "epoch": 0.03907,
      "grad_norm": 0.7794484704379305,
      "learning_rate": 0.003,
      "loss": 4.156,
      "step": 3907
    },
    {
      "epoch": 0.03908,
      "grad_norm": 0.7560452632943567,
      "learning_rate": 0.003,
      "loss": 4.1382,
      "step": 3908
    },
    {
      "epoch": 0.03909,
      "grad_norm": 0.7946846197040874,
      "learning_rate": 0.003,
      "loss": 4.1459,
      "step": 3909
    },
    {
      "epoch": 0.0391,
      "grad_norm": 0.8613810775384986,
      "learning_rate": 0.003,
      "loss": 4.1768,
      "step": 3910
    },
    {
      "epoch": 0.03911,
      "grad_norm": 0.8386874382765742,
      "learning_rate": 0.003,
      "loss": 4.1609,
      "step": 3911
    },
    {
      "epoch": 0.03912,
      "grad_norm": 0.9865330307604043,
      "learning_rate": 0.003,
      "loss": 4.1673,
      "step": 3912
    },
    {
      "epoch": 0.03913,
      "grad_norm": 1.1331373891184677,
      "learning_rate": 0.003,
      "loss": 4.151,
      "step": 3913
    },
    {
      "epoch": 0.03914,
      "grad_norm": 0.7046370423377648,
      "learning_rate": 0.003,
      "loss": 4.1706,
      "step": 3914
    },
    {
      "epoch": 0.03915,
      "grad_norm": 0.7467899755928374,
      "learning_rate": 0.003,
      "loss": 4.1435,
      "step": 3915
    },
    {
      "epoch": 0.03916,
      "grad_norm": 0.892872585324721,
      "learning_rate": 0.003,
      "loss": 4.1734,
      "step": 3916
    },
    {
      "epoch": 0.03917,
      "grad_norm": 1.228738480994026,
      "learning_rate": 0.003,
      "loss": 4.1732,
      "step": 3917
    },
    {
      "epoch": 0.03918,
      "grad_norm": 0.9438157037038833,
      "learning_rate": 0.003,
      "loss": 4.1549,
      "step": 3918
    },
    {
      "epoch": 0.03919,
      "grad_norm": 0.6952503471849076,
      "learning_rate": 0.003,
      "loss": 4.1167,
      "step": 3919
    },
    {
      "epoch": 0.0392,
      "grad_norm": 0.7048182141212009,
      "learning_rate": 0.003,
      "loss": 4.1429,
      "step": 3920
    },
    {
      "epoch": 0.03921,
      "grad_norm": 0.879657398847784,
      "learning_rate": 0.003,
      "loss": 4.1435,
      "step": 3921
    },
    {
      "epoch": 0.03922,
      "grad_norm": 1.1793211640732564,
      "learning_rate": 0.003,
      "loss": 4.1943,
      "step": 3922
    },
    {
      "epoch": 0.03923,
      "grad_norm": 0.9549026205906491,
      "learning_rate": 0.003,
      "loss": 4.1448,
      "step": 3923
    },
    {
      "epoch": 0.03924,
      "grad_norm": 0.7775760685887316,
      "learning_rate": 0.003,
      "loss": 4.1477,
      "step": 3924
    },
    {
      "epoch": 0.03925,
      "grad_norm": 0.7598517272028414,
      "learning_rate": 0.003,
      "loss": 4.1522,
      "step": 3925
    },
    {
      "epoch": 0.03926,
      "grad_norm": 0.7119046881729456,
      "learning_rate": 0.003,
      "loss": 4.139,
      "step": 3926
    },
    {
      "epoch": 0.03927,
      "grad_norm": 0.8115139166245634,
      "learning_rate": 0.003,
      "loss": 4.1534,
      "step": 3927
    },
    {
      "epoch": 0.03928,
      "grad_norm": 0.8496702812010677,
      "learning_rate": 0.003,
      "loss": 4.172,
      "step": 3928
    },
    {
      "epoch": 0.03929,
      "grad_norm": 0.914389232519974,
      "learning_rate": 0.003,
      "loss": 4.1836,
      "step": 3929
    },
    {
      "epoch": 0.0393,
      "grad_norm": 0.934142877257439,
      "learning_rate": 0.003,
      "loss": 4.1699,
      "step": 3930
    },
    {
      "epoch": 0.03931,
      "grad_norm": 0.9169035963389445,
      "learning_rate": 0.003,
      "loss": 4.1746,
      "step": 3931
    },
    {
      "epoch": 0.03932,
      "grad_norm": 0.9586032495283386,
      "learning_rate": 0.003,
      "loss": 4.1767,
      "step": 3932
    },
    {
      "epoch": 0.03933,
      "grad_norm": 0.8943397447772738,
      "learning_rate": 0.003,
      "loss": 4.1604,
      "step": 3933
    },
    {
      "epoch": 0.03934,
      "grad_norm": 1.1458130542379994,
      "learning_rate": 0.003,
      "loss": 4.1591,
      "step": 3934
    },
    {
      "epoch": 0.03935,
      "grad_norm": 1.1119954153292497,
      "learning_rate": 0.003,
      "loss": 4.1564,
      "step": 3935
    },
    {
      "epoch": 0.03936,
      "grad_norm": 0.9651500208576397,
      "learning_rate": 0.003,
      "loss": 4.118,
      "step": 3936
    },
    {
      "epoch": 0.03937,
      "grad_norm": 0.9009017242821815,
      "learning_rate": 0.003,
      "loss": 4.1567,
      "step": 3937
    },
    {
      "epoch": 0.03938,
      "grad_norm": 0.682722210666222,
      "learning_rate": 0.003,
      "loss": 4.1508,
      "step": 3938
    },
    {
      "epoch": 0.03939,
      "grad_norm": 0.6257361874612599,
      "learning_rate": 0.003,
      "loss": 4.1296,
      "step": 3939
    },
    {
      "epoch": 0.0394,
      "grad_norm": 0.6910228445809036,
      "learning_rate": 0.003,
      "loss": 4.157,
      "step": 3940
    },
    {
      "epoch": 0.03941,
      "grad_norm": 0.8752420058270881,
      "learning_rate": 0.003,
      "loss": 4.1523,
      "step": 3941
    },
    {
      "epoch": 0.03942,
      "grad_norm": 1.0090937202006367,
      "learning_rate": 0.003,
      "loss": 4.1701,
      "step": 3942
    },
    {
      "epoch": 0.03943,
      "grad_norm": 0.8713788492851877,
      "learning_rate": 0.003,
      "loss": 4.1594,
      "step": 3943
    },
    {
      "epoch": 0.03944,
      "grad_norm": 0.6395719699631305,
      "learning_rate": 0.003,
      "loss": 4.1691,
      "step": 3944
    },
    {
      "epoch": 0.03945,
      "grad_norm": 0.6237828376004393,
      "learning_rate": 0.003,
      "loss": 4.1351,
      "step": 3945
    },
    {
      "epoch": 0.03946,
      "grad_norm": 0.7996865672671727,
      "learning_rate": 0.003,
      "loss": 4.1662,
      "step": 3946
    },
    {
      "epoch": 0.03947,
      "grad_norm": 0.8027734166618509,
      "learning_rate": 0.003,
      "loss": 4.1684,
      "step": 3947
    },
    {
      "epoch": 0.03948,
      "grad_norm": 0.7480672212389291,
      "learning_rate": 0.003,
      "loss": 4.185,
      "step": 3948
    },
    {
      "epoch": 0.03949,
      "grad_norm": 0.7986884544671021,
      "learning_rate": 0.003,
      "loss": 4.1278,
      "step": 3949
    },
    {
      "epoch": 0.0395,
      "grad_norm": 0.7882737042471627,
      "learning_rate": 0.003,
      "loss": 4.1581,
      "step": 3950
    },
    {
      "epoch": 0.03951,
      "grad_norm": 0.8513453245753498,
      "learning_rate": 0.003,
      "loss": 4.1908,
      "step": 3951
    },
    {
      "epoch": 0.03952,
      "grad_norm": 0.9604447646143368,
      "learning_rate": 0.003,
      "loss": 4.1733,
      "step": 3952
    },
    {
      "epoch": 0.03953,
      "grad_norm": 0.9585905884824354,
      "learning_rate": 0.003,
      "loss": 4.1514,
      "step": 3953
    },
    {
      "epoch": 0.03954,
      "grad_norm": 0.9656678821151307,
      "learning_rate": 0.003,
      "loss": 4.1554,
      "step": 3954
    },
    {
      "epoch": 0.03955,
      "grad_norm": 0.9222198486019069,
      "learning_rate": 0.003,
      "loss": 4.1565,
      "step": 3955
    },
    {
      "epoch": 0.03956,
      "grad_norm": 1.0225324631256894,
      "learning_rate": 0.003,
      "loss": 4.1615,
      "step": 3956
    },
    {
      "epoch": 0.03957,
      "grad_norm": 1.0431715292566202,
      "learning_rate": 0.003,
      "loss": 4.1665,
      "step": 3957
    },
    {
      "epoch": 0.03958,
      "grad_norm": 1.0749240135902176,
      "learning_rate": 0.003,
      "loss": 4.1477,
      "step": 3958
    },
    {
      "epoch": 0.03959,
      "grad_norm": 0.8880519487734211,
      "learning_rate": 0.003,
      "loss": 4.1433,
      "step": 3959
    },
    {
      "epoch": 0.0396,
      "grad_norm": 0.770078201609415,
      "learning_rate": 0.003,
      "loss": 4.1591,
      "step": 3960
    },
    {
      "epoch": 0.03961,
      "grad_norm": 0.7312192163132855,
      "learning_rate": 0.003,
      "loss": 4.1531,
      "step": 3961
    },
    {
      "epoch": 0.03962,
      "grad_norm": 0.8077388602218855,
      "learning_rate": 0.003,
      "loss": 4.1671,
      "step": 3962
    },
    {
      "epoch": 0.03963,
      "grad_norm": 0.9497511656553155,
      "learning_rate": 0.003,
      "loss": 4.1518,
      "step": 3963
    },
    {
      "epoch": 0.03964,
      "grad_norm": 1.09506865726512,
      "learning_rate": 0.003,
      "loss": 4.1627,
      "step": 3964
    },
    {
      "epoch": 0.03965,
      "grad_norm": 0.9655209694273282,
      "learning_rate": 0.003,
      "loss": 4.1567,
      "step": 3965
    },
    {
      "epoch": 0.03966,
      "grad_norm": 0.9060903529494769,
      "learning_rate": 0.003,
      "loss": 4.197,
      "step": 3966
    },
    {
      "epoch": 0.03967,
      "grad_norm": 0.9168805934226382,
      "learning_rate": 0.003,
      "loss": 4.1544,
      "step": 3967
    },
    {
      "epoch": 0.03968,
      "grad_norm": 0.8082674185213146,
      "learning_rate": 0.003,
      "loss": 4.1795,
      "step": 3968
    },
    {
      "epoch": 0.03969,
      "grad_norm": 0.8323391059785374,
      "learning_rate": 0.003,
      "loss": 4.1564,
      "step": 3969
    },
    {
      "epoch": 0.0397,
      "grad_norm": 0.9524651490900072,
      "learning_rate": 0.003,
      "loss": 4.1671,
      "step": 3970
    },
    {
      "epoch": 0.03971,
      "grad_norm": 0.9567901053636726,
      "learning_rate": 0.003,
      "loss": 4.1454,
      "step": 3971
    },
    {
      "epoch": 0.03972,
      "grad_norm": 0.9696093602166972,
      "learning_rate": 0.003,
      "loss": 4.1654,
      "step": 3972
    },
    {
      "epoch": 0.03973,
      "grad_norm": 0.9370133833063669,
      "learning_rate": 0.003,
      "loss": 4.1687,
      "step": 3973
    },
    {
      "epoch": 0.03974,
      "grad_norm": 0.8608341541886206,
      "learning_rate": 0.003,
      "loss": 4.1643,
      "step": 3974
    },
    {
      "epoch": 0.03975,
      "grad_norm": 0.9108884430543455,
      "learning_rate": 0.003,
      "loss": 4.1535,
      "step": 3975
    },
    {
      "epoch": 0.03976,
      "grad_norm": 0.9241700018742819,
      "learning_rate": 0.003,
      "loss": 4.1895,
      "step": 3976
    },
    {
      "epoch": 0.03977,
      "grad_norm": 0.8946194243663588,
      "learning_rate": 0.003,
      "loss": 4.1801,
      "step": 3977
    },
    {
      "epoch": 0.03978,
      "grad_norm": 0.8947344596350721,
      "learning_rate": 0.003,
      "loss": 4.1464,
      "step": 3978
    },
    {
      "epoch": 0.03979,
      "grad_norm": 0.9770682330119697,
      "learning_rate": 0.003,
      "loss": 4.166,
      "step": 3979
    },
    {
      "epoch": 0.0398,
      "grad_norm": 0.938338044149382,
      "learning_rate": 0.003,
      "loss": 4.1591,
      "step": 3980
    },
    {
      "epoch": 0.03981,
      "grad_norm": 1.0362927866956353,
      "learning_rate": 0.003,
      "loss": 4.1556,
      "step": 3981
    },
    {
      "epoch": 0.03982,
      "grad_norm": 1.1980787664669188,
      "learning_rate": 0.003,
      "loss": 4.1442,
      "step": 3982
    },
    {
      "epoch": 0.03983,
      "grad_norm": 0.8306482940322903,
      "learning_rate": 0.003,
      "loss": 4.1542,
      "step": 3983
    },
    {
      "epoch": 0.03984,
      "grad_norm": 0.7984572534714871,
      "learning_rate": 0.003,
      "loss": 4.1641,
      "step": 3984
    },
    {
      "epoch": 0.03985,
      "grad_norm": 0.7618742502189577,
      "learning_rate": 0.003,
      "loss": 4.1568,
      "step": 3985
    },
    {
      "epoch": 0.03986,
      "grad_norm": 0.7612926191359269,
      "learning_rate": 0.003,
      "loss": 4.1559,
      "step": 3986
    },
    {
      "epoch": 0.03987,
      "grad_norm": 0.8001366102763903,
      "learning_rate": 0.003,
      "loss": 4.1589,
      "step": 3987
    },
    {
      "epoch": 0.03988,
      "grad_norm": 1.0875345535497536,
      "learning_rate": 0.003,
      "loss": 4.1695,
      "step": 3988
    },
    {
      "epoch": 0.03989,
      "grad_norm": 0.9012674416468869,
      "learning_rate": 0.003,
      "loss": 4.156,
      "step": 3989
    },
    {
      "epoch": 0.0399,
      "grad_norm": 0.7809977747028035,
      "learning_rate": 0.003,
      "loss": 4.1238,
      "step": 3990
    },
    {
      "epoch": 0.03991,
      "grad_norm": 0.7695628801274876,
      "learning_rate": 0.003,
      "loss": 4.1692,
      "step": 3991
    },
    {
      "epoch": 0.03992,
      "grad_norm": 0.6898705145692119,
      "learning_rate": 0.003,
      "loss": 4.1474,
      "step": 3992
    },
    {
      "epoch": 0.03993,
      "grad_norm": 0.7439323328212757,
      "learning_rate": 0.003,
      "loss": 4.1436,
      "step": 3993
    },
    {
      "epoch": 0.03994,
      "grad_norm": 0.8108536082334781,
      "learning_rate": 0.003,
      "loss": 4.1442,
      "step": 3994
    },
    {
      "epoch": 0.03995,
      "grad_norm": 0.8023110096378178,
      "learning_rate": 0.003,
      "loss": 4.1319,
      "step": 3995
    },
    {
      "epoch": 0.03996,
      "grad_norm": 0.7568430508180715,
      "learning_rate": 0.003,
      "loss": 4.1644,
      "step": 3996
    },
    {
      "epoch": 0.03997,
      "grad_norm": 0.8365583946977457,
      "learning_rate": 0.003,
      "loss": 4.1688,
      "step": 3997
    },
    {
      "epoch": 0.03998,
      "grad_norm": 0.9142234466239328,
      "learning_rate": 0.003,
      "loss": 4.1721,
      "step": 3998
    },
    {
      "epoch": 0.03999,
      "grad_norm": 1.038283900357651,
      "learning_rate": 0.003,
      "loss": 4.1795,
      "step": 3999
    },
    {
      "epoch": 0.04,
      "grad_norm": 1.1201832946054642,
      "learning_rate": 0.003,
      "loss": 4.1848,
      "step": 4000
    },
    {
      "epoch": 0.04001,
      "grad_norm": 0.808173413358703,
      "learning_rate": 0.003,
      "loss": 4.1725,
      "step": 4001
    },
    {
      "epoch": 0.04002,
      "grad_norm": 0.6629014428039164,
      "learning_rate": 0.003,
      "loss": 4.1423,
      "step": 4002
    },
    {
      "epoch": 0.04003,
      "grad_norm": 0.7358065961733515,
      "learning_rate": 0.003,
      "loss": 4.1644,
      "step": 4003
    },
    {
      "epoch": 0.04004,
      "grad_norm": 0.8629016209004771,
      "learning_rate": 0.003,
      "loss": 4.173,
      "step": 4004
    },
    {
      "epoch": 0.04005,
      "grad_norm": 0.8785262035574583,
      "learning_rate": 0.003,
      "loss": 4.1835,
      "step": 4005
    },
    {
      "epoch": 0.04006,
      "grad_norm": 0.8675854999926088,
      "learning_rate": 0.003,
      "loss": 4.139,
      "step": 4006
    },
    {
      "epoch": 0.04007,
      "grad_norm": 0.7186371802576791,
      "learning_rate": 0.003,
      "loss": 4.1643,
      "step": 4007
    },
    {
      "epoch": 0.04008,
      "grad_norm": 0.6367987674963547,
      "learning_rate": 0.003,
      "loss": 4.1369,
      "step": 4008
    },
    {
      "epoch": 0.04009,
      "grad_norm": 0.6717991325425072,
      "learning_rate": 0.003,
      "loss": 4.1344,
      "step": 4009
    },
    {
      "epoch": 0.0401,
      "grad_norm": 1.0170810285743253,
      "learning_rate": 0.003,
      "loss": 4.1593,
      "step": 4010
    },
    {
      "epoch": 0.04011,
      "grad_norm": 1.3056483319688719,
      "learning_rate": 0.003,
      "loss": 4.1768,
      "step": 4011
    },
    {
      "epoch": 0.04012,
      "grad_norm": 0.760243524633557,
      "learning_rate": 0.003,
      "loss": 4.1595,
      "step": 4012
    },
    {
      "epoch": 0.04013,
      "grad_norm": 0.7397871657108581,
      "learning_rate": 0.003,
      "loss": 4.1563,
      "step": 4013
    },
    {
      "epoch": 0.04014,
      "grad_norm": 0.7499847916552277,
      "learning_rate": 0.003,
      "loss": 4.1409,
      "step": 4014
    },
    {
      "epoch": 0.04015,
      "grad_norm": 0.8867737502420797,
      "learning_rate": 0.003,
      "loss": 4.1225,
      "step": 4015
    },
    {
      "epoch": 0.04016,
      "grad_norm": 0.9755243040378807,
      "learning_rate": 0.003,
      "loss": 4.1449,
      "step": 4016
    },
    {
      "epoch": 0.04017,
      "grad_norm": 1.0307798897636882,
      "learning_rate": 0.003,
      "loss": 4.1884,
      "step": 4017
    },
    {
      "epoch": 0.04018,
      "grad_norm": 0.9218658209542674,
      "learning_rate": 0.003,
      "loss": 4.1539,
      "step": 4018
    },
    {
      "epoch": 0.04019,
      "grad_norm": 0.7251607327676213,
      "learning_rate": 0.003,
      "loss": 4.1289,
      "step": 4019
    },
    {
      "epoch": 0.0402,
      "grad_norm": 0.6906853299354309,
      "learning_rate": 0.003,
      "loss": 4.1606,
      "step": 4020
    },
    {
      "epoch": 0.04021,
      "grad_norm": 0.7032884976043096,
      "learning_rate": 0.003,
      "loss": 4.1343,
      "step": 4021
    },
    {
      "epoch": 0.04022,
      "grad_norm": 0.8205807169248119,
      "learning_rate": 0.003,
      "loss": 4.1352,
      "step": 4022
    },
    {
      "epoch": 0.04023,
      "grad_norm": 0.8414169267763164,
      "learning_rate": 0.003,
      "loss": 4.1364,
      "step": 4023
    },
    {
      "epoch": 0.04024,
      "grad_norm": 0.867716750706141,
      "learning_rate": 0.003,
      "loss": 4.1258,
      "step": 4024
    },
    {
      "epoch": 0.04025,
      "grad_norm": 0.8383796894954268,
      "learning_rate": 0.003,
      "loss": 4.1328,
      "step": 4025
    },
    {
      "epoch": 0.04026,
      "grad_norm": 0.8750670478361139,
      "learning_rate": 0.003,
      "loss": 4.1616,
      "step": 4026
    },
    {
      "epoch": 0.04027,
      "grad_norm": 0.8493399946851298,
      "learning_rate": 0.003,
      "loss": 4.1643,
      "step": 4027
    },
    {
      "epoch": 0.04028,
      "grad_norm": 0.8679376610707137,
      "learning_rate": 0.003,
      "loss": 4.1478,
      "step": 4028
    },
    {
      "epoch": 0.04029,
      "grad_norm": 0.7870403687200003,
      "learning_rate": 0.003,
      "loss": 4.1729,
      "step": 4029
    },
    {
      "epoch": 0.0403,
      "grad_norm": 0.7657237380129638,
      "learning_rate": 0.003,
      "loss": 4.1216,
      "step": 4030
    },
    {
      "epoch": 0.04031,
      "grad_norm": 0.8834770309040775,
      "learning_rate": 0.003,
      "loss": 4.1436,
      "step": 4031
    },
    {
      "epoch": 0.04032,
      "grad_norm": 0.971327728269083,
      "learning_rate": 0.003,
      "loss": 4.1729,
      "step": 4032
    },
    {
      "epoch": 0.04033,
      "grad_norm": 0.9938430497115239,
      "learning_rate": 0.003,
      "loss": 4.1739,
      "step": 4033
    },
    {
      "epoch": 0.04034,
      "grad_norm": 0.9507001357433588,
      "learning_rate": 0.003,
      "loss": 4.1397,
      "step": 4034
    },
    {
      "epoch": 0.04035,
      "grad_norm": 0.7647174546606386,
      "learning_rate": 0.003,
      "loss": 4.1582,
      "step": 4035
    },
    {
      "epoch": 0.04036,
      "grad_norm": 0.7526257143196984,
      "learning_rate": 0.003,
      "loss": 4.1713,
      "step": 4036
    },
    {
      "epoch": 0.04037,
      "grad_norm": 0.8211100317431537,
      "learning_rate": 0.003,
      "loss": 4.1242,
      "step": 4037
    },
    {
      "epoch": 0.04038,
      "grad_norm": 0.9809609729682468,
      "learning_rate": 0.003,
      "loss": 4.1513,
      "step": 4038
    },
    {
      "epoch": 0.04039,
      "grad_norm": 1.2927831672520989,
      "learning_rate": 0.003,
      "loss": 4.1545,
      "step": 4039
    },
    {
      "epoch": 0.0404,
      "grad_norm": 0.8695253025713267,
      "learning_rate": 0.003,
      "loss": 4.1455,
      "step": 4040
    },
    {
      "epoch": 0.04041,
      "grad_norm": 0.7950885742981303,
      "learning_rate": 0.003,
      "loss": 4.1416,
      "step": 4041
    },
    {
      "epoch": 0.04042,
      "grad_norm": 0.8571261424916939,
      "learning_rate": 0.003,
      "loss": 4.1815,
      "step": 4042
    },
    {
      "epoch": 0.04043,
      "grad_norm": 0.9275780079359417,
      "learning_rate": 0.003,
      "loss": 4.171,
      "step": 4043
    },
    {
      "epoch": 0.04044,
      "grad_norm": 0.9484978049597467,
      "learning_rate": 0.003,
      "loss": 4.1634,
      "step": 4044
    },
    {
      "epoch": 0.04045,
      "grad_norm": 0.9256569027586441,
      "learning_rate": 0.003,
      "loss": 4.1079,
      "step": 4045
    },
    {
      "epoch": 0.04046,
      "grad_norm": 0.862135206370447,
      "learning_rate": 0.003,
      "loss": 4.1554,
      "step": 4046
    },
    {
      "epoch": 0.04047,
      "grad_norm": 0.8957879714648798,
      "learning_rate": 0.003,
      "loss": 4.148,
      "step": 4047
    },
    {
      "epoch": 0.04048,
      "grad_norm": 0.915367027532899,
      "learning_rate": 0.003,
      "loss": 4.1432,
      "step": 4048
    },
    {
      "epoch": 0.04049,
      "grad_norm": 0.849479270104104,
      "learning_rate": 0.003,
      "loss": 4.1819,
      "step": 4049
    },
    {
      "epoch": 0.0405,
      "grad_norm": 0.802623437841427,
      "learning_rate": 0.003,
      "loss": 4.1744,
      "step": 4050
    },
    {
      "epoch": 0.04051,
      "grad_norm": 0.8361334119276083,
      "learning_rate": 0.003,
      "loss": 4.1544,
      "step": 4051
    },
    {
      "epoch": 0.04052,
      "grad_norm": 0.8502895680512856,
      "learning_rate": 0.003,
      "loss": 4.1628,
      "step": 4052
    },
    {
      "epoch": 0.04053,
      "grad_norm": 0.9495553848705864,
      "learning_rate": 0.003,
      "loss": 4.1153,
      "step": 4053
    },
    {
      "epoch": 0.04054,
      "grad_norm": 1.1074705319718914,
      "learning_rate": 0.003,
      "loss": 4.1421,
      "step": 4054
    },
    {
      "epoch": 0.04055,
      "grad_norm": 0.786290186382969,
      "learning_rate": 0.003,
      "loss": 4.1281,
      "step": 4055
    },
    {
      "epoch": 0.04056,
      "grad_norm": 0.6575896415910156,
      "learning_rate": 0.003,
      "loss": 4.1491,
      "step": 4056
    },
    {
      "epoch": 0.04057,
      "grad_norm": 0.7670781614470484,
      "learning_rate": 0.003,
      "loss": 4.1514,
      "step": 4057
    },
    {
      "epoch": 0.04058,
      "grad_norm": 0.833843513666775,
      "learning_rate": 0.003,
      "loss": 4.1255,
      "step": 4058
    },
    {
      "epoch": 0.04059,
      "grad_norm": 0.8114168552814227,
      "learning_rate": 0.003,
      "loss": 4.106,
      "step": 4059
    },
    {
      "epoch": 0.0406,
      "grad_norm": 0.7692714330479883,
      "learning_rate": 0.003,
      "loss": 4.1663,
      "step": 4060
    },
    {
      "epoch": 0.04061,
      "grad_norm": 0.9330465410070367,
      "learning_rate": 0.003,
      "loss": 4.1545,
      "step": 4061
    },
    {
      "epoch": 0.04062,
      "grad_norm": 0.9117817660909525,
      "learning_rate": 0.003,
      "loss": 4.1351,
      "step": 4062
    },
    {
      "epoch": 0.04063,
      "grad_norm": 0.9445008569369826,
      "learning_rate": 0.003,
      "loss": 4.1431,
      "step": 4063
    },
    {
      "epoch": 0.04064,
      "grad_norm": 1.087764691560672,
      "learning_rate": 0.003,
      "loss": 4.1619,
      "step": 4064
    },
    {
      "epoch": 0.04065,
      "grad_norm": 1.019578221772767,
      "learning_rate": 0.003,
      "loss": 4.1964,
      "step": 4065
    },
    {
      "epoch": 0.04066,
      "grad_norm": 0.8751481231375284,
      "learning_rate": 0.003,
      "loss": 4.1547,
      "step": 4066
    },
    {
      "epoch": 0.04067,
      "grad_norm": 0.8359962586744085,
      "learning_rate": 0.003,
      "loss": 4.1546,
      "step": 4067
    },
    {
      "epoch": 0.04068,
      "grad_norm": 0.8804677832654572,
      "learning_rate": 0.003,
      "loss": 4.1641,
      "step": 4068
    },
    {
      "epoch": 0.04069,
      "grad_norm": 0.8702907169533645,
      "learning_rate": 0.003,
      "loss": 4.1587,
      "step": 4069
    },
    {
      "epoch": 0.0407,
      "grad_norm": 0.8649761703309935,
      "learning_rate": 0.003,
      "loss": 4.1596,
      "step": 4070
    },
    {
      "epoch": 0.04071,
      "grad_norm": 0.8396276365063743,
      "learning_rate": 0.003,
      "loss": 4.126,
      "step": 4071
    },
    {
      "epoch": 0.04072,
      "grad_norm": 0.8818553417907129,
      "learning_rate": 0.003,
      "loss": 4.1491,
      "step": 4072
    },
    {
      "epoch": 0.04073,
      "grad_norm": 0.9077326173731153,
      "learning_rate": 0.003,
      "loss": 4.1385,
      "step": 4073
    },
    {
      "epoch": 0.04074,
      "grad_norm": 1.0214476432409059,
      "learning_rate": 0.003,
      "loss": 4.1689,
      "step": 4074
    },
    {
      "epoch": 0.04075,
      "grad_norm": 0.9920974174141465,
      "learning_rate": 0.003,
      "loss": 4.168,
      "step": 4075
    },
    {
      "epoch": 0.04076,
      "grad_norm": 0.8907761503803999,
      "learning_rate": 0.003,
      "loss": 4.1596,
      "step": 4076
    },
    {
      "epoch": 0.04077,
      "grad_norm": 0.8997616564510362,
      "learning_rate": 0.003,
      "loss": 4.1711,
      "step": 4077
    },
    {
      "epoch": 0.04078,
      "grad_norm": 0.8451943457352864,
      "learning_rate": 0.003,
      "loss": 4.1445,
      "step": 4078
    },
    {
      "epoch": 0.04079,
      "grad_norm": 0.797710300379145,
      "learning_rate": 0.003,
      "loss": 4.1525,
      "step": 4079
    },
    {
      "epoch": 0.0408,
      "grad_norm": 0.7483016121536471,
      "learning_rate": 0.003,
      "loss": 4.1443,
      "step": 4080
    },
    {
      "epoch": 0.04081,
      "grad_norm": 0.7023171834702521,
      "learning_rate": 0.003,
      "loss": 4.1223,
      "step": 4081
    },
    {
      "epoch": 0.04082,
      "grad_norm": 0.7970696730978741,
      "learning_rate": 0.003,
      "loss": 4.1597,
      "step": 4082
    },
    {
      "epoch": 0.04083,
      "grad_norm": 0.8338986570903769,
      "learning_rate": 0.003,
      "loss": 4.1382,
      "step": 4083
    },
    {
      "epoch": 0.04084,
      "grad_norm": 0.8012483337932724,
      "learning_rate": 0.003,
      "loss": 4.1743,
      "step": 4084
    },
    {
      "epoch": 0.04085,
      "grad_norm": 0.8926567018581679,
      "learning_rate": 0.003,
      "loss": 4.1539,
      "step": 4085
    },
    {
      "epoch": 0.04086,
      "grad_norm": 0.9581160933844942,
      "learning_rate": 0.003,
      "loss": 4.1789,
      "step": 4086
    },
    {
      "epoch": 0.04087,
      "grad_norm": 1.0136930052545263,
      "learning_rate": 0.003,
      "loss": 4.1619,
      "step": 4087
    },
    {
      "epoch": 0.04088,
      "grad_norm": 0.8125239420336555,
      "learning_rate": 0.003,
      "loss": 4.1368,
      "step": 4088
    },
    {
      "epoch": 0.04089,
      "grad_norm": 0.6317696576646198,
      "learning_rate": 0.003,
      "loss": 4.131,
      "step": 4089
    },
    {
      "epoch": 0.0409,
      "grad_norm": 0.7082265254467968,
      "learning_rate": 0.003,
      "loss": 4.1506,
      "step": 4090
    },
    {
      "epoch": 0.04091,
      "grad_norm": 0.7341122654124385,
      "learning_rate": 0.003,
      "loss": 4.1612,
      "step": 4091
    },
    {
      "epoch": 0.04092,
      "grad_norm": 0.8808208810851171,
      "learning_rate": 0.003,
      "loss": 4.1199,
      "step": 4092
    },
    {
      "epoch": 0.04093,
      "grad_norm": 1.0270137223618354,
      "learning_rate": 0.003,
      "loss": 4.1516,
      "step": 4093
    },
    {
      "epoch": 0.04094,
      "grad_norm": 0.9812040125714347,
      "learning_rate": 0.003,
      "loss": 4.144,
      "step": 4094
    },
    {
      "epoch": 0.04095,
      "grad_norm": 0.7496518915755831,
      "learning_rate": 0.003,
      "loss": 4.1112,
      "step": 4095
    },
    {
      "epoch": 0.04096,
      "grad_norm": 0.6920676687623359,
      "learning_rate": 0.003,
      "loss": 4.1185,
      "step": 4096
    },
    {
      "epoch": 0.04097,
      "grad_norm": 0.7659926369906308,
      "learning_rate": 0.003,
      "loss": 4.1358,
      "step": 4097
    },
    {
      "epoch": 0.04098,
      "grad_norm": 0.7973861989081666,
      "learning_rate": 0.003,
      "loss": 4.1437,
      "step": 4098
    },
    {
      "epoch": 0.04099,
      "grad_norm": 0.9266888279285345,
      "learning_rate": 0.003,
      "loss": 4.1698,
      "step": 4099
    },
    {
      "epoch": 0.041,
      "grad_norm": 0.8520041299509284,
      "learning_rate": 0.003,
      "loss": 4.1296,
      "step": 4100
    },
    {
      "epoch": 0.04101,
      "grad_norm": 0.83237018706289,
      "learning_rate": 0.003,
      "loss": 4.1245,
      "step": 4101
    },
    {
      "epoch": 0.04102,
      "grad_norm": 0.9965520477059928,
      "learning_rate": 0.003,
      "loss": 4.1505,
      "step": 4102
    },
    {
      "epoch": 0.04103,
      "grad_norm": 1.0818833033424187,
      "learning_rate": 0.003,
      "loss": 4.1836,
      "step": 4103
    },
    {
      "epoch": 0.04104,
      "grad_norm": 0.7652239209069888,
      "learning_rate": 0.003,
      "loss": 4.1545,
      "step": 4104
    },
    {
      "epoch": 0.04105,
      "grad_norm": 0.8496173505486513,
      "learning_rate": 0.003,
      "loss": 4.1264,
      "step": 4105
    },
    {
      "epoch": 0.04106,
      "grad_norm": 0.9926762870076,
      "learning_rate": 0.003,
      "loss": 4.1633,
      "step": 4106
    },
    {
      "epoch": 0.04107,
      "grad_norm": 1.1688815385892068,
      "learning_rate": 0.003,
      "loss": 4.1684,
      "step": 4107
    },
    {
      "epoch": 0.04108,
      "grad_norm": 0.9904790188927448,
      "learning_rate": 0.003,
      "loss": 4.1515,
      "step": 4108
    },
    {
      "epoch": 0.04109,
      "grad_norm": 1.0036626582575459,
      "learning_rate": 0.003,
      "loss": 4.1435,
      "step": 4109
    },
    {
      "epoch": 0.0411,
      "grad_norm": 0.9269220216164543,
      "learning_rate": 0.003,
      "loss": 4.1516,
      "step": 4110
    },
    {
      "epoch": 0.04111,
      "grad_norm": 0.9283892200368855,
      "learning_rate": 0.003,
      "loss": 4.1339,
      "step": 4111
    },
    {
      "epoch": 0.04112,
      "grad_norm": 1.1060288948934516,
      "learning_rate": 0.003,
      "loss": 4.1588,
      "step": 4112
    },
    {
      "epoch": 0.04113,
      "grad_norm": 1.0211912461653543,
      "learning_rate": 0.003,
      "loss": 4.1992,
      "step": 4113
    },
    {
      "epoch": 0.04114,
      "grad_norm": 0.917660491574899,
      "learning_rate": 0.003,
      "loss": 4.1387,
      "step": 4114
    },
    {
      "epoch": 0.04115,
      "grad_norm": 0.7897516328452319,
      "learning_rate": 0.003,
      "loss": 4.1479,
      "step": 4115
    },
    {
      "epoch": 0.04116,
      "grad_norm": 0.8456029460362186,
      "learning_rate": 0.003,
      "loss": 4.1643,
      "step": 4116
    },
    {
      "epoch": 0.04117,
      "grad_norm": 0.8944705808882064,
      "learning_rate": 0.003,
      "loss": 4.1278,
      "step": 4117
    },
    {
      "epoch": 0.04118,
      "grad_norm": 0.8832980566787416,
      "learning_rate": 0.003,
      "loss": 4.1861,
      "step": 4118
    },
    {
      "epoch": 0.04119,
      "grad_norm": 0.8067714590301097,
      "learning_rate": 0.003,
      "loss": 4.1605,
      "step": 4119
    },
    {
      "epoch": 0.0412,
      "grad_norm": 0.7737228855286454,
      "learning_rate": 0.003,
      "loss": 4.1496,
      "step": 4120
    },
    {
      "epoch": 0.04121,
      "grad_norm": 0.8389511273786252,
      "learning_rate": 0.003,
      "loss": 4.175,
      "step": 4121
    },
    {
      "epoch": 0.04122,
      "grad_norm": 0.8398694848503073,
      "learning_rate": 0.003,
      "loss": 4.1495,
      "step": 4122
    },
    {
      "epoch": 0.04123,
      "grad_norm": 0.9090933780386284,
      "learning_rate": 0.003,
      "loss": 4.1578,
      "step": 4123
    },
    {
      "epoch": 0.04124,
      "grad_norm": 0.9872711905443635,
      "learning_rate": 0.003,
      "loss": 4.1421,
      "step": 4124
    },
    {
      "epoch": 0.04125,
      "grad_norm": 1.085044262616643,
      "learning_rate": 0.003,
      "loss": 4.1685,
      "step": 4125
    },
    {
      "epoch": 0.04126,
      "grad_norm": 0.8814957273506651,
      "learning_rate": 0.003,
      "loss": 4.1556,
      "step": 4126
    },
    {
      "epoch": 0.04127,
      "grad_norm": 0.8604955271585082,
      "learning_rate": 0.003,
      "loss": 4.1452,
      "step": 4127
    },
    {
      "epoch": 0.04128,
      "grad_norm": 0.9621571347239927,
      "learning_rate": 0.003,
      "loss": 4.1288,
      "step": 4128
    },
    {
      "epoch": 0.04129,
      "grad_norm": 0.8835160766900358,
      "learning_rate": 0.003,
      "loss": 4.171,
      "step": 4129
    },
    {
      "epoch": 0.0413,
      "grad_norm": 0.8289903012402862,
      "learning_rate": 0.003,
      "loss": 4.1347,
      "step": 4130
    },
    {
      "epoch": 0.04131,
      "grad_norm": 0.8175052036927931,
      "learning_rate": 0.003,
      "loss": 4.1632,
      "step": 4131
    },
    {
      "epoch": 0.04132,
      "grad_norm": 0.8802427709327816,
      "learning_rate": 0.003,
      "loss": 4.1299,
      "step": 4132
    },
    {
      "epoch": 0.04133,
      "grad_norm": 0.9035446720390723,
      "learning_rate": 0.003,
      "loss": 4.1697,
      "step": 4133
    },
    {
      "epoch": 0.04134,
      "grad_norm": 0.7899715760468391,
      "learning_rate": 0.003,
      "loss": 4.1581,
      "step": 4134
    },
    {
      "epoch": 0.04135,
      "grad_norm": 0.7442781516869081,
      "learning_rate": 0.003,
      "loss": 4.1387,
      "step": 4135
    },
    {
      "epoch": 0.04136,
      "grad_norm": 0.7238773685007321,
      "learning_rate": 0.003,
      "loss": 4.1328,
      "step": 4136
    },
    {
      "epoch": 0.04137,
      "grad_norm": 0.7132883212877328,
      "learning_rate": 0.003,
      "loss": 4.173,
      "step": 4137
    },
    {
      "epoch": 0.04138,
      "grad_norm": 0.8062240784991308,
      "learning_rate": 0.003,
      "loss": 4.1578,
      "step": 4138
    },
    {
      "epoch": 0.04139,
      "grad_norm": 1.0359489486607962,
      "learning_rate": 0.003,
      "loss": 4.1584,
      "step": 4139
    },
    {
      "epoch": 0.0414,
      "grad_norm": 1.1920638942276114,
      "learning_rate": 0.003,
      "loss": 4.1675,
      "step": 4140
    },
    {
      "epoch": 0.04141,
      "grad_norm": 0.7605076804575376,
      "learning_rate": 0.003,
      "loss": 4.1353,
      "step": 4141
    },
    {
      "epoch": 0.04142,
      "grad_norm": 0.6390080347747942,
      "learning_rate": 0.003,
      "loss": 4.1576,
      "step": 4142
    },
    {
      "epoch": 0.04143,
      "grad_norm": 0.716027932583964,
      "learning_rate": 0.003,
      "loss": 4.145,
      "step": 4143
    },
    {
      "epoch": 0.04144,
      "grad_norm": 0.8343696675557656,
      "learning_rate": 0.003,
      "loss": 4.1459,
      "step": 4144
    },
    {
      "epoch": 0.04145,
      "grad_norm": 1.0821429598570727,
      "learning_rate": 0.003,
      "loss": 4.1347,
      "step": 4145
    },
    {
      "epoch": 0.04146,
      "grad_norm": 1.0810361946276594,
      "learning_rate": 0.003,
      "loss": 4.1595,
      "step": 4146
    },
    {
      "epoch": 0.04147,
      "grad_norm": 0.963764020070276,
      "learning_rate": 0.003,
      "loss": 4.1681,
      "step": 4147
    },
    {
      "epoch": 0.04148,
      "grad_norm": 0.8320178719420672,
      "learning_rate": 0.003,
      "loss": 4.1239,
      "step": 4148
    },
    {
      "epoch": 0.04149,
      "grad_norm": 0.7785864004070887,
      "learning_rate": 0.003,
      "loss": 4.1351,
      "step": 4149
    },
    {
      "epoch": 0.0415,
      "grad_norm": 0.8437857943811232,
      "learning_rate": 0.003,
      "loss": 4.1395,
      "step": 4150
    },
    {
      "epoch": 0.04151,
      "grad_norm": 0.9338894402887595,
      "learning_rate": 0.003,
      "loss": 4.1627,
      "step": 4151
    },
    {
      "epoch": 0.04152,
      "grad_norm": 0.9207228379208315,
      "learning_rate": 0.003,
      "loss": 4.1608,
      "step": 4152
    },
    {
      "epoch": 0.04153,
      "grad_norm": 0.8528458519081906,
      "learning_rate": 0.003,
      "loss": 4.1255,
      "step": 4153
    },
    {
      "epoch": 0.04154,
      "grad_norm": 0.7391574811644701,
      "learning_rate": 0.003,
      "loss": 4.1571,
      "step": 4154
    },
    {
      "epoch": 0.04155,
      "grad_norm": 0.75760856734852,
      "learning_rate": 0.003,
      "loss": 4.131,
      "step": 4155
    },
    {
      "epoch": 0.04156,
      "grad_norm": 0.8731952166338035,
      "learning_rate": 0.003,
      "loss": 4.1562,
      "step": 4156
    },
    {
      "epoch": 0.04157,
      "grad_norm": 0.8741618403734526,
      "learning_rate": 0.003,
      "loss": 4.1699,
      "step": 4157
    },
    {
      "epoch": 0.04158,
      "grad_norm": 0.9164866788533468,
      "learning_rate": 0.003,
      "loss": 4.1742,
      "step": 4158
    },
    {
      "epoch": 0.04159,
      "grad_norm": 0.845613595302616,
      "learning_rate": 0.003,
      "loss": 4.1426,
      "step": 4159
    },
    {
      "epoch": 0.0416,
      "grad_norm": 0.8195288287923924,
      "learning_rate": 0.003,
      "loss": 4.1518,
      "step": 4160
    },
    {
      "epoch": 0.04161,
      "grad_norm": 0.718824361447048,
      "learning_rate": 0.003,
      "loss": 4.1571,
      "step": 4161
    },
    {
      "epoch": 0.04162,
      "grad_norm": 0.7519169259607166,
      "learning_rate": 0.003,
      "loss": 4.1138,
      "step": 4162
    },
    {
      "epoch": 0.04163,
      "grad_norm": 0.7491094262311686,
      "learning_rate": 0.003,
      "loss": 4.145,
      "step": 4163
    },
    {
      "epoch": 0.04164,
      "grad_norm": 0.7993205047667109,
      "learning_rate": 0.003,
      "loss": 4.1624,
      "step": 4164
    },
    {
      "epoch": 0.04165,
      "grad_norm": 0.940572881338154,
      "learning_rate": 0.003,
      "loss": 4.1424,
      "step": 4165
    },
    {
      "epoch": 0.04166,
      "grad_norm": 1.0074718156659002,
      "learning_rate": 0.003,
      "loss": 4.1357,
      "step": 4166
    },
    {
      "epoch": 0.04167,
      "grad_norm": 0.9011941427002811,
      "learning_rate": 0.003,
      "loss": 4.1605,
      "step": 4167
    },
    {
      "epoch": 0.04168,
      "grad_norm": 0.7574197524201725,
      "learning_rate": 0.003,
      "loss": 4.1571,
      "step": 4168
    },
    {
      "epoch": 0.04169,
      "grad_norm": 0.6882002633674081,
      "learning_rate": 0.003,
      "loss": 4.1597,
      "step": 4169
    },
    {
      "epoch": 0.0417,
      "grad_norm": 0.6764254459950164,
      "learning_rate": 0.003,
      "loss": 4.1213,
      "step": 4170
    },
    {
      "epoch": 0.04171,
      "grad_norm": 0.7445116628843363,
      "learning_rate": 0.003,
      "loss": 4.1426,
      "step": 4171
    },
    {
      "epoch": 0.04172,
      "grad_norm": 0.7455792792401567,
      "learning_rate": 0.003,
      "loss": 4.1617,
      "step": 4172
    },
    {
      "epoch": 0.04173,
      "grad_norm": 0.6531022610209052,
      "learning_rate": 0.003,
      "loss": 4.1554,
      "step": 4173
    },
    {
      "epoch": 0.04174,
      "grad_norm": 0.5937692964868775,
      "learning_rate": 0.003,
      "loss": 4.1198,
      "step": 4174
    },
    {
      "epoch": 0.04175,
      "grad_norm": 0.7459694950099481,
      "learning_rate": 0.003,
      "loss": 4.172,
      "step": 4175
    },
    {
      "epoch": 0.04176,
      "grad_norm": 0.8453202156461872,
      "learning_rate": 0.003,
      "loss": 4.135,
      "step": 4176
    },
    {
      "epoch": 0.04177,
      "grad_norm": 0.9540346884999437,
      "learning_rate": 0.003,
      "loss": 4.1176,
      "step": 4177
    },
    {
      "epoch": 0.04178,
      "grad_norm": 1.04708047235552,
      "learning_rate": 0.003,
      "loss": 4.1529,
      "step": 4178
    },
    {
      "epoch": 0.04179,
      "grad_norm": 0.9852878776313095,
      "learning_rate": 0.003,
      "loss": 4.131,
      "step": 4179
    },
    {
      "epoch": 0.0418,
      "grad_norm": 0.8786478389355888,
      "learning_rate": 0.003,
      "loss": 4.1423,
      "step": 4180
    },
    {
      "epoch": 0.04181,
      "grad_norm": 0.8639731089635772,
      "learning_rate": 0.003,
      "loss": 4.1428,
      "step": 4181
    },
    {
      "epoch": 0.04182,
      "grad_norm": 0.8851770442904429,
      "learning_rate": 0.003,
      "loss": 4.1257,
      "step": 4182
    },
    {
      "epoch": 0.04183,
      "grad_norm": 0.9649954563926619,
      "learning_rate": 0.003,
      "loss": 4.137,
      "step": 4183
    },
    {
      "epoch": 0.04184,
      "grad_norm": 1.028830613529158,
      "learning_rate": 0.003,
      "loss": 4.1954,
      "step": 4184
    },
    {
      "epoch": 0.04185,
      "grad_norm": 1.1044711550852657,
      "learning_rate": 0.003,
      "loss": 4.1803,
      "step": 4185
    },
    {
      "epoch": 0.04186,
      "grad_norm": 0.7784600027105134,
      "learning_rate": 0.003,
      "loss": 4.1176,
      "step": 4186
    },
    {
      "epoch": 0.04187,
      "grad_norm": 0.9056948295325973,
      "learning_rate": 0.003,
      "loss": 4.1711,
      "step": 4187
    },
    {
      "epoch": 0.04188,
      "grad_norm": 1.1599766816413652,
      "learning_rate": 0.003,
      "loss": 4.1441,
      "step": 4188
    },
    {
      "epoch": 0.04189,
      "grad_norm": 1.1201002937284052,
      "learning_rate": 0.003,
      "loss": 4.1607,
      "step": 4189
    },
    {
      "epoch": 0.0419,
      "grad_norm": 0.7376394234035888,
      "learning_rate": 0.003,
      "loss": 4.143,
      "step": 4190
    },
    {
      "epoch": 0.04191,
      "grad_norm": 0.7634120599363466,
      "learning_rate": 0.003,
      "loss": 4.151,
      "step": 4191
    },
    {
      "epoch": 0.04192,
      "grad_norm": 0.763696759562722,
      "learning_rate": 0.003,
      "loss": 4.1308,
      "step": 4192
    },
    {
      "epoch": 0.04193,
      "grad_norm": 0.737464203974721,
      "learning_rate": 0.003,
      "loss": 4.1104,
      "step": 4193
    },
    {
      "epoch": 0.04194,
      "grad_norm": 0.8729631339185211,
      "learning_rate": 0.003,
      "loss": 4.1117,
      "step": 4194
    },
    {
      "epoch": 0.04195,
      "grad_norm": 1.0324703043706847,
      "learning_rate": 0.003,
      "loss": 4.1702,
      "step": 4195
    },
    {
      "epoch": 0.04196,
      "grad_norm": 1.1422522770241212,
      "learning_rate": 0.003,
      "loss": 4.134,
      "step": 4196
    },
    {
      "epoch": 0.04197,
      "grad_norm": 0.8189618711192417,
      "learning_rate": 0.003,
      "loss": 4.1351,
      "step": 4197
    },
    {
      "epoch": 0.04198,
      "grad_norm": 0.8230164876125401,
      "learning_rate": 0.003,
      "loss": 4.1325,
      "step": 4198
    },
    {
      "epoch": 0.04199,
      "grad_norm": 0.9071510187811559,
      "learning_rate": 0.003,
      "loss": 4.1715,
      "step": 4199
    },
    {
      "epoch": 0.042,
      "grad_norm": 0.9418882514551147,
      "learning_rate": 0.003,
      "loss": 4.1529,
      "step": 4200
    },
    {
      "epoch": 0.04201,
      "grad_norm": 1.000866294908965,
      "learning_rate": 0.003,
      "loss": 4.1309,
      "step": 4201
    },
    {
      "epoch": 0.04202,
      "grad_norm": 1.0447837092759422,
      "learning_rate": 0.003,
      "loss": 4.1533,
      "step": 4202
    },
    {
      "epoch": 0.04203,
      "grad_norm": 0.9183915096913366,
      "learning_rate": 0.003,
      "loss": 4.1691,
      "step": 4203
    },
    {
      "epoch": 0.04204,
      "grad_norm": 0.8811060274489334,
      "learning_rate": 0.003,
      "loss": 4.1356,
      "step": 4204
    },
    {
      "epoch": 0.04205,
      "grad_norm": 0.8896816083602174,
      "learning_rate": 0.003,
      "loss": 4.1641,
      "step": 4205
    },
    {
      "epoch": 0.04206,
      "grad_norm": 1.0780443908354043,
      "learning_rate": 0.003,
      "loss": 4.1613,
      "step": 4206
    },
    {
      "epoch": 0.04207,
      "grad_norm": 1.2797808490177898,
      "learning_rate": 0.003,
      "loss": 4.1573,
      "step": 4207
    },
    {
      "epoch": 0.04208,
      "grad_norm": 0.9179007982217813,
      "learning_rate": 0.003,
      "loss": 4.1587,
      "step": 4208
    },
    {
      "epoch": 0.04209,
      "grad_norm": 0.8977511024729588,
      "learning_rate": 0.003,
      "loss": 4.1592,
      "step": 4209
    },
    {
      "epoch": 0.0421,
      "grad_norm": 0.9454990443320976,
      "learning_rate": 0.003,
      "loss": 4.1243,
      "step": 4210
    },
    {
      "epoch": 0.04211,
      "grad_norm": 1.0959447073496884,
      "learning_rate": 0.003,
      "loss": 4.1634,
      "step": 4211
    },
    {
      "epoch": 0.04212,
      "grad_norm": 0.9065788803964404,
      "learning_rate": 0.003,
      "loss": 4.1506,
      "step": 4212
    },
    {
      "epoch": 0.04213,
      "grad_norm": 0.8052786745518665,
      "learning_rate": 0.003,
      "loss": 4.1284,
      "step": 4213
    },
    {
      "epoch": 0.04214,
      "grad_norm": 0.8570874487237381,
      "learning_rate": 0.003,
      "loss": 4.1372,
      "step": 4214
    },
    {
      "epoch": 0.04215,
      "grad_norm": 0.8380491615517233,
      "learning_rate": 0.003,
      "loss": 4.1566,
      "step": 4215
    },
    {
      "epoch": 0.04216,
      "grad_norm": 0.8424695888257845,
      "learning_rate": 0.003,
      "loss": 4.1206,
      "step": 4216
    },
    {
      "epoch": 0.04217,
      "grad_norm": 0.7943923888745155,
      "learning_rate": 0.003,
      "loss": 4.1403,
      "step": 4217
    },
    {
      "epoch": 0.04218,
      "grad_norm": 0.7677254830488702,
      "learning_rate": 0.003,
      "loss": 4.17,
      "step": 4218
    },
    {
      "epoch": 0.04219,
      "grad_norm": 0.6901018800578395,
      "learning_rate": 0.003,
      "loss": 4.0884,
      "step": 4219
    },
    {
      "epoch": 0.0422,
      "grad_norm": 0.6822117165527345,
      "learning_rate": 0.003,
      "loss": 4.1548,
      "step": 4220
    },
    {
      "epoch": 0.04221,
      "grad_norm": 0.6908541457405948,
      "learning_rate": 0.003,
      "loss": 4.1271,
      "step": 4221
    },
    {
      "epoch": 0.04222,
      "grad_norm": 0.6494859209716153,
      "learning_rate": 0.003,
      "loss": 4.0935,
      "step": 4222
    },
    {
      "epoch": 0.04223,
      "grad_norm": 0.6502340833989994,
      "learning_rate": 0.003,
      "loss": 4.1567,
      "step": 4223
    },
    {
      "epoch": 0.04224,
      "grad_norm": 0.6452092264025292,
      "learning_rate": 0.003,
      "loss": 4.1601,
      "step": 4224
    },
    {
      "epoch": 0.04225,
      "grad_norm": 0.6665791958458259,
      "learning_rate": 0.003,
      "loss": 4.1461,
      "step": 4225
    },
    {
      "epoch": 0.04226,
      "grad_norm": 0.6985305884300859,
      "learning_rate": 0.003,
      "loss": 4.131,
      "step": 4226
    },
    {
      "epoch": 0.04227,
      "grad_norm": 0.7856841999519473,
      "learning_rate": 0.003,
      "loss": 4.1329,
      "step": 4227
    },
    {
      "epoch": 0.04228,
      "grad_norm": 0.8773648423553363,
      "learning_rate": 0.003,
      "loss": 4.1193,
      "step": 4228
    },
    {
      "epoch": 0.04229,
      "grad_norm": 1.1631329607599121,
      "learning_rate": 0.003,
      "loss": 4.1525,
      "step": 4229
    },
    {
      "epoch": 0.0423,
      "grad_norm": 0.8856168266507708,
      "learning_rate": 0.003,
      "loss": 4.1368,
      "step": 4230
    },
    {
      "epoch": 0.04231,
      "grad_norm": 0.7750650850872894,
      "learning_rate": 0.003,
      "loss": 4.131,
      "step": 4231
    },
    {
      "epoch": 0.04232,
      "grad_norm": 0.7131605323369269,
      "learning_rate": 0.003,
      "loss": 4.1437,
      "step": 4232
    },
    {
      "epoch": 0.04233,
      "grad_norm": 0.6623267284695459,
      "learning_rate": 0.003,
      "loss": 4.0953,
      "step": 4233
    },
    {
      "epoch": 0.04234,
      "grad_norm": 0.7686135410977546,
      "learning_rate": 0.003,
      "loss": 4.1298,
      "step": 4234
    },
    {
      "epoch": 0.04235,
      "grad_norm": 0.8548823502536986,
      "learning_rate": 0.003,
      "loss": 4.1153,
      "step": 4235
    },
    {
      "epoch": 0.04236,
      "grad_norm": 0.7555623186654448,
      "learning_rate": 0.003,
      "loss": 4.1245,
      "step": 4236
    },
    {
      "epoch": 0.04237,
      "grad_norm": 0.7193078765588848,
      "learning_rate": 0.003,
      "loss": 4.0971,
      "step": 4237
    },
    {
      "epoch": 0.04238,
      "grad_norm": 0.7633904865120098,
      "learning_rate": 0.003,
      "loss": 4.1563,
      "step": 4238
    },
    {
      "epoch": 0.04239,
      "grad_norm": 0.7393743565898704,
      "learning_rate": 0.003,
      "loss": 4.1408,
      "step": 4239
    },
    {
      "epoch": 0.0424,
      "grad_norm": 0.6305401293342845,
      "learning_rate": 0.003,
      "loss": 4.1373,
      "step": 4240
    },
    {
      "epoch": 0.04241,
      "grad_norm": 0.6754271205716451,
      "learning_rate": 0.003,
      "loss": 4.1524,
      "step": 4241
    },
    {
      "epoch": 0.04242,
      "grad_norm": 0.5501650821009278,
      "learning_rate": 0.003,
      "loss": 4.1355,
      "step": 4242
    },
    {
      "epoch": 0.04243,
      "grad_norm": 0.5903485874209297,
      "learning_rate": 0.003,
      "loss": 4.1191,
      "step": 4243
    },
    {
      "epoch": 0.04244,
      "grad_norm": 0.5264249302326871,
      "learning_rate": 0.003,
      "loss": 4.1239,
      "step": 4244
    },
    {
      "epoch": 0.04245,
      "grad_norm": 0.6494172750972387,
      "learning_rate": 0.003,
      "loss": 4.1209,
      "step": 4245
    },
    {
      "epoch": 0.04246,
      "grad_norm": 1.1075046969225877,
      "learning_rate": 0.003,
      "loss": 4.1558,
      "step": 4246
    },
    {
      "epoch": 0.04247,
      "grad_norm": 1.4603989952406695,
      "learning_rate": 0.003,
      "loss": 4.1454,
      "step": 4247
    },
    {
      "epoch": 0.04248,
      "grad_norm": 0.6973373631740518,
      "learning_rate": 0.003,
      "loss": 4.1253,
      "step": 4248
    },
    {
      "epoch": 0.04249,
      "grad_norm": 0.8960151802399898,
      "learning_rate": 0.003,
      "loss": 4.1257,
      "step": 4249
    },
    {
      "epoch": 0.0425,
      "grad_norm": 1.0148476039293284,
      "learning_rate": 0.003,
      "loss": 4.1494,
      "step": 4250
    },
    {
      "epoch": 0.04251,
      "grad_norm": 1.091029088953841,
      "learning_rate": 0.003,
      "loss": 4.1103,
      "step": 4251
    },
    {
      "epoch": 0.04252,
      "grad_norm": 0.9828804779949347,
      "learning_rate": 0.003,
      "loss": 4.1604,
      "step": 4252
    },
    {
      "epoch": 0.04253,
      "grad_norm": 1.0407830888915848,
      "learning_rate": 0.003,
      "loss": 4.1223,
      "step": 4253
    },
    {
      "epoch": 0.04254,
      "grad_norm": 0.9969078243511121,
      "learning_rate": 0.003,
      "loss": 4.1498,
      "step": 4254
    },
    {
      "epoch": 0.04255,
      "grad_norm": 0.8726287911896201,
      "learning_rate": 0.003,
      "loss": 4.1246,
      "step": 4255
    },
    {
      "epoch": 0.04256,
      "grad_norm": 0.9734032369713972,
      "learning_rate": 0.003,
      "loss": 4.1419,
      "step": 4256
    },
    {
      "epoch": 0.04257,
      "grad_norm": 0.9839814555396913,
      "learning_rate": 0.003,
      "loss": 4.166,
      "step": 4257
    },
    {
      "epoch": 0.04258,
      "grad_norm": 0.9700101811814381,
      "learning_rate": 0.003,
      "loss": 4.1643,
      "step": 4258
    },
    {
      "epoch": 0.04259,
      "grad_norm": 1.1106152004837204,
      "learning_rate": 0.003,
      "loss": 4.1361,
      "step": 4259
    },
    {
      "epoch": 0.0426,
      "grad_norm": 1.0102602526109068,
      "learning_rate": 0.003,
      "loss": 4.1544,
      "step": 4260
    },
    {
      "epoch": 0.04261,
      "grad_norm": 1.0306790729362876,
      "learning_rate": 0.003,
      "loss": 4.1637,
      "step": 4261
    },
    {
      "epoch": 0.04262,
      "grad_norm": 1.2862557928903777,
      "learning_rate": 0.003,
      "loss": 4.1909,
      "step": 4262
    },
    {
      "epoch": 0.04263,
      "grad_norm": 1.0325757049146929,
      "learning_rate": 0.003,
      "loss": 4.1721,
      "step": 4263
    },
    {
      "epoch": 0.04264,
      "grad_norm": 1.0162869618759527,
      "learning_rate": 0.003,
      "loss": 4.1764,
      "step": 4264
    },
    {
      "epoch": 0.04265,
      "grad_norm": 1.0950967203268815,
      "learning_rate": 0.003,
      "loss": 4.1786,
      "step": 4265
    },
    {
      "epoch": 0.04266,
      "grad_norm": 0.948160311309965,
      "learning_rate": 0.003,
      "loss": 4.1532,
      "step": 4266
    },
    {
      "epoch": 0.04267,
      "grad_norm": 1.2348979147487773,
      "learning_rate": 0.003,
      "loss": 4.1375,
      "step": 4267
    },
    {
      "epoch": 0.04268,
      "grad_norm": 0.8778615179492638,
      "learning_rate": 0.003,
      "loss": 4.1726,
      "step": 4268
    },
    {
      "epoch": 0.04269,
      "grad_norm": 0.8286683782955315,
      "learning_rate": 0.003,
      "loss": 4.1311,
      "step": 4269
    },
    {
      "epoch": 0.0427,
      "grad_norm": 0.9176546079150403,
      "learning_rate": 0.003,
      "loss": 4.1546,
      "step": 4270
    },
    {
      "epoch": 0.04271,
      "grad_norm": 0.8811867110687683,
      "learning_rate": 0.003,
      "loss": 4.1692,
      "step": 4271
    },
    {
      "epoch": 0.04272,
      "grad_norm": 0.8863150873209689,
      "learning_rate": 0.003,
      "loss": 4.1537,
      "step": 4272
    },
    {
      "epoch": 0.04273,
      "grad_norm": 0.8585270675572245,
      "learning_rate": 0.003,
      "loss": 4.1338,
      "step": 4273
    },
    {
      "epoch": 0.04274,
      "grad_norm": 0.7731759429393139,
      "learning_rate": 0.003,
      "loss": 4.1545,
      "step": 4274
    },
    {
      "epoch": 0.04275,
      "grad_norm": 0.6354552017070271,
      "learning_rate": 0.003,
      "loss": 4.1528,
      "step": 4275
    },
    {
      "epoch": 0.04276,
      "grad_norm": 0.7646129405940127,
      "learning_rate": 0.003,
      "loss": 4.1723,
      "step": 4276
    },
    {
      "epoch": 0.04277,
      "grad_norm": 0.8927972665604179,
      "learning_rate": 0.003,
      "loss": 4.16,
      "step": 4277
    },
    {
      "epoch": 0.04278,
      "grad_norm": 1.0322138758213815,
      "learning_rate": 0.003,
      "loss": 4.1677,
      "step": 4278
    },
    {
      "epoch": 0.04279,
      "grad_norm": 0.960550547346212,
      "learning_rate": 0.003,
      "loss": 4.1397,
      "step": 4279
    },
    {
      "epoch": 0.0428,
      "grad_norm": 1.0071112981046495,
      "learning_rate": 0.003,
      "loss": 4.1241,
      "step": 4280
    },
    {
      "epoch": 0.04281,
      "grad_norm": 0.8672605322618707,
      "learning_rate": 0.003,
      "loss": 4.1751,
      "step": 4281
    },
    {
      "epoch": 0.04282,
      "grad_norm": 0.836732025387026,
      "learning_rate": 0.003,
      "loss": 4.1499,
      "step": 4282
    },
    {
      "epoch": 0.04283,
      "grad_norm": 0.8927876116515261,
      "learning_rate": 0.003,
      "loss": 4.1845,
      "step": 4283
    },
    {
      "epoch": 0.04284,
      "grad_norm": 0.8645218811194867,
      "learning_rate": 0.003,
      "loss": 4.1357,
      "step": 4284
    },
    {
      "epoch": 0.04285,
      "grad_norm": 0.842404435262628,
      "learning_rate": 0.003,
      "loss": 4.1507,
      "step": 4285
    },
    {
      "epoch": 0.04286,
      "grad_norm": 0.8812355936864655,
      "learning_rate": 0.003,
      "loss": 4.1695,
      "step": 4286
    },
    {
      "epoch": 0.04287,
      "grad_norm": 0.8580448230390728,
      "learning_rate": 0.003,
      "loss": 4.1516,
      "step": 4287
    },
    {
      "epoch": 0.04288,
      "grad_norm": 0.797143513389131,
      "learning_rate": 0.003,
      "loss": 4.1264,
      "step": 4288
    },
    {
      "epoch": 0.04289,
      "grad_norm": 0.772184705171998,
      "learning_rate": 0.003,
      "loss": 4.1343,
      "step": 4289
    },
    {
      "epoch": 0.0429,
      "grad_norm": 0.7832547421398193,
      "learning_rate": 0.003,
      "loss": 4.1196,
      "step": 4290
    },
    {
      "epoch": 0.04291,
      "grad_norm": 0.9556558247199429,
      "learning_rate": 0.003,
      "loss": 4.1288,
      "step": 4291
    },
    {
      "epoch": 0.04292,
      "grad_norm": 1.1338816182381097,
      "learning_rate": 0.003,
      "loss": 4.1412,
      "step": 4292
    },
    {
      "epoch": 0.04293,
      "grad_norm": 0.8021810217572,
      "learning_rate": 0.003,
      "loss": 4.1679,
      "step": 4293
    },
    {
      "epoch": 0.04294,
      "grad_norm": 0.6541787631431899,
      "learning_rate": 0.003,
      "loss": 4.1425,
      "step": 4294
    },
    {
      "epoch": 0.04295,
      "grad_norm": 0.7140599630716042,
      "learning_rate": 0.003,
      "loss": 4.1176,
      "step": 4295
    },
    {
      "epoch": 0.04296,
      "grad_norm": 0.7055439180689369,
      "learning_rate": 0.003,
      "loss": 4.1132,
      "step": 4296
    },
    {
      "epoch": 0.04297,
      "grad_norm": 0.7279360773174349,
      "learning_rate": 0.003,
      "loss": 4.1022,
      "step": 4297
    },
    {
      "epoch": 0.04298,
      "grad_norm": 0.8602725232980242,
      "learning_rate": 0.003,
      "loss": 4.1487,
      "step": 4298
    },
    {
      "epoch": 0.04299,
      "grad_norm": 1.1960364199120477,
      "learning_rate": 0.003,
      "loss": 4.1483,
      "step": 4299
    },
    {
      "epoch": 0.043,
      "grad_norm": 0.887735194757027,
      "learning_rate": 0.003,
      "loss": 4.1177,
      "step": 4300
    },
    {
      "epoch": 0.04301,
      "grad_norm": 0.8671235111098321,
      "learning_rate": 0.003,
      "loss": 4.1691,
      "step": 4301
    },
    {
      "epoch": 0.04302,
      "grad_norm": 0.8219777153602685,
      "learning_rate": 0.003,
      "loss": 4.1561,
      "step": 4302
    },
    {
      "epoch": 0.04303,
      "grad_norm": 0.7365454368217494,
      "learning_rate": 0.003,
      "loss": 4.1434,
      "step": 4303
    },
    {
      "epoch": 0.04304,
      "grad_norm": 0.7332880503208107,
      "learning_rate": 0.003,
      "loss": 4.1391,
      "step": 4304
    },
    {
      "epoch": 0.04305,
      "grad_norm": 0.6049518431707501,
      "learning_rate": 0.003,
      "loss": 4.1349,
      "step": 4305
    },
    {
      "epoch": 0.04306,
      "grad_norm": 0.552458178896163,
      "learning_rate": 0.003,
      "loss": 4.108,
      "step": 4306
    },
    {
      "epoch": 0.04307,
      "grad_norm": 0.6072550137534989,
      "learning_rate": 0.003,
      "loss": 4.1158,
      "step": 4307
    },
    {
      "epoch": 0.04308,
      "grad_norm": 0.8491425972676354,
      "learning_rate": 0.003,
      "loss": 4.1348,
      "step": 4308
    },
    {
      "epoch": 0.04309,
      "grad_norm": 1.1697300613438677,
      "learning_rate": 0.003,
      "loss": 4.1303,
      "step": 4309
    },
    {
      "epoch": 0.0431,
      "grad_norm": 0.9663103268632941,
      "learning_rate": 0.003,
      "loss": 4.1521,
      "step": 4310
    },
    {
      "epoch": 0.04311,
      "grad_norm": 0.8246252258530357,
      "learning_rate": 0.003,
      "loss": 4.1463,
      "step": 4311
    },
    {
      "epoch": 0.04312,
      "grad_norm": 0.8926510401981749,
      "learning_rate": 0.003,
      "loss": 4.1263,
      "step": 4312
    },
    {
      "epoch": 0.04313,
      "grad_norm": 0.9825409959593736,
      "learning_rate": 0.003,
      "loss": 4.1442,
      "step": 4313
    },
    {
      "epoch": 0.04314,
      "grad_norm": 1.135321041866459,
      "learning_rate": 0.003,
      "loss": 4.1396,
      "step": 4314
    },
    {
      "epoch": 0.04315,
      "grad_norm": 1.1351817598396858,
      "learning_rate": 0.003,
      "loss": 4.1674,
      "step": 4315
    },
    {
      "epoch": 0.04316,
      "grad_norm": 0.8853146589527483,
      "learning_rate": 0.003,
      "loss": 4.156,
      "step": 4316
    },
    {
      "epoch": 0.04317,
      "grad_norm": 0.8300765192397569,
      "learning_rate": 0.003,
      "loss": 4.1673,
      "step": 4317
    },
    {
      "epoch": 0.04318,
      "grad_norm": 0.8608939310347146,
      "learning_rate": 0.003,
      "loss": 4.1099,
      "step": 4318
    },
    {
      "epoch": 0.04319,
      "grad_norm": 0.846452814864276,
      "learning_rate": 0.003,
      "loss": 4.1072,
      "step": 4319
    },
    {
      "epoch": 0.0432,
      "grad_norm": 0.8565259497390109,
      "learning_rate": 0.003,
      "loss": 4.13,
      "step": 4320
    },
    {
      "epoch": 0.04321,
      "grad_norm": 1.003163747360344,
      "learning_rate": 0.003,
      "loss": 4.1365,
      "step": 4321
    },
    {
      "epoch": 0.04322,
      "grad_norm": 1.3687808098292402,
      "learning_rate": 0.003,
      "loss": 4.1846,
      "step": 4322
    },
    {
      "epoch": 0.04323,
      "grad_norm": 0.7765534239012071,
      "learning_rate": 0.003,
      "loss": 4.1396,
      "step": 4323
    },
    {
      "epoch": 0.04324,
      "grad_norm": 0.7409828216455313,
      "learning_rate": 0.003,
      "loss": 4.1331,
      "step": 4324
    },
    {
      "epoch": 0.04325,
      "grad_norm": 0.6593731828315239,
      "learning_rate": 0.003,
      "loss": 4.1592,
      "step": 4325
    },
    {
      "epoch": 0.04326,
      "grad_norm": 0.7626886495825427,
      "learning_rate": 0.003,
      "loss": 4.1383,
      "step": 4326
    },
    {
      "epoch": 0.04327,
      "grad_norm": 0.9753282134260721,
      "learning_rate": 0.003,
      "loss": 4.1256,
      "step": 4327
    },
    {
      "epoch": 0.04328,
      "grad_norm": 1.1122267963910306,
      "learning_rate": 0.003,
      "loss": 4.1474,
      "step": 4328
    },
    {
      "epoch": 0.04329,
      "grad_norm": 0.8812692829330543,
      "learning_rate": 0.003,
      "loss": 4.1609,
      "step": 4329
    },
    {
      "epoch": 0.0433,
      "grad_norm": 0.9117992082897464,
      "learning_rate": 0.003,
      "loss": 4.1343,
      "step": 4330
    },
    {
      "epoch": 0.04331,
      "grad_norm": 1.008387824573847,
      "learning_rate": 0.003,
      "loss": 4.132,
      "step": 4331
    },
    {
      "epoch": 0.04332,
      "grad_norm": 0.9701656948256054,
      "learning_rate": 0.003,
      "loss": 4.1817,
      "step": 4332
    },
    {
      "epoch": 0.04333,
      "grad_norm": 0.7550874154301345,
      "learning_rate": 0.003,
      "loss": 4.1464,
      "step": 4333
    },
    {
      "epoch": 0.04334,
      "grad_norm": 0.9440172266611684,
      "learning_rate": 0.003,
      "loss": 4.1374,
      "step": 4334
    },
    {
      "epoch": 0.04335,
      "grad_norm": 1.0714395710867621,
      "learning_rate": 0.003,
      "loss": 4.1307,
      "step": 4335
    },
    {
      "epoch": 0.04336,
      "grad_norm": 1.274134153372981,
      "learning_rate": 0.003,
      "loss": 4.1731,
      "step": 4336
    },
    {
      "epoch": 0.04337,
      "grad_norm": 0.919379094932585,
      "learning_rate": 0.003,
      "loss": 4.1531,
      "step": 4337
    },
    {
      "epoch": 0.04338,
      "grad_norm": 0.9113554686958292,
      "learning_rate": 0.003,
      "loss": 4.1517,
      "step": 4338
    },
    {
      "epoch": 0.04339,
      "grad_norm": 1.0175097294988102,
      "learning_rate": 0.003,
      "loss": 4.1611,
      "step": 4339
    },
    {
      "epoch": 0.0434,
      "grad_norm": 1.1823172721493573,
      "learning_rate": 0.003,
      "loss": 4.1439,
      "step": 4340
    },
    {
      "epoch": 0.04341,
      "grad_norm": 1.0738265997223806,
      "learning_rate": 0.003,
      "loss": 4.1578,
      "step": 4341
    },
    {
      "epoch": 0.04342,
      "grad_norm": 0.9859857822713426,
      "learning_rate": 0.003,
      "loss": 4.1403,
      "step": 4342
    },
    {
      "epoch": 0.04343,
      "grad_norm": 1.0178429154018418,
      "learning_rate": 0.003,
      "loss": 4.1818,
      "step": 4343
    },
    {
      "epoch": 0.04344,
      "grad_norm": 0.8898698596336355,
      "learning_rate": 0.003,
      "loss": 4.1228,
      "step": 4344
    },
    {
      "epoch": 0.04345,
      "grad_norm": 0.8224305636273126,
      "learning_rate": 0.003,
      "loss": 4.1577,
      "step": 4345
    },
    {
      "epoch": 0.04346,
      "grad_norm": 0.8036467865466507,
      "learning_rate": 0.003,
      "loss": 4.1525,
      "step": 4346
    },
    {
      "epoch": 0.04347,
      "grad_norm": 0.8647944237371374,
      "learning_rate": 0.003,
      "loss": 4.1577,
      "step": 4347
    },
    {
      "epoch": 0.04348,
      "grad_norm": 0.8408116621591446,
      "learning_rate": 0.003,
      "loss": 4.1392,
      "step": 4348
    },
    {
      "epoch": 0.04349,
      "grad_norm": 0.7541460584282086,
      "learning_rate": 0.003,
      "loss": 4.1498,
      "step": 4349
    },
    {
      "epoch": 0.0435,
      "grad_norm": 0.7511170217669931,
      "learning_rate": 0.003,
      "loss": 4.1358,
      "step": 4350
    },
    {
      "epoch": 0.04351,
      "grad_norm": 0.7203893697283558,
      "learning_rate": 0.003,
      "loss": 4.1468,
      "step": 4351
    },
    {
      "epoch": 0.04352,
      "grad_norm": 0.889935460967273,
      "learning_rate": 0.003,
      "loss": 4.1657,
      "step": 4352
    },
    {
      "epoch": 0.04353,
      "grad_norm": 1.0367752140513655,
      "learning_rate": 0.003,
      "loss": 4.1398,
      "step": 4353
    },
    {
      "epoch": 0.04354,
      "grad_norm": 1.0594985151220453,
      "learning_rate": 0.003,
      "loss": 4.1457,
      "step": 4354
    },
    {
      "epoch": 0.04355,
      "grad_norm": 1.0642370180766931,
      "learning_rate": 0.003,
      "loss": 4.157,
      "step": 4355
    },
    {
      "epoch": 0.04356,
      "grad_norm": 0.7520263014358131,
      "learning_rate": 0.003,
      "loss": 4.1511,
      "step": 4356
    },
    {
      "epoch": 0.04357,
      "grad_norm": 0.5909325929201686,
      "learning_rate": 0.003,
      "loss": 4.1345,
      "step": 4357
    },
    {
      "epoch": 0.04358,
      "grad_norm": 0.6503221006497333,
      "learning_rate": 0.003,
      "loss": 4.1232,
      "step": 4358
    },
    {
      "epoch": 0.04359,
      "grad_norm": 0.6977281953888783,
      "learning_rate": 0.003,
      "loss": 4.1442,
      "step": 4359
    },
    {
      "epoch": 0.0436,
      "grad_norm": 0.705707170893373,
      "learning_rate": 0.003,
      "loss": 4.1459,
      "step": 4360
    },
    {
      "epoch": 0.04361,
      "grad_norm": 0.6978704097912947,
      "learning_rate": 0.003,
      "loss": 4.111,
      "step": 4361
    },
    {
      "epoch": 0.04362,
      "grad_norm": 0.6552490872419106,
      "learning_rate": 0.003,
      "loss": 4.1192,
      "step": 4362
    },
    {
      "epoch": 0.04363,
      "grad_norm": 0.6258270583226841,
      "learning_rate": 0.003,
      "loss": 4.1245,
      "step": 4363
    },
    {
      "epoch": 0.04364,
      "grad_norm": 0.6273578442057955,
      "learning_rate": 0.003,
      "loss": 4.1372,
      "step": 4364
    },
    {
      "epoch": 0.04365,
      "grad_norm": 0.692759351590066,
      "learning_rate": 0.003,
      "loss": 4.1384,
      "step": 4365
    },
    {
      "epoch": 0.04366,
      "grad_norm": 0.7514191452479654,
      "learning_rate": 0.003,
      "loss": 4.0943,
      "step": 4366
    },
    {
      "epoch": 0.04367,
      "grad_norm": 0.8818255856194102,
      "learning_rate": 0.003,
      "loss": 4.1476,
      "step": 4367
    },
    {
      "epoch": 0.04368,
      "grad_norm": 1.262257310600421,
      "learning_rate": 0.003,
      "loss": 4.1443,
      "step": 4368
    },
    {
      "epoch": 0.04369,
      "grad_norm": 1.0072650700377674,
      "learning_rate": 0.003,
      "loss": 4.1343,
      "step": 4369
    },
    {
      "epoch": 0.0437,
      "grad_norm": 0.8796323488234715,
      "learning_rate": 0.003,
      "loss": 4.158,
      "step": 4370
    },
    {
      "epoch": 0.04371,
      "grad_norm": 0.8385748458074573,
      "learning_rate": 0.003,
      "loss": 4.1428,
      "step": 4371
    },
    {
      "epoch": 0.04372,
      "grad_norm": 0.9689208942990393,
      "learning_rate": 0.003,
      "loss": 4.1294,
      "step": 4372
    },
    {
      "epoch": 0.04373,
      "grad_norm": 1.0255215476071573,
      "learning_rate": 0.003,
      "loss": 4.1558,
      "step": 4373
    },
    {
      "epoch": 0.04374,
      "grad_norm": 1.0377452403485916,
      "learning_rate": 0.003,
      "loss": 4.1966,
      "step": 4374
    },
    {
      "epoch": 0.04375,
      "grad_norm": 1.225365163750092,
      "learning_rate": 0.003,
      "loss": 4.1152,
      "step": 4375
    },
    {
      "epoch": 0.04376,
      "grad_norm": 0.9746772561653527,
      "learning_rate": 0.003,
      "loss": 4.1562,
      "step": 4376
    },
    {
      "epoch": 0.04377,
      "grad_norm": 0.9884150415730085,
      "learning_rate": 0.003,
      "loss": 4.1817,
      "step": 4377
    },
    {
      "epoch": 0.04378,
      "grad_norm": 1.0487083703280902,
      "learning_rate": 0.003,
      "loss": 4.1728,
      "step": 4378
    },
    {
      "epoch": 0.04379,
      "grad_norm": 1.1241089785074756,
      "learning_rate": 0.003,
      "loss": 4.186,
      "step": 4379
    },
    {
      "epoch": 0.0438,
      "grad_norm": 1.0220079899190067,
      "learning_rate": 0.003,
      "loss": 4.1556,
      "step": 4380
    },
    {
      "epoch": 0.04381,
      "grad_norm": 0.9536314073088888,
      "learning_rate": 0.003,
      "loss": 4.143,
      "step": 4381
    },
    {
      "epoch": 0.04382,
      "grad_norm": 0.9677782727134393,
      "learning_rate": 0.003,
      "loss": 4.1552,
      "step": 4382
    },
    {
      "epoch": 0.04383,
      "grad_norm": 0.9472672654930465,
      "learning_rate": 0.003,
      "loss": 4.1335,
      "step": 4383
    },
    {
      "epoch": 0.04384,
      "grad_norm": 0.8670033507963211,
      "learning_rate": 0.003,
      "loss": 4.1382,
      "step": 4384
    },
    {
      "epoch": 0.04385,
      "grad_norm": 0.7964325834207587,
      "learning_rate": 0.003,
      "loss": 4.1743,
      "step": 4385
    },
    {
      "epoch": 0.04386,
      "grad_norm": 0.841894742549459,
      "learning_rate": 0.003,
      "loss": 4.1488,
      "step": 4386
    },
    {
      "epoch": 0.04387,
      "grad_norm": 0.7784847249978989,
      "learning_rate": 0.003,
      "loss": 4.1715,
      "step": 4387
    },
    {
      "epoch": 0.04388,
      "grad_norm": 0.8761401164059524,
      "learning_rate": 0.003,
      "loss": 4.1342,
      "step": 4388
    },
    {
      "epoch": 0.04389,
      "grad_norm": 1.0513363320918883,
      "learning_rate": 0.003,
      "loss": 4.1696,
      "step": 4389
    },
    {
      "epoch": 0.0439,
      "grad_norm": 1.018366577116525,
      "learning_rate": 0.003,
      "loss": 4.1665,
      "step": 4390
    },
    {
      "epoch": 0.04391,
      "grad_norm": 0.8408058225537212,
      "learning_rate": 0.003,
      "loss": 4.1325,
      "step": 4391
    },
    {
      "epoch": 0.04392,
      "grad_norm": 0.7944853381325269,
      "learning_rate": 0.003,
      "loss": 4.1392,
      "step": 4392
    },
    {
      "epoch": 0.04393,
      "grad_norm": 0.7864110377608735,
      "learning_rate": 0.003,
      "loss": 4.1285,
      "step": 4393
    },
    {
      "epoch": 0.04394,
      "grad_norm": 0.8003481063323544,
      "learning_rate": 0.003,
      "loss": 4.1474,
      "step": 4394
    },
    {
      "epoch": 0.04395,
      "grad_norm": 0.82434538845085,
      "learning_rate": 0.003,
      "loss": 4.1295,
      "step": 4395
    },
    {
      "epoch": 0.04396,
      "grad_norm": 0.8782690940643569,
      "learning_rate": 0.003,
      "loss": 4.1057,
      "step": 4396
    },
    {
      "epoch": 0.04397,
      "grad_norm": 0.9209528050546905,
      "learning_rate": 0.003,
      "loss": 4.1219,
      "step": 4397
    },
    {
      "epoch": 0.04398,
      "grad_norm": 0.9867795261868282,
      "learning_rate": 0.003,
      "loss": 4.1415,
      "step": 4398
    },
    {
      "epoch": 0.04399,
      "grad_norm": 1.001127729963885,
      "learning_rate": 0.003,
      "loss": 4.1499,
      "step": 4399
    },
    {
      "epoch": 0.044,
      "grad_norm": 0.9840084183939074,
      "learning_rate": 0.003,
      "loss": 4.1456,
      "step": 4400
    },
    {
      "epoch": 0.04401,
      "grad_norm": 0.9259452446977503,
      "learning_rate": 0.003,
      "loss": 4.1587,
      "step": 4401
    },
    {
      "epoch": 0.04402,
      "grad_norm": 0.941717579650296,
      "learning_rate": 0.003,
      "loss": 4.1617,
      "step": 4402
    },
    {
      "epoch": 0.04403,
      "grad_norm": 0.9973820182998915,
      "learning_rate": 0.003,
      "loss": 4.1314,
      "step": 4403
    },
    {
      "epoch": 0.04404,
      "grad_norm": 0.9391290732287068,
      "learning_rate": 0.003,
      "loss": 4.1624,
      "step": 4404
    },
    {
      "epoch": 0.04405,
      "grad_norm": 0.9771616987677118,
      "learning_rate": 0.003,
      "loss": 4.1538,
      "step": 4405
    },
    {
      "epoch": 0.04406,
      "grad_norm": 0.8645709876169484,
      "learning_rate": 0.003,
      "loss": 4.1525,
      "step": 4406
    },
    {
      "epoch": 0.04407,
      "grad_norm": 0.8057298668086074,
      "learning_rate": 0.003,
      "loss": 4.1202,
      "step": 4407
    },
    {
      "epoch": 0.04408,
      "grad_norm": 0.8826642860291924,
      "learning_rate": 0.003,
      "loss": 4.128,
      "step": 4408
    },
    {
      "epoch": 0.04409,
      "grad_norm": 0.942883777044805,
      "learning_rate": 0.003,
      "loss": 4.1621,
      "step": 4409
    },
    {
      "epoch": 0.0441,
      "grad_norm": 1.1342610134630753,
      "learning_rate": 0.003,
      "loss": 4.1509,
      "step": 4410
    },
    {
      "epoch": 0.04411,
      "grad_norm": 0.8444711866462494,
      "learning_rate": 0.003,
      "loss": 4.1275,
      "step": 4411
    },
    {
      "epoch": 0.04412,
      "grad_norm": 0.618795096815875,
      "learning_rate": 0.003,
      "loss": 4.1112,
      "step": 4412
    },
    {
      "epoch": 0.04413,
      "grad_norm": 0.604846824661422,
      "learning_rate": 0.003,
      "loss": 4.1201,
      "step": 4413
    },
    {
      "epoch": 0.04414,
      "grad_norm": 0.784437278432988,
      "learning_rate": 0.003,
      "loss": 4.1719,
      "step": 4414
    },
    {
      "epoch": 0.04415,
      "grad_norm": 1.1541784220535383,
      "learning_rate": 0.003,
      "loss": 4.1437,
      "step": 4415
    },
    {
      "epoch": 0.04416,
      "grad_norm": 0.9891248909004052,
      "learning_rate": 0.003,
      "loss": 4.1294,
      "step": 4416
    },
    {
      "epoch": 0.04417,
      "grad_norm": 0.9814607771592448,
      "learning_rate": 0.003,
      "loss": 4.1059,
      "step": 4417
    },
    {
      "epoch": 0.04418,
      "grad_norm": 1.0297483619337797,
      "learning_rate": 0.003,
      "loss": 4.1543,
      "step": 4418
    },
    {
      "epoch": 0.04419,
      "grad_norm": 0.9547057067964952,
      "learning_rate": 0.003,
      "loss": 4.1695,
      "step": 4419
    },
    {
      "epoch": 0.0442,
      "grad_norm": 0.8002338037740572,
      "learning_rate": 0.003,
      "loss": 4.1367,
      "step": 4420
    },
    {
      "epoch": 0.04421,
      "grad_norm": 0.8265892990962934,
      "learning_rate": 0.003,
      "loss": 4.1666,
      "step": 4421
    },
    {
      "epoch": 0.04422,
      "grad_norm": 1.01186758550101,
      "learning_rate": 0.003,
      "loss": 4.1412,
      "step": 4422
    },
    {
      "epoch": 0.04423,
      "grad_norm": 0.8991548755615442,
      "learning_rate": 0.003,
      "loss": 4.1558,
      "step": 4423
    },
    {
      "epoch": 0.04424,
      "grad_norm": 0.9249376765836381,
      "learning_rate": 0.003,
      "loss": 4.1084,
      "step": 4424
    },
    {
      "epoch": 0.04425,
      "grad_norm": 0.787895727766037,
      "learning_rate": 0.003,
      "loss": 4.1287,
      "step": 4425
    },
    {
      "epoch": 0.04426,
      "grad_norm": 0.8384547452759158,
      "learning_rate": 0.003,
      "loss": 4.1518,
      "step": 4426
    },
    {
      "epoch": 0.04427,
      "grad_norm": 0.8084673109174956,
      "learning_rate": 0.003,
      "loss": 4.1382,
      "step": 4427
    },
    {
      "epoch": 0.04428,
      "grad_norm": 0.7355210397111905,
      "learning_rate": 0.003,
      "loss": 4.131,
      "step": 4428
    },
    {
      "epoch": 0.04429,
      "grad_norm": 0.6972354751664229,
      "learning_rate": 0.003,
      "loss": 4.174,
      "step": 4429
    },
    {
      "epoch": 0.0443,
      "grad_norm": 0.7045383271873761,
      "learning_rate": 0.003,
      "loss": 4.1284,
      "step": 4430
    },
    {
      "epoch": 0.04431,
      "grad_norm": 0.7845800246059359,
      "learning_rate": 0.003,
      "loss": 4.1559,
      "step": 4431
    },
    {
      "epoch": 0.04432,
      "grad_norm": 0.9145736262867346,
      "learning_rate": 0.003,
      "loss": 4.1612,
      "step": 4432
    },
    {
      "epoch": 0.04433,
      "grad_norm": 0.9779689050072177,
      "learning_rate": 0.003,
      "loss": 4.1376,
      "step": 4433
    },
    {
      "epoch": 0.04434,
      "grad_norm": 1.0759117748955118,
      "learning_rate": 0.003,
      "loss": 4.1382,
      "step": 4434
    },
    {
      "epoch": 0.04435,
      "grad_norm": 1.0791198295017557,
      "learning_rate": 0.003,
      "loss": 4.1576,
      "step": 4435
    },
    {
      "epoch": 0.04436,
      "grad_norm": 0.9820518152505495,
      "learning_rate": 0.003,
      "loss": 4.1266,
      "step": 4436
    },
    {
      "epoch": 0.04437,
      "grad_norm": 0.8709315291003812,
      "learning_rate": 0.003,
      "loss": 4.1495,
      "step": 4437
    },
    {
      "epoch": 0.04438,
      "grad_norm": 0.7342179784041142,
      "learning_rate": 0.003,
      "loss": 4.1824,
      "step": 4438
    },
    {
      "epoch": 0.04439,
      "grad_norm": 0.8877734923678575,
      "learning_rate": 0.003,
      "loss": 4.155,
      "step": 4439
    },
    {
      "epoch": 0.0444,
      "grad_norm": 0.82101397614282,
      "learning_rate": 0.003,
      "loss": 4.1083,
      "step": 4440
    },
    {
      "epoch": 0.04441,
      "grad_norm": 0.8094046311622209,
      "learning_rate": 0.003,
      "loss": 4.1391,
      "step": 4441
    },
    {
      "epoch": 0.04442,
      "grad_norm": 0.8261845339551874,
      "learning_rate": 0.003,
      "loss": 4.133,
      "step": 4442
    },
    {
      "epoch": 0.04443,
      "grad_norm": 0.8838895022282738,
      "learning_rate": 0.003,
      "loss": 4.1118,
      "step": 4443
    },
    {
      "epoch": 0.04444,
      "grad_norm": 0.8720075839018505,
      "learning_rate": 0.003,
      "loss": 4.113,
      "step": 4444
    },
    {
      "epoch": 0.04445,
      "grad_norm": 0.9491882233225604,
      "learning_rate": 0.003,
      "loss": 4.1432,
      "step": 4445
    },
    {
      "epoch": 0.04446,
      "grad_norm": 0.9902050613570654,
      "learning_rate": 0.003,
      "loss": 4.1148,
      "step": 4446
    },
    {
      "epoch": 0.04447,
      "grad_norm": 0.9639238211565191,
      "learning_rate": 0.003,
      "loss": 4.1205,
      "step": 4447
    },
    {
      "epoch": 0.04448,
      "grad_norm": 1.0022912532571144,
      "learning_rate": 0.003,
      "loss": 4.1226,
      "step": 4448
    },
    {
      "epoch": 0.04449,
      "grad_norm": 1.0491136390522242,
      "learning_rate": 0.003,
      "loss": 4.1431,
      "step": 4449
    },
    {
      "epoch": 0.0445,
      "grad_norm": 1.0713493139767634,
      "learning_rate": 0.003,
      "loss": 4.1488,
      "step": 4450
    },
    {
      "epoch": 0.04451,
      "grad_norm": 0.9194026244209427,
      "learning_rate": 0.003,
      "loss": 4.1438,
      "step": 4451
    },
    {
      "epoch": 0.04452,
      "grad_norm": 0.9298997780980283,
      "learning_rate": 0.003,
      "loss": 4.1574,
      "step": 4452
    },
    {
      "epoch": 0.04453,
      "grad_norm": 0.8266906084450703,
      "learning_rate": 0.003,
      "loss": 4.167,
      "step": 4453
    },
    {
      "epoch": 0.04454,
      "grad_norm": 0.8781132654558221,
      "learning_rate": 0.003,
      "loss": 4.1456,
      "step": 4454
    },
    {
      "epoch": 0.04455,
      "grad_norm": 0.949277954775384,
      "learning_rate": 0.003,
      "loss": 4.1619,
      "step": 4455
    },
    {
      "epoch": 0.04456,
      "grad_norm": 0.9259049491169088,
      "learning_rate": 0.003,
      "loss": 4.1255,
      "step": 4456
    },
    {
      "epoch": 0.04457,
      "grad_norm": 1.1275778174317006,
      "learning_rate": 0.003,
      "loss": 4.1488,
      "step": 4457
    },
    {
      "epoch": 0.04458,
      "grad_norm": 0.9410680370701086,
      "learning_rate": 0.003,
      "loss": 4.1646,
      "step": 4458
    },
    {
      "epoch": 0.04459,
      "grad_norm": 0.9376161276388697,
      "learning_rate": 0.003,
      "loss": 4.1723,
      "step": 4459
    },
    {
      "epoch": 0.0446,
      "grad_norm": 1.04997158261314,
      "learning_rate": 0.003,
      "loss": 4.1739,
      "step": 4460
    },
    {
      "epoch": 0.04461,
      "grad_norm": 1.112982852233256,
      "learning_rate": 0.003,
      "loss": 4.174,
      "step": 4461
    },
    {
      "epoch": 0.04462,
      "grad_norm": 1.1238997045435504,
      "learning_rate": 0.003,
      "loss": 4.1339,
      "step": 4462
    },
    {
      "epoch": 0.04463,
      "grad_norm": 1.1095760340846703,
      "learning_rate": 0.003,
      "loss": 4.1465,
      "step": 4463
    },
    {
      "epoch": 0.04464,
      "grad_norm": 0.8395501354061055,
      "learning_rate": 0.003,
      "loss": 4.1571,
      "step": 4464
    },
    {
      "epoch": 0.04465,
      "grad_norm": 0.70825602546013,
      "learning_rate": 0.003,
      "loss": 4.1403,
      "step": 4465
    },
    {
      "epoch": 0.04466,
      "grad_norm": 0.6551509755636126,
      "learning_rate": 0.003,
      "loss": 4.1486,
      "step": 4466
    },
    {
      "epoch": 0.04467,
      "grad_norm": 0.6536469534418317,
      "learning_rate": 0.003,
      "loss": 4.1404,
      "step": 4467
    },
    {
      "epoch": 0.04468,
      "grad_norm": 0.7859836361638165,
      "learning_rate": 0.003,
      "loss": 4.1192,
      "step": 4468
    },
    {
      "epoch": 0.04469,
      "grad_norm": 0.9147137560720803,
      "learning_rate": 0.003,
      "loss": 4.1515,
      "step": 4469
    },
    {
      "epoch": 0.0447,
      "grad_norm": 1.0209143473160414,
      "learning_rate": 0.003,
      "loss": 4.1582,
      "step": 4470
    },
    {
      "epoch": 0.04471,
      "grad_norm": 0.8684398085030052,
      "learning_rate": 0.003,
      "loss": 4.1747,
      "step": 4471
    },
    {
      "epoch": 0.04472,
      "grad_norm": 0.7039517677091388,
      "learning_rate": 0.003,
      "loss": 4.121,
      "step": 4472
    },
    {
      "epoch": 0.04473,
      "grad_norm": 0.7016130752036785,
      "learning_rate": 0.003,
      "loss": 4.1228,
      "step": 4473
    },
    {
      "epoch": 0.04474,
      "grad_norm": 0.819372242317214,
      "learning_rate": 0.003,
      "loss": 4.1202,
      "step": 4474
    },
    {
      "epoch": 0.04475,
      "grad_norm": 0.9581916584575547,
      "learning_rate": 0.003,
      "loss": 4.1152,
      "step": 4475
    },
    {
      "epoch": 0.04476,
      "grad_norm": 1.0128184224439518,
      "learning_rate": 0.003,
      "loss": 4.1343,
      "step": 4476
    },
    {
      "epoch": 0.04477,
      "grad_norm": 0.9335743970032304,
      "learning_rate": 0.003,
      "loss": 4.1489,
      "step": 4477
    },
    {
      "epoch": 0.04478,
      "grad_norm": 0.9839256263771743,
      "learning_rate": 0.003,
      "loss": 4.1498,
      "step": 4478
    },
    {
      "epoch": 0.04479,
      "grad_norm": 1.0121152195223608,
      "learning_rate": 0.003,
      "loss": 4.1335,
      "step": 4479
    },
    {
      "epoch": 0.0448,
      "grad_norm": 1.1007103156748486,
      "learning_rate": 0.003,
      "loss": 4.1461,
      "step": 4480
    },
    {
      "epoch": 0.04481,
      "grad_norm": 0.7582518945106804,
      "learning_rate": 0.003,
      "loss": 4.1289,
      "step": 4481
    },
    {
      "epoch": 0.04482,
      "grad_norm": 0.6947887020501614,
      "learning_rate": 0.003,
      "loss": 4.15,
      "step": 4482
    },
    {
      "epoch": 0.04483,
      "grad_norm": 0.6849977721811616,
      "learning_rate": 0.003,
      "loss": 4.1374,
      "step": 4483
    },
    {
      "epoch": 0.04484,
      "grad_norm": 0.6987096200585385,
      "learning_rate": 0.003,
      "loss": 4.1514,
      "step": 4484
    },
    {
      "epoch": 0.04485,
      "grad_norm": 0.6929640245126742,
      "learning_rate": 0.003,
      "loss": 4.1581,
      "step": 4485
    },
    {
      "epoch": 0.04486,
      "grad_norm": 0.6288138860854519,
      "learning_rate": 0.003,
      "loss": 4.1182,
      "step": 4486
    },
    {
      "epoch": 0.04487,
      "grad_norm": 0.6828177453458262,
      "learning_rate": 0.003,
      "loss": 4.1276,
      "step": 4487
    },
    {
      "epoch": 0.04488,
      "grad_norm": 0.7640042454477429,
      "learning_rate": 0.003,
      "loss": 4.1472,
      "step": 4488
    },
    {
      "epoch": 0.04489,
      "grad_norm": 0.9955063503331009,
      "learning_rate": 0.003,
      "loss": 4.1542,
      "step": 4489
    },
    {
      "epoch": 0.0449,
      "grad_norm": 1.190431242593713,
      "learning_rate": 0.003,
      "loss": 4.1566,
      "step": 4490
    },
    {
      "epoch": 0.04491,
      "grad_norm": 0.729909947923579,
      "learning_rate": 0.003,
      "loss": 4.1341,
      "step": 4491
    },
    {
      "epoch": 0.04492,
      "grad_norm": 0.6337337418684384,
      "learning_rate": 0.003,
      "loss": 4.1441,
      "step": 4492
    },
    {
      "epoch": 0.04493,
      "grad_norm": 0.6901332741523903,
      "learning_rate": 0.003,
      "loss": 4.1496,
      "step": 4493
    },
    {
      "epoch": 0.04494,
      "grad_norm": 0.7753077021765635,
      "learning_rate": 0.003,
      "loss": 4.1312,
      "step": 4494
    },
    {
      "epoch": 0.04495,
      "grad_norm": 0.7970714712777819,
      "learning_rate": 0.003,
      "loss": 4.131,
      "step": 4495
    },
    {
      "epoch": 0.04496,
      "grad_norm": 0.7494590761073396,
      "learning_rate": 0.003,
      "loss": 4.1699,
      "step": 4496
    },
    {
      "epoch": 0.04497,
      "grad_norm": 0.7663642245891397,
      "learning_rate": 0.003,
      "loss": 4.148,
      "step": 4497
    },
    {
      "epoch": 0.04498,
      "grad_norm": 0.8315020221220991,
      "learning_rate": 0.003,
      "loss": 4.1238,
      "step": 4498
    },
    {
      "epoch": 0.04499,
      "grad_norm": 0.8744656956645718,
      "learning_rate": 0.003,
      "loss": 4.0979,
      "step": 4499
    },
    {
      "epoch": 0.045,
      "grad_norm": 0.8267996207966193,
      "learning_rate": 0.003,
      "loss": 4.1256,
      "step": 4500
    },
    {
      "epoch": 0.04501,
      "grad_norm": 0.8946790372935638,
      "learning_rate": 0.003,
      "loss": 4.1224,
      "step": 4501
    },
    {
      "epoch": 0.04502,
      "grad_norm": 0.9463619249258222,
      "learning_rate": 0.003,
      "loss": 4.1656,
      "step": 4502
    },
    {
      "epoch": 0.04503,
      "grad_norm": 1.0222323520895809,
      "learning_rate": 0.003,
      "loss": 4.1678,
      "step": 4503
    },
    {
      "epoch": 0.04504,
      "grad_norm": 1.0547640109337635,
      "learning_rate": 0.003,
      "loss": 4.1671,
      "step": 4504
    },
    {
      "epoch": 0.04505,
      "grad_norm": 0.9864660536132722,
      "learning_rate": 0.003,
      "loss": 4.1612,
      "step": 4505
    },
    {
      "epoch": 0.04506,
      "grad_norm": 1.0360024418113565,
      "learning_rate": 0.003,
      "loss": 4.146,
      "step": 4506
    },
    {
      "epoch": 0.04507,
      "grad_norm": 1.0278269548286743,
      "learning_rate": 0.003,
      "loss": 4.1244,
      "step": 4507
    },
    {
      "epoch": 0.04508,
      "grad_norm": 0.9639240103094384,
      "learning_rate": 0.003,
      "loss": 4.1406,
      "step": 4508
    },
    {
      "epoch": 0.04509,
      "grad_norm": 0.9853142135454404,
      "learning_rate": 0.003,
      "loss": 4.1662,
      "step": 4509
    },
    {
      "epoch": 0.0451,
      "grad_norm": 1.0935429480575658,
      "learning_rate": 0.003,
      "loss": 4.1727,
      "step": 4510
    },
    {
      "epoch": 0.04511,
      "grad_norm": 1.1102303545176306,
      "learning_rate": 0.003,
      "loss": 4.1554,
      "step": 4511
    },
    {
      "epoch": 0.04512,
      "grad_norm": 1.115670666411986,
      "learning_rate": 0.003,
      "loss": 4.1117,
      "step": 4512
    },
    {
      "epoch": 0.04513,
      "grad_norm": 0.8607532188769688,
      "learning_rate": 0.003,
      "loss": 4.135,
      "step": 4513
    },
    {
      "epoch": 0.04514,
      "grad_norm": 0.7573822293737257,
      "learning_rate": 0.003,
      "loss": 4.1412,
      "step": 4514
    },
    {
      "epoch": 0.04515,
      "grad_norm": 0.679162344485831,
      "learning_rate": 0.003,
      "loss": 4.1333,
      "step": 4515
    },
    {
      "epoch": 0.04516,
      "grad_norm": 0.7152570377504436,
      "learning_rate": 0.003,
      "loss": 4.1458,
      "step": 4516
    },
    {
      "epoch": 0.04517,
      "grad_norm": 0.7954703911013454,
      "learning_rate": 0.003,
      "loss": 4.1574,
      "step": 4517
    },
    {
      "epoch": 0.04518,
      "grad_norm": 1.0634551087943087,
      "learning_rate": 0.003,
      "loss": 4.1382,
      "step": 4518
    },
    {
      "epoch": 0.04519,
      "grad_norm": 1.2285232634943983,
      "learning_rate": 0.003,
      "loss": 4.1397,
      "step": 4519
    },
    {
      "epoch": 0.0452,
      "grad_norm": 0.8007618323918877,
      "learning_rate": 0.003,
      "loss": 4.1382,
      "step": 4520
    },
    {
      "epoch": 0.04521,
      "grad_norm": 0.6592457815722775,
      "learning_rate": 0.003,
      "loss": 4.1246,
      "step": 4521
    },
    {
      "epoch": 0.04522,
      "grad_norm": 0.8403672242278453,
      "learning_rate": 0.003,
      "loss": 4.1258,
      "step": 4522
    },
    {
      "epoch": 0.04523,
      "grad_norm": 1.1877972422707144,
      "learning_rate": 0.003,
      "loss": 4.1368,
      "step": 4523
    },
    {
      "epoch": 0.04524,
      "grad_norm": 0.8947697003509342,
      "learning_rate": 0.003,
      "loss": 4.1104,
      "step": 4524
    },
    {
      "epoch": 0.04525,
      "grad_norm": 0.7848986351194489,
      "learning_rate": 0.003,
      "loss": 4.1179,
      "step": 4525
    },
    {
      "epoch": 0.04526,
      "grad_norm": 0.7651712977009272,
      "learning_rate": 0.003,
      "loss": 4.1446,
      "step": 4526
    },
    {
      "epoch": 0.04527,
      "grad_norm": 0.6750467543871055,
      "learning_rate": 0.003,
      "loss": 4.1223,
      "step": 4527
    },
    {
      "epoch": 0.04528,
      "grad_norm": 0.7273916921816331,
      "learning_rate": 0.003,
      "loss": 4.1048,
      "step": 4528
    },
    {
      "epoch": 0.04529,
      "grad_norm": 0.8479138902943643,
      "learning_rate": 0.003,
      "loss": 4.1118,
      "step": 4529
    },
    {
      "epoch": 0.0453,
      "grad_norm": 0.9684628880640245,
      "learning_rate": 0.003,
      "loss": 4.114,
      "step": 4530
    },
    {
      "epoch": 0.04531,
      "grad_norm": 1.1134698284207358,
      "learning_rate": 0.003,
      "loss": 4.1205,
      "step": 4531
    },
    {
      "epoch": 0.04532,
      "grad_norm": 0.876191461867141,
      "learning_rate": 0.003,
      "loss": 4.1387,
      "step": 4532
    },
    {
      "epoch": 0.04533,
      "grad_norm": 0.8864267526479417,
      "learning_rate": 0.003,
      "loss": 4.1045,
      "step": 4533
    },
    {
      "epoch": 0.04534,
      "grad_norm": 0.8208227713611345,
      "learning_rate": 0.003,
      "loss": 4.1448,
      "step": 4534
    },
    {
      "epoch": 0.04535,
      "grad_norm": 0.848319739416912,
      "learning_rate": 0.003,
      "loss": 4.167,
      "step": 4535
    },
    {
      "epoch": 0.04536,
      "grad_norm": 0.6979960468142136,
      "learning_rate": 0.003,
      "loss": 4.1578,
      "step": 4536
    },
    {
      "epoch": 0.04537,
      "grad_norm": 0.7258752037017571,
      "learning_rate": 0.003,
      "loss": 4.1381,
      "step": 4537
    },
    {
      "epoch": 0.04538,
      "grad_norm": 0.7367885904517688,
      "learning_rate": 0.003,
      "loss": 4.1366,
      "step": 4538
    },
    {
      "epoch": 0.04539,
      "grad_norm": 0.7909398070219044,
      "learning_rate": 0.003,
      "loss": 4.1037,
      "step": 4539
    },
    {
      "epoch": 0.0454,
      "grad_norm": 0.9542272258252085,
      "learning_rate": 0.003,
      "loss": 4.1153,
      "step": 4540
    },
    {
      "epoch": 0.04541,
      "grad_norm": 1.342847961348652,
      "learning_rate": 0.003,
      "loss": 4.1716,
      "step": 4541
    },
    {
      "epoch": 0.04542,
      "grad_norm": 0.7341173411686917,
      "learning_rate": 0.003,
      "loss": 4.1106,
      "step": 4542
    },
    {
      "epoch": 0.04543,
      "grad_norm": 0.8199205913967764,
      "learning_rate": 0.003,
      "loss": 4.1425,
      "step": 4543
    },
    {
      "epoch": 0.04544,
      "grad_norm": 0.9991230666952743,
      "learning_rate": 0.003,
      "loss": 4.129,
      "step": 4544
    },
    {
      "epoch": 0.04545,
      "grad_norm": 1.154240542999549,
      "learning_rate": 0.003,
      "loss": 4.1457,
      "step": 4545
    },
    {
      "epoch": 0.04546,
      "grad_norm": 1.0006050135666062,
      "learning_rate": 0.003,
      "loss": 4.1274,
      "step": 4546
    },
    {
      "epoch": 0.04547,
      "grad_norm": 0.9848572236171086,
      "learning_rate": 0.003,
      "loss": 4.1334,
      "step": 4547
    },
    {
      "epoch": 0.04548,
      "grad_norm": 0.950231501902435,
      "learning_rate": 0.003,
      "loss": 4.1449,
      "step": 4548
    },
    {
      "epoch": 0.04549,
      "grad_norm": 1.0155067779223592,
      "learning_rate": 0.003,
      "loss": 4.1401,
      "step": 4549
    },
    {
      "epoch": 0.0455,
      "grad_norm": 1.0449688473715737,
      "learning_rate": 0.003,
      "loss": 4.1757,
      "step": 4550
    },
    {
      "epoch": 0.04551,
      "grad_norm": 0.901666260155581,
      "learning_rate": 0.003,
      "loss": 4.1821,
      "step": 4551
    },
    {
      "epoch": 0.04552,
      "grad_norm": 0.9049695661423774,
      "learning_rate": 0.003,
      "loss": 4.162,
      "step": 4552
    },
    {
      "epoch": 0.04553,
      "grad_norm": 0.9336311136993248,
      "learning_rate": 0.003,
      "loss": 4.1516,
      "step": 4553
    },
    {
      "epoch": 0.04554,
      "grad_norm": 0.8640303831252293,
      "learning_rate": 0.003,
      "loss": 4.1352,
      "step": 4554
    },
    {
      "epoch": 0.04555,
      "grad_norm": 0.9397425841906786,
      "learning_rate": 0.003,
      "loss": 4.1474,
      "step": 4555
    },
    {
      "epoch": 0.04556,
      "grad_norm": 0.9618493597139517,
      "learning_rate": 0.003,
      "loss": 4.152,
      "step": 4556
    },
    {
      "epoch": 0.04557,
      "grad_norm": 0.884753188680489,
      "learning_rate": 0.003,
      "loss": 4.1323,
      "step": 4557
    },
    {
      "epoch": 0.04558,
      "grad_norm": 0.7671272683143043,
      "learning_rate": 0.003,
      "loss": 4.1584,
      "step": 4558
    },
    {
      "epoch": 0.04559,
      "grad_norm": 0.6905499070998972,
      "learning_rate": 0.003,
      "loss": 4.1701,
      "step": 4559
    },
    {
      "epoch": 0.0456,
      "grad_norm": 0.6501916829076367,
      "learning_rate": 0.003,
      "loss": 4.1256,
      "step": 4560
    },
    {
      "epoch": 0.04561,
      "grad_norm": 0.7854319499541154,
      "learning_rate": 0.003,
      "loss": 4.138,
      "step": 4561
    },
    {
      "epoch": 0.04562,
      "grad_norm": 0.9267219643007959,
      "learning_rate": 0.003,
      "loss": 4.1642,
      "step": 4562
    },
    {
      "epoch": 0.04563,
      "grad_norm": 1.09883882872006,
      "learning_rate": 0.003,
      "loss": 4.1358,
      "step": 4563
    },
    {
      "epoch": 0.04564,
      "grad_norm": 0.8737747079220421,
      "learning_rate": 0.003,
      "loss": 4.1411,
      "step": 4564
    },
    {
      "epoch": 0.04565,
      "grad_norm": 0.8193816189119624,
      "learning_rate": 0.003,
      "loss": 4.1117,
      "step": 4565
    },
    {
      "epoch": 0.04566,
      "grad_norm": 0.870765536242901,
      "learning_rate": 0.003,
      "loss": 4.1465,
      "step": 4566
    },
    {
      "epoch": 0.04567,
      "grad_norm": 1.006590995989121,
      "learning_rate": 0.003,
      "loss": 4.1462,
      "step": 4567
    },
    {
      "epoch": 0.04568,
      "grad_norm": 1.042592813338758,
      "learning_rate": 0.003,
      "loss": 4.1272,
      "step": 4568
    },
    {
      "epoch": 0.04569,
      "grad_norm": 0.9179039021155998,
      "learning_rate": 0.003,
      "loss": 4.15,
      "step": 4569
    },
    {
      "epoch": 0.0457,
      "grad_norm": 0.9469764446785002,
      "learning_rate": 0.003,
      "loss": 4.1411,
      "step": 4570
    },
    {
      "epoch": 0.04571,
      "grad_norm": 0.9383380289023364,
      "learning_rate": 0.003,
      "loss": 4.1673,
      "step": 4571
    },
    {
      "epoch": 0.04572,
      "grad_norm": 1.141150830462319,
      "learning_rate": 0.003,
      "loss": 4.1469,
      "step": 4572
    },
    {
      "epoch": 0.04573,
      "grad_norm": 0.8749894272125335,
      "learning_rate": 0.003,
      "loss": 4.1135,
      "step": 4573
    },
    {
      "epoch": 0.04574,
      "grad_norm": 1.071173257933056,
      "learning_rate": 0.003,
      "loss": 4.1315,
      "step": 4574
    },
    {
      "epoch": 0.04575,
      "grad_norm": 1.1588296020864692,
      "learning_rate": 0.003,
      "loss": 4.1607,
      "step": 4575
    },
    {
      "epoch": 0.04576,
      "grad_norm": 1.0001116928921225,
      "learning_rate": 0.003,
      "loss": 4.1479,
      "step": 4576
    },
    {
      "epoch": 0.04577,
      "grad_norm": 1.1162255859546153,
      "learning_rate": 0.003,
      "loss": 4.1711,
      "step": 4577
    },
    {
      "epoch": 0.04578,
      "grad_norm": 0.8912231810669947,
      "learning_rate": 0.003,
      "loss": 4.1337,
      "step": 4578
    },
    {
      "epoch": 0.04579,
      "grad_norm": 0.7751452813751742,
      "learning_rate": 0.003,
      "loss": 4.113,
      "step": 4579
    },
    {
      "epoch": 0.0458,
      "grad_norm": 0.812467570483858,
      "learning_rate": 0.003,
      "loss": 4.1495,
      "step": 4580
    },
    {
      "epoch": 0.04581,
      "grad_norm": 0.9496499343062301,
      "learning_rate": 0.003,
      "loss": 4.1642,
      "step": 4581
    },
    {
      "epoch": 0.04582,
      "grad_norm": 1.257095124885388,
      "learning_rate": 0.003,
      "loss": 4.1743,
      "step": 4582
    },
    {
      "epoch": 0.04583,
      "grad_norm": 1.0572142245473386,
      "learning_rate": 0.003,
      "loss": 4.1413,
      "step": 4583
    },
    {
      "epoch": 0.04584,
      "grad_norm": 1.0493651940978534,
      "learning_rate": 0.003,
      "loss": 4.143,
      "step": 4584
    },
    {
      "epoch": 0.04585,
      "grad_norm": 1.0075480210769747,
      "learning_rate": 0.003,
      "loss": 4.1423,
      "step": 4585
    },
    {
      "epoch": 0.04586,
      "grad_norm": 0.9609773823228706,
      "learning_rate": 0.003,
      "loss": 4.1455,
      "step": 4586
    },
    {
      "epoch": 0.04587,
      "grad_norm": 0.8094793207666494,
      "learning_rate": 0.003,
      "loss": 4.1598,
      "step": 4587
    },
    {
      "epoch": 0.04588,
      "grad_norm": 0.6497566279929464,
      "learning_rate": 0.003,
      "loss": 4.153,
      "step": 4588
    },
    {
      "epoch": 0.04589,
      "grad_norm": 0.6395143391670874,
      "learning_rate": 0.003,
      "loss": 4.1187,
      "step": 4589
    },
    {
      "epoch": 0.0459,
      "grad_norm": 0.6017649664143976,
      "learning_rate": 0.003,
      "loss": 4.1404,
      "step": 4590
    },
    {
      "epoch": 0.04591,
      "grad_norm": 0.5348849495027335,
      "learning_rate": 0.003,
      "loss": 4.105,
      "step": 4591
    },
    {
      "epoch": 0.04592,
      "grad_norm": 0.6214267296386631,
      "learning_rate": 0.003,
      "loss": 4.1304,
      "step": 4592
    },
    {
      "epoch": 0.04593,
      "grad_norm": 0.6662326597458281,
      "learning_rate": 0.003,
      "loss": 4.1526,
      "step": 4593
    },
    {
      "epoch": 0.04594,
      "grad_norm": 0.9037274242355646,
      "learning_rate": 0.003,
      "loss": 4.1498,
      "step": 4594
    },
    {
      "epoch": 0.04595,
      "grad_norm": 1.0198140749645483,
      "learning_rate": 0.003,
      "loss": 4.1408,
      "step": 4595
    },
    {
      "epoch": 0.04596,
      "grad_norm": 0.9085054402184916,
      "learning_rate": 0.003,
      "loss": 4.1422,
      "step": 4596
    },
    {
      "epoch": 0.04597,
      "grad_norm": 0.818858115564251,
      "learning_rate": 0.003,
      "loss": 4.1439,
      "step": 4597
    },
    {
      "epoch": 0.04598,
      "grad_norm": 0.8218121161747024,
      "learning_rate": 0.003,
      "loss": 4.1774,
      "step": 4598
    },
    {
      "epoch": 0.04599,
      "grad_norm": 0.84010102233204,
      "learning_rate": 0.003,
      "loss": 4.1261,
      "step": 4599
    },
    {
      "epoch": 0.046,
      "grad_norm": 0.8552355887980282,
      "learning_rate": 0.003,
      "loss": 4.1394,
      "step": 4600
    },
    {
      "epoch": 0.04601,
      "grad_norm": 0.8950007645860975,
      "learning_rate": 0.003,
      "loss": 4.1446,
      "step": 4601
    },
    {
      "epoch": 0.04602,
      "grad_norm": 0.9057095536124716,
      "learning_rate": 0.003,
      "loss": 4.137,
      "step": 4602
    },
    {
      "epoch": 0.04603,
      "grad_norm": 0.957183477029017,
      "learning_rate": 0.003,
      "loss": 4.1534,
      "step": 4603
    },
    {
      "epoch": 0.04604,
      "grad_norm": 0.9080594849186558,
      "learning_rate": 0.003,
      "loss": 4.1496,
      "step": 4604
    },
    {
      "epoch": 0.04605,
      "grad_norm": 0.8631739974861561,
      "learning_rate": 0.003,
      "loss": 4.1569,
      "step": 4605
    },
    {
      "epoch": 0.04606,
      "grad_norm": 0.8826596745901574,
      "learning_rate": 0.003,
      "loss": 4.1212,
      "step": 4606
    },
    {
      "epoch": 0.04607,
      "grad_norm": 0.8058337880510137,
      "learning_rate": 0.003,
      "loss": 4.1298,
      "step": 4607
    },
    {
      "epoch": 0.04608,
      "grad_norm": 0.9071710574106185,
      "learning_rate": 0.003,
      "loss": 4.1128,
      "step": 4608
    },
    {
      "epoch": 0.04609,
      "grad_norm": 1.1450997362229134,
      "learning_rate": 0.003,
      "loss": 4.1442,
      "step": 4609
    },
    {
      "epoch": 0.0461,
      "grad_norm": 0.9244793191418952,
      "learning_rate": 0.003,
      "loss": 4.1194,
      "step": 4610
    },
    {
      "epoch": 0.04611,
      "grad_norm": 0.8489428980051721,
      "learning_rate": 0.003,
      "loss": 4.1642,
      "step": 4611
    },
    {
      "epoch": 0.04612,
      "grad_norm": 0.9711813069563194,
      "learning_rate": 0.003,
      "loss": 4.1554,
      "step": 4612
    },
    {
      "epoch": 0.04613,
      "grad_norm": 0.9867549615620282,
      "learning_rate": 0.003,
      "loss": 4.1465,
      "step": 4613
    },
    {
      "epoch": 0.04614,
      "grad_norm": 1.0312719455514217,
      "learning_rate": 0.003,
      "loss": 4.1103,
      "step": 4614
    },
    {
      "epoch": 0.04615,
      "grad_norm": 0.9155691103438471,
      "learning_rate": 0.003,
      "loss": 4.1472,
      "step": 4615
    },
    {
      "epoch": 0.04616,
      "grad_norm": 0.8072019861534341,
      "learning_rate": 0.003,
      "loss": 4.1246,
      "step": 4616
    },
    {
      "epoch": 0.04617,
      "grad_norm": 0.7700754980869141,
      "learning_rate": 0.003,
      "loss": 4.147,
      "step": 4617
    },
    {
      "epoch": 0.04618,
      "grad_norm": 1.0494097265203939,
      "learning_rate": 0.003,
      "loss": 4.1344,
      "step": 4618
    },
    {
      "epoch": 0.04619,
      "grad_norm": 1.2538022534808464,
      "learning_rate": 0.003,
      "loss": 4.1391,
      "step": 4619
    },
    {
      "epoch": 0.0462,
      "grad_norm": 0.8505682632402205,
      "learning_rate": 0.003,
      "loss": 4.1615,
      "step": 4620
    },
    {
      "epoch": 0.04621,
      "grad_norm": 0.8349859298521801,
      "learning_rate": 0.003,
      "loss": 4.1671,
      "step": 4621
    },
    {
      "epoch": 0.04622,
      "grad_norm": 0.8636675338766925,
      "learning_rate": 0.003,
      "loss": 4.1644,
      "step": 4622
    },
    {
      "epoch": 0.04623,
      "grad_norm": 0.9109557261630941,
      "learning_rate": 0.003,
      "loss": 4.1507,
      "step": 4623
    },
    {
      "epoch": 0.04624,
      "grad_norm": 0.8181989834734489,
      "learning_rate": 0.003,
      "loss": 4.1212,
      "step": 4624
    },
    {
      "epoch": 0.04625,
      "grad_norm": 0.836714187206783,
      "learning_rate": 0.003,
      "loss": 4.1419,
      "step": 4625
    },
    {
      "epoch": 0.04626,
      "grad_norm": 0.8974854040725699,
      "learning_rate": 0.003,
      "loss": 4.148,
      "step": 4626
    },
    {
      "epoch": 0.04627,
      "grad_norm": 0.8399612016833758,
      "learning_rate": 0.003,
      "loss": 4.1329,
      "step": 4627
    },
    {
      "epoch": 0.04628,
      "grad_norm": 0.9152472588666292,
      "learning_rate": 0.003,
      "loss": 4.1602,
      "step": 4628
    },
    {
      "epoch": 0.04629,
      "grad_norm": 0.9681854075822318,
      "learning_rate": 0.003,
      "loss": 4.1343,
      "step": 4629
    },
    {
      "epoch": 0.0463,
      "grad_norm": 1.2804113609132692,
      "learning_rate": 0.003,
      "loss": 4.1606,
      "step": 4630
    },
    {
      "epoch": 0.04631,
      "grad_norm": 0.8985924390327231,
      "learning_rate": 0.003,
      "loss": 4.1326,
      "step": 4631
    },
    {
      "epoch": 0.04632,
      "grad_norm": 0.6464228130780736,
      "learning_rate": 0.003,
      "loss": 4.1205,
      "step": 4632
    },
    {
      "epoch": 0.04633,
      "grad_norm": 0.6316825408575106,
      "learning_rate": 0.003,
      "loss": 4.1225,
      "step": 4633
    },
    {
      "epoch": 0.04634,
      "grad_norm": 0.7173406958415919,
      "learning_rate": 0.003,
      "loss": 4.1251,
      "step": 4634
    },
    {
      "epoch": 0.04635,
      "grad_norm": 0.9060915985310092,
      "learning_rate": 0.003,
      "loss": 4.131,
      "step": 4635
    },
    {
      "epoch": 0.04636,
      "grad_norm": 1.1784178209510372,
      "learning_rate": 0.003,
      "loss": 4.1746,
      "step": 4636
    },
    {
      "epoch": 0.04637,
      "grad_norm": 0.922005994009661,
      "learning_rate": 0.003,
      "loss": 4.1143,
      "step": 4637
    },
    {
      "epoch": 0.04638,
      "grad_norm": 0.7797208498501719,
      "learning_rate": 0.003,
      "loss": 4.1471,
      "step": 4638
    },
    {
      "epoch": 0.04639,
      "grad_norm": 0.9045238193488381,
      "learning_rate": 0.003,
      "loss": 4.1228,
      "step": 4639
    },
    {
      "epoch": 0.0464,
      "grad_norm": 0.8873863035754851,
      "learning_rate": 0.003,
      "loss": 4.1475,
      "step": 4640
    },
    {
      "epoch": 0.04641,
      "grad_norm": 0.9577308333714971,
      "learning_rate": 0.003,
      "loss": 4.1647,
      "step": 4641
    },
    {
      "epoch": 0.04642,
      "grad_norm": 1.053057664461161,
      "learning_rate": 0.003,
      "loss": 4.1155,
      "step": 4642
    },
    {
      "epoch": 0.04643,
      "grad_norm": 1.0102454910670058,
      "learning_rate": 0.003,
      "loss": 4.138,
      "step": 4643
    },
    {
      "epoch": 0.04644,
      "grad_norm": 1.0170448253769175,
      "learning_rate": 0.003,
      "loss": 4.1427,
      "step": 4644
    },
    {
      "epoch": 0.04645,
      "grad_norm": 1.0485498444094856,
      "learning_rate": 0.003,
      "loss": 4.1394,
      "step": 4645
    },
    {
      "epoch": 0.04646,
      "grad_norm": 1.1607087803243306,
      "learning_rate": 0.003,
      "loss": 4.1437,
      "step": 4646
    },
    {
      "epoch": 0.04647,
      "grad_norm": 0.984885992390032,
      "learning_rate": 0.003,
      "loss": 4.1376,
      "step": 4647
    },
    {
      "epoch": 0.04648,
      "grad_norm": 1.042823798476842,
      "learning_rate": 0.003,
      "loss": 4.1612,
      "step": 4648
    },
    {
      "epoch": 0.04649,
      "grad_norm": 1.1175899168625645,
      "learning_rate": 0.003,
      "loss": 4.1508,
      "step": 4649
    },
    {
      "epoch": 0.0465,
      "grad_norm": 1.1641349913018662,
      "learning_rate": 0.003,
      "loss": 4.1244,
      "step": 4650
    },
    {
      "epoch": 0.04651,
      "grad_norm": 1.1182777997481035,
      "learning_rate": 0.003,
      "loss": 4.153,
      "step": 4651
    },
    {
      "epoch": 0.04652,
      "grad_norm": 1.1639122814807727,
      "learning_rate": 0.003,
      "loss": 4.1678,
      "step": 4652
    },
    {
      "epoch": 0.04653,
      "grad_norm": 0.9052375805967465,
      "learning_rate": 0.003,
      "loss": 4.1619,
      "step": 4653
    },
    {
      "epoch": 0.04654,
      "grad_norm": 0.7923411907134336,
      "learning_rate": 0.003,
      "loss": 4.1484,
      "step": 4654
    },
    {
      "epoch": 0.04655,
      "grad_norm": 0.8924156423091137,
      "learning_rate": 0.003,
      "loss": 4.1563,
      "step": 4655
    },
    {
      "epoch": 0.04656,
      "grad_norm": 1.2505936066471501,
      "learning_rate": 0.003,
      "loss": 4.1359,
      "step": 4656
    },
    {
      "epoch": 0.04657,
      "grad_norm": 0.7705046707851887,
      "learning_rate": 0.003,
      "loss": 4.1645,
      "step": 4657
    },
    {
      "epoch": 0.04658,
      "grad_norm": 0.6721536386762502,
      "learning_rate": 0.003,
      "loss": 4.1323,
      "step": 4658
    },
    {
      "epoch": 0.04659,
      "grad_norm": 0.7136764569063401,
      "learning_rate": 0.003,
      "loss": 4.1342,
      "step": 4659
    },
    {
      "epoch": 0.0466,
      "grad_norm": 0.7116121289071756,
      "learning_rate": 0.003,
      "loss": 4.1514,
      "step": 4660
    },
    {
      "epoch": 0.04661,
      "grad_norm": 0.583275742533722,
      "learning_rate": 0.003,
      "loss": 4.1312,
      "step": 4661
    },
    {
      "epoch": 0.04662,
      "grad_norm": 0.6128088997283417,
      "learning_rate": 0.003,
      "loss": 4.1179,
      "step": 4662
    },
    {
      "epoch": 0.04663,
      "grad_norm": 0.7407134458121065,
      "learning_rate": 0.003,
      "loss": 4.1396,
      "step": 4663
    },
    {
      "epoch": 0.04664,
      "grad_norm": 0.9058911754062067,
      "learning_rate": 0.003,
      "loss": 4.1169,
      "step": 4664
    },
    {
      "epoch": 0.04665,
      "grad_norm": 1.1897493791033456,
      "learning_rate": 0.003,
      "loss": 4.1244,
      "step": 4665
    },
    {
      "epoch": 0.04666,
      "grad_norm": 0.8049445127211454,
      "learning_rate": 0.003,
      "loss": 4.1169,
      "step": 4666
    },
    {
      "epoch": 0.04667,
      "grad_norm": 0.6282022686305142,
      "learning_rate": 0.003,
      "loss": 4.1137,
      "step": 4667
    },
    {
      "epoch": 0.04668,
      "grad_norm": 0.589167104573055,
      "learning_rate": 0.003,
      "loss": 4.0868,
      "step": 4668
    },
    {
      "epoch": 0.04669,
      "grad_norm": 0.7583426981182202,
      "learning_rate": 0.003,
      "loss": 4.1473,
      "step": 4669
    },
    {
      "epoch": 0.0467,
      "grad_norm": 0.9171714392229983,
      "learning_rate": 0.003,
      "loss": 4.0986,
      "step": 4670
    },
    {
      "epoch": 0.04671,
      "grad_norm": 1.006207243577008,
      "learning_rate": 0.003,
      "loss": 4.1294,
      "step": 4671
    },
    {
      "epoch": 0.04672,
      "grad_norm": 1.063976185292903,
      "learning_rate": 0.003,
      "loss": 4.1448,
      "step": 4672
    },
    {
      "epoch": 0.04673,
      "grad_norm": 0.9424991510428172,
      "learning_rate": 0.003,
      "loss": 4.1482,
      "step": 4673
    },
    {
      "epoch": 0.04674,
      "grad_norm": 0.9346287569779962,
      "learning_rate": 0.003,
      "loss": 4.1164,
      "step": 4674
    },
    {
      "epoch": 0.04675,
      "grad_norm": 0.9394221276053155,
      "learning_rate": 0.003,
      "loss": 4.143,
      "step": 4675
    },
    {
      "epoch": 0.04676,
      "grad_norm": 1.143713314597062,
      "learning_rate": 0.003,
      "loss": 4.1526,
      "step": 4676
    },
    {
      "epoch": 0.04677,
      "grad_norm": 1.0492560762157657,
      "learning_rate": 0.003,
      "loss": 4.1572,
      "step": 4677
    },
    {
      "epoch": 0.04678,
      "grad_norm": 0.9237030884818286,
      "learning_rate": 0.003,
      "loss": 4.1102,
      "step": 4678
    },
    {
      "epoch": 0.04679,
      "grad_norm": 1.0604849270980485,
      "learning_rate": 0.003,
      "loss": 4.1502,
      "step": 4679
    },
    {
      "epoch": 0.0468,
      "grad_norm": 0.9599319014900776,
      "learning_rate": 0.003,
      "loss": 4.1206,
      "step": 4680
    },
    {
      "epoch": 0.04681,
      "grad_norm": 1.1130119095403597,
      "learning_rate": 0.003,
      "loss": 4.1766,
      "step": 4681
    },
    {
      "epoch": 0.04682,
      "grad_norm": 1.0396062685784013,
      "learning_rate": 0.003,
      "loss": 4.1404,
      "step": 4682
    },
    {
      "epoch": 0.04683,
      "grad_norm": 0.9560921327155597,
      "learning_rate": 0.003,
      "loss": 4.1339,
      "step": 4683
    },
    {
      "epoch": 0.04684,
      "grad_norm": 0.9641619437962302,
      "learning_rate": 0.003,
      "loss": 4.1543,
      "step": 4684
    },
    {
      "epoch": 0.04685,
      "grad_norm": 0.8865003876092341,
      "learning_rate": 0.003,
      "loss": 4.1364,
      "step": 4685
    },
    {
      "epoch": 0.04686,
      "grad_norm": 0.9255222896745879,
      "learning_rate": 0.003,
      "loss": 4.1467,
      "step": 4686
    },
    {
      "epoch": 0.04687,
      "grad_norm": 0.9164703207217101,
      "learning_rate": 0.003,
      "loss": 4.1737,
      "step": 4687
    },
    {
      "epoch": 0.04688,
      "grad_norm": 0.8576876310928183,
      "learning_rate": 0.003,
      "loss": 4.1534,
      "step": 4688
    },
    {
      "epoch": 0.04689,
      "grad_norm": 0.7547156448268271,
      "learning_rate": 0.003,
      "loss": 4.1655,
      "step": 4689
    },
    {
      "epoch": 0.0469,
      "grad_norm": 0.8110688622338084,
      "learning_rate": 0.003,
      "loss": 4.1169,
      "step": 4690
    },
    {
      "epoch": 0.04691,
      "grad_norm": 0.7871973858039091,
      "learning_rate": 0.003,
      "loss": 4.1284,
      "step": 4691
    },
    {
      "epoch": 0.04692,
      "grad_norm": 0.9924565689192236,
      "learning_rate": 0.003,
      "loss": 4.1389,
      "step": 4692
    },
    {
      "epoch": 0.04693,
      "grad_norm": 1.1840555444723178,
      "learning_rate": 0.003,
      "loss": 4.1699,
      "step": 4693
    },
    {
      "epoch": 0.04694,
      "grad_norm": 0.8956812289363905,
      "learning_rate": 0.003,
      "loss": 4.1282,
      "step": 4694
    },
    {
      "epoch": 0.04695,
      "grad_norm": 0.9376926331756155,
      "learning_rate": 0.003,
      "loss": 4.1166,
      "step": 4695
    },
    {
      "epoch": 0.04696,
      "grad_norm": 0.8795403664260718,
      "learning_rate": 0.003,
      "loss": 4.1578,
      "step": 4696
    },
    {
      "epoch": 0.04697,
      "grad_norm": 0.8357794249885189,
      "learning_rate": 0.003,
      "loss": 4.1396,
      "step": 4697
    },
    {
      "epoch": 0.04698,
      "grad_norm": 0.6738545963309196,
      "learning_rate": 0.003,
      "loss": 4.1382,
      "step": 4698
    },
    {
      "epoch": 0.04699,
      "grad_norm": 0.5951615468449445,
      "learning_rate": 0.003,
      "loss": 4.1238,
      "step": 4699
    },
    {
      "epoch": 0.047,
      "grad_norm": 0.6225150714509209,
      "learning_rate": 0.003,
      "loss": 4.1376,
      "step": 4700
    },
    {
      "epoch": 0.04701,
      "grad_norm": 0.6437330166954227,
      "learning_rate": 0.003,
      "loss": 4.1458,
      "step": 4701
    },
    {
      "epoch": 0.04702,
      "grad_norm": 0.7562946776902397,
      "learning_rate": 0.003,
      "loss": 4.1394,
      "step": 4702
    },
    {
      "epoch": 0.04703,
      "grad_norm": 0.9543436677847786,
      "learning_rate": 0.003,
      "loss": 4.1288,
      "step": 4703
    },
    {
      "epoch": 0.04704,
      "grad_norm": 1.4579145170633065,
      "learning_rate": 0.003,
      "loss": 4.1357,
      "step": 4704
    },
    {
      "epoch": 0.04705,
      "grad_norm": 0.6960477975136533,
      "learning_rate": 0.003,
      "loss": 4.1568,
      "step": 4705
    },
    {
      "epoch": 0.04706,
      "grad_norm": 0.5563368169699227,
      "learning_rate": 0.003,
      "loss": 4.0948,
      "step": 4706
    },
    {
      "epoch": 0.04707,
      "grad_norm": 0.7028499109080264,
      "learning_rate": 0.003,
      "loss": 4.1378,
      "step": 4707
    },
    {
      "epoch": 0.04708,
      "grad_norm": 0.868294214987963,
      "learning_rate": 0.003,
      "loss": 4.0955,
      "step": 4708
    },
    {
      "epoch": 0.04709,
      "grad_norm": 1.0806652270407238,
      "learning_rate": 0.003,
      "loss": 4.1316,
      "step": 4709
    },
    {
      "epoch": 0.0471,
      "grad_norm": 0.9045144570670249,
      "learning_rate": 0.003,
      "loss": 4.1425,
      "step": 4710
    },
    {
      "epoch": 0.04711,
      "grad_norm": 0.7929082522931609,
      "learning_rate": 0.003,
      "loss": 4.1337,
      "step": 4711
    },
    {
      "epoch": 0.04712,
      "grad_norm": 0.8998152710604039,
      "learning_rate": 0.003,
      "loss": 4.1272,
      "step": 4712
    },
    {
      "epoch": 0.04713,
      "grad_norm": 0.900819429264007,
      "learning_rate": 0.003,
      "loss": 4.1239,
      "step": 4713
    },
    {
      "epoch": 0.04714,
      "grad_norm": 0.8770728733527219,
      "learning_rate": 0.003,
      "loss": 4.1336,
      "step": 4714
    },
    {
      "epoch": 0.04715,
      "grad_norm": 0.9305052960807428,
      "learning_rate": 0.003,
      "loss": 4.1627,
      "step": 4715
    },
    {
      "epoch": 0.04716,
      "grad_norm": 1.0563148846171344,
      "learning_rate": 0.003,
      "loss": 4.1393,
      "step": 4716
    },
    {
      "epoch": 0.04717,
      "grad_norm": 1.0183123784712407,
      "learning_rate": 0.003,
      "loss": 4.158,
      "step": 4717
    },
    {
      "epoch": 0.04718,
      "grad_norm": 1.0249581976294773,
      "learning_rate": 0.003,
      "loss": 4.1235,
      "step": 4718
    },
    {
      "epoch": 0.04719,
      "grad_norm": 0.9289493979577722,
      "learning_rate": 0.003,
      "loss": 4.132,
      "step": 4719
    },
    {
      "epoch": 0.0472,
      "grad_norm": 0.8741526678227087,
      "learning_rate": 0.003,
      "loss": 4.1578,
      "step": 4720
    },
    {
      "epoch": 0.04721,
      "grad_norm": 1.2410401068900363,
      "learning_rate": 0.003,
      "loss": 4.1289,
      "step": 4721
    },
    {
      "epoch": 0.04722,
      "grad_norm": 1.1958207913096726,
      "learning_rate": 0.003,
      "loss": 4.1467,
      "step": 4722
    },
    {
      "epoch": 0.04723,
      "grad_norm": 0.983729916262391,
      "learning_rate": 0.003,
      "loss": 4.1525,
      "step": 4723
    },
    {
      "epoch": 0.04724,
      "grad_norm": 0.9846514174679839,
      "learning_rate": 0.003,
      "loss": 4.131,
      "step": 4724
    },
    {
      "epoch": 0.04725,
      "grad_norm": 1.2006797419141146,
      "learning_rate": 0.003,
      "loss": 4.1585,
      "step": 4725
    },
    {
      "epoch": 0.04726,
      "grad_norm": 0.7619404889077381,
      "learning_rate": 0.003,
      "loss": 4.138,
      "step": 4726
    },
    {
      "epoch": 0.04727,
      "grad_norm": 0.6760195770103148,
      "learning_rate": 0.003,
      "loss": 4.1183,
      "step": 4727
    },
    {
      "epoch": 0.04728,
      "grad_norm": 0.6711364467462053,
      "learning_rate": 0.003,
      "loss": 4.1259,
      "step": 4728
    },
    {
      "epoch": 0.04729,
      "grad_norm": 0.6961741239186977,
      "learning_rate": 0.003,
      "loss": 4.141,
      "step": 4729
    },
    {
      "epoch": 0.0473,
      "grad_norm": 0.7303647620564638,
      "learning_rate": 0.003,
      "loss": 4.1353,
      "step": 4730
    },
    {
      "epoch": 0.04731,
      "grad_norm": 0.712139084345733,
      "learning_rate": 0.003,
      "loss": 4.1282,
      "step": 4731
    },
    {
      "epoch": 0.04732,
      "grad_norm": 0.8077440937117389,
      "learning_rate": 0.003,
      "loss": 4.143,
      "step": 4732
    },
    {
      "epoch": 0.04733,
      "grad_norm": 1.1656166796396292,
      "learning_rate": 0.003,
      "loss": 4.1325,
      "step": 4733
    },
    {
      "epoch": 0.04734,
      "grad_norm": 1.166287688296092,
      "learning_rate": 0.003,
      "loss": 4.1433,
      "step": 4734
    },
    {
      "epoch": 0.04735,
      "grad_norm": 0.7216780900295331,
      "learning_rate": 0.003,
      "loss": 4.1516,
      "step": 4735
    },
    {
      "epoch": 0.04736,
      "grad_norm": 0.7134965352325359,
      "learning_rate": 0.003,
      "loss": 4.1348,
      "step": 4736
    },
    {
      "epoch": 0.04737,
      "grad_norm": 0.8921498388016378,
      "learning_rate": 0.003,
      "loss": 4.1039,
      "step": 4737
    },
    {
      "epoch": 0.04738,
      "grad_norm": 1.1445099509639862,
      "learning_rate": 0.003,
      "loss": 4.1524,
      "step": 4738
    },
    {
      "epoch": 0.04739,
      "grad_norm": 1.1030230453118304,
      "learning_rate": 0.003,
      "loss": 4.0873,
      "step": 4739
    },
    {
      "epoch": 0.0474,
      "grad_norm": 0.8584656214420842,
      "learning_rate": 0.003,
      "loss": 4.1535,
      "step": 4740
    },
    {
      "epoch": 0.04741,
      "grad_norm": 0.8035201218462465,
      "learning_rate": 0.003,
      "loss": 4.1414,
      "step": 4741
    },
    {
      "epoch": 0.04742,
      "grad_norm": 0.8357881665845689,
      "learning_rate": 0.003,
      "loss": 4.1292,
      "step": 4742
    },
    {
      "epoch": 0.04743,
      "grad_norm": 0.9521041649493661,
      "learning_rate": 0.003,
      "loss": 4.1522,
      "step": 4743
    },
    {
      "epoch": 0.04744,
      "grad_norm": 0.9335234952847369,
      "learning_rate": 0.003,
      "loss": 4.1154,
      "step": 4744
    },
    {
      "epoch": 0.04745,
      "grad_norm": 0.9092467912620412,
      "learning_rate": 0.003,
      "loss": 4.1178,
      "step": 4745
    },
    {
      "epoch": 0.04746,
      "grad_norm": 1.0554547866629964,
      "learning_rate": 0.003,
      "loss": 4.1384,
      "step": 4746
    },
    {
      "epoch": 0.04747,
      "grad_norm": 0.9041793629659869,
      "learning_rate": 0.003,
      "loss": 4.1109,
      "step": 4747
    },
    {
      "epoch": 0.04748,
      "grad_norm": 0.858859989561575,
      "learning_rate": 0.003,
      "loss": 4.1554,
      "step": 4748
    },
    {
      "epoch": 0.04749,
      "grad_norm": 0.9479384146537131,
      "learning_rate": 0.003,
      "loss": 4.1658,
      "step": 4749
    },
    {
      "epoch": 0.0475,
      "grad_norm": 1.0250013309916148,
      "learning_rate": 0.003,
      "loss": 4.1324,
      "step": 4750
    },
    {
      "epoch": 0.04751,
      "grad_norm": 0.9606461873821214,
      "learning_rate": 0.003,
      "loss": 4.0976,
      "step": 4751
    },
    {
      "epoch": 0.04752,
      "grad_norm": 0.9181341108641666,
      "learning_rate": 0.003,
      "loss": 4.1273,
      "step": 4752
    },
    {
      "epoch": 0.04753,
      "grad_norm": 0.8507586417545276,
      "learning_rate": 0.003,
      "loss": 4.1324,
      "step": 4753
    },
    {
      "epoch": 0.04754,
      "grad_norm": 0.8523834824717018,
      "learning_rate": 0.003,
      "loss": 4.1515,
      "step": 4754
    },
    {
      "epoch": 0.04755,
      "grad_norm": 0.789624720859558,
      "learning_rate": 0.003,
      "loss": 4.1302,
      "step": 4755
    },
    {
      "epoch": 0.04756,
      "grad_norm": 0.6798364490430269,
      "learning_rate": 0.003,
      "loss": 4.1285,
      "step": 4756
    },
    {
      "epoch": 0.04757,
      "grad_norm": 0.8431885193516194,
      "learning_rate": 0.003,
      "loss": 4.1285,
      "step": 4757
    },
    {
      "epoch": 0.04758,
      "grad_norm": 1.0193242917886531,
      "learning_rate": 0.003,
      "loss": 4.1239,
      "step": 4758
    },
    {
      "epoch": 0.04759,
      "grad_norm": 1.1675022300199527,
      "learning_rate": 0.003,
      "loss": 4.1418,
      "step": 4759
    },
    {
      "epoch": 0.0476,
      "grad_norm": 0.8374418244140791,
      "learning_rate": 0.003,
      "loss": 4.1154,
      "step": 4760
    },
    {
      "epoch": 0.04761,
      "grad_norm": 0.8004744833013939,
      "learning_rate": 0.003,
      "loss": 4.1243,
      "step": 4761
    },
    {
      "epoch": 0.04762,
      "grad_norm": 0.8950693235914801,
      "learning_rate": 0.003,
      "loss": 4.12,
      "step": 4762
    },
    {
      "epoch": 0.04763,
      "grad_norm": 0.9038705695942526,
      "learning_rate": 0.003,
      "loss": 4.1216,
      "step": 4763
    },
    {
      "epoch": 0.04764,
      "grad_norm": 1.0083140925650727,
      "learning_rate": 0.003,
      "loss": 4.115,
      "step": 4764
    },
    {
      "epoch": 0.04765,
      "grad_norm": 1.2084489092590291,
      "learning_rate": 0.003,
      "loss": 4.1734,
      "step": 4765
    },
    {
      "epoch": 0.04766,
      "grad_norm": 0.9331486718026547,
      "learning_rate": 0.003,
      "loss": 4.1369,
      "step": 4766
    },
    {
      "epoch": 0.04767,
      "grad_norm": 1.032810812597879,
      "learning_rate": 0.003,
      "loss": 4.1412,
      "step": 4767
    },
    {
      "epoch": 0.04768,
      "grad_norm": 0.8079898898652215,
      "learning_rate": 0.003,
      "loss": 4.1305,
      "step": 4768
    },
    {
      "epoch": 0.04769,
      "grad_norm": 0.8175014309895597,
      "learning_rate": 0.003,
      "loss": 4.0936,
      "step": 4769
    },
    {
      "epoch": 0.0477,
      "grad_norm": 1.040815138157364,
      "learning_rate": 0.003,
      "loss": 4.1151,
      "step": 4770
    },
    {
      "epoch": 0.04771,
      "grad_norm": 1.1500954512552088,
      "learning_rate": 0.003,
      "loss": 4.1434,
      "step": 4771
    },
    {
      "epoch": 0.04772,
      "grad_norm": 0.8897742060078679,
      "learning_rate": 0.003,
      "loss": 4.1374,
      "step": 4772
    },
    {
      "epoch": 0.04773,
      "grad_norm": 0.8351692338869225,
      "learning_rate": 0.003,
      "loss": 4.1568,
      "step": 4773
    },
    {
      "epoch": 0.04774,
      "grad_norm": 0.9151298772105244,
      "learning_rate": 0.003,
      "loss": 4.1427,
      "step": 4774
    },
    {
      "epoch": 0.04775,
      "grad_norm": 0.9751550319429745,
      "learning_rate": 0.003,
      "loss": 4.1406,
      "step": 4775
    },
    {
      "epoch": 0.04776,
      "grad_norm": 0.9638676545283447,
      "learning_rate": 0.003,
      "loss": 4.1315,
      "step": 4776
    },
    {
      "epoch": 0.04777,
      "grad_norm": 1.1646838839296059,
      "learning_rate": 0.003,
      "loss": 4.1355,
      "step": 4777
    },
    {
      "epoch": 0.04778,
      "grad_norm": 0.8824685892267762,
      "learning_rate": 0.003,
      "loss": 4.1061,
      "step": 4778
    },
    {
      "epoch": 0.04779,
      "grad_norm": 0.7623197654914352,
      "learning_rate": 0.003,
      "loss": 4.1422,
      "step": 4779
    },
    {
      "epoch": 0.0478,
      "grad_norm": 0.7803964563209768,
      "learning_rate": 0.003,
      "loss": 4.1446,
      "step": 4780
    },
    {
      "epoch": 0.04781,
      "grad_norm": 0.8677755571648247,
      "learning_rate": 0.003,
      "loss": 4.1248,
      "step": 4781
    },
    {
      "epoch": 0.04782,
      "grad_norm": 0.8977177055850454,
      "learning_rate": 0.003,
      "loss": 4.1248,
      "step": 4782
    },
    {
      "epoch": 0.04783,
      "grad_norm": 1.0119802996771465,
      "learning_rate": 0.003,
      "loss": 4.1272,
      "step": 4783
    },
    {
      "epoch": 0.04784,
      "grad_norm": 0.8774934854130646,
      "learning_rate": 0.003,
      "loss": 4.1453,
      "step": 4784
    },
    {
      "epoch": 0.04785,
      "grad_norm": 0.7857875427258052,
      "learning_rate": 0.003,
      "loss": 4.1438,
      "step": 4785
    },
    {
      "epoch": 0.04786,
      "grad_norm": 0.8820990387768732,
      "learning_rate": 0.003,
      "loss": 4.1463,
      "step": 4786
    },
    {
      "epoch": 0.04787,
      "grad_norm": 1.3348035022755884,
      "learning_rate": 0.003,
      "loss": 4.1544,
      "step": 4787
    },
    {
      "epoch": 0.04788,
      "grad_norm": 0.9722902455641531,
      "learning_rate": 0.003,
      "loss": 4.1323,
      "step": 4788
    },
    {
      "epoch": 0.04789,
      "grad_norm": 0.8249065978709161,
      "learning_rate": 0.003,
      "loss": 4.114,
      "step": 4789
    },
    {
      "epoch": 0.0479,
      "grad_norm": 0.8098922858184793,
      "learning_rate": 0.003,
      "loss": 4.1301,
      "step": 4790
    },
    {
      "epoch": 0.04791,
      "grad_norm": 0.8530020182040862,
      "learning_rate": 0.003,
      "loss": 4.1489,
      "step": 4791
    },
    {
      "epoch": 0.04792,
      "grad_norm": 0.9361191470916291,
      "learning_rate": 0.003,
      "loss": 4.1596,
      "step": 4792
    },
    {
      "epoch": 0.04793,
      "grad_norm": 0.8995706892213676,
      "learning_rate": 0.003,
      "loss": 4.1215,
      "step": 4793
    },
    {
      "epoch": 0.04794,
      "grad_norm": 0.8946924778288801,
      "learning_rate": 0.003,
      "loss": 4.1406,
      "step": 4794
    },
    {
      "epoch": 0.04795,
      "grad_norm": 0.955319338607847,
      "learning_rate": 0.003,
      "loss": 4.1433,
      "step": 4795
    },
    {
      "epoch": 0.04796,
      "grad_norm": 0.9209462528560896,
      "learning_rate": 0.003,
      "loss": 4.1112,
      "step": 4796
    },
    {
      "epoch": 0.04797,
      "grad_norm": 0.9072552403900586,
      "learning_rate": 0.003,
      "loss": 4.1462,
      "step": 4797
    },
    {
      "epoch": 0.04798,
      "grad_norm": 1.1376509845610743,
      "learning_rate": 0.003,
      "loss": 4.1115,
      "step": 4798
    },
    {
      "epoch": 0.04799,
      "grad_norm": 0.9872830450267011,
      "learning_rate": 0.003,
      "loss": 4.1212,
      "step": 4799
    },
    {
      "epoch": 0.048,
      "grad_norm": 1.0296601446082432,
      "learning_rate": 0.003,
      "loss": 4.1291,
      "step": 4800
    },
    {
      "epoch": 0.04801,
      "grad_norm": 0.9104805461757415,
      "learning_rate": 0.003,
      "loss": 4.1364,
      "step": 4801
    },
    {
      "epoch": 0.04802,
      "grad_norm": 0.9761083085564308,
      "learning_rate": 0.003,
      "loss": 4.1105,
      "step": 4802
    },
    {
      "epoch": 0.04803,
      "grad_norm": 0.9665537842868093,
      "learning_rate": 0.003,
      "loss": 4.1209,
      "step": 4803
    },
    {
      "epoch": 0.04804,
      "grad_norm": 0.9087746601238126,
      "learning_rate": 0.003,
      "loss": 4.1511,
      "step": 4804
    },
    {
      "epoch": 0.04805,
      "grad_norm": 0.8359020051915194,
      "learning_rate": 0.003,
      "loss": 4.1491,
      "step": 4805
    },
    {
      "epoch": 0.04806,
      "grad_norm": 1.0009769366999013,
      "learning_rate": 0.003,
      "loss": 4.0985,
      "step": 4806
    },
    {
      "epoch": 0.04807,
      "grad_norm": 1.3538106119491318,
      "learning_rate": 0.003,
      "loss": 4.1631,
      "step": 4807
    },
    {
      "epoch": 0.04808,
      "grad_norm": 0.7759337211545583,
      "learning_rate": 0.003,
      "loss": 4.129,
      "step": 4808
    },
    {
      "epoch": 0.04809,
      "grad_norm": 0.7867983138442217,
      "learning_rate": 0.003,
      "loss": 4.1203,
      "step": 4809
    },
    {
      "epoch": 0.0481,
      "grad_norm": 1.0460647834530599,
      "learning_rate": 0.003,
      "loss": 4.1469,
      "step": 4810
    },
    {
      "epoch": 0.04811,
      "grad_norm": 0.9588194390032568,
      "learning_rate": 0.003,
      "loss": 4.1436,
      "step": 4811
    },
    {
      "epoch": 0.04812,
      "grad_norm": 1.2072876723569714,
      "learning_rate": 0.003,
      "loss": 4.1503,
      "step": 4812
    },
    {
      "epoch": 0.04813,
      "grad_norm": 0.8442155640566389,
      "learning_rate": 0.003,
      "loss": 4.1311,
      "step": 4813
    },
    {
      "epoch": 0.04814,
      "grad_norm": 0.7342275117522243,
      "learning_rate": 0.003,
      "loss": 4.0838,
      "step": 4814
    },
    {
      "epoch": 0.04815,
      "grad_norm": 0.709374442322939,
      "learning_rate": 0.003,
      "loss": 4.1253,
      "step": 4815
    },
    {
      "epoch": 0.04816,
      "grad_norm": 0.7917399222991578,
      "learning_rate": 0.003,
      "loss": 4.135,
      "step": 4816
    },
    {
      "epoch": 0.04817,
      "grad_norm": 0.8274465563158391,
      "learning_rate": 0.003,
      "loss": 4.1101,
      "step": 4817
    },
    {
      "epoch": 0.04818,
      "grad_norm": 0.8182701860044233,
      "learning_rate": 0.003,
      "loss": 4.1239,
      "step": 4818
    },
    {
      "epoch": 0.04819,
      "grad_norm": 0.8458748903862201,
      "learning_rate": 0.003,
      "loss": 4.1283,
      "step": 4819
    },
    {
      "epoch": 0.0482,
      "grad_norm": 0.8155150451708626,
      "learning_rate": 0.003,
      "loss": 4.1167,
      "step": 4820
    },
    {
      "epoch": 0.04821,
      "grad_norm": 0.8777962638905318,
      "learning_rate": 0.003,
      "loss": 4.1466,
      "step": 4821
    },
    {
      "epoch": 0.04822,
      "grad_norm": 0.9431701355771726,
      "learning_rate": 0.003,
      "loss": 4.125,
      "step": 4822
    },
    {
      "epoch": 0.04823,
      "grad_norm": 0.9695687695883342,
      "learning_rate": 0.003,
      "loss": 4.1237,
      "step": 4823
    },
    {
      "epoch": 0.04824,
      "grad_norm": 0.991895345693445,
      "learning_rate": 0.003,
      "loss": 4.113,
      "step": 4824
    },
    {
      "epoch": 0.04825,
      "grad_norm": 1.0943797710534833,
      "learning_rate": 0.003,
      "loss": 4.1322,
      "step": 4825
    },
    {
      "epoch": 0.04826,
      "grad_norm": 0.8268466548342316,
      "learning_rate": 0.003,
      "loss": 4.1299,
      "step": 4826
    },
    {
      "epoch": 0.04827,
      "grad_norm": 0.8361639143145106,
      "learning_rate": 0.003,
      "loss": 4.1336,
      "step": 4827
    },
    {
      "epoch": 0.04828,
      "grad_norm": 1.0286267106339764,
      "learning_rate": 0.003,
      "loss": 4.1511,
      "step": 4828
    },
    {
      "epoch": 0.04829,
      "grad_norm": 1.159942400169441,
      "learning_rate": 0.003,
      "loss": 4.1364,
      "step": 4829
    },
    {
      "epoch": 0.0483,
      "grad_norm": 0.9372268357664759,
      "learning_rate": 0.003,
      "loss": 4.1427,
      "step": 4830
    },
    {
      "epoch": 0.04831,
      "grad_norm": 0.8980121041802035,
      "learning_rate": 0.003,
      "loss": 4.1082,
      "step": 4831
    },
    {
      "epoch": 0.04832,
      "grad_norm": 0.8916438529090394,
      "learning_rate": 0.003,
      "loss": 4.1312,
      "step": 4832
    },
    {
      "epoch": 0.04833,
      "grad_norm": 1.0001644468197317,
      "learning_rate": 0.003,
      "loss": 4.1421,
      "step": 4833
    },
    {
      "epoch": 0.04834,
      "grad_norm": 1.2415615713112236,
      "learning_rate": 0.003,
      "loss": 4.1197,
      "step": 4834
    },
    {
      "epoch": 0.04835,
      "grad_norm": 0.9209372163782303,
      "learning_rate": 0.003,
      "loss": 4.1495,
      "step": 4835
    },
    {
      "epoch": 0.04836,
      "grad_norm": 1.0240500494308544,
      "learning_rate": 0.003,
      "loss": 4.1217,
      "step": 4836
    },
    {
      "epoch": 0.04837,
      "grad_norm": 1.0801075970896188,
      "learning_rate": 0.003,
      "loss": 4.1502,
      "step": 4837
    },
    {
      "epoch": 0.04838,
      "grad_norm": 0.8556232959465542,
      "learning_rate": 0.003,
      "loss": 4.1533,
      "step": 4838
    },
    {
      "epoch": 0.04839,
      "grad_norm": 0.8048051834755471,
      "learning_rate": 0.003,
      "loss": 4.1369,
      "step": 4839
    },
    {
      "epoch": 0.0484,
      "grad_norm": 0.8367021532891096,
      "learning_rate": 0.003,
      "loss": 4.1415,
      "step": 4840
    },
    {
      "epoch": 0.04841,
      "grad_norm": 0.7607914132160744,
      "learning_rate": 0.003,
      "loss": 4.1369,
      "step": 4841
    },
    {
      "epoch": 0.04842,
      "grad_norm": 0.7830637400101697,
      "learning_rate": 0.003,
      "loss": 4.1503,
      "step": 4842
    },
    {
      "epoch": 0.04843,
      "grad_norm": 0.9114911514114503,
      "learning_rate": 0.003,
      "loss": 4.1092,
      "step": 4843
    },
    {
      "epoch": 0.04844,
      "grad_norm": 1.1708951262723284,
      "learning_rate": 0.003,
      "loss": 4.143,
      "step": 4844
    },
    {
      "epoch": 0.04845,
      "grad_norm": 1.0089311204905278,
      "learning_rate": 0.003,
      "loss": 4.1298,
      "step": 4845
    },
    {
      "epoch": 0.04846,
      "grad_norm": 1.0807920040383794,
      "learning_rate": 0.003,
      "loss": 4.1529,
      "step": 4846
    },
    {
      "epoch": 0.04847,
      "grad_norm": 0.8594401536252937,
      "learning_rate": 0.003,
      "loss": 4.1099,
      "step": 4847
    },
    {
      "epoch": 0.04848,
      "grad_norm": 0.7513172891372867,
      "learning_rate": 0.003,
      "loss": 4.1503,
      "step": 4848
    },
    {
      "epoch": 0.04849,
      "grad_norm": 0.751292803407457,
      "learning_rate": 0.003,
      "loss": 4.1402,
      "step": 4849
    },
    {
      "epoch": 0.0485,
      "grad_norm": 0.7959018388472837,
      "learning_rate": 0.003,
      "loss": 4.1293,
      "step": 4850
    },
    {
      "epoch": 0.04851,
      "grad_norm": 0.9962650900237886,
      "learning_rate": 0.003,
      "loss": 4.1456,
      "step": 4851
    },
    {
      "epoch": 0.04852,
      "grad_norm": 1.1834574873765151,
      "learning_rate": 0.003,
      "loss": 4.0888,
      "step": 4852
    },
    {
      "epoch": 0.04853,
      "grad_norm": 0.7889329019222011,
      "learning_rate": 0.003,
      "loss": 4.1405,
      "step": 4853
    },
    {
      "epoch": 0.04854,
      "grad_norm": 0.7364486677015084,
      "learning_rate": 0.003,
      "loss": 4.1334,
      "step": 4854
    },
    {
      "epoch": 0.04855,
      "grad_norm": 0.7363603615147029,
      "learning_rate": 0.003,
      "loss": 4.1409,
      "step": 4855
    },
    {
      "epoch": 0.04856,
      "grad_norm": 0.7141913003641922,
      "learning_rate": 0.003,
      "loss": 4.1373,
      "step": 4856
    },
    {
      "epoch": 0.04857,
      "grad_norm": 0.7697901500308619,
      "learning_rate": 0.003,
      "loss": 4.1512,
      "step": 4857
    },
    {
      "epoch": 0.04858,
      "grad_norm": 0.978947209936643,
      "learning_rate": 0.003,
      "loss": 4.1246,
      "step": 4858
    },
    {
      "epoch": 0.04859,
      "grad_norm": 1.0853261299941506,
      "learning_rate": 0.003,
      "loss": 4.164,
      "step": 4859
    },
    {
      "epoch": 0.0486,
      "grad_norm": 0.8322040486694344,
      "learning_rate": 0.003,
      "loss": 4.1406,
      "step": 4860
    },
    {
      "epoch": 0.04861,
      "grad_norm": 0.911479945440266,
      "learning_rate": 0.003,
      "loss": 4.1325,
      "step": 4861
    },
    {
      "epoch": 0.04862,
      "grad_norm": 0.9173241722504067,
      "learning_rate": 0.003,
      "loss": 4.1361,
      "step": 4862
    },
    {
      "epoch": 0.04863,
      "grad_norm": 0.8964490214389235,
      "learning_rate": 0.003,
      "loss": 4.1527,
      "step": 4863
    },
    {
      "epoch": 0.04864,
      "grad_norm": 0.972306148276167,
      "learning_rate": 0.003,
      "loss": 4.1292,
      "step": 4864
    },
    {
      "epoch": 0.04865,
      "grad_norm": 1.1291430700547054,
      "learning_rate": 0.003,
      "loss": 4.1559,
      "step": 4865
    },
    {
      "epoch": 0.04866,
      "grad_norm": 1.0797428982956703,
      "learning_rate": 0.003,
      "loss": 4.1598,
      "step": 4866
    },
    {
      "epoch": 0.04867,
      "grad_norm": 0.802691748949696,
      "learning_rate": 0.003,
      "loss": 4.1233,
      "step": 4867
    },
    {
      "epoch": 0.04868,
      "grad_norm": 0.6363552129783063,
      "learning_rate": 0.003,
      "loss": 4.1297,
      "step": 4868
    },
    {
      "epoch": 0.04869,
      "grad_norm": 0.8246675935867382,
      "learning_rate": 0.003,
      "loss": 4.1398,
      "step": 4869
    },
    {
      "epoch": 0.0487,
      "grad_norm": 0.9180468562537373,
      "learning_rate": 0.003,
      "loss": 4.1316,
      "step": 4870
    },
    {
      "epoch": 0.04871,
      "grad_norm": 1.101938319921805,
      "learning_rate": 0.003,
      "loss": 4.1561,
      "step": 4871
    },
    {
      "epoch": 0.04872,
      "grad_norm": 0.8727221393103543,
      "learning_rate": 0.003,
      "loss": 4.114,
      "step": 4872
    },
    {
      "epoch": 0.04873,
      "grad_norm": 0.9149687440430082,
      "learning_rate": 0.003,
      "loss": 4.1428,
      "step": 4873
    },
    {
      "epoch": 0.04874,
      "grad_norm": 0.9259144418955525,
      "learning_rate": 0.003,
      "loss": 4.1338,
      "step": 4874
    },
    {
      "epoch": 0.04875,
      "grad_norm": 0.8834881037341485,
      "learning_rate": 0.003,
      "loss": 4.1258,
      "step": 4875
    },
    {
      "epoch": 0.04876,
      "grad_norm": 0.933167953983561,
      "learning_rate": 0.003,
      "loss": 4.1323,
      "step": 4876
    },
    {
      "epoch": 0.04877,
      "grad_norm": 1.0482409044529428,
      "learning_rate": 0.003,
      "loss": 4.1334,
      "step": 4877
    },
    {
      "epoch": 0.04878,
      "grad_norm": 1.0364280416714626,
      "learning_rate": 0.003,
      "loss": 4.1173,
      "step": 4878
    },
    {
      "epoch": 0.04879,
      "grad_norm": 0.8888490541945427,
      "learning_rate": 0.003,
      "loss": 4.1087,
      "step": 4879
    },
    {
      "epoch": 0.0488,
      "grad_norm": 0.8177396614668453,
      "learning_rate": 0.003,
      "loss": 4.1219,
      "step": 4880
    },
    {
      "epoch": 0.04881,
      "grad_norm": 0.8123257391255282,
      "learning_rate": 0.003,
      "loss": 4.1167,
      "step": 4881
    },
    {
      "epoch": 0.04882,
      "grad_norm": 0.7975721516970518,
      "learning_rate": 0.003,
      "loss": 4.1213,
      "step": 4882
    },
    {
      "epoch": 0.04883,
      "grad_norm": 0.7955421903063317,
      "learning_rate": 0.003,
      "loss": 4.1169,
      "step": 4883
    },
    {
      "epoch": 0.04884,
      "grad_norm": 1.0189979080759815,
      "learning_rate": 0.003,
      "loss": 4.13,
      "step": 4884
    },
    {
      "epoch": 0.04885,
      "grad_norm": 1.2153408218196549,
      "learning_rate": 0.003,
      "loss": 4.1537,
      "step": 4885
    },
    {
      "epoch": 0.04886,
      "grad_norm": 0.7349864263114413,
      "learning_rate": 0.003,
      "loss": 4.1323,
      "step": 4886
    },
    {
      "epoch": 0.04887,
      "grad_norm": 0.7833103419829659,
      "learning_rate": 0.003,
      "loss": 4.1424,
      "step": 4887
    },
    {
      "epoch": 0.04888,
      "grad_norm": 0.7975450688309594,
      "learning_rate": 0.003,
      "loss": 4.1448,
      "step": 4888
    },
    {
      "epoch": 0.04889,
      "grad_norm": 1.1954327023621745,
      "learning_rate": 0.003,
      "loss": 4.1319,
      "step": 4889
    },
    {
      "epoch": 0.0489,
      "grad_norm": 1.2543113698602726,
      "learning_rate": 0.003,
      "loss": 4.1457,
      "step": 4890
    },
    {
      "epoch": 0.04891,
      "grad_norm": 0.8947016132538613,
      "learning_rate": 0.003,
      "loss": 4.1489,
      "step": 4891
    },
    {
      "epoch": 0.04892,
      "grad_norm": 0.8348670424043885,
      "learning_rate": 0.003,
      "loss": 4.1441,
      "step": 4892
    },
    {
      "epoch": 0.04893,
      "grad_norm": 0.8995677954784294,
      "learning_rate": 0.003,
      "loss": 4.1419,
      "step": 4893
    },
    {
      "epoch": 0.04894,
      "grad_norm": 1.4114151388613976,
      "learning_rate": 0.003,
      "loss": 4.1623,
      "step": 4894
    },
    {
      "epoch": 0.04895,
      "grad_norm": 1.1554803750152747,
      "learning_rate": 0.003,
      "loss": 4.134,
      "step": 4895
    },
    {
      "epoch": 0.04896,
      "grad_norm": 0.9507103715274245,
      "learning_rate": 0.003,
      "loss": 4.1441,
      "step": 4896
    },
    {
      "epoch": 0.04897,
      "grad_norm": 0.906957283921157,
      "learning_rate": 0.003,
      "loss": 4.151,
      "step": 4897
    },
    {
      "epoch": 0.04898,
      "grad_norm": 0.8944074460888909,
      "learning_rate": 0.003,
      "loss": 4.1402,
      "step": 4898
    },
    {
      "epoch": 0.04899,
      "grad_norm": 0.8638995775443565,
      "learning_rate": 0.003,
      "loss": 4.1265,
      "step": 4899
    },
    {
      "epoch": 0.049,
      "grad_norm": 0.8297845635886839,
      "learning_rate": 0.003,
      "loss": 4.1532,
      "step": 4900
    },
    {
      "epoch": 0.04901,
      "grad_norm": 0.9248298803156414,
      "learning_rate": 0.003,
      "loss": 4.1148,
      "step": 4901
    },
    {
      "epoch": 0.04902,
      "grad_norm": 1.0368926529413733,
      "learning_rate": 0.003,
      "loss": 4.1541,
      "step": 4902
    },
    {
      "epoch": 0.04903,
      "grad_norm": 0.9155640413660329,
      "learning_rate": 0.003,
      "loss": 4.1516,
      "step": 4903
    },
    {
      "epoch": 0.04904,
      "grad_norm": 0.8052932601979055,
      "learning_rate": 0.003,
      "loss": 4.1131,
      "step": 4904
    },
    {
      "epoch": 0.04905,
      "grad_norm": 0.8694803464220409,
      "learning_rate": 0.003,
      "loss": 4.1189,
      "step": 4905
    },
    {
      "epoch": 0.04906,
      "grad_norm": 1.0292003158296992,
      "learning_rate": 0.003,
      "loss": 4.1329,
      "step": 4906
    },
    {
      "epoch": 0.04907,
      "grad_norm": 1.080150716503704,
      "learning_rate": 0.003,
      "loss": 4.1175,
      "step": 4907
    },
    {
      "epoch": 0.04908,
      "grad_norm": 0.9036715675717701,
      "learning_rate": 0.003,
      "loss": 4.1285,
      "step": 4908
    },
    {
      "epoch": 0.04909,
      "grad_norm": 0.8807978117119255,
      "learning_rate": 0.003,
      "loss": 4.1342,
      "step": 4909
    },
    {
      "epoch": 0.0491,
      "grad_norm": 0.8458247064272691,
      "learning_rate": 0.003,
      "loss": 4.1265,
      "step": 4910
    },
    {
      "epoch": 0.04911,
      "grad_norm": 0.8766008605366878,
      "learning_rate": 0.003,
      "loss": 4.1228,
      "step": 4911
    },
    {
      "epoch": 0.04912,
      "grad_norm": 0.8331605702108348,
      "learning_rate": 0.003,
      "loss": 4.0972,
      "step": 4912
    },
    {
      "epoch": 0.04913,
      "grad_norm": 0.8360485345313021,
      "learning_rate": 0.003,
      "loss": 4.1235,
      "step": 4913
    },
    {
      "epoch": 0.04914,
      "grad_norm": 0.7899851119072792,
      "learning_rate": 0.003,
      "loss": 4.1276,
      "step": 4914
    },
    {
      "epoch": 0.04915,
      "grad_norm": 0.9630478684955635,
      "learning_rate": 0.003,
      "loss": 4.1381,
      "step": 4915
    },
    {
      "epoch": 0.04916,
      "grad_norm": 0.9934301086816488,
      "learning_rate": 0.003,
      "loss": 4.1375,
      "step": 4916
    },
    {
      "epoch": 0.04917,
      "grad_norm": 1.3041941957357555,
      "learning_rate": 0.003,
      "loss": 4.102,
      "step": 4917
    },
    {
      "epoch": 0.04918,
      "grad_norm": 0.7891044278279551,
      "learning_rate": 0.003,
      "loss": 4.1302,
      "step": 4918
    },
    {
      "epoch": 0.04919,
      "grad_norm": 0.866921940746236,
      "learning_rate": 0.003,
      "loss": 4.1365,
      "step": 4919
    },
    {
      "epoch": 0.0492,
      "grad_norm": 0.9289187382638477,
      "learning_rate": 0.003,
      "loss": 4.1354,
      "step": 4920
    },
    {
      "epoch": 0.04921,
      "grad_norm": 0.999976308146298,
      "learning_rate": 0.003,
      "loss": 4.1547,
      "step": 4921
    },
    {
      "epoch": 0.04922,
      "grad_norm": 1.124045561099308,
      "learning_rate": 0.003,
      "loss": 4.1365,
      "step": 4922
    },
    {
      "epoch": 0.04923,
      "grad_norm": 0.9763261912117048,
      "learning_rate": 0.003,
      "loss": 4.1117,
      "step": 4923
    },
    {
      "epoch": 0.04924,
      "grad_norm": 0.8510875610500769,
      "learning_rate": 0.003,
      "loss": 4.1611,
      "step": 4924
    },
    {
      "epoch": 0.04925,
      "grad_norm": 0.8704283410665644,
      "learning_rate": 0.003,
      "loss": 4.1127,
      "step": 4925
    },
    {
      "epoch": 0.04926,
      "grad_norm": 0.9383663644735687,
      "learning_rate": 0.003,
      "loss": 4.1169,
      "step": 4926
    },
    {
      "epoch": 0.04927,
      "grad_norm": 1.1427920258058981,
      "learning_rate": 0.003,
      "loss": 4.1548,
      "step": 4927
    },
    {
      "epoch": 0.04928,
      "grad_norm": 1.0626423088763775,
      "learning_rate": 0.003,
      "loss": 4.1151,
      "step": 4928
    },
    {
      "epoch": 0.04929,
      "grad_norm": 1.0837552807904482,
      "learning_rate": 0.003,
      "loss": 4.1353,
      "step": 4929
    },
    {
      "epoch": 0.0493,
      "grad_norm": 0.9600959111967431,
      "learning_rate": 0.003,
      "loss": 4.1253,
      "step": 4930
    },
    {
      "epoch": 0.04931,
      "grad_norm": 0.9442147578078418,
      "learning_rate": 0.003,
      "loss": 4.1055,
      "step": 4931
    },
    {
      "epoch": 0.04932,
      "grad_norm": 0.8723803650613164,
      "learning_rate": 0.003,
      "loss": 4.1546,
      "step": 4932
    },
    {
      "epoch": 0.04933,
      "grad_norm": 0.9126114021558055,
      "learning_rate": 0.003,
      "loss": 4.1313,
      "step": 4933
    },
    {
      "epoch": 0.04934,
      "grad_norm": 0.7831133724300265,
      "learning_rate": 0.003,
      "loss": 4.1594,
      "step": 4934
    },
    {
      "epoch": 0.04935,
      "grad_norm": 0.8193838082678253,
      "learning_rate": 0.003,
      "loss": 4.1556,
      "step": 4935
    },
    {
      "epoch": 0.04936,
      "grad_norm": 0.9542250458968273,
      "learning_rate": 0.003,
      "loss": 4.1312,
      "step": 4936
    },
    {
      "epoch": 0.04937,
      "grad_norm": 1.0102642975787324,
      "learning_rate": 0.003,
      "loss": 4.1293,
      "step": 4937
    },
    {
      "epoch": 0.04938,
      "grad_norm": 1.1822748470538103,
      "learning_rate": 0.003,
      "loss": 4.1433,
      "step": 4938
    },
    {
      "epoch": 0.04939,
      "grad_norm": 0.8813626480520453,
      "learning_rate": 0.003,
      "loss": 4.1465,
      "step": 4939
    },
    {
      "epoch": 0.0494,
      "grad_norm": 0.9354601920016478,
      "learning_rate": 0.003,
      "loss": 4.1534,
      "step": 4940
    },
    {
      "epoch": 0.04941,
      "grad_norm": 0.8531796298952382,
      "learning_rate": 0.003,
      "loss": 4.1179,
      "step": 4941
    },
    {
      "epoch": 0.04942,
      "grad_norm": 0.8648680617674837,
      "learning_rate": 0.003,
      "loss": 4.1322,
      "step": 4942
    },
    {
      "epoch": 0.04943,
      "grad_norm": 0.8572189947286999,
      "learning_rate": 0.003,
      "loss": 4.1537,
      "step": 4943
    },
    {
      "epoch": 0.04944,
      "grad_norm": 1.0596615074274864,
      "learning_rate": 0.003,
      "loss": 4.1644,
      "step": 4944
    },
    {
      "epoch": 0.04945,
      "grad_norm": 0.929833863296995,
      "learning_rate": 0.003,
      "loss": 4.1507,
      "step": 4945
    },
    {
      "epoch": 0.04946,
      "grad_norm": 0.9625930540154848,
      "learning_rate": 0.003,
      "loss": 4.1756,
      "step": 4946
    },
    {
      "epoch": 0.04947,
      "grad_norm": 0.9938577466365767,
      "learning_rate": 0.003,
      "loss": 4.1186,
      "step": 4947
    },
    {
      "epoch": 0.04948,
      "grad_norm": 1.0582956690263399,
      "learning_rate": 0.003,
      "loss": 4.1396,
      "step": 4948
    },
    {
      "epoch": 0.04949,
      "grad_norm": 1.0609539105003243,
      "learning_rate": 0.003,
      "loss": 4.1426,
      "step": 4949
    },
    {
      "epoch": 0.0495,
      "grad_norm": 0.8573405852337684,
      "learning_rate": 0.003,
      "loss": 4.1041,
      "step": 4950
    },
    {
      "epoch": 0.04951,
      "grad_norm": 0.9045716976759907,
      "learning_rate": 0.003,
      "loss": 4.1119,
      "step": 4951
    },
    {
      "epoch": 0.04952,
      "grad_norm": 0.9697520441480982,
      "learning_rate": 0.003,
      "loss": 4.1497,
      "step": 4952
    },
    {
      "epoch": 0.04953,
      "grad_norm": 1.0183145355055803,
      "learning_rate": 0.003,
      "loss": 4.1342,
      "step": 4953
    },
    {
      "epoch": 0.04954,
      "grad_norm": 0.9741338701993631,
      "learning_rate": 0.003,
      "loss": 4.158,
      "step": 4954
    },
    {
      "epoch": 0.04955,
      "grad_norm": 1.1025167524243962,
      "learning_rate": 0.003,
      "loss": 4.137,
      "step": 4955
    },
    {
      "epoch": 0.04956,
      "grad_norm": 0.8870510145238134,
      "learning_rate": 0.003,
      "loss": 4.1472,
      "step": 4956
    },
    {
      "epoch": 0.04957,
      "grad_norm": 0.7825537450099663,
      "learning_rate": 0.003,
      "loss": 4.1136,
      "step": 4957
    },
    {
      "epoch": 0.04958,
      "grad_norm": 0.7124964191360044,
      "learning_rate": 0.003,
      "loss": 4.1296,
      "step": 4958
    },
    {
      "epoch": 0.04959,
      "grad_norm": 0.7590903052152222,
      "learning_rate": 0.003,
      "loss": 4.1322,
      "step": 4959
    },
    {
      "epoch": 0.0496,
      "grad_norm": 0.8759268385091953,
      "learning_rate": 0.003,
      "loss": 4.1038,
      "step": 4960
    },
    {
      "epoch": 0.04961,
      "grad_norm": 1.0986531804493005,
      "learning_rate": 0.003,
      "loss": 4.1147,
      "step": 4961
    },
    {
      "epoch": 0.04962,
      "grad_norm": 0.9519760561283285,
      "learning_rate": 0.003,
      "loss": 4.1294,
      "step": 4962
    },
    {
      "epoch": 0.04963,
      "grad_norm": 0.8654145678491735,
      "learning_rate": 0.003,
      "loss": 4.1644,
      "step": 4963
    },
    {
      "epoch": 0.04964,
      "grad_norm": 0.7211388340825737,
      "learning_rate": 0.003,
      "loss": 4.1328,
      "step": 4964
    },
    {
      "epoch": 0.04965,
      "grad_norm": 0.8630218156973655,
      "learning_rate": 0.003,
      "loss": 4.1346,
      "step": 4965
    },
    {
      "epoch": 0.04966,
      "grad_norm": 1.0251817150393079,
      "learning_rate": 0.003,
      "loss": 4.1096,
      "step": 4966
    },
    {
      "epoch": 0.04967,
      "grad_norm": 1.093600352302926,
      "learning_rate": 0.003,
      "loss": 4.1381,
      "step": 4967
    },
    {
      "epoch": 0.04968,
      "grad_norm": 0.9143668717946363,
      "learning_rate": 0.003,
      "loss": 4.1578,
      "step": 4968
    },
    {
      "epoch": 0.04969,
      "grad_norm": 0.8211912863315765,
      "learning_rate": 0.003,
      "loss": 4.1303,
      "step": 4969
    },
    {
      "epoch": 0.0497,
      "grad_norm": 0.8134944463392609,
      "learning_rate": 0.003,
      "loss": 4.1253,
      "step": 4970
    },
    {
      "epoch": 0.04971,
      "grad_norm": 0.761104211586214,
      "learning_rate": 0.003,
      "loss": 4.1424,
      "step": 4971
    },
    {
      "epoch": 0.04972,
      "grad_norm": 0.8740554584097706,
      "learning_rate": 0.003,
      "loss": 4.119,
      "step": 4972
    },
    {
      "epoch": 0.04973,
      "grad_norm": 0.8620362754914105,
      "learning_rate": 0.003,
      "loss": 4.1698,
      "step": 4973
    },
    {
      "epoch": 0.04974,
      "grad_norm": 0.9442564618069658,
      "learning_rate": 0.003,
      "loss": 4.1306,
      "step": 4974
    },
    {
      "epoch": 0.04975,
      "grad_norm": 0.9778639609000918,
      "learning_rate": 0.003,
      "loss": 4.1336,
      "step": 4975
    },
    {
      "epoch": 0.04976,
      "grad_norm": 1.0420631135988534,
      "learning_rate": 0.003,
      "loss": 4.1342,
      "step": 4976
    },
    {
      "epoch": 0.04977,
      "grad_norm": 1.0602564274040003,
      "learning_rate": 0.003,
      "loss": 4.146,
      "step": 4977
    },
    {
      "epoch": 0.04978,
      "grad_norm": 0.9052204217537351,
      "learning_rate": 0.003,
      "loss": 4.139,
      "step": 4978
    },
    {
      "epoch": 0.04979,
      "grad_norm": 0.9947293611924701,
      "learning_rate": 0.003,
      "loss": 4.1496,
      "step": 4979
    },
    {
      "epoch": 0.0498,
      "grad_norm": 1.0091251108504844,
      "learning_rate": 0.003,
      "loss": 4.1407,
      "step": 4980
    },
    {
      "epoch": 0.04981,
      "grad_norm": 1.1609493803122968,
      "learning_rate": 0.003,
      "loss": 4.1227,
      "step": 4981
    },
    {
      "epoch": 0.04982,
      "grad_norm": 1.122465102066747,
      "learning_rate": 0.003,
      "loss": 4.1467,
      "step": 4982
    },
    {
      "epoch": 0.04983,
      "grad_norm": 1.3048056173322304,
      "learning_rate": 0.003,
      "loss": 4.1121,
      "step": 4983
    },
    {
      "epoch": 0.04984,
      "grad_norm": 0.9839501695087924,
      "learning_rate": 0.003,
      "loss": 4.1085,
      "step": 4984
    },
    {
      "epoch": 0.04985,
      "grad_norm": 1.0551935971857649,
      "learning_rate": 0.003,
      "loss": 4.1428,
      "step": 4985
    },
    {
      "epoch": 0.04986,
      "grad_norm": 1.0243284821155656,
      "learning_rate": 0.003,
      "loss": 4.1587,
      "step": 4986
    },
    {
      "epoch": 0.04987,
      "grad_norm": 0.7958713265484796,
      "learning_rate": 0.003,
      "loss": 4.1138,
      "step": 4987
    },
    {
      "epoch": 0.04988,
      "grad_norm": 0.7040759247040429,
      "learning_rate": 0.003,
      "loss": 4.1438,
      "step": 4988
    },
    {
      "epoch": 0.04989,
      "grad_norm": 0.8411275085142221,
      "learning_rate": 0.003,
      "loss": 4.1491,
      "step": 4989
    },
    {
      "epoch": 0.0499,
      "grad_norm": 0.8415827540961842,
      "learning_rate": 0.003,
      "loss": 4.124,
      "step": 4990
    },
    {
      "epoch": 0.04991,
      "grad_norm": 0.7469609887556586,
      "learning_rate": 0.003,
      "loss": 4.1128,
      "step": 4991
    },
    {
      "epoch": 0.04992,
      "grad_norm": 0.8249372889306039,
      "learning_rate": 0.003,
      "loss": 4.0976,
      "step": 4992
    },
    {
      "epoch": 0.04993,
      "grad_norm": 1.0566657783178806,
      "learning_rate": 0.003,
      "loss": 4.1285,
      "step": 4993
    },
    {
      "epoch": 0.04994,
      "grad_norm": 1.0996011403959378,
      "learning_rate": 0.003,
      "loss": 4.1196,
      "step": 4994
    },
    {
      "epoch": 0.04995,
      "grad_norm": 0.9432281244618136,
      "learning_rate": 0.003,
      "loss": 4.1376,
      "step": 4995
    },
    {
      "epoch": 0.04996,
      "grad_norm": 0.8902334825733927,
      "learning_rate": 0.003,
      "loss": 4.119,
      "step": 4996
    },
    {
      "epoch": 0.04997,
      "grad_norm": 0.7781105185125646,
      "learning_rate": 0.003,
      "loss": 4.1289,
      "step": 4997
    },
    {
      "epoch": 0.04998,
      "grad_norm": 0.6987900025248667,
      "learning_rate": 0.003,
      "loss": 4.1524,
      "step": 4998
    },
    {
      "epoch": 0.04999,
      "grad_norm": 0.740322084305092,
      "learning_rate": 0.003,
      "loss": 4.1116,
      "step": 4999
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.8975134700731652,
      "learning_rate": 0.003,
      "loss": 4.158,
      "step": 5000
    },
    {
      "epoch": 0.05001,
      "grad_norm": 1.1125421426369673,
      "learning_rate": 0.003,
      "loss": 4.1551,
      "step": 5001
    },
    {
      "epoch": 0.05002,
      "grad_norm": 1.268946353977916,
      "learning_rate": 0.003,
      "loss": 4.1379,
      "step": 5002
    },
    {
      "epoch": 0.05003,
      "grad_norm": 0.8237186256695063,
      "learning_rate": 0.003,
      "loss": 4.1417,
      "step": 5003
    },
    {
      "epoch": 0.05004,
      "grad_norm": 0.8676428422328399,
      "learning_rate": 0.003,
      "loss": 4.1254,
      "step": 5004
    },
    {
      "epoch": 0.05005,
      "grad_norm": 0.7746806700851315,
      "learning_rate": 0.003,
      "loss": 4.1456,
      "step": 5005
    },
    {
      "epoch": 0.05006,
      "grad_norm": 0.8409396903439786,
      "learning_rate": 0.003,
      "loss": 4.1247,
      "step": 5006
    },
    {
      "epoch": 0.05007,
      "grad_norm": 0.9473102489321485,
      "learning_rate": 0.003,
      "loss": 4.1431,
      "step": 5007
    },
    {
      "epoch": 0.05008,
      "grad_norm": 1.100401498879317,
      "learning_rate": 0.003,
      "loss": 4.1132,
      "step": 5008
    },
    {
      "epoch": 0.05009,
      "grad_norm": 0.9798182802542361,
      "learning_rate": 0.003,
      "loss": 4.1205,
      "step": 5009
    },
    {
      "epoch": 0.0501,
      "grad_norm": 1.0935878325404964,
      "learning_rate": 0.003,
      "loss": 4.1384,
      "step": 5010
    },
    {
      "epoch": 0.05011,
      "grad_norm": 1.0191676731522699,
      "learning_rate": 0.003,
      "loss": 4.1388,
      "step": 5011
    },
    {
      "epoch": 0.05012,
      "grad_norm": 1.1270362384105346,
      "learning_rate": 0.003,
      "loss": 4.1333,
      "step": 5012
    },
    {
      "epoch": 0.05013,
      "grad_norm": 1.018439521580052,
      "learning_rate": 0.003,
      "loss": 4.1362,
      "step": 5013
    },
    {
      "epoch": 0.05014,
      "grad_norm": 1.0624365348167546,
      "learning_rate": 0.003,
      "loss": 4.1137,
      "step": 5014
    },
    {
      "epoch": 0.05015,
      "grad_norm": 0.9411876813426826,
      "learning_rate": 0.003,
      "loss": 4.1499,
      "step": 5015
    },
    {
      "epoch": 0.05016,
      "grad_norm": 0.9736253716196595,
      "learning_rate": 0.003,
      "loss": 4.1334,
      "step": 5016
    },
    {
      "epoch": 0.05017,
      "grad_norm": 1.1100942667063913,
      "learning_rate": 0.003,
      "loss": 4.1527,
      "step": 5017
    },
    {
      "epoch": 0.05018,
      "grad_norm": 1.0084783515734652,
      "learning_rate": 0.003,
      "loss": 4.147,
      "step": 5018
    },
    {
      "epoch": 0.05019,
      "grad_norm": 0.9759151660471137,
      "learning_rate": 0.003,
      "loss": 4.1635,
      "step": 5019
    },
    {
      "epoch": 0.0502,
      "grad_norm": 0.9723722102677356,
      "learning_rate": 0.003,
      "loss": 4.1294,
      "step": 5020
    },
    {
      "epoch": 0.05021,
      "grad_norm": 0.9049845415635082,
      "learning_rate": 0.003,
      "loss": 4.1347,
      "step": 5021
    },
    {
      "epoch": 0.05022,
      "grad_norm": 0.9107217668273613,
      "learning_rate": 0.003,
      "loss": 4.1337,
      "step": 5022
    },
    {
      "epoch": 0.05023,
      "grad_norm": 1.0887083292657602,
      "learning_rate": 0.003,
      "loss": 4.1618,
      "step": 5023
    },
    {
      "epoch": 0.05024,
      "grad_norm": 1.1150744057770852,
      "learning_rate": 0.003,
      "loss": 4.1207,
      "step": 5024
    },
    {
      "epoch": 0.05025,
      "grad_norm": 1.0031676743967801,
      "learning_rate": 0.003,
      "loss": 4.1564,
      "step": 5025
    },
    {
      "epoch": 0.05026,
      "grad_norm": 0.9402890020375552,
      "learning_rate": 0.003,
      "loss": 4.1302,
      "step": 5026
    },
    {
      "epoch": 0.05027,
      "grad_norm": 0.8731649649321198,
      "learning_rate": 0.003,
      "loss": 4.1479,
      "step": 5027
    },
    {
      "epoch": 0.05028,
      "grad_norm": 0.7951256123529497,
      "learning_rate": 0.003,
      "loss": 4.1304,
      "step": 5028
    },
    {
      "epoch": 0.05029,
      "grad_norm": 0.8539293439248877,
      "learning_rate": 0.003,
      "loss": 4.1424,
      "step": 5029
    },
    {
      "epoch": 0.0503,
      "grad_norm": 0.8281260610684518,
      "learning_rate": 0.003,
      "loss": 4.1119,
      "step": 5030
    },
    {
      "epoch": 0.05031,
      "grad_norm": 0.817963787364258,
      "learning_rate": 0.003,
      "loss": 4.1307,
      "step": 5031
    },
    {
      "epoch": 0.05032,
      "grad_norm": 0.9512198937363134,
      "learning_rate": 0.003,
      "loss": 4.1351,
      "step": 5032
    },
    {
      "epoch": 0.05033,
      "grad_norm": 0.9963022197559578,
      "learning_rate": 0.003,
      "loss": 4.1135,
      "step": 5033
    },
    {
      "epoch": 0.05034,
      "grad_norm": 1.104614966345552,
      "learning_rate": 0.003,
      "loss": 4.1216,
      "step": 5034
    },
    {
      "epoch": 0.05035,
      "grad_norm": 1.0983448584739037,
      "learning_rate": 0.003,
      "loss": 4.1412,
      "step": 5035
    },
    {
      "epoch": 0.05036,
      "grad_norm": 0.9061350341495388,
      "learning_rate": 0.003,
      "loss": 4.1213,
      "step": 5036
    },
    {
      "epoch": 0.05037,
      "grad_norm": 0.9464966069987577,
      "learning_rate": 0.003,
      "loss": 4.1164,
      "step": 5037
    },
    {
      "epoch": 0.05038,
      "grad_norm": 1.0201982381782406,
      "learning_rate": 0.003,
      "loss": 4.1289,
      "step": 5038
    },
    {
      "epoch": 0.05039,
      "grad_norm": 1.0969503993497207,
      "learning_rate": 0.003,
      "loss": 4.1453,
      "step": 5039
    },
    {
      "epoch": 0.0504,
      "grad_norm": 0.902563340773235,
      "learning_rate": 0.003,
      "loss": 4.139,
      "step": 5040
    },
    {
      "epoch": 0.05041,
      "grad_norm": 0.8273610612179048,
      "learning_rate": 0.003,
      "loss": 4.1395,
      "step": 5041
    },
    {
      "epoch": 0.05042,
      "grad_norm": 0.9503016386214178,
      "learning_rate": 0.003,
      "loss": 4.1322,
      "step": 5042
    },
    {
      "epoch": 0.05043,
      "grad_norm": 0.9388384995642503,
      "learning_rate": 0.003,
      "loss": 4.1299,
      "step": 5043
    },
    {
      "epoch": 0.05044,
      "grad_norm": 0.8905833491260514,
      "learning_rate": 0.003,
      "loss": 4.1461,
      "step": 5044
    },
    {
      "epoch": 0.05045,
      "grad_norm": 0.8430993221174582,
      "learning_rate": 0.003,
      "loss": 4.1603,
      "step": 5045
    },
    {
      "epoch": 0.05046,
      "grad_norm": 0.8169585705346127,
      "learning_rate": 0.003,
      "loss": 4.1223,
      "step": 5046
    },
    {
      "epoch": 0.05047,
      "grad_norm": 0.95893686498772,
      "learning_rate": 0.003,
      "loss": 4.134,
      "step": 5047
    },
    {
      "epoch": 0.05048,
      "grad_norm": 1.3360304246137076,
      "learning_rate": 0.003,
      "loss": 4.1378,
      "step": 5048
    },
    {
      "epoch": 0.05049,
      "grad_norm": 0.951954395506191,
      "learning_rate": 0.003,
      "loss": 4.0947,
      "step": 5049
    },
    {
      "epoch": 0.0505,
      "grad_norm": 0.8883334202277473,
      "learning_rate": 0.003,
      "loss": 4.1824,
      "step": 5050
    },
    {
      "epoch": 0.05051,
      "grad_norm": 0.849437175294796,
      "learning_rate": 0.003,
      "loss": 4.1422,
      "step": 5051
    },
    {
      "epoch": 0.05052,
      "grad_norm": 0.907407381853802,
      "learning_rate": 0.003,
      "loss": 4.1161,
      "step": 5052
    },
    {
      "epoch": 0.05053,
      "grad_norm": 0.8362515125237234,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 5053
    },
    {
      "epoch": 0.05054,
      "grad_norm": 0.6457242952400993,
      "learning_rate": 0.003,
      "loss": 4.1409,
      "step": 5054
    },
    {
      "epoch": 0.05055,
      "grad_norm": 0.6640040755007646,
      "learning_rate": 0.003,
      "loss": 4.1174,
      "step": 5055
    },
    {
      "epoch": 0.05056,
      "grad_norm": 0.730354009820354,
      "learning_rate": 0.003,
      "loss": 4.1334,
      "step": 5056
    },
    {
      "epoch": 0.05057,
      "grad_norm": 0.9095854829866085,
      "learning_rate": 0.003,
      "loss": 4.1397,
      "step": 5057
    },
    {
      "epoch": 0.05058,
      "grad_norm": 1.3085718232187313,
      "learning_rate": 0.003,
      "loss": 4.1505,
      "step": 5058
    },
    {
      "epoch": 0.05059,
      "grad_norm": 0.8208558372297785,
      "learning_rate": 0.003,
      "loss": 4.1067,
      "step": 5059
    },
    {
      "epoch": 0.0506,
      "grad_norm": 0.8287539156196918,
      "learning_rate": 0.003,
      "loss": 4.1408,
      "step": 5060
    },
    {
      "epoch": 0.05061,
      "grad_norm": 0.9396605720141616,
      "learning_rate": 0.003,
      "loss": 4.1213,
      "step": 5061
    },
    {
      "epoch": 0.05062,
      "grad_norm": 1.080983860701126,
      "learning_rate": 0.003,
      "loss": 4.1121,
      "step": 5062
    },
    {
      "epoch": 0.05063,
      "grad_norm": 0.9569977418540291,
      "learning_rate": 0.003,
      "loss": 4.0982,
      "step": 5063
    },
    {
      "epoch": 0.05064,
      "grad_norm": 1.075568604431901,
      "learning_rate": 0.003,
      "loss": 4.1431,
      "step": 5064
    },
    {
      "epoch": 0.05065,
      "grad_norm": 0.9243313906929234,
      "learning_rate": 0.003,
      "loss": 4.1267,
      "step": 5065
    },
    {
      "epoch": 0.05066,
      "grad_norm": 0.9374319059631955,
      "learning_rate": 0.003,
      "loss": 4.1144,
      "step": 5066
    },
    {
      "epoch": 0.05067,
      "grad_norm": 0.8481769015446154,
      "learning_rate": 0.003,
      "loss": 4.1218,
      "step": 5067
    },
    {
      "epoch": 0.05068,
      "grad_norm": 0.9303443535141828,
      "learning_rate": 0.003,
      "loss": 4.1451,
      "step": 5068
    },
    {
      "epoch": 0.05069,
      "grad_norm": 0.9770075079961184,
      "learning_rate": 0.003,
      "loss": 4.1358,
      "step": 5069
    },
    {
      "epoch": 0.0507,
      "grad_norm": 1.1996863760884935,
      "learning_rate": 0.003,
      "loss": 4.1571,
      "step": 5070
    },
    {
      "epoch": 0.05071,
      "grad_norm": 0.7185648581445719,
      "learning_rate": 0.003,
      "loss": 4.1343,
      "step": 5071
    },
    {
      "epoch": 0.05072,
      "grad_norm": 0.7935232361406919,
      "learning_rate": 0.003,
      "loss": 4.1097,
      "step": 5072
    },
    {
      "epoch": 0.05073,
      "grad_norm": 0.8603762101329337,
      "learning_rate": 0.003,
      "loss": 4.1147,
      "step": 5073
    },
    {
      "epoch": 0.05074,
      "grad_norm": 0.8776333809260097,
      "learning_rate": 0.003,
      "loss": 4.1101,
      "step": 5074
    },
    {
      "epoch": 0.05075,
      "grad_norm": 1.134189424557927,
      "learning_rate": 0.003,
      "loss": 4.1354,
      "step": 5075
    },
    {
      "epoch": 0.05076,
      "grad_norm": 1.1348672821872274,
      "learning_rate": 0.003,
      "loss": 4.1487,
      "step": 5076
    },
    {
      "epoch": 0.05077,
      "grad_norm": 0.9941313632381431,
      "learning_rate": 0.003,
      "loss": 4.1532,
      "step": 5077
    },
    {
      "epoch": 0.05078,
      "grad_norm": 0.894813842371774,
      "learning_rate": 0.003,
      "loss": 4.1412,
      "step": 5078
    },
    {
      "epoch": 0.05079,
      "grad_norm": 0.9864042658471017,
      "learning_rate": 0.003,
      "loss": 4.1151,
      "step": 5079
    },
    {
      "epoch": 0.0508,
      "grad_norm": 1.2211066026928066,
      "learning_rate": 0.003,
      "loss": 4.1176,
      "step": 5080
    },
    {
      "epoch": 0.05081,
      "grad_norm": 0.952087759769159,
      "learning_rate": 0.003,
      "loss": 4.1337,
      "step": 5081
    },
    {
      "epoch": 0.05082,
      "grad_norm": 1.0044149163525906,
      "learning_rate": 0.003,
      "loss": 4.143,
      "step": 5082
    },
    {
      "epoch": 0.05083,
      "grad_norm": 1.043147668555842,
      "learning_rate": 0.003,
      "loss": 4.1112,
      "step": 5083
    },
    {
      "epoch": 0.05084,
      "grad_norm": 0.916074260393084,
      "learning_rate": 0.003,
      "loss": 4.1253,
      "step": 5084
    },
    {
      "epoch": 0.05085,
      "grad_norm": 0.9863563460723465,
      "learning_rate": 0.003,
      "loss": 4.0964,
      "step": 5085
    },
    {
      "epoch": 0.05086,
      "grad_norm": 0.9526080378517822,
      "learning_rate": 0.003,
      "loss": 4.1059,
      "step": 5086
    },
    {
      "epoch": 0.05087,
      "grad_norm": 0.8954841316797875,
      "learning_rate": 0.003,
      "loss": 4.127,
      "step": 5087
    },
    {
      "epoch": 0.05088,
      "grad_norm": 0.6865090658283284,
      "learning_rate": 0.003,
      "loss": 4.1224,
      "step": 5088
    },
    {
      "epoch": 0.05089,
      "grad_norm": 0.7613574075539622,
      "learning_rate": 0.003,
      "loss": 4.1179,
      "step": 5089
    },
    {
      "epoch": 0.0509,
      "grad_norm": 0.8375881612111975,
      "learning_rate": 0.003,
      "loss": 4.1562,
      "step": 5090
    },
    {
      "epoch": 0.05091,
      "grad_norm": 1.088984983029717,
      "learning_rate": 0.003,
      "loss": 4.123,
      "step": 5091
    },
    {
      "epoch": 0.05092,
      "grad_norm": 0.9537871650339408,
      "learning_rate": 0.003,
      "loss": 4.1231,
      "step": 5092
    },
    {
      "epoch": 0.05093,
      "grad_norm": 0.8778865142910927,
      "learning_rate": 0.003,
      "loss": 4.1352,
      "step": 5093
    },
    {
      "epoch": 0.05094,
      "grad_norm": 0.8245219632382089,
      "learning_rate": 0.003,
      "loss": 4.0965,
      "step": 5094
    },
    {
      "epoch": 0.05095,
      "grad_norm": 0.7511141744389324,
      "learning_rate": 0.003,
      "loss": 4.1313,
      "step": 5095
    },
    {
      "epoch": 0.05096,
      "grad_norm": 0.7847143162436225,
      "learning_rate": 0.003,
      "loss": 4.1173,
      "step": 5096
    },
    {
      "epoch": 0.05097,
      "grad_norm": 0.8122517942129055,
      "learning_rate": 0.003,
      "loss": 4.1148,
      "step": 5097
    },
    {
      "epoch": 0.05098,
      "grad_norm": 0.7795564526148553,
      "learning_rate": 0.003,
      "loss": 4.1083,
      "step": 5098
    },
    {
      "epoch": 0.05099,
      "grad_norm": 0.685051675840029,
      "learning_rate": 0.003,
      "loss": 4.0823,
      "step": 5099
    },
    {
      "epoch": 0.051,
      "grad_norm": 0.7357655109108967,
      "learning_rate": 0.003,
      "loss": 4.1049,
      "step": 5100
    },
    {
      "epoch": 0.05101,
      "grad_norm": 0.7366959145534409,
      "learning_rate": 0.003,
      "loss": 4.14,
      "step": 5101
    },
    {
      "epoch": 0.05102,
      "grad_norm": 0.7876273112899445,
      "learning_rate": 0.003,
      "loss": 4.12,
      "step": 5102
    },
    {
      "epoch": 0.05103,
      "grad_norm": 0.8800742283306661,
      "learning_rate": 0.003,
      "loss": 4.134,
      "step": 5103
    },
    {
      "epoch": 0.05104,
      "grad_norm": 0.8715423090080415,
      "learning_rate": 0.003,
      "loss": 4.1156,
      "step": 5104
    },
    {
      "epoch": 0.05105,
      "grad_norm": 0.8824452296809646,
      "learning_rate": 0.003,
      "loss": 4.1041,
      "step": 5105
    },
    {
      "epoch": 0.05106,
      "grad_norm": 1.1209780196152646,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 5106
    },
    {
      "epoch": 0.05107,
      "grad_norm": 1.1399593567055775,
      "learning_rate": 0.003,
      "loss": 4.115,
      "step": 5107
    },
    {
      "epoch": 0.05108,
      "grad_norm": 1.4095394891532904,
      "learning_rate": 0.003,
      "loss": 4.1698,
      "step": 5108
    },
    {
      "epoch": 0.05109,
      "grad_norm": 0.7738170346317165,
      "learning_rate": 0.003,
      "loss": 4.096,
      "step": 5109
    },
    {
      "epoch": 0.0511,
      "grad_norm": 0.6999432461866981,
      "learning_rate": 0.003,
      "loss": 4.1324,
      "step": 5110
    },
    {
      "epoch": 0.05111,
      "grad_norm": 0.6582926413773714,
      "learning_rate": 0.003,
      "loss": 4.1431,
      "step": 5111
    },
    {
      "epoch": 0.05112,
      "grad_norm": 0.7551033740188957,
      "learning_rate": 0.003,
      "loss": 4.128,
      "step": 5112
    },
    {
      "epoch": 0.05113,
      "grad_norm": 1.1477812623559516,
      "learning_rate": 0.003,
      "loss": 4.1255,
      "step": 5113
    },
    {
      "epoch": 0.05114,
      "grad_norm": 1.067864380242395,
      "learning_rate": 0.003,
      "loss": 4.1017,
      "step": 5114
    },
    {
      "epoch": 0.05115,
      "grad_norm": 0.9300669029542773,
      "learning_rate": 0.003,
      "loss": 4.1297,
      "step": 5115
    },
    {
      "epoch": 0.05116,
      "grad_norm": 0.9229839891475532,
      "learning_rate": 0.003,
      "loss": 4.1356,
      "step": 5116
    },
    {
      "epoch": 0.05117,
      "grad_norm": 0.9776019657145362,
      "learning_rate": 0.003,
      "loss": 4.1199,
      "step": 5117
    },
    {
      "epoch": 0.05118,
      "grad_norm": 1.0187076693236539,
      "learning_rate": 0.003,
      "loss": 4.1208,
      "step": 5118
    },
    {
      "epoch": 0.05119,
      "grad_norm": 1.1898894115527578,
      "learning_rate": 0.003,
      "loss": 4.11,
      "step": 5119
    },
    {
      "epoch": 0.0512,
      "grad_norm": 0.7633336883976507,
      "learning_rate": 0.003,
      "loss": 4.0918,
      "step": 5120
    },
    {
      "epoch": 0.05121,
      "grad_norm": 0.8471498082951939,
      "learning_rate": 0.003,
      "loss": 4.1139,
      "step": 5121
    },
    {
      "epoch": 0.05122,
      "grad_norm": 0.9274839274688383,
      "learning_rate": 0.003,
      "loss": 4.1026,
      "step": 5122
    },
    {
      "epoch": 0.05123,
      "grad_norm": 1.1909929556885597,
      "learning_rate": 0.003,
      "loss": 4.1243,
      "step": 5123
    },
    {
      "epoch": 0.05124,
      "grad_norm": 0.815679730561404,
      "learning_rate": 0.003,
      "loss": 4.1565,
      "step": 5124
    },
    {
      "epoch": 0.05125,
      "grad_norm": 0.6789786360542248,
      "learning_rate": 0.003,
      "loss": 4.1191,
      "step": 5125
    },
    {
      "epoch": 0.05126,
      "grad_norm": 0.7440665492798663,
      "learning_rate": 0.003,
      "loss": 4.1148,
      "step": 5126
    },
    {
      "epoch": 0.05127,
      "grad_norm": 1.024011999747007,
      "learning_rate": 0.003,
      "loss": 4.1211,
      "step": 5127
    },
    {
      "epoch": 0.05128,
      "grad_norm": 1.3782586129142689,
      "learning_rate": 0.003,
      "loss": 4.1481,
      "step": 5128
    },
    {
      "epoch": 0.05129,
      "grad_norm": 0.779693198261867,
      "learning_rate": 0.003,
      "loss": 4.1056,
      "step": 5129
    },
    {
      "epoch": 0.0513,
      "grad_norm": 0.6550835083023576,
      "learning_rate": 0.003,
      "loss": 4.0883,
      "step": 5130
    },
    {
      "epoch": 0.05131,
      "grad_norm": 0.7096202778600647,
      "learning_rate": 0.003,
      "loss": 4.1263,
      "step": 5131
    },
    {
      "epoch": 0.05132,
      "grad_norm": 0.8254712872773159,
      "learning_rate": 0.003,
      "loss": 4.1147,
      "step": 5132
    },
    {
      "epoch": 0.05133,
      "grad_norm": 0.823333279464697,
      "learning_rate": 0.003,
      "loss": 4.124,
      "step": 5133
    },
    {
      "epoch": 0.05134,
      "grad_norm": 0.77855502466439,
      "learning_rate": 0.003,
      "loss": 4.1339,
      "step": 5134
    },
    {
      "epoch": 0.05135,
      "grad_norm": 0.8870106498581014,
      "learning_rate": 0.003,
      "loss": 4.1069,
      "step": 5135
    },
    {
      "epoch": 0.05136,
      "grad_norm": 1.1069868718885745,
      "learning_rate": 0.003,
      "loss": 4.098,
      "step": 5136
    },
    {
      "epoch": 0.05137,
      "grad_norm": 1.055928538799243,
      "learning_rate": 0.003,
      "loss": 4.1421,
      "step": 5137
    },
    {
      "epoch": 0.05138,
      "grad_norm": 1.2420968795396852,
      "learning_rate": 0.003,
      "loss": 4.1395,
      "step": 5138
    },
    {
      "epoch": 0.05139,
      "grad_norm": 1.1060392587125027,
      "learning_rate": 0.003,
      "loss": 4.129,
      "step": 5139
    },
    {
      "epoch": 0.0514,
      "grad_norm": 1.084456335779527,
      "learning_rate": 0.003,
      "loss": 4.1278,
      "step": 5140
    },
    {
      "epoch": 0.05141,
      "grad_norm": 0.8635486301225069,
      "learning_rate": 0.003,
      "loss": 4.1747,
      "step": 5141
    },
    {
      "epoch": 0.05142,
      "grad_norm": 0.8012211497201878,
      "learning_rate": 0.003,
      "loss": 4.1179,
      "step": 5142
    },
    {
      "epoch": 0.05143,
      "grad_norm": 0.7460263516853637,
      "learning_rate": 0.003,
      "loss": 4.12,
      "step": 5143
    },
    {
      "epoch": 0.05144,
      "grad_norm": 0.8599506765736857,
      "learning_rate": 0.003,
      "loss": 4.1086,
      "step": 5144
    },
    {
      "epoch": 0.05145,
      "grad_norm": 1.1530777313914198,
      "learning_rate": 0.003,
      "loss": 4.1152,
      "step": 5145
    },
    {
      "epoch": 0.05146,
      "grad_norm": 1.0839316629215454,
      "learning_rate": 0.003,
      "loss": 4.1076,
      "step": 5146
    },
    {
      "epoch": 0.05147,
      "grad_norm": 1.009008584231616,
      "learning_rate": 0.003,
      "loss": 4.1318,
      "step": 5147
    },
    {
      "epoch": 0.05148,
      "grad_norm": 1.0775481336458725,
      "learning_rate": 0.003,
      "loss": 4.1391,
      "step": 5148
    },
    {
      "epoch": 0.05149,
      "grad_norm": 0.8958199968824248,
      "learning_rate": 0.003,
      "loss": 4.107,
      "step": 5149
    },
    {
      "epoch": 0.0515,
      "grad_norm": 0.8559458186789074,
      "learning_rate": 0.003,
      "loss": 4.14,
      "step": 5150
    },
    {
      "epoch": 0.05151,
      "grad_norm": 0.9332584785279358,
      "learning_rate": 0.003,
      "loss": 4.1082,
      "step": 5151
    },
    {
      "epoch": 0.05152,
      "grad_norm": 0.9765825905780722,
      "learning_rate": 0.003,
      "loss": 4.1469,
      "step": 5152
    },
    {
      "epoch": 0.05153,
      "grad_norm": 0.9840577439203694,
      "learning_rate": 0.003,
      "loss": 4.1239,
      "step": 5153
    },
    {
      "epoch": 0.05154,
      "grad_norm": 1.247678420318687,
      "learning_rate": 0.003,
      "loss": 4.1479,
      "step": 5154
    },
    {
      "epoch": 0.05155,
      "grad_norm": 0.8909111698042794,
      "learning_rate": 0.003,
      "loss": 4.1354,
      "step": 5155
    },
    {
      "epoch": 0.05156,
      "grad_norm": 0.8794649041747071,
      "learning_rate": 0.003,
      "loss": 4.1605,
      "step": 5156
    },
    {
      "epoch": 0.05157,
      "grad_norm": 0.8299673058125748,
      "learning_rate": 0.003,
      "loss": 4.1296,
      "step": 5157
    },
    {
      "epoch": 0.05158,
      "grad_norm": 0.8342651114569559,
      "learning_rate": 0.003,
      "loss": 4.1251,
      "step": 5158
    },
    {
      "epoch": 0.05159,
      "grad_norm": 0.9556371282557828,
      "learning_rate": 0.003,
      "loss": 4.1096,
      "step": 5159
    },
    {
      "epoch": 0.0516,
      "grad_norm": 1.0224358360807535,
      "learning_rate": 0.003,
      "loss": 4.1183,
      "step": 5160
    },
    {
      "epoch": 0.05161,
      "grad_norm": 1.2146298773247146,
      "learning_rate": 0.003,
      "loss": 4.0964,
      "step": 5161
    },
    {
      "epoch": 0.05162,
      "grad_norm": 0.9045538434599459,
      "learning_rate": 0.003,
      "loss": 4.1206,
      "step": 5162
    },
    {
      "epoch": 0.05163,
      "grad_norm": 0.8480691004844201,
      "learning_rate": 0.003,
      "loss": 4.1363,
      "step": 5163
    },
    {
      "epoch": 0.05164,
      "grad_norm": 0.8243976283594234,
      "learning_rate": 0.003,
      "loss": 4.1136,
      "step": 5164
    },
    {
      "epoch": 0.05165,
      "grad_norm": 0.932262551471062,
      "learning_rate": 0.003,
      "loss": 4.1336,
      "step": 5165
    },
    {
      "epoch": 0.05166,
      "grad_norm": 0.8908185716264864,
      "learning_rate": 0.003,
      "loss": 4.1334,
      "step": 5166
    },
    {
      "epoch": 0.05167,
      "grad_norm": 0.8051457712052962,
      "learning_rate": 0.003,
      "loss": 4.1349,
      "step": 5167
    },
    {
      "epoch": 0.05168,
      "grad_norm": 0.9134452565395993,
      "learning_rate": 0.003,
      "loss": 4.1709,
      "step": 5168
    },
    {
      "epoch": 0.05169,
      "grad_norm": 1.132338869492368,
      "learning_rate": 0.003,
      "loss": 4.1245,
      "step": 5169
    },
    {
      "epoch": 0.0517,
      "grad_norm": 0.9742389137238414,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 5170
    },
    {
      "epoch": 0.05171,
      "grad_norm": 1.067704962383225,
      "learning_rate": 0.003,
      "loss": 4.1262,
      "step": 5171
    },
    {
      "epoch": 0.05172,
      "grad_norm": 0.8196743774447537,
      "learning_rate": 0.003,
      "loss": 4.1337,
      "step": 5172
    },
    {
      "epoch": 0.05173,
      "grad_norm": 0.7197901038637725,
      "learning_rate": 0.003,
      "loss": 4.1426,
      "step": 5173
    },
    {
      "epoch": 0.05174,
      "grad_norm": 0.6647802187127981,
      "learning_rate": 0.003,
      "loss": 4.1305,
      "step": 5174
    },
    {
      "epoch": 0.05175,
      "grad_norm": 0.6834092900736586,
      "learning_rate": 0.003,
      "loss": 4.1381,
      "step": 5175
    },
    {
      "epoch": 0.05176,
      "grad_norm": 0.6719272953115575,
      "learning_rate": 0.003,
      "loss": 4.102,
      "step": 5176
    },
    {
      "epoch": 0.05177,
      "grad_norm": 0.7402517697049626,
      "learning_rate": 0.003,
      "loss": 4.097,
      "step": 5177
    },
    {
      "epoch": 0.05178,
      "grad_norm": 0.9402641067102157,
      "learning_rate": 0.003,
      "loss": 4.0967,
      "step": 5178
    },
    {
      "epoch": 0.05179,
      "grad_norm": 1.1891054842856807,
      "learning_rate": 0.003,
      "loss": 4.124,
      "step": 5179
    },
    {
      "epoch": 0.0518,
      "grad_norm": 0.8356173492045658,
      "learning_rate": 0.003,
      "loss": 4.1136,
      "step": 5180
    },
    {
      "epoch": 0.05181,
      "grad_norm": 0.8309031331284943,
      "learning_rate": 0.003,
      "loss": 4.1237,
      "step": 5181
    },
    {
      "epoch": 0.05182,
      "grad_norm": 0.7669452922855337,
      "learning_rate": 0.003,
      "loss": 4.1363,
      "step": 5182
    },
    {
      "epoch": 0.05183,
      "grad_norm": 0.8821032377373486,
      "learning_rate": 0.003,
      "loss": 4.1031,
      "step": 5183
    },
    {
      "epoch": 0.05184,
      "grad_norm": 1.236399441865476,
      "learning_rate": 0.003,
      "loss": 4.1199,
      "step": 5184
    },
    {
      "epoch": 0.05185,
      "grad_norm": 1.0136590261857248,
      "learning_rate": 0.003,
      "loss": 4.1155,
      "step": 5185
    },
    {
      "epoch": 0.05186,
      "grad_norm": 0.9746961882311193,
      "learning_rate": 0.003,
      "loss": 4.1255,
      "step": 5186
    },
    {
      "epoch": 0.05187,
      "grad_norm": 0.9084848079843075,
      "learning_rate": 0.003,
      "loss": 4.1637,
      "step": 5187
    },
    {
      "epoch": 0.05188,
      "grad_norm": 1.1465706391107853,
      "learning_rate": 0.003,
      "loss": 4.1409,
      "step": 5188
    },
    {
      "epoch": 0.05189,
      "grad_norm": 1.0110195586113877,
      "learning_rate": 0.003,
      "loss": 4.1156,
      "step": 5189
    },
    {
      "epoch": 0.0519,
      "grad_norm": 0.9256433751833841,
      "learning_rate": 0.003,
      "loss": 4.1193,
      "step": 5190
    },
    {
      "epoch": 0.05191,
      "grad_norm": 0.9134067973569046,
      "learning_rate": 0.003,
      "loss": 4.118,
      "step": 5191
    },
    {
      "epoch": 0.05192,
      "grad_norm": 0.8394166491498798,
      "learning_rate": 0.003,
      "loss": 4.139,
      "step": 5192
    },
    {
      "epoch": 0.05193,
      "grad_norm": 0.8609628758113029,
      "learning_rate": 0.003,
      "loss": 4.1347,
      "step": 5193
    },
    {
      "epoch": 0.05194,
      "grad_norm": 0.9815840288645425,
      "learning_rate": 0.003,
      "loss": 4.1002,
      "step": 5194
    },
    {
      "epoch": 0.05195,
      "grad_norm": 1.0136348688047472,
      "learning_rate": 0.003,
      "loss": 4.1421,
      "step": 5195
    },
    {
      "epoch": 0.05196,
      "grad_norm": 1.1480365342383962,
      "learning_rate": 0.003,
      "loss": 4.1212,
      "step": 5196
    },
    {
      "epoch": 0.05197,
      "grad_norm": 1.012937790622735,
      "learning_rate": 0.003,
      "loss": 4.1432,
      "step": 5197
    },
    {
      "epoch": 0.05198,
      "grad_norm": 1.01624508607687,
      "learning_rate": 0.003,
      "loss": 4.1139,
      "step": 5198
    },
    {
      "epoch": 0.05199,
      "grad_norm": 1.0857847091315482,
      "learning_rate": 0.003,
      "loss": 4.1058,
      "step": 5199
    },
    {
      "epoch": 0.052,
      "grad_norm": 0.9295276501582445,
      "learning_rate": 0.003,
      "loss": 4.1351,
      "step": 5200
    },
    {
      "epoch": 0.05201,
      "grad_norm": 1.0722513948753385,
      "learning_rate": 0.003,
      "loss": 4.1361,
      "step": 5201
    },
    {
      "epoch": 0.05202,
      "grad_norm": 0.939772903818911,
      "learning_rate": 0.003,
      "loss": 4.1201,
      "step": 5202
    },
    {
      "epoch": 0.05203,
      "grad_norm": 0.9141550193477442,
      "learning_rate": 0.003,
      "loss": 4.1491,
      "step": 5203
    },
    {
      "epoch": 0.05204,
      "grad_norm": 1.1559198563879414,
      "learning_rate": 0.003,
      "loss": 4.1436,
      "step": 5204
    },
    {
      "epoch": 0.05205,
      "grad_norm": 1.0879097255219679,
      "learning_rate": 0.003,
      "loss": 4.1488,
      "step": 5205
    },
    {
      "epoch": 0.05206,
      "grad_norm": 1.116664769475625,
      "learning_rate": 0.003,
      "loss": 4.1357,
      "step": 5206
    },
    {
      "epoch": 0.05207,
      "grad_norm": 0.8807032835753495,
      "learning_rate": 0.003,
      "loss": 4.1097,
      "step": 5207
    },
    {
      "epoch": 0.05208,
      "grad_norm": 0.9569291327418711,
      "learning_rate": 0.003,
      "loss": 4.1271,
      "step": 5208
    },
    {
      "epoch": 0.05209,
      "grad_norm": 1.0490164635826829,
      "learning_rate": 0.003,
      "loss": 4.1239,
      "step": 5209
    },
    {
      "epoch": 0.0521,
      "grad_norm": 1.0917925759655678,
      "learning_rate": 0.003,
      "loss": 4.0981,
      "step": 5210
    },
    {
      "epoch": 0.05211,
      "grad_norm": 0.8008924222179035,
      "learning_rate": 0.003,
      "loss": 4.1158,
      "step": 5211
    },
    {
      "epoch": 0.05212,
      "grad_norm": 0.7902460429952818,
      "learning_rate": 0.003,
      "loss": 4.163,
      "step": 5212
    },
    {
      "epoch": 0.05213,
      "grad_norm": 0.8144312236237736,
      "learning_rate": 0.003,
      "loss": 4.1184,
      "step": 5213
    },
    {
      "epoch": 0.05214,
      "grad_norm": 0.9364089538217132,
      "learning_rate": 0.003,
      "loss": 4.1161,
      "step": 5214
    },
    {
      "epoch": 0.05215,
      "grad_norm": 1.0643005071151037,
      "learning_rate": 0.003,
      "loss": 4.1138,
      "step": 5215
    },
    {
      "epoch": 0.05216,
      "grad_norm": 0.8962708070858758,
      "learning_rate": 0.003,
      "loss": 4.1215,
      "step": 5216
    },
    {
      "epoch": 0.05217,
      "grad_norm": 0.9767411427633232,
      "learning_rate": 0.003,
      "loss": 4.1256,
      "step": 5217
    },
    {
      "epoch": 0.05218,
      "grad_norm": 1.0676104045202033,
      "learning_rate": 0.003,
      "loss": 4.1178,
      "step": 5218
    },
    {
      "epoch": 0.05219,
      "grad_norm": 1.1483320645297568,
      "learning_rate": 0.003,
      "loss": 4.1451,
      "step": 5219
    },
    {
      "epoch": 0.0522,
      "grad_norm": 0.9125640959202299,
      "learning_rate": 0.003,
      "loss": 4.11,
      "step": 5220
    },
    {
      "epoch": 0.05221,
      "grad_norm": 0.7553348099730735,
      "learning_rate": 0.003,
      "loss": 4.1156,
      "step": 5221
    },
    {
      "epoch": 0.05222,
      "grad_norm": 0.7974155600968277,
      "learning_rate": 0.003,
      "loss": 4.1265,
      "step": 5222
    },
    {
      "epoch": 0.05223,
      "grad_norm": 0.9496439860291601,
      "learning_rate": 0.003,
      "loss": 4.1454,
      "step": 5223
    },
    {
      "epoch": 0.05224,
      "grad_norm": 1.16338354437749,
      "learning_rate": 0.003,
      "loss": 4.1245,
      "step": 5224
    },
    {
      "epoch": 0.05225,
      "grad_norm": 0.7847885589485009,
      "learning_rate": 0.003,
      "loss": 4.141,
      "step": 5225
    },
    {
      "epoch": 0.05226,
      "grad_norm": 0.7629525693734982,
      "learning_rate": 0.003,
      "loss": 4.1291,
      "step": 5226
    },
    {
      "epoch": 0.05227,
      "grad_norm": 0.8426081978751812,
      "learning_rate": 0.003,
      "loss": 4.1542,
      "step": 5227
    },
    {
      "epoch": 0.05228,
      "grad_norm": 0.9894193979113189,
      "learning_rate": 0.003,
      "loss": 4.1384,
      "step": 5228
    },
    {
      "epoch": 0.05229,
      "grad_norm": 0.9793859414490865,
      "learning_rate": 0.003,
      "loss": 4.1049,
      "step": 5229
    },
    {
      "epoch": 0.0523,
      "grad_norm": 1.1720102953944842,
      "learning_rate": 0.003,
      "loss": 4.1066,
      "step": 5230
    },
    {
      "epoch": 0.05231,
      "grad_norm": 0.8997010664140895,
      "learning_rate": 0.003,
      "loss": 4.1335,
      "step": 5231
    },
    {
      "epoch": 0.05232,
      "grad_norm": 1.0693983537886569,
      "learning_rate": 0.003,
      "loss": 4.1271,
      "step": 5232
    },
    {
      "epoch": 0.05233,
      "grad_norm": 1.1956318009565163,
      "learning_rate": 0.003,
      "loss": 4.1162,
      "step": 5233
    },
    {
      "epoch": 0.05234,
      "grad_norm": 0.8983927893087599,
      "learning_rate": 0.003,
      "loss": 4.1515,
      "step": 5234
    },
    {
      "epoch": 0.05235,
      "grad_norm": 1.108065774608959,
      "learning_rate": 0.003,
      "loss": 4.1411,
      "step": 5235
    },
    {
      "epoch": 0.05236,
      "grad_norm": 0.9295908500527476,
      "learning_rate": 0.003,
      "loss": 4.116,
      "step": 5236
    },
    {
      "epoch": 0.05237,
      "grad_norm": 0.960476320153068,
      "learning_rate": 0.003,
      "loss": 4.1207,
      "step": 5237
    },
    {
      "epoch": 0.05238,
      "grad_norm": 1.057289393831159,
      "learning_rate": 0.003,
      "loss": 4.1114,
      "step": 5238
    },
    {
      "epoch": 0.05239,
      "grad_norm": 1.204033369603102,
      "learning_rate": 0.003,
      "loss": 4.1415,
      "step": 5239
    },
    {
      "epoch": 0.0524,
      "grad_norm": 0.8690312577527531,
      "learning_rate": 0.003,
      "loss": 4.1418,
      "step": 5240
    },
    {
      "epoch": 0.05241,
      "grad_norm": 0.7542309413903446,
      "learning_rate": 0.003,
      "loss": 4.1436,
      "step": 5241
    },
    {
      "epoch": 0.05242,
      "grad_norm": 0.7680576081041368,
      "learning_rate": 0.003,
      "loss": 4.1413,
      "step": 5242
    },
    {
      "epoch": 0.05243,
      "grad_norm": 0.9313741587108058,
      "learning_rate": 0.003,
      "loss": 4.1363,
      "step": 5243
    },
    {
      "epoch": 0.05244,
      "grad_norm": 1.066443957086773,
      "learning_rate": 0.003,
      "loss": 4.1163,
      "step": 5244
    },
    {
      "epoch": 0.05245,
      "grad_norm": 1.109894429188262,
      "learning_rate": 0.003,
      "loss": 4.1236,
      "step": 5245
    },
    {
      "epoch": 0.05246,
      "grad_norm": 1.2624847108853103,
      "learning_rate": 0.003,
      "loss": 4.1355,
      "step": 5246
    },
    {
      "epoch": 0.05247,
      "grad_norm": 0.7462508145722354,
      "learning_rate": 0.003,
      "loss": 4.1266,
      "step": 5247
    },
    {
      "epoch": 0.05248,
      "grad_norm": 0.8237453540746109,
      "learning_rate": 0.003,
      "loss": 4.1328,
      "step": 5248
    },
    {
      "epoch": 0.05249,
      "grad_norm": 0.9781734225083963,
      "learning_rate": 0.003,
      "loss": 4.1473,
      "step": 5249
    },
    {
      "epoch": 0.0525,
      "grad_norm": 1.095567790576504,
      "learning_rate": 0.003,
      "loss": 4.1399,
      "step": 5250
    },
    {
      "epoch": 0.05251,
      "grad_norm": 1.1083386982850694,
      "learning_rate": 0.003,
      "loss": 4.1309,
      "step": 5251
    },
    {
      "epoch": 0.05252,
      "grad_norm": 0.890687010798308,
      "learning_rate": 0.003,
      "loss": 4.1526,
      "step": 5252
    },
    {
      "epoch": 0.05253,
      "grad_norm": 0.990651302574722,
      "learning_rate": 0.003,
      "loss": 4.1211,
      "step": 5253
    },
    {
      "epoch": 0.05254,
      "grad_norm": 1.030601009707246,
      "learning_rate": 0.003,
      "loss": 4.1091,
      "step": 5254
    },
    {
      "epoch": 0.05255,
      "grad_norm": 0.9511829626380641,
      "learning_rate": 0.003,
      "loss": 4.1104,
      "step": 5255
    },
    {
      "epoch": 0.05256,
      "grad_norm": 1.0335656090038043,
      "learning_rate": 0.003,
      "loss": 4.1379,
      "step": 5256
    },
    {
      "epoch": 0.05257,
      "grad_norm": 1.1381395017816789,
      "learning_rate": 0.003,
      "loss": 4.1327,
      "step": 5257
    },
    {
      "epoch": 0.05258,
      "grad_norm": 0.8499060638291269,
      "learning_rate": 0.003,
      "loss": 4.1261,
      "step": 5258
    },
    {
      "epoch": 0.05259,
      "grad_norm": 0.7311247197745678,
      "learning_rate": 0.003,
      "loss": 4.1102,
      "step": 5259
    },
    {
      "epoch": 0.0526,
      "grad_norm": 0.6682808585819072,
      "learning_rate": 0.003,
      "loss": 4.1371,
      "step": 5260
    },
    {
      "epoch": 0.05261,
      "grad_norm": 0.7141975868734773,
      "learning_rate": 0.003,
      "loss": 4.1039,
      "step": 5261
    },
    {
      "epoch": 0.05262,
      "grad_norm": 0.9329305583466294,
      "learning_rate": 0.003,
      "loss": 4.1031,
      "step": 5262
    },
    {
      "epoch": 0.05263,
      "grad_norm": 1.0911979951064397,
      "learning_rate": 0.003,
      "loss": 4.1549,
      "step": 5263
    },
    {
      "epoch": 0.05264,
      "grad_norm": 0.9231843005490539,
      "learning_rate": 0.003,
      "loss": 4.1481,
      "step": 5264
    },
    {
      "epoch": 0.05265,
      "grad_norm": 0.8912385368222187,
      "learning_rate": 0.003,
      "loss": 4.1091,
      "step": 5265
    },
    {
      "epoch": 0.05266,
      "grad_norm": 1.0102934580335026,
      "learning_rate": 0.003,
      "loss": 4.1002,
      "step": 5266
    },
    {
      "epoch": 0.05267,
      "grad_norm": 1.0589226520761357,
      "learning_rate": 0.003,
      "loss": 4.152,
      "step": 5267
    },
    {
      "epoch": 0.05268,
      "grad_norm": 1.0573701409369491,
      "learning_rate": 0.003,
      "loss": 4.1176,
      "step": 5268
    },
    {
      "epoch": 0.05269,
      "grad_norm": 1.0527832464427491,
      "learning_rate": 0.003,
      "loss": 4.0954,
      "step": 5269
    },
    {
      "epoch": 0.0527,
      "grad_norm": 1.2210421513264824,
      "learning_rate": 0.003,
      "loss": 4.1324,
      "step": 5270
    },
    {
      "epoch": 0.05271,
      "grad_norm": 0.8659230590416374,
      "learning_rate": 0.003,
      "loss": 4.0925,
      "step": 5271
    },
    {
      "epoch": 0.05272,
      "grad_norm": 0.8213928297594013,
      "learning_rate": 0.003,
      "loss": 4.1447,
      "step": 5272
    },
    {
      "epoch": 0.05273,
      "grad_norm": 0.8661885686925771,
      "learning_rate": 0.003,
      "loss": 4.1337,
      "step": 5273
    },
    {
      "epoch": 0.05274,
      "grad_norm": 0.9513399963087238,
      "learning_rate": 0.003,
      "loss": 4.1434,
      "step": 5274
    },
    {
      "epoch": 0.05275,
      "grad_norm": 0.9767073542295025,
      "learning_rate": 0.003,
      "loss": 4.1179,
      "step": 5275
    },
    {
      "epoch": 0.05276,
      "grad_norm": 1.0855056908438585,
      "learning_rate": 0.003,
      "loss": 4.1112,
      "step": 5276
    },
    {
      "epoch": 0.05277,
      "grad_norm": 1.049255203197355,
      "learning_rate": 0.003,
      "loss": 4.1015,
      "step": 5277
    },
    {
      "epoch": 0.05278,
      "grad_norm": 1.1033706045707017,
      "learning_rate": 0.003,
      "loss": 4.1189,
      "step": 5278
    },
    {
      "epoch": 0.05279,
      "grad_norm": 0.870576303775063,
      "learning_rate": 0.003,
      "loss": 4.1148,
      "step": 5279
    },
    {
      "epoch": 0.0528,
      "grad_norm": 0.7247735805655298,
      "learning_rate": 0.003,
      "loss": 4.1066,
      "step": 5280
    },
    {
      "epoch": 0.05281,
      "grad_norm": 0.7279116242105975,
      "learning_rate": 0.003,
      "loss": 4.1199,
      "step": 5281
    },
    {
      "epoch": 0.05282,
      "grad_norm": 0.6767646560568664,
      "learning_rate": 0.003,
      "loss": 4.1115,
      "step": 5282
    },
    {
      "epoch": 0.05283,
      "grad_norm": 0.6311016160426699,
      "learning_rate": 0.003,
      "loss": 4.0959,
      "step": 5283
    },
    {
      "epoch": 0.05284,
      "grad_norm": 0.8176724475761017,
      "learning_rate": 0.003,
      "loss": 4.1268,
      "step": 5284
    },
    {
      "epoch": 0.05285,
      "grad_norm": 1.152437810958939,
      "learning_rate": 0.003,
      "loss": 4.0908,
      "step": 5285
    },
    {
      "epoch": 0.05286,
      "grad_norm": 1.0650058217265772,
      "learning_rate": 0.003,
      "loss": 4.1548,
      "step": 5286
    },
    {
      "epoch": 0.05287,
      "grad_norm": 0.7344115850568415,
      "learning_rate": 0.003,
      "loss": 4.1161,
      "step": 5287
    },
    {
      "epoch": 0.05288,
      "grad_norm": 0.7465352407381385,
      "learning_rate": 0.003,
      "loss": 4.0994,
      "step": 5288
    },
    {
      "epoch": 0.05289,
      "grad_norm": 0.9583980517974787,
      "learning_rate": 0.003,
      "loss": 4.1296,
      "step": 5289
    },
    {
      "epoch": 0.0529,
      "grad_norm": 1.1218838641820494,
      "learning_rate": 0.003,
      "loss": 4.1285,
      "step": 5290
    },
    {
      "epoch": 0.05291,
      "grad_norm": 0.9074431149136537,
      "learning_rate": 0.003,
      "loss": 4.1376,
      "step": 5291
    },
    {
      "epoch": 0.05292,
      "grad_norm": 0.8727673415619097,
      "learning_rate": 0.003,
      "loss": 4.1301,
      "step": 5292
    },
    {
      "epoch": 0.05293,
      "grad_norm": 0.8526331049950101,
      "learning_rate": 0.003,
      "loss": 4.1294,
      "step": 5293
    },
    {
      "epoch": 0.05294,
      "grad_norm": 0.9787782709064855,
      "learning_rate": 0.003,
      "loss": 4.1191,
      "step": 5294
    },
    {
      "epoch": 0.05295,
      "grad_norm": 1.0826337231740053,
      "learning_rate": 0.003,
      "loss": 4.1369,
      "step": 5295
    },
    {
      "epoch": 0.05296,
      "grad_norm": 0.8743798848979503,
      "learning_rate": 0.003,
      "loss": 4.1114,
      "step": 5296
    },
    {
      "epoch": 0.05297,
      "grad_norm": 0.8905436718147242,
      "learning_rate": 0.003,
      "loss": 4.116,
      "step": 5297
    },
    {
      "epoch": 0.05298,
      "grad_norm": 0.9159685627131691,
      "learning_rate": 0.003,
      "loss": 4.1372,
      "step": 5298
    },
    {
      "epoch": 0.05299,
      "grad_norm": 1.052082610357495,
      "learning_rate": 0.003,
      "loss": 4.116,
      "step": 5299
    },
    {
      "epoch": 0.053,
      "grad_norm": 1.254455298911267,
      "learning_rate": 0.003,
      "loss": 4.1459,
      "step": 5300
    },
    {
      "epoch": 0.05301,
      "grad_norm": 0.8905522802074054,
      "learning_rate": 0.003,
      "loss": 4.1463,
      "step": 5301
    },
    {
      "epoch": 0.05302,
      "grad_norm": 0.9386802829956024,
      "learning_rate": 0.003,
      "loss": 4.1206,
      "step": 5302
    },
    {
      "epoch": 0.05303,
      "grad_norm": 1.010270978418881,
      "learning_rate": 0.003,
      "loss": 4.1358,
      "step": 5303
    },
    {
      "epoch": 0.05304,
      "grad_norm": 1.168823963382456,
      "learning_rate": 0.003,
      "loss": 4.1259,
      "step": 5304
    },
    {
      "epoch": 0.05305,
      "grad_norm": 0.9968521036034549,
      "learning_rate": 0.003,
      "loss": 4.1311,
      "step": 5305
    },
    {
      "epoch": 0.05306,
      "grad_norm": 1.1415999424056091,
      "learning_rate": 0.003,
      "loss": 4.1436,
      "step": 5306
    },
    {
      "epoch": 0.05307,
      "grad_norm": 0.8577763401656362,
      "learning_rate": 0.003,
      "loss": 4.1333,
      "step": 5307
    },
    {
      "epoch": 0.05308,
      "grad_norm": 0.8065378663634303,
      "learning_rate": 0.003,
      "loss": 4.1436,
      "step": 5308
    },
    {
      "epoch": 0.05309,
      "grad_norm": 0.8620894674479869,
      "learning_rate": 0.003,
      "loss": 4.1092,
      "step": 5309
    },
    {
      "epoch": 0.0531,
      "grad_norm": 0.8265410133677272,
      "learning_rate": 0.003,
      "loss": 4.122,
      "step": 5310
    },
    {
      "epoch": 0.05311,
      "grad_norm": 0.840656557399505,
      "learning_rate": 0.003,
      "loss": 4.1549,
      "step": 5311
    },
    {
      "epoch": 0.05312,
      "grad_norm": 0.9477091831792939,
      "learning_rate": 0.003,
      "loss": 4.1132,
      "step": 5312
    },
    {
      "epoch": 0.05313,
      "grad_norm": 0.899466744931056,
      "learning_rate": 0.003,
      "loss": 4.0986,
      "step": 5313
    },
    {
      "epoch": 0.05314,
      "grad_norm": 0.778035937952836,
      "learning_rate": 0.003,
      "loss": 4.1225,
      "step": 5314
    },
    {
      "epoch": 0.05315,
      "grad_norm": 0.7258696044493477,
      "learning_rate": 0.003,
      "loss": 4.1265,
      "step": 5315
    },
    {
      "epoch": 0.05316,
      "grad_norm": 0.7881753340530017,
      "learning_rate": 0.003,
      "loss": 4.1286,
      "step": 5316
    },
    {
      "epoch": 0.05317,
      "grad_norm": 0.8930575208143203,
      "learning_rate": 0.003,
      "loss": 4.1315,
      "step": 5317
    },
    {
      "epoch": 0.05318,
      "grad_norm": 1.1521265057134467,
      "learning_rate": 0.003,
      "loss": 4.1055,
      "step": 5318
    },
    {
      "epoch": 0.05319,
      "grad_norm": 1.241887421255421,
      "learning_rate": 0.003,
      "loss": 4.1151,
      "step": 5319
    },
    {
      "epoch": 0.0532,
      "grad_norm": 0.8187535312680814,
      "learning_rate": 0.003,
      "loss": 4.1173,
      "step": 5320
    },
    {
      "epoch": 0.05321,
      "grad_norm": 0.8332689083765074,
      "learning_rate": 0.003,
      "loss": 4.1182,
      "step": 5321
    },
    {
      "epoch": 0.05322,
      "grad_norm": 0.9035176420820373,
      "learning_rate": 0.003,
      "loss": 4.1398,
      "step": 5322
    },
    {
      "epoch": 0.05323,
      "grad_norm": 0.8995073997128675,
      "learning_rate": 0.003,
      "loss": 4.1614,
      "step": 5323
    },
    {
      "epoch": 0.05324,
      "grad_norm": 0.8729634947781086,
      "learning_rate": 0.003,
      "loss": 4.1035,
      "step": 5324
    },
    {
      "epoch": 0.05325,
      "grad_norm": 0.8236931256668504,
      "learning_rate": 0.003,
      "loss": 4.1233,
      "step": 5325
    },
    {
      "epoch": 0.05326,
      "grad_norm": 0.9692358504853177,
      "learning_rate": 0.003,
      "loss": 4.1448,
      "step": 5326
    },
    {
      "epoch": 0.05327,
      "grad_norm": 1.176101038605182,
      "learning_rate": 0.003,
      "loss": 4.1295,
      "step": 5327
    },
    {
      "epoch": 0.05328,
      "grad_norm": 1.0752975657295243,
      "learning_rate": 0.003,
      "loss": 4.137,
      "step": 5328
    },
    {
      "epoch": 0.05329,
      "grad_norm": 0.9034182614898145,
      "learning_rate": 0.003,
      "loss": 4.1194,
      "step": 5329
    },
    {
      "epoch": 0.0533,
      "grad_norm": 0.9520954885772951,
      "learning_rate": 0.003,
      "loss": 4.1392,
      "step": 5330
    },
    {
      "epoch": 0.05331,
      "grad_norm": 1.0638411915039017,
      "learning_rate": 0.003,
      "loss": 4.1367,
      "step": 5331
    },
    {
      "epoch": 0.05332,
      "grad_norm": 1.0918447593613807,
      "learning_rate": 0.003,
      "loss": 4.153,
      "step": 5332
    },
    {
      "epoch": 0.05333,
      "grad_norm": 1.0190559714048477,
      "learning_rate": 0.003,
      "loss": 4.1214,
      "step": 5333
    },
    {
      "epoch": 0.05334,
      "grad_norm": 1.0666691935424213,
      "learning_rate": 0.003,
      "loss": 4.1119,
      "step": 5334
    },
    {
      "epoch": 0.05335,
      "grad_norm": 0.9983956702837128,
      "learning_rate": 0.003,
      "loss": 4.1417,
      "step": 5335
    },
    {
      "epoch": 0.05336,
      "grad_norm": 1.1601743720224316,
      "learning_rate": 0.003,
      "loss": 4.1541,
      "step": 5336
    },
    {
      "epoch": 0.05337,
      "grad_norm": 0.7881208672585496,
      "learning_rate": 0.003,
      "loss": 4.1362,
      "step": 5337
    },
    {
      "epoch": 0.05338,
      "grad_norm": 0.7044325113303782,
      "learning_rate": 0.003,
      "loss": 4.1153,
      "step": 5338
    },
    {
      "epoch": 0.05339,
      "grad_norm": 0.8906022550747246,
      "learning_rate": 0.003,
      "loss": 4.1105,
      "step": 5339
    },
    {
      "epoch": 0.0534,
      "grad_norm": 1.0726295657720748,
      "learning_rate": 0.003,
      "loss": 4.1323,
      "step": 5340
    },
    {
      "epoch": 0.05341,
      "grad_norm": 1.0067520134393941,
      "learning_rate": 0.003,
      "loss": 4.1366,
      "step": 5341
    },
    {
      "epoch": 0.05342,
      "grad_norm": 1.1915314362667089,
      "learning_rate": 0.003,
      "loss": 4.1291,
      "step": 5342
    },
    {
      "epoch": 0.05343,
      "grad_norm": 1.0367148889053333,
      "learning_rate": 0.003,
      "loss": 4.1394,
      "step": 5343
    },
    {
      "epoch": 0.05344,
      "grad_norm": 0.8675512667584127,
      "learning_rate": 0.003,
      "loss": 4.0968,
      "step": 5344
    },
    {
      "epoch": 0.05345,
      "grad_norm": 0.7665396986619814,
      "learning_rate": 0.003,
      "loss": 4.1076,
      "step": 5345
    },
    {
      "epoch": 0.05346,
      "grad_norm": 0.7754587564090736,
      "learning_rate": 0.003,
      "loss": 4.1263,
      "step": 5346
    },
    {
      "epoch": 0.05347,
      "grad_norm": 0.8030514376721718,
      "learning_rate": 0.003,
      "loss": 4.112,
      "step": 5347
    },
    {
      "epoch": 0.05348,
      "grad_norm": 0.8046117284167067,
      "learning_rate": 0.003,
      "loss": 4.1096,
      "step": 5348
    },
    {
      "epoch": 0.05349,
      "grad_norm": 0.849059304723128,
      "learning_rate": 0.003,
      "loss": 4.1213,
      "step": 5349
    },
    {
      "epoch": 0.0535,
      "grad_norm": 1.082067485015541,
      "learning_rate": 0.003,
      "loss": 4.1156,
      "step": 5350
    },
    {
      "epoch": 0.05351,
      "grad_norm": 1.1906159889512828,
      "learning_rate": 0.003,
      "loss": 4.1225,
      "step": 5351
    },
    {
      "epoch": 0.05352,
      "grad_norm": 1.0756028658025156,
      "learning_rate": 0.003,
      "loss": 4.1155,
      "step": 5352
    },
    {
      "epoch": 0.05353,
      "grad_norm": 1.0059552387327784,
      "learning_rate": 0.003,
      "loss": 4.1244,
      "step": 5353
    },
    {
      "epoch": 0.05354,
      "grad_norm": 1.1282632737459894,
      "learning_rate": 0.003,
      "loss": 4.114,
      "step": 5354
    },
    {
      "epoch": 0.05355,
      "grad_norm": 0.9652277661616635,
      "learning_rate": 0.003,
      "loss": 4.1049,
      "step": 5355
    },
    {
      "epoch": 0.05356,
      "grad_norm": 0.9108289260268686,
      "learning_rate": 0.003,
      "loss": 4.1412,
      "step": 5356
    },
    {
      "epoch": 0.05357,
      "grad_norm": 0.7828004246923763,
      "learning_rate": 0.003,
      "loss": 4.1259,
      "step": 5357
    },
    {
      "epoch": 0.05358,
      "grad_norm": 0.9168739207771537,
      "learning_rate": 0.003,
      "loss": 4.1163,
      "step": 5358
    },
    {
      "epoch": 0.05359,
      "grad_norm": 1.0075080046155496,
      "learning_rate": 0.003,
      "loss": 4.1299,
      "step": 5359
    },
    {
      "epoch": 0.0536,
      "grad_norm": 1.0970792845542654,
      "learning_rate": 0.003,
      "loss": 4.1282,
      "step": 5360
    },
    {
      "epoch": 0.05361,
      "grad_norm": 0.8824114030322678,
      "learning_rate": 0.003,
      "loss": 4.1317,
      "step": 5361
    },
    {
      "epoch": 0.05362,
      "grad_norm": 0.993743544871738,
      "learning_rate": 0.003,
      "loss": 4.1425,
      "step": 5362
    },
    {
      "epoch": 0.05363,
      "grad_norm": 0.9312141932011715,
      "learning_rate": 0.003,
      "loss": 4.147,
      "step": 5363
    },
    {
      "epoch": 0.05364,
      "grad_norm": 0.8022980893619517,
      "learning_rate": 0.003,
      "loss": 4.1398,
      "step": 5364
    },
    {
      "epoch": 0.05365,
      "grad_norm": 0.7351009835979198,
      "learning_rate": 0.003,
      "loss": 4.1298,
      "step": 5365
    },
    {
      "epoch": 0.05366,
      "grad_norm": 0.7464822800282958,
      "learning_rate": 0.003,
      "loss": 4.1125,
      "step": 5366
    },
    {
      "epoch": 0.05367,
      "grad_norm": 0.8388818072236447,
      "learning_rate": 0.003,
      "loss": 4.1133,
      "step": 5367
    },
    {
      "epoch": 0.05368,
      "grad_norm": 0.9344086732131297,
      "learning_rate": 0.003,
      "loss": 4.1069,
      "step": 5368
    },
    {
      "epoch": 0.05369,
      "grad_norm": 1.0848241130255325,
      "learning_rate": 0.003,
      "loss": 4.125,
      "step": 5369
    },
    {
      "epoch": 0.0537,
      "grad_norm": 1.0620467839392764,
      "learning_rate": 0.003,
      "loss": 4.1253,
      "step": 5370
    },
    {
      "epoch": 0.05371,
      "grad_norm": 1.2010940806246506,
      "learning_rate": 0.003,
      "loss": 4.1479,
      "step": 5371
    },
    {
      "epoch": 0.05372,
      "grad_norm": 0.7001840635960661,
      "learning_rate": 0.003,
      "loss": 4.1152,
      "step": 5372
    },
    {
      "epoch": 0.05373,
      "grad_norm": 0.780826293502416,
      "learning_rate": 0.003,
      "loss": 4.133,
      "step": 5373
    },
    {
      "epoch": 0.05374,
      "grad_norm": 1.0632965886449934,
      "learning_rate": 0.003,
      "loss": 4.1154,
      "step": 5374
    },
    {
      "epoch": 0.05375,
      "grad_norm": 1.1966014514160626,
      "learning_rate": 0.003,
      "loss": 4.1168,
      "step": 5375
    },
    {
      "epoch": 0.05376,
      "grad_norm": 0.8842297934560165,
      "learning_rate": 0.003,
      "loss": 4.1389,
      "step": 5376
    },
    {
      "epoch": 0.05377,
      "grad_norm": 0.778786324051496,
      "learning_rate": 0.003,
      "loss": 4.1251,
      "step": 5377
    },
    {
      "epoch": 0.05378,
      "grad_norm": 0.8596824656028225,
      "learning_rate": 0.003,
      "loss": 4.1289,
      "step": 5378
    },
    {
      "epoch": 0.05379,
      "grad_norm": 0.9934380169213879,
      "learning_rate": 0.003,
      "loss": 4.1391,
      "step": 5379
    },
    {
      "epoch": 0.0538,
      "grad_norm": 0.9638667426110514,
      "learning_rate": 0.003,
      "loss": 4.1071,
      "step": 5380
    },
    {
      "epoch": 0.05381,
      "grad_norm": 1.09393211180781,
      "learning_rate": 0.003,
      "loss": 4.1237,
      "step": 5381
    },
    {
      "epoch": 0.05382,
      "grad_norm": 0.7651739896803385,
      "learning_rate": 0.003,
      "loss": 4.135,
      "step": 5382
    },
    {
      "epoch": 0.05383,
      "grad_norm": 0.7092093674621837,
      "learning_rate": 0.003,
      "loss": 4.1301,
      "step": 5383
    },
    {
      "epoch": 0.05384,
      "grad_norm": 0.6802032392903303,
      "learning_rate": 0.003,
      "loss": 4.1096,
      "step": 5384
    },
    {
      "epoch": 0.05385,
      "grad_norm": 0.7953694996673716,
      "learning_rate": 0.003,
      "loss": 4.1378,
      "step": 5385
    },
    {
      "epoch": 0.05386,
      "grad_norm": 1.0544432753666775,
      "learning_rate": 0.003,
      "loss": 4.1246,
      "step": 5386
    },
    {
      "epoch": 0.05387,
      "grad_norm": 1.1016215087001056,
      "learning_rate": 0.003,
      "loss": 4.1131,
      "step": 5387
    },
    {
      "epoch": 0.05388,
      "grad_norm": 1.0432563739342082,
      "learning_rate": 0.003,
      "loss": 4.1506,
      "step": 5388
    },
    {
      "epoch": 0.05389,
      "grad_norm": 1.024764584061503,
      "learning_rate": 0.003,
      "loss": 4.1229,
      "step": 5389
    },
    {
      "epoch": 0.0539,
      "grad_norm": 1.1680948971945517,
      "learning_rate": 0.003,
      "loss": 4.099,
      "step": 5390
    },
    {
      "epoch": 0.05391,
      "grad_norm": 1.063797358927456,
      "learning_rate": 0.003,
      "loss": 4.1141,
      "step": 5391
    },
    {
      "epoch": 0.05392,
      "grad_norm": 1.0207694285265143,
      "learning_rate": 0.003,
      "loss": 4.1182,
      "step": 5392
    },
    {
      "epoch": 0.05393,
      "grad_norm": 1.0736035287161585,
      "learning_rate": 0.003,
      "loss": 4.1448,
      "step": 5393
    },
    {
      "epoch": 0.05394,
      "grad_norm": 1.0325659552141049,
      "learning_rate": 0.003,
      "loss": 4.1183,
      "step": 5394
    },
    {
      "epoch": 0.05395,
      "grad_norm": 0.8864130337421087,
      "learning_rate": 0.003,
      "loss": 4.1451,
      "step": 5395
    },
    {
      "epoch": 0.05396,
      "grad_norm": 0.7998033184393275,
      "learning_rate": 0.003,
      "loss": 4.1331,
      "step": 5396
    },
    {
      "epoch": 0.05397,
      "grad_norm": 0.8620800324774359,
      "learning_rate": 0.003,
      "loss": 4.1085,
      "step": 5397
    },
    {
      "epoch": 0.05398,
      "grad_norm": 0.9934924712119845,
      "learning_rate": 0.003,
      "loss": 4.1093,
      "step": 5398
    },
    {
      "epoch": 0.05399,
      "grad_norm": 1.0677747326074263,
      "learning_rate": 0.003,
      "loss": 4.1455,
      "step": 5399
    },
    {
      "epoch": 0.054,
      "grad_norm": 1.0851959853077549,
      "learning_rate": 0.003,
      "loss": 4.1543,
      "step": 5400
    },
    {
      "epoch": 0.05401,
      "grad_norm": 0.9114774952428576,
      "learning_rate": 0.003,
      "loss": 4.128,
      "step": 5401
    },
    {
      "epoch": 0.05402,
      "grad_norm": 0.7885051914531169,
      "learning_rate": 0.003,
      "loss": 4.1396,
      "step": 5402
    },
    {
      "epoch": 0.05403,
      "grad_norm": 0.8695681565380565,
      "learning_rate": 0.003,
      "loss": 4.1263,
      "step": 5403
    },
    {
      "epoch": 0.05404,
      "grad_norm": 0.8524754859405422,
      "learning_rate": 0.003,
      "loss": 4.1187,
      "step": 5404
    },
    {
      "epoch": 0.05405,
      "grad_norm": 0.925811086709952,
      "learning_rate": 0.003,
      "loss": 4.1227,
      "step": 5405
    },
    {
      "epoch": 0.05406,
      "grad_norm": 1.1266556587941388,
      "learning_rate": 0.003,
      "loss": 4.1135,
      "step": 5406
    },
    {
      "epoch": 0.05407,
      "grad_norm": 0.8810690573065147,
      "learning_rate": 0.003,
      "loss": 4.1209,
      "step": 5407
    },
    {
      "epoch": 0.05408,
      "grad_norm": 0.9725214113742383,
      "learning_rate": 0.003,
      "loss": 4.1111,
      "step": 5408
    },
    {
      "epoch": 0.05409,
      "grad_norm": 0.9978772080924893,
      "learning_rate": 0.003,
      "loss": 4.082,
      "step": 5409
    },
    {
      "epoch": 0.0541,
      "grad_norm": 1.0553800598077203,
      "learning_rate": 0.003,
      "loss": 4.1355,
      "step": 5410
    },
    {
      "epoch": 0.05411,
      "grad_norm": 0.8112327791737242,
      "learning_rate": 0.003,
      "loss": 4.1275,
      "step": 5411
    },
    {
      "epoch": 0.05412,
      "grad_norm": 0.814707239730718,
      "learning_rate": 0.003,
      "loss": 4.1219,
      "step": 5412
    },
    {
      "epoch": 0.05413,
      "grad_norm": 0.8573040534301559,
      "learning_rate": 0.003,
      "loss": 4.1167,
      "step": 5413
    },
    {
      "epoch": 0.05414,
      "grad_norm": 1.0569847685652707,
      "learning_rate": 0.003,
      "loss": 4.0999,
      "step": 5414
    },
    {
      "epoch": 0.05415,
      "grad_norm": 1.296000338820238,
      "learning_rate": 0.003,
      "loss": 4.1445,
      "step": 5415
    },
    {
      "epoch": 0.05416,
      "grad_norm": 0.7564909243699854,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 5416
    },
    {
      "epoch": 0.05417,
      "grad_norm": 0.6579679791300916,
      "learning_rate": 0.003,
      "loss": 4.1119,
      "step": 5417
    },
    {
      "epoch": 0.05418,
      "grad_norm": 0.7369314194114258,
      "learning_rate": 0.003,
      "loss": 4.103,
      "step": 5418
    },
    {
      "epoch": 0.05419,
      "grad_norm": 0.9712634198986607,
      "learning_rate": 0.003,
      "loss": 4.1249,
      "step": 5419
    },
    {
      "epoch": 0.0542,
      "grad_norm": 1.6010509746450199,
      "learning_rate": 0.003,
      "loss": 4.1256,
      "step": 5420
    },
    {
      "epoch": 0.05421,
      "grad_norm": 0.7403826711098679,
      "learning_rate": 0.003,
      "loss": 4.1084,
      "step": 5421
    },
    {
      "epoch": 0.05422,
      "grad_norm": 0.8348547775605684,
      "learning_rate": 0.003,
      "loss": 4.1272,
      "step": 5422
    },
    {
      "epoch": 0.05423,
      "grad_norm": 1.0466376848312227,
      "learning_rate": 0.003,
      "loss": 4.1237,
      "step": 5423
    },
    {
      "epoch": 0.05424,
      "grad_norm": 1.1320796829971231,
      "learning_rate": 0.003,
      "loss": 4.1578,
      "step": 5424
    },
    {
      "epoch": 0.05425,
      "grad_norm": 0.9903561882831523,
      "learning_rate": 0.003,
      "loss": 4.1303,
      "step": 5425
    },
    {
      "epoch": 0.05426,
      "grad_norm": 0.845398305003601,
      "learning_rate": 0.003,
      "loss": 4.11,
      "step": 5426
    },
    {
      "epoch": 0.05427,
      "grad_norm": 0.8221075578228308,
      "learning_rate": 0.003,
      "loss": 4.105,
      "step": 5427
    },
    {
      "epoch": 0.05428,
      "grad_norm": 0.8509764151091259,
      "learning_rate": 0.003,
      "loss": 4.1247,
      "step": 5428
    },
    {
      "epoch": 0.05429,
      "grad_norm": 0.9581157952224152,
      "learning_rate": 0.003,
      "loss": 4.13,
      "step": 5429
    },
    {
      "epoch": 0.0543,
      "grad_norm": 0.9436738122904814,
      "learning_rate": 0.003,
      "loss": 4.1516,
      "step": 5430
    },
    {
      "epoch": 0.05431,
      "grad_norm": 0.9842222295174526,
      "learning_rate": 0.003,
      "loss": 4.1251,
      "step": 5431
    },
    {
      "epoch": 0.05432,
      "grad_norm": 0.9492659298620857,
      "learning_rate": 0.003,
      "loss": 4.1172,
      "step": 5432
    },
    {
      "epoch": 0.05433,
      "grad_norm": 1.0409726423743026,
      "learning_rate": 0.003,
      "loss": 4.0966,
      "step": 5433
    },
    {
      "epoch": 0.05434,
      "grad_norm": 1.2078024184830263,
      "learning_rate": 0.003,
      "loss": 4.1253,
      "step": 5434
    },
    {
      "epoch": 0.05435,
      "grad_norm": 1.0505703926974785,
      "learning_rate": 0.003,
      "loss": 4.1148,
      "step": 5435
    },
    {
      "epoch": 0.05436,
      "grad_norm": 1.1873356578446181,
      "learning_rate": 0.003,
      "loss": 4.142,
      "step": 5436
    },
    {
      "epoch": 0.05437,
      "grad_norm": 0.9961938731710348,
      "learning_rate": 0.003,
      "loss": 4.1407,
      "step": 5437
    },
    {
      "epoch": 0.05438,
      "grad_norm": 1.116985738059544,
      "learning_rate": 0.003,
      "loss": 4.1315,
      "step": 5438
    },
    {
      "epoch": 0.05439,
      "grad_norm": 1.013140285858862,
      "learning_rate": 0.003,
      "loss": 4.1226,
      "step": 5439
    },
    {
      "epoch": 0.0544,
      "grad_norm": 0.97878397740082,
      "learning_rate": 0.003,
      "loss": 4.1159,
      "step": 5440
    },
    {
      "epoch": 0.05441,
      "grad_norm": 1.047952260173831,
      "learning_rate": 0.003,
      "loss": 4.1485,
      "step": 5441
    },
    {
      "epoch": 0.05442,
      "grad_norm": 1.0168380196792572,
      "learning_rate": 0.003,
      "loss": 4.11,
      "step": 5442
    },
    {
      "epoch": 0.05443,
      "grad_norm": 0.8830876213922452,
      "learning_rate": 0.003,
      "loss": 4.1231,
      "step": 5443
    },
    {
      "epoch": 0.05444,
      "grad_norm": 0.8682786365558881,
      "learning_rate": 0.003,
      "loss": 4.1114,
      "step": 5444
    },
    {
      "epoch": 0.05445,
      "grad_norm": 0.9756475817613688,
      "learning_rate": 0.003,
      "loss": 4.1128,
      "step": 5445
    },
    {
      "epoch": 0.05446,
      "grad_norm": 0.9934036218079088,
      "learning_rate": 0.003,
      "loss": 4.1167,
      "step": 5446
    },
    {
      "epoch": 0.05447,
      "grad_norm": 0.9369260577718805,
      "learning_rate": 0.003,
      "loss": 4.1074,
      "step": 5447
    },
    {
      "epoch": 0.05448,
      "grad_norm": 0.9630471244706246,
      "learning_rate": 0.003,
      "loss": 4.1466,
      "step": 5448
    },
    {
      "epoch": 0.05449,
      "grad_norm": 0.8852681416751951,
      "learning_rate": 0.003,
      "loss": 4.135,
      "step": 5449
    },
    {
      "epoch": 0.0545,
      "grad_norm": 0.8723516400396613,
      "learning_rate": 0.003,
      "loss": 4.1285,
      "step": 5450
    },
    {
      "epoch": 0.05451,
      "grad_norm": 0.7744610055329229,
      "learning_rate": 0.003,
      "loss": 4.1513,
      "step": 5451
    },
    {
      "epoch": 0.05452,
      "grad_norm": 0.7334830805607475,
      "learning_rate": 0.003,
      "loss": 4.1144,
      "step": 5452
    },
    {
      "epoch": 0.05453,
      "grad_norm": 0.6840465052326917,
      "learning_rate": 0.003,
      "loss": 4.1069,
      "step": 5453
    },
    {
      "epoch": 0.05454,
      "grad_norm": 0.7047366215451967,
      "learning_rate": 0.003,
      "loss": 4.1131,
      "step": 5454
    },
    {
      "epoch": 0.05455,
      "grad_norm": 0.7373515590693615,
      "learning_rate": 0.003,
      "loss": 4.137,
      "step": 5455
    },
    {
      "epoch": 0.05456,
      "grad_norm": 0.7466741982652629,
      "learning_rate": 0.003,
      "loss": 4.1111,
      "step": 5456
    },
    {
      "epoch": 0.05457,
      "grad_norm": 0.9180906670943273,
      "learning_rate": 0.003,
      "loss": 4.137,
      "step": 5457
    },
    {
      "epoch": 0.05458,
      "grad_norm": 1.344651162550162,
      "learning_rate": 0.003,
      "loss": 4.1195,
      "step": 5458
    },
    {
      "epoch": 0.05459,
      "grad_norm": 0.9718892443666932,
      "learning_rate": 0.003,
      "loss": 4.1279,
      "step": 5459
    },
    {
      "epoch": 0.0546,
      "grad_norm": 1.1893468224298873,
      "learning_rate": 0.003,
      "loss": 4.1244,
      "step": 5460
    },
    {
      "epoch": 0.05461,
      "grad_norm": 0.9099724886418035,
      "learning_rate": 0.003,
      "loss": 4.1198,
      "step": 5461
    },
    {
      "epoch": 0.05462,
      "grad_norm": 0.8909858664433765,
      "learning_rate": 0.003,
      "loss": 4.1452,
      "step": 5462
    },
    {
      "epoch": 0.05463,
      "grad_norm": 0.943471639674701,
      "learning_rate": 0.003,
      "loss": 4.108,
      "step": 5463
    },
    {
      "epoch": 0.05464,
      "grad_norm": 1.006359112858614,
      "learning_rate": 0.003,
      "loss": 4.1475,
      "step": 5464
    },
    {
      "epoch": 0.05465,
      "grad_norm": 0.9070328242868714,
      "learning_rate": 0.003,
      "loss": 4.1421,
      "step": 5465
    },
    {
      "epoch": 0.05466,
      "grad_norm": 0.802164321014551,
      "learning_rate": 0.003,
      "loss": 4.1469,
      "step": 5466
    },
    {
      "epoch": 0.05467,
      "grad_norm": 0.9316649207868387,
      "learning_rate": 0.003,
      "loss": 4.1292,
      "step": 5467
    },
    {
      "epoch": 0.05468,
      "grad_norm": 1.353349775155964,
      "learning_rate": 0.003,
      "loss": 4.1174,
      "step": 5468
    },
    {
      "epoch": 0.05469,
      "grad_norm": 0.9167605119347981,
      "learning_rate": 0.003,
      "loss": 4.1036,
      "step": 5469
    },
    {
      "epoch": 0.0547,
      "grad_norm": 0.978636038190918,
      "learning_rate": 0.003,
      "loss": 4.1078,
      "step": 5470
    },
    {
      "epoch": 0.05471,
      "grad_norm": 1.1336275611473108,
      "learning_rate": 0.003,
      "loss": 4.1237,
      "step": 5471
    },
    {
      "epoch": 0.05472,
      "grad_norm": 1.0636357857736143,
      "learning_rate": 0.003,
      "loss": 4.1176,
      "step": 5472
    },
    {
      "epoch": 0.05473,
      "grad_norm": 0.9598520665828051,
      "learning_rate": 0.003,
      "loss": 4.1353,
      "step": 5473
    },
    {
      "epoch": 0.05474,
      "grad_norm": 1.2065527518151113,
      "learning_rate": 0.003,
      "loss": 4.1192,
      "step": 5474
    },
    {
      "epoch": 0.05475,
      "grad_norm": 0.9321565799401134,
      "learning_rate": 0.003,
      "loss": 4.1726,
      "step": 5475
    },
    {
      "epoch": 0.05476,
      "grad_norm": 0.9847486405198328,
      "learning_rate": 0.003,
      "loss": 4.0957,
      "step": 5476
    },
    {
      "epoch": 0.05477,
      "grad_norm": 1.0242683669210109,
      "learning_rate": 0.003,
      "loss": 4.1239,
      "step": 5477
    },
    {
      "epoch": 0.05478,
      "grad_norm": 1.044507174514191,
      "learning_rate": 0.003,
      "loss": 4.1256,
      "step": 5478
    },
    {
      "epoch": 0.05479,
      "grad_norm": 1.1644979991385647,
      "learning_rate": 0.003,
      "loss": 4.1435,
      "step": 5479
    },
    {
      "epoch": 0.0548,
      "grad_norm": 1.1314239580426855,
      "learning_rate": 0.003,
      "loss": 4.1422,
      "step": 5480
    },
    {
      "epoch": 0.05481,
      "grad_norm": 1.037870781059475,
      "learning_rate": 0.003,
      "loss": 4.1251,
      "step": 5481
    },
    {
      "epoch": 0.05482,
      "grad_norm": 0.8116803859655726,
      "learning_rate": 0.003,
      "loss": 4.1141,
      "step": 5482
    },
    {
      "epoch": 0.05483,
      "grad_norm": 0.8083557186166936,
      "learning_rate": 0.003,
      "loss": 4.117,
      "step": 5483
    },
    {
      "epoch": 0.05484,
      "grad_norm": 0.9299734149282691,
      "learning_rate": 0.003,
      "loss": 4.1395,
      "step": 5484
    },
    {
      "epoch": 0.05485,
      "grad_norm": 1.4501325018415843,
      "learning_rate": 0.003,
      "loss": 4.1592,
      "step": 5485
    },
    {
      "epoch": 0.05486,
      "grad_norm": 0.6907754147935445,
      "learning_rate": 0.003,
      "loss": 4.1282,
      "step": 5486
    },
    {
      "epoch": 0.05487,
      "grad_norm": 0.771065057500987,
      "learning_rate": 0.003,
      "loss": 4.1339,
      "step": 5487
    },
    {
      "epoch": 0.05488,
      "grad_norm": 0.9398223883665248,
      "learning_rate": 0.003,
      "loss": 4.1329,
      "step": 5488
    },
    {
      "epoch": 0.05489,
      "grad_norm": 1.245734722187167,
      "learning_rate": 0.003,
      "loss": 4.1595,
      "step": 5489
    },
    {
      "epoch": 0.0549,
      "grad_norm": 1.0989670327525296,
      "learning_rate": 0.003,
      "loss": 4.1312,
      "step": 5490
    },
    {
      "epoch": 0.05491,
      "grad_norm": 0.9071445359587079,
      "learning_rate": 0.003,
      "loss": 4.0936,
      "step": 5491
    },
    {
      "epoch": 0.05492,
      "grad_norm": 0.8397438252768895,
      "learning_rate": 0.003,
      "loss": 4.1416,
      "step": 5492
    },
    {
      "epoch": 0.05493,
      "grad_norm": 0.8946981978524265,
      "learning_rate": 0.003,
      "loss": 4.1079,
      "step": 5493
    },
    {
      "epoch": 0.05494,
      "grad_norm": 0.8180794667689825,
      "learning_rate": 0.003,
      "loss": 4.1354,
      "step": 5494
    },
    {
      "epoch": 0.05495,
      "grad_norm": 0.8005989617772755,
      "learning_rate": 0.003,
      "loss": 4.1196,
      "step": 5495
    },
    {
      "epoch": 0.05496,
      "grad_norm": 0.7786588826073861,
      "learning_rate": 0.003,
      "loss": 4.1112,
      "step": 5496
    },
    {
      "epoch": 0.05497,
      "grad_norm": 0.7956028398493934,
      "learning_rate": 0.003,
      "loss": 4.1333,
      "step": 5497
    },
    {
      "epoch": 0.05498,
      "grad_norm": 0.7918941742902131,
      "learning_rate": 0.003,
      "loss": 4.1132,
      "step": 5498
    },
    {
      "epoch": 0.05499,
      "grad_norm": 0.9382930732854762,
      "learning_rate": 0.003,
      "loss": 4.1015,
      "step": 5499
    },
    {
      "epoch": 0.055,
      "grad_norm": 1.126573950621417,
      "learning_rate": 0.003,
      "loss": 4.1117,
      "step": 5500
    },
    {
      "epoch": 0.05501,
      "grad_norm": 1.003864850005161,
      "learning_rate": 0.003,
      "loss": 4.1193,
      "step": 5501
    },
    {
      "epoch": 0.05502,
      "grad_norm": 1.1422394527747446,
      "learning_rate": 0.003,
      "loss": 4.1102,
      "step": 5502
    },
    {
      "epoch": 0.05503,
      "grad_norm": 0.7665896617368361,
      "learning_rate": 0.003,
      "loss": 4.1069,
      "step": 5503
    },
    {
      "epoch": 0.05504,
      "grad_norm": 0.7411461800377359,
      "learning_rate": 0.003,
      "loss": 4.1551,
      "step": 5504
    },
    {
      "epoch": 0.05505,
      "grad_norm": 0.7963833583959748,
      "learning_rate": 0.003,
      "loss": 4.1095,
      "step": 5505
    },
    {
      "epoch": 0.05506,
      "grad_norm": 0.9257832028586459,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 5506
    },
    {
      "epoch": 0.05507,
      "grad_norm": 1.10578776496078,
      "learning_rate": 0.003,
      "loss": 4.1524,
      "step": 5507
    },
    {
      "epoch": 0.05508,
      "grad_norm": 1.0431087201167466,
      "learning_rate": 0.003,
      "loss": 4.1283,
      "step": 5508
    },
    {
      "epoch": 0.05509,
      "grad_norm": 1.0367422108110553,
      "learning_rate": 0.003,
      "loss": 4.1275,
      "step": 5509
    },
    {
      "epoch": 0.0551,
      "grad_norm": 1.2529842317031787,
      "learning_rate": 0.003,
      "loss": 4.1118,
      "step": 5510
    },
    {
      "epoch": 0.05511,
      "grad_norm": 0.9691541938141458,
      "learning_rate": 0.003,
      "loss": 4.1333,
      "step": 5511
    },
    {
      "epoch": 0.05512,
      "grad_norm": 0.864716463133856,
      "learning_rate": 0.003,
      "loss": 4.1352,
      "step": 5512
    },
    {
      "epoch": 0.05513,
      "grad_norm": 0.8354141565098123,
      "learning_rate": 0.003,
      "loss": 4.1195,
      "step": 5513
    },
    {
      "epoch": 0.05514,
      "grad_norm": 1.0376042919809385,
      "learning_rate": 0.003,
      "loss": 4.1334,
      "step": 5514
    },
    {
      "epoch": 0.05515,
      "grad_norm": 0.893982497355944,
      "learning_rate": 0.003,
      "loss": 4.1127,
      "step": 5515
    },
    {
      "epoch": 0.05516,
      "grad_norm": 0.8092294335938044,
      "learning_rate": 0.003,
      "loss": 4.145,
      "step": 5516
    },
    {
      "epoch": 0.05517,
      "grad_norm": 0.8335157315875446,
      "learning_rate": 0.003,
      "loss": 4.1049,
      "step": 5517
    },
    {
      "epoch": 0.05518,
      "grad_norm": 0.8631670789345905,
      "learning_rate": 0.003,
      "loss": 4.1252,
      "step": 5518
    },
    {
      "epoch": 0.05519,
      "grad_norm": 0.772502136816785,
      "learning_rate": 0.003,
      "loss": 4.15,
      "step": 5519
    },
    {
      "epoch": 0.0552,
      "grad_norm": 0.7318161157773011,
      "learning_rate": 0.003,
      "loss": 4.1198,
      "step": 5520
    },
    {
      "epoch": 0.05521,
      "grad_norm": 0.7805844973360749,
      "learning_rate": 0.003,
      "loss": 4.1341,
      "step": 5521
    },
    {
      "epoch": 0.05522,
      "grad_norm": 0.9610950814102949,
      "learning_rate": 0.003,
      "loss": 4.1492,
      "step": 5522
    },
    {
      "epoch": 0.05523,
      "grad_norm": 1.4983777342001978,
      "learning_rate": 0.003,
      "loss": 4.137,
      "step": 5523
    },
    {
      "epoch": 0.05524,
      "grad_norm": 0.8114506426427744,
      "learning_rate": 0.003,
      "loss": 4.1408,
      "step": 5524
    },
    {
      "epoch": 0.05525,
      "grad_norm": 0.8437663742868677,
      "learning_rate": 0.003,
      "loss": 4.1074,
      "step": 5525
    },
    {
      "epoch": 0.05526,
      "grad_norm": 0.966987301586662,
      "learning_rate": 0.003,
      "loss": 4.1134,
      "step": 5526
    },
    {
      "epoch": 0.05527,
      "grad_norm": 1.2725075684475797,
      "learning_rate": 0.003,
      "loss": 4.0848,
      "step": 5527
    },
    {
      "epoch": 0.05528,
      "grad_norm": 0.8486880982316782,
      "learning_rate": 0.003,
      "loss": 4.1243,
      "step": 5528
    },
    {
      "epoch": 0.05529,
      "grad_norm": 0.9285432185402387,
      "learning_rate": 0.003,
      "loss": 4.1354,
      "step": 5529
    },
    {
      "epoch": 0.0553,
      "grad_norm": 1.1784089544019016,
      "learning_rate": 0.003,
      "loss": 4.1526,
      "step": 5530
    },
    {
      "epoch": 0.05531,
      "grad_norm": 0.7869416792914739,
      "learning_rate": 0.003,
      "loss": 4.131,
      "step": 5531
    },
    {
      "epoch": 0.05532,
      "grad_norm": 0.8846989558217583,
      "learning_rate": 0.003,
      "loss": 4.1188,
      "step": 5532
    },
    {
      "epoch": 0.05533,
      "grad_norm": 0.8833239717000477,
      "learning_rate": 0.003,
      "loss": 4.1355,
      "step": 5533
    },
    {
      "epoch": 0.05534,
      "grad_norm": 0.8715591388693983,
      "learning_rate": 0.003,
      "loss": 4.1231,
      "step": 5534
    },
    {
      "epoch": 0.05535,
      "grad_norm": 0.8133286665003178,
      "learning_rate": 0.003,
      "loss": 4.134,
      "step": 5535
    },
    {
      "epoch": 0.05536,
      "grad_norm": 0.926009595893208,
      "learning_rate": 0.003,
      "loss": 4.129,
      "step": 5536
    },
    {
      "epoch": 0.05537,
      "grad_norm": 1.052877549031079,
      "learning_rate": 0.003,
      "loss": 4.1274,
      "step": 5537
    },
    {
      "epoch": 0.05538,
      "grad_norm": 0.8636003611383188,
      "learning_rate": 0.003,
      "loss": 4.1204,
      "step": 5538
    },
    {
      "epoch": 0.05539,
      "grad_norm": 0.8087447171104265,
      "learning_rate": 0.003,
      "loss": 4.1044,
      "step": 5539
    },
    {
      "epoch": 0.0554,
      "grad_norm": 0.898241169296187,
      "learning_rate": 0.003,
      "loss": 4.1631,
      "step": 5540
    },
    {
      "epoch": 0.05541,
      "grad_norm": 1.228621032338734,
      "learning_rate": 0.003,
      "loss": 4.1482,
      "step": 5541
    },
    {
      "epoch": 0.05542,
      "grad_norm": 0.908807188172974,
      "learning_rate": 0.003,
      "loss": 4.1133,
      "step": 5542
    },
    {
      "epoch": 0.05543,
      "grad_norm": 1.1675594774032338,
      "learning_rate": 0.003,
      "loss": 4.1127,
      "step": 5543
    },
    {
      "epoch": 0.05544,
      "grad_norm": 1.1933027347530332,
      "learning_rate": 0.003,
      "loss": 4.1574,
      "step": 5544
    },
    {
      "epoch": 0.05545,
      "grad_norm": 0.9571394512885866,
      "learning_rate": 0.003,
      "loss": 4.1393,
      "step": 5545
    },
    {
      "epoch": 0.05546,
      "grad_norm": 0.8590171381737576,
      "learning_rate": 0.003,
      "loss": 4.1219,
      "step": 5546
    },
    {
      "epoch": 0.05547,
      "grad_norm": 1.0666304751642304,
      "learning_rate": 0.003,
      "loss": 4.1214,
      "step": 5547
    },
    {
      "epoch": 0.05548,
      "grad_norm": 0.9704599043567833,
      "learning_rate": 0.003,
      "loss": 4.0967,
      "step": 5548
    },
    {
      "epoch": 0.05549,
      "grad_norm": 0.9865568862830414,
      "learning_rate": 0.003,
      "loss": 4.1432,
      "step": 5549
    },
    {
      "epoch": 0.0555,
      "grad_norm": 0.9885534149053431,
      "learning_rate": 0.003,
      "loss": 4.1234,
      "step": 5550
    },
    {
      "epoch": 0.05551,
      "grad_norm": 0.9749472205384399,
      "learning_rate": 0.003,
      "loss": 4.0895,
      "step": 5551
    },
    {
      "epoch": 0.05552,
      "grad_norm": 1.026944712612127,
      "learning_rate": 0.003,
      "loss": 4.1135,
      "step": 5552
    },
    {
      "epoch": 0.05553,
      "grad_norm": 1.1445015380207735,
      "learning_rate": 0.003,
      "loss": 4.1373,
      "step": 5553
    },
    {
      "epoch": 0.05554,
      "grad_norm": 1.0166458533215852,
      "learning_rate": 0.003,
      "loss": 4.1198,
      "step": 5554
    },
    {
      "epoch": 0.05555,
      "grad_norm": 1.0654603455457914,
      "learning_rate": 0.003,
      "loss": 4.1641,
      "step": 5555
    },
    {
      "epoch": 0.05556,
      "grad_norm": 1.0202184347102567,
      "learning_rate": 0.003,
      "loss": 4.1137,
      "step": 5556
    },
    {
      "epoch": 0.05557,
      "grad_norm": 1.0187438847348156,
      "learning_rate": 0.003,
      "loss": 4.1451,
      "step": 5557
    },
    {
      "epoch": 0.05558,
      "grad_norm": 1.1056836731423136,
      "learning_rate": 0.003,
      "loss": 4.1013,
      "step": 5558
    },
    {
      "epoch": 0.05559,
      "grad_norm": 1.0395503189831727,
      "learning_rate": 0.003,
      "loss": 4.1333,
      "step": 5559
    },
    {
      "epoch": 0.0556,
      "grad_norm": 1.0987107991051774,
      "learning_rate": 0.003,
      "loss": 4.1596,
      "step": 5560
    },
    {
      "epoch": 0.05561,
      "grad_norm": 0.9414302320280178,
      "learning_rate": 0.003,
      "loss": 4.1105,
      "step": 5561
    },
    {
      "epoch": 0.05562,
      "grad_norm": 1.080665983026407,
      "learning_rate": 0.003,
      "loss": 4.1262,
      "step": 5562
    },
    {
      "epoch": 0.05563,
      "grad_norm": 1.0503459917604645,
      "learning_rate": 0.003,
      "loss": 4.1356,
      "step": 5563
    },
    {
      "epoch": 0.05564,
      "grad_norm": 1.1247898961225558,
      "learning_rate": 0.003,
      "loss": 4.1203,
      "step": 5564
    },
    {
      "epoch": 0.05565,
      "grad_norm": 1.0361569490033156,
      "learning_rate": 0.003,
      "loss": 4.1594,
      "step": 5565
    },
    {
      "epoch": 0.05566,
      "grad_norm": 0.993562704860863,
      "learning_rate": 0.003,
      "loss": 4.1074,
      "step": 5566
    },
    {
      "epoch": 0.05567,
      "grad_norm": 0.9350452216561642,
      "learning_rate": 0.003,
      "loss": 4.1168,
      "step": 5567
    },
    {
      "epoch": 0.05568,
      "grad_norm": 0.9246250113104997,
      "learning_rate": 0.003,
      "loss": 4.1453,
      "step": 5568
    },
    {
      "epoch": 0.05569,
      "grad_norm": 1.1010360382158253,
      "learning_rate": 0.003,
      "loss": 4.122,
      "step": 5569
    },
    {
      "epoch": 0.0557,
      "grad_norm": 1.014573516129285,
      "learning_rate": 0.003,
      "loss": 4.1435,
      "step": 5570
    },
    {
      "epoch": 0.05571,
      "grad_norm": 1.0901500909260304,
      "learning_rate": 0.003,
      "loss": 4.1345,
      "step": 5571
    },
    {
      "epoch": 0.05572,
      "grad_norm": 1.0639295197328618,
      "learning_rate": 0.003,
      "loss": 4.1118,
      "step": 5572
    },
    {
      "epoch": 0.05573,
      "grad_norm": 1.160526318650138,
      "learning_rate": 0.003,
      "loss": 4.1559,
      "step": 5573
    },
    {
      "epoch": 0.05574,
      "grad_norm": 0.8017141002983521,
      "learning_rate": 0.003,
      "loss": 4.1171,
      "step": 5574
    },
    {
      "epoch": 0.05575,
      "grad_norm": 0.7725242603041408,
      "learning_rate": 0.003,
      "loss": 4.1173,
      "step": 5575
    },
    {
      "epoch": 0.05576,
      "grad_norm": 0.7959745021533514,
      "learning_rate": 0.003,
      "loss": 4.092,
      "step": 5576
    },
    {
      "epoch": 0.05577,
      "grad_norm": 0.8656786722945465,
      "learning_rate": 0.003,
      "loss": 4.1159,
      "step": 5577
    },
    {
      "epoch": 0.05578,
      "grad_norm": 1.129061918601625,
      "learning_rate": 0.003,
      "loss": 4.1233,
      "step": 5578
    },
    {
      "epoch": 0.05579,
      "grad_norm": 1.0600753924801507,
      "learning_rate": 0.003,
      "loss": 4.1392,
      "step": 5579
    },
    {
      "epoch": 0.0558,
      "grad_norm": 0.9542665048147827,
      "learning_rate": 0.003,
      "loss": 4.1267,
      "step": 5580
    },
    {
      "epoch": 0.05581,
      "grad_norm": 1.0248470489985253,
      "learning_rate": 0.003,
      "loss": 4.1402,
      "step": 5581
    },
    {
      "epoch": 0.05582,
      "grad_norm": 1.2221203414392114,
      "learning_rate": 0.003,
      "loss": 4.1585,
      "step": 5582
    },
    {
      "epoch": 0.05583,
      "grad_norm": 0.9829928529956432,
      "learning_rate": 0.003,
      "loss": 4.1242,
      "step": 5583
    },
    {
      "epoch": 0.05584,
      "grad_norm": 1.0416914686566905,
      "learning_rate": 0.003,
      "loss": 4.1263,
      "step": 5584
    },
    {
      "epoch": 0.05585,
      "grad_norm": 1.185036141605951,
      "learning_rate": 0.003,
      "loss": 4.1023,
      "step": 5585
    },
    {
      "epoch": 0.05586,
      "grad_norm": 0.8882890058762307,
      "learning_rate": 0.003,
      "loss": 4.1356,
      "step": 5586
    },
    {
      "epoch": 0.05587,
      "grad_norm": 0.8853504241990832,
      "learning_rate": 0.003,
      "loss": 4.1399,
      "step": 5587
    },
    {
      "epoch": 0.05588,
      "grad_norm": 0.9145372711332053,
      "learning_rate": 0.003,
      "loss": 4.1136,
      "step": 5588
    },
    {
      "epoch": 0.05589,
      "grad_norm": 1.098734879665573,
      "learning_rate": 0.003,
      "loss": 4.1284,
      "step": 5589
    },
    {
      "epoch": 0.0559,
      "grad_norm": 1.0306332486921068,
      "learning_rate": 0.003,
      "loss": 4.1255,
      "step": 5590
    },
    {
      "epoch": 0.05591,
      "grad_norm": 0.9663756117466623,
      "learning_rate": 0.003,
      "loss": 4.1338,
      "step": 5591
    },
    {
      "epoch": 0.05592,
      "grad_norm": 0.9707121490476076,
      "learning_rate": 0.003,
      "loss": 4.132,
      "step": 5592
    },
    {
      "epoch": 0.05593,
      "grad_norm": 0.9040080758689395,
      "learning_rate": 0.003,
      "loss": 4.1185,
      "step": 5593
    },
    {
      "epoch": 0.05594,
      "grad_norm": 0.8399062436913435,
      "learning_rate": 0.003,
      "loss": 4.1095,
      "step": 5594
    },
    {
      "epoch": 0.05595,
      "grad_norm": 0.7927817251689244,
      "learning_rate": 0.003,
      "loss": 4.0906,
      "step": 5595
    },
    {
      "epoch": 0.05596,
      "grad_norm": 0.7554549829981407,
      "learning_rate": 0.003,
      "loss": 4.1037,
      "step": 5596
    },
    {
      "epoch": 0.05597,
      "grad_norm": 0.8263782174776544,
      "learning_rate": 0.003,
      "loss": 4.1334,
      "step": 5597
    },
    {
      "epoch": 0.05598,
      "grad_norm": 1.0617645805745923,
      "learning_rate": 0.003,
      "loss": 4.1,
      "step": 5598
    },
    {
      "epoch": 0.05599,
      "grad_norm": 1.0105155570821656,
      "learning_rate": 0.003,
      "loss": 4.1452,
      "step": 5599
    },
    {
      "epoch": 0.056,
      "grad_norm": 1.0218581737778532,
      "learning_rate": 0.003,
      "loss": 4.0885,
      "step": 5600
    },
    {
      "epoch": 0.05601,
      "grad_norm": 1.0778280577751203,
      "learning_rate": 0.003,
      "loss": 4.1079,
      "step": 5601
    },
    {
      "epoch": 0.05602,
      "grad_norm": 0.7582012801520183,
      "learning_rate": 0.003,
      "loss": 4.0943,
      "step": 5602
    },
    {
      "epoch": 0.05603,
      "grad_norm": 0.7402260100117943,
      "learning_rate": 0.003,
      "loss": 4.1057,
      "step": 5603
    },
    {
      "epoch": 0.05604,
      "grad_norm": 0.8873650869380406,
      "learning_rate": 0.003,
      "loss": 4.1446,
      "step": 5604
    },
    {
      "epoch": 0.05605,
      "grad_norm": 1.0177321840014748,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 5605
    },
    {
      "epoch": 0.05606,
      "grad_norm": 1.0177155480228988,
      "learning_rate": 0.003,
      "loss": 4.1354,
      "step": 5606
    },
    {
      "epoch": 0.05607,
      "grad_norm": 1.005812598468043,
      "learning_rate": 0.003,
      "loss": 4.152,
      "step": 5607
    },
    {
      "epoch": 0.05608,
      "grad_norm": 1.345264690947568,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 5608
    },
    {
      "epoch": 0.05609,
      "grad_norm": 0.9420214751930821,
      "learning_rate": 0.003,
      "loss": 4.1213,
      "step": 5609
    },
    {
      "epoch": 0.0561,
      "grad_norm": 1.2061944377166507,
      "learning_rate": 0.003,
      "loss": 4.1644,
      "step": 5610
    },
    {
      "epoch": 0.05611,
      "grad_norm": 1.0861930497452696,
      "learning_rate": 0.003,
      "loss": 4.1313,
      "step": 5611
    },
    {
      "epoch": 0.05612,
      "grad_norm": 0.8693052542154518,
      "learning_rate": 0.003,
      "loss": 4.1172,
      "step": 5612
    },
    {
      "epoch": 0.05613,
      "grad_norm": 0.8252162892679517,
      "learning_rate": 0.003,
      "loss": 4.1428,
      "step": 5613
    },
    {
      "epoch": 0.05614,
      "grad_norm": 0.8162230158140894,
      "learning_rate": 0.003,
      "loss": 4.125,
      "step": 5614
    },
    {
      "epoch": 0.05615,
      "grad_norm": 0.7932415284599255,
      "learning_rate": 0.003,
      "loss": 4.09,
      "step": 5615
    },
    {
      "epoch": 0.05616,
      "grad_norm": 0.8147303370119828,
      "learning_rate": 0.003,
      "loss": 4.1067,
      "step": 5616
    },
    {
      "epoch": 0.05617,
      "grad_norm": 0.979345832369033,
      "learning_rate": 0.003,
      "loss": 4.1323,
      "step": 5617
    },
    {
      "epoch": 0.05618,
      "grad_norm": 1.1939522303555639,
      "learning_rate": 0.003,
      "loss": 4.1174,
      "step": 5618
    },
    {
      "epoch": 0.05619,
      "grad_norm": 0.9785893247462943,
      "learning_rate": 0.003,
      "loss": 4.1734,
      "step": 5619
    },
    {
      "epoch": 0.0562,
      "grad_norm": 0.8751466109554733,
      "learning_rate": 0.003,
      "loss": 4.1216,
      "step": 5620
    },
    {
      "epoch": 0.05621,
      "grad_norm": 0.9221389977798955,
      "learning_rate": 0.003,
      "loss": 4.1357,
      "step": 5621
    },
    {
      "epoch": 0.05622,
      "grad_norm": 1.0075873675251126,
      "learning_rate": 0.003,
      "loss": 4.1178,
      "step": 5622
    },
    {
      "epoch": 0.05623,
      "grad_norm": 1.1949478525553667,
      "learning_rate": 0.003,
      "loss": 4.1438,
      "step": 5623
    },
    {
      "epoch": 0.05624,
      "grad_norm": 0.8893580304093707,
      "learning_rate": 0.003,
      "loss": 4.1524,
      "step": 5624
    },
    {
      "epoch": 0.05625,
      "grad_norm": 0.9101874793046152,
      "learning_rate": 0.003,
      "loss": 4.1489,
      "step": 5625
    },
    {
      "epoch": 0.05626,
      "grad_norm": 0.8568178177963297,
      "learning_rate": 0.003,
      "loss": 4.113,
      "step": 5626
    },
    {
      "epoch": 0.05627,
      "grad_norm": 0.8445550632051778,
      "learning_rate": 0.003,
      "loss": 4.1228,
      "step": 5627
    },
    {
      "epoch": 0.05628,
      "grad_norm": 0.8395278921688747,
      "learning_rate": 0.003,
      "loss": 4.1175,
      "step": 5628
    },
    {
      "epoch": 0.05629,
      "grad_norm": 0.8238415014776045,
      "learning_rate": 0.003,
      "loss": 4.1277,
      "step": 5629
    },
    {
      "epoch": 0.0563,
      "grad_norm": 0.9266145959778516,
      "learning_rate": 0.003,
      "loss": 4.1101,
      "step": 5630
    },
    {
      "epoch": 0.05631,
      "grad_norm": 1.041049278487491,
      "learning_rate": 0.003,
      "loss": 4.0999,
      "step": 5631
    },
    {
      "epoch": 0.05632,
      "grad_norm": 1.2634529953673648,
      "learning_rate": 0.003,
      "loss": 4.1399,
      "step": 5632
    },
    {
      "epoch": 0.05633,
      "grad_norm": 0.8743820927721512,
      "learning_rate": 0.003,
      "loss": 4.1277,
      "step": 5633
    },
    {
      "epoch": 0.05634,
      "grad_norm": 0.9946748940515425,
      "learning_rate": 0.003,
      "loss": 4.1125,
      "step": 5634
    },
    {
      "epoch": 0.05635,
      "grad_norm": 1.0906818511461622,
      "learning_rate": 0.003,
      "loss": 4.1357,
      "step": 5635
    },
    {
      "epoch": 0.05636,
      "grad_norm": 0.9660961085376262,
      "learning_rate": 0.003,
      "loss": 4.1248,
      "step": 5636
    },
    {
      "epoch": 0.05637,
      "grad_norm": 0.9695551749489936,
      "learning_rate": 0.003,
      "loss": 4.1638,
      "step": 5637
    },
    {
      "epoch": 0.05638,
      "grad_norm": 0.8858313611285864,
      "learning_rate": 0.003,
      "loss": 4.1107,
      "step": 5638
    },
    {
      "epoch": 0.05639,
      "grad_norm": 0.9018585489980617,
      "learning_rate": 0.003,
      "loss": 4.1202,
      "step": 5639
    },
    {
      "epoch": 0.0564,
      "grad_norm": 0.8861660445176881,
      "learning_rate": 0.003,
      "loss": 4.1295,
      "step": 5640
    },
    {
      "epoch": 0.05641,
      "grad_norm": 0.8951943773644586,
      "learning_rate": 0.003,
      "loss": 4.0811,
      "step": 5641
    },
    {
      "epoch": 0.05642,
      "grad_norm": 0.9083819743767668,
      "learning_rate": 0.003,
      "loss": 4.1477,
      "step": 5642
    },
    {
      "epoch": 0.05643,
      "grad_norm": 0.962671004199545,
      "learning_rate": 0.003,
      "loss": 4.1087,
      "step": 5643
    },
    {
      "epoch": 0.05644,
      "grad_norm": 1.077396446555887,
      "learning_rate": 0.003,
      "loss": 4.1206,
      "step": 5644
    },
    {
      "epoch": 0.05645,
      "grad_norm": 1.40715572500545,
      "learning_rate": 0.003,
      "loss": 4.1262,
      "step": 5645
    },
    {
      "epoch": 0.05646,
      "grad_norm": 0.8175124218911509,
      "learning_rate": 0.003,
      "loss": 4.1223,
      "step": 5646
    },
    {
      "epoch": 0.05647,
      "grad_norm": 0.7832062109640497,
      "learning_rate": 0.003,
      "loss": 4.1204,
      "step": 5647
    },
    {
      "epoch": 0.05648,
      "grad_norm": 0.9630055472683711,
      "learning_rate": 0.003,
      "loss": 4.1381,
      "step": 5648
    },
    {
      "epoch": 0.05649,
      "grad_norm": 1.0631770670815335,
      "learning_rate": 0.003,
      "loss": 4.1268,
      "step": 5649
    },
    {
      "epoch": 0.0565,
      "grad_norm": 1.124204894656994,
      "learning_rate": 0.003,
      "loss": 4.1458,
      "step": 5650
    },
    {
      "epoch": 0.05651,
      "grad_norm": 1.022339961520506,
      "learning_rate": 0.003,
      "loss": 4.1098,
      "step": 5651
    },
    {
      "epoch": 0.05652,
      "grad_norm": 0.8587439092944441,
      "learning_rate": 0.003,
      "loss": 4.1043,
      "step": 5652
    },
    {
      "epoch": 0.05653,
      "grad_norm": 0.7753822534356805,
      "learning_rate": 0.003,
      "loss": 4.1302,
      "step": 5653
    },
    {
      "epoch": 0.05654,
      "grad_norm": 0.8333542595529039,
      "learning_rate": 0.003,
      "loss": 4.139,
      "step": 5654
    },
    {
      "epoch": 0.05655,
      "grad_norm": 0.9690699423615071,
      "learning_rate": 0.003,
      "loss": 4.1319,
      "step": 5655
    },
    {
      "epoch": 0.05656,
      "grad_norm": 1.1790002076186354,
      "learning_rate": 0.003,
      "loss": 4.1337,
      "step": 5656
    },
    {
      "epoch": 0.05657,
      "grad_norm": 0.8826076546461541,
      "learning_rate": 0.003,
      "loss": 4.1253,
      "step": 5657
    },
    {
      "epoch": 0.05658,
      "grad_norm": 0.8843360394533153,
      "learning_rate": 0.003,
      "loss": 4.1167,
      "step": 5658
    },
    {
      "epoch": 0.05659,
      "grad_norm": 1.0024365762558438,
      "learning_rate": 0.003,
      "loss": 4.1219,
      "step": 5659
    },
    {
      "epoch": 0.0566,
      "grad_norm": 1.0811089362032615,
      "learning_rate": 0.003,
      "loss": 4.1309,
      "step": 5660
    },
    {
      "epoch": 0.05661,
      "grad_norm": 1.0126548273143556,
      "learning_rate": 0.003,
      "loss": 4.1648,
      "step": 5661
    },
    {
      "epoch": 0.05662,
      "grad_norm": 1.1175005958432391,
      "learning_rate": 0.003,
      "loss": 4.111,
      "step": 5662
    },
    {
      "epoch": 0.05663,
      "grad_norm": 0.8123904859528547,
      "learning_rate": 0.003,
      "loss": 4.1255,
      "step": 5663
    },
    {
      "epoch": 0.05664,
      "grad_norm": 0.8486369117778391,
      "learning_rate": 0.003,
      "loss": 4.0996,
      "step": 5664
    },
    {
      "epoch": 0.05665,
      "grad_norm": 1.120613790787452,
      "learning_rate": 0.003,
      "loss": 4.1146,
      "step": 5665
    },
    {
      "epoch": 0.05666,
      "grad_norm": 1.0540993326958734,
      "learning_rate": 0.003,
      "loss": 4.1276,
      "step": 5666
    },
    {
      "epoch": 0.05667,
      "grad_norm": 0.8401662075563899,
      "learning_rate": 0.003,
      "loss": 4.0944,
      "step": 5667
    },
    {
      "epoch": 0.05668,
      "grad_norm": 0.8686439657617465,
      "learning_rate": 0.003,
      "loss": 4.1399,
      "step": 5668
    },
    {
      "epoch": 0.05669,
      "grad_norm": 0.9702787574694016,
      "learning_rate": 0.003,
      "loss": 4.0882,
      "step": 5669
    },
    {
      "epoch": 0.0567,
      "grad_norm": 1.005684782440143,
      "learning_rate": 0.003,
      "loss": 4.1283,
      "step": 5670
    },
    {
      "epoch": 0.05671,
      "grad_norm": 0.8966916733750543,
      "learning_rate": 0.003,
      "loss": 4.1121,
      "step": 5671
    },
    {
      "epoch": 0.05672,
      "grad_norm": 1.1718295219256112,
      "learning_rate": 0.003,
      "loss": 4.1199,
      "step": 5672
    },
    {
      "epoch": 0.05673,
      "grad_norm": 1.2210000503080898,
      "learning_rate": 0.003,
      "loss": 4.1025,
      "step": 5673
    },
    {
      "epoch": 0.05674,
      "grad_norm": 0.9196178304374668,
      "learning_rate": 0.003,
      "loss": 4.1317,
      "step": 5674
    },
    {
      "epoch": 0.05675,
      "grad_norm": 0.9259626109244385,
      "learning_rate": 0.003,
      "loss": 4.1204,
      "step": 5675
    },
    {
      "epoch": 0.05676,
      "grad_norm": 0.8929657175241067,
      "learning_rate": 0.003,
      "loss": 4.1289,
      "step": 5676
    },
    {
      "epoch": 0.05677,
      "grad_norm": 0.952108119774777,
      "learning_rate": 0.003,
      "loss": 4.1258,
      "step": 5677
    },
    {
      "epoch": 0.05678,
      "grad_norm": 1.1126100059487607,
      "learning_rate": 0.003,
      "loss": 4.126,
      "step": 5678
    },
    {
      "epoch": 0.05679,
      "grad_norm": 1.0364911564982762,
      "learning_rate": 0.003,
      "loss": 4.1416,
      "step": 5679
    },
    {
      "epoch": 0.0568,
      "grad_norm": 1.3191412104281883,
      "learning_rate": 0.003,
      "loss": 4.13,
      "step": 5680
    },
    {
      "epoch": 0.05681,
      "grad_norm": 0.7736101131054048,
      "learning_rate": 0.003,
      "loss": 4.1193,
      "step": 5681
    },
    {
      "epoch": 0.05682,
      "grad_norm": 0.7593487398147124,
      "learning_rate": 0.003,
      "loss": 4.1314,
      "step": 5682
    },
    {
      "epoch": 0.05683,
      "grad_norm": 0.8129218561244385,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 5683
    },
    {
      "epoch": 0.05684,
      "grad_norm": 1.0333497384359716,
      "learning_rate": 0.003,
      "loss": 4.1046,
      "step": 5684
    },
    {
      "epoch": 0.05685,
      "grad_norm": 1.43332981994489,
      "learning_rate": 0.003,
      "loss": 4.1523,
      "step": 5685
    },
    {
      "epoch": 0.05686,
      "grad_norm": 0.8430936458304196,
      "learning_rate": 0.003,
      "loss": 4.1449,
      "step": 5686
    },
    {
      "epoch": 0.05687,
      "grad_norm": 0.8954710874656873,
      "learning_rate": 0.003,
      "loss": 4.0975,
      "step": 5687
    },
    {
      "epoch": 0.05688,
      "grad_norm": 1.0730066166878027,
      "learning_rate": 0.003,
      "loss": 4.121,
      "step": 5688
    },
    {
      "epoch": 0.05689,
      "grad_norm": 1.08532099527152,
      "learning_rate": 0.003,
      "loss": 4.1011,
      "step": 5689
    },
    {
      "epoch": 0.0569,
      "grad_norm": 0.9067090942223068,
      "learning_rate": 0.003,
      "loss": 4.1226,
      "step": 5690
    },
    {
      "epoch": 0.05691,
      "grad_norm": 1.035726205384705,
      "learning_rate": 0.003,
      "loss": 4.1558,
      "step": 5691
    },
    {
      "epoch": 0.05692,
      "grad_norm": 0.9336873866149337,
      "learning_rate": 0.003,
      "loss": 4.1291,
      "step": 5692
    },
    {
      "epoch": 0.05693,
      "grad_norm": 0.8773378995984399,
      "learning_rate": 0.003,
      "loss": 4.1249,
      "step": 5693
    },
    {
      "epoch": 0.05694,
      "grad_norm": 0.9097271093934071,
      "learning_rate": 0.003,
      "loss": 4.1069,
      "step": 5694
    },
    {
      "epoch": 0.05695,
      "grad_norm": 1.0985181967102629,
      "learning_rate": 0.003,
      "loss": 4.11,
      "step": 5695
    },
    {
      "epoch": 0.05696,
      "grad_norm": 1.040087310747754,
      "learning_rate": 0.003,
      "loss": 4.1162,
      "step": 5696
    },
    {
      "epoch": 0.05697,
      "grad_norm": 0.9817632213668405,
      "learning_rate": 0.003,
      "loss": 4.1512,
      "step": 5697
    },
    {
      "epoch": 0.05698,
      "grad_norm": 1.0569743573990606,
      "learning_rate": 0.003,
      "loss": 4.1475,
      "step": 5698
    },
    {
      "epoch": 0.05699,
      "grad_norm": 1.0902526090916207,
      "learning_rate": 0.003,
      "loss": 4.1324,
      "step": 5699
    },
    {
      "epoch": 0.057,
      "grad_norm": 0.8928105595260728,
      "learning_rate": 0.003,
      "loss": 4.1085,
      "step": 5700
    },
    {
      "epoch": 0.05701,
      "grad_norm": 0.7369786806129934,
      "learning_rate": 0.003,
      "loss": 4.1294,
      "step": 5701
    },
    {
      "epoch": 0.05702,
      "grad_norm": 0.7065919886418875,
      "learning_rate": 0.003,
      "loss": 4.106,
      "step": 5702
    },
    {
      "epoch": 0.05703,
      "grad_norm": 0.7357229062821155,
      "learning_rate": 0.003,
      "loss": 4.1494,
      "step": 5703
    },
    {
      "epoch": 0.05704,
      "grad_norm": 0.771783060800162,
      "learning_rate": 0.003,
      "loss": 4.1087,
      "step": 5704
    },
    {
      "epoch": 0.05705,
      "grad_norm": 0.8020733794571436,
      "learning_rate": 0.003,
      "loss": 4.1077,
      "step": 5705
    },
    {
      "epoch": 0.05706,
      "grad_norm": 0.8005778172331136,
      "learning_rate": 0.003,
      "loss": 4.1043,
      "step": 5706
    },
    {
      "epoch": 0.05707,
      "grad_norm": 0.8597236364014955,
      "learning_rate": 0.003,
      "loss": 4.1054,
      "step": 5707
    },
    {
      "epoch": 0.05708,
      "grad_norm": 1.063034575015758,
      "learning_rate": 0.003,
      "loss": 4.1468,
      "step": 5708
    },
    {
      "epoch": 0.05709,
      "grad_norm": 1.0484123566982753,
      "learning_rate": 0.003,
      "loss": 4.1188,
      "step": 5709
    },
    {
      "epoch": 0.0571,
      "grad_norm": 1.1382736991524067,
      "learning_rate": 0.003,
      "loss": 4.1043,
      "step": 5710
    },
    {
      "epoch": 0.05711,
      "grad_norm": 0.9787958240785122,
      "learning_rate": 0.003,
      "loss": 4.1303,
      "step": 5711
    },
    {
      "epoch": 0.05712,
      "grad_norm": 0.9904719131026177,
      "learning_rate": 0.003,
      "loss": 4.1071,
      "step": 5712
    },
    {
      "epoch": 0.05713,
      "grad_norm": 1.0238241408520392,
      "learning_rate": 0.003,
      "loss": 4.1079,
      "step": 5713
    },
    {
      "epoch": 0.05714,
      "grad_norm": 1.0007244420082209,
      "learning_rate": 0.003,
      "loss": 4.1626,
      "step": 5714
    },
    {
      "epoch": 0.05715,
      "grad_norm": 1.1489073266379561,
      "learning_rate": 0.003,
      "loss": 4.1692,
      "step": 5715
    },
    {
      "epoch": 0.05716,
      "grad_norm": 1.1100927142481192,
      "learning_rate": 0.003,
      "loss": 4.1411,
      "step": 5716
    },
    {
      "epoch": 0.05717,
      "grad_norm": 0.8696259900880453,
      "learning_rate": 0.003,
      "loss": 4.1464,
      "step": 5717
    },
    {
      "epoch": 0.05718,
      "grad_norm": 0.9371133901319081,
      "learning_rate": 0.003,
      "loss": 4.1285,
      "step": 5718
    },
    {
      "epoch": 0.05719,
      "grad_norm": 0.931314660567155,
      "learning_rate": 0.003,
      "loss": 4.1304,
      "step": 5719
    },
    {
      "epoch": 0.0572,
      "grad_norm": 0.9782165711056069,
      "learning_rate": 0.003,
      "loss": 4.1183,
      "step": 5720
    },
    {
      "epoch": 0.05721,
      "grad_norm": 1.0886623888279425,
      "learning_rate": 0.003,
      "loss": 4.1067,
      "step": 5721
    },
    {
      "epoch": 0.05722,
      "grad_norm": 0.9997773216929959,
      "learning_rate": 0.003,
      "loss": 4.1158,
      "step": 5722
    },
    {
      "epoch": 0.05723,
      "grad_norm": 0.9514627056664299,
      "learning_rate": 0.003,
      "loss": 4.1159,
      "step": 5723
    },
    {
      "epoch": 0.05724,
      "grad_norm": 0.8230402798369292,
      "learning_rate": 0.003,
      "loss": 4.1148,
      "step": 5724
    },
    {
      "epoch": 0.05725,
      "grad_norm": 0.8987819684221542,
      "learning_rate": 0.003,
      "loss": 4.1296,
      "step": 5725
    },
    {
      "epoch": 0.05726,
      "grad_norm": 0.9892002123491518,
      "learning_rate": 0.003,
      "loss": 4.1212,
      "step": 5726
    },
    {
      "epoch": 0.05727,
      "grad_norm": 1.0469586088251535,
      "learning_rate": 0.003,
      "loss": 4.1129,
      "step": 5727
    },
    {
      "epoch": 0.05728,
      "grad_norm": 1.0973205641994594,
      "learning_rate": 0.003,
      "loss": 4.1431,
      "step": 5728
    },
    {
      "epoch": 0.05729,
      "grad_norm": 1.0064407627655658,
      "learning_rate": 0.003,
      "loss": 4.122,
      "step": 5729
    },
    {
      "epoch": 0.0573,
      "grad_norm": 1.110713311982964,
      "learning_rate": 0.003,
      "loss": 4.097,
      "step": 5730
    },
    {
      "epoch": 0.05731,
      "grad_norm": 1.0241807049928666,
      "learning_rate": 0.003,
      "loss": 4.1155,
      "step": 5731
    },
    {
      "epoch": 0.05732,
      "grad_norm": 1.0429234117415462,
      "learning_rate": 0.003,
      "loss": 4.1449,
      "step": 5732
    },
    {
      "epoch": 0.05733,
      "grad_norm": 0.9357269471087896,
      "learning_rate": 0.003,
      "loss": 4.1531,
      "step": 5733
    },
    {
      "epoch": 0.05734,
      "grad_norm": 1.1141629712170185,
      "learning_rate": 0.003,
      "loss": 4.1542,
      "step": 5734
    },
    {
      "epoch": 0.05735,
      "grad_norm": 0.8298330992050329,
      "learning_rate": 0.003,
      "loss": 4.1074,
      "step": 5735
    },
    {
      "epoch": 0.05736,
      "grad_norm": 0.8744860650234516,
      "learning_rate": 0.003,
      "loss": 4.1347,
      "step": 5736
    },
    {
      "epoch": 0.05737,
      "grad_norm": 0.8447450239136927,
      "learning_rate": 0.003,
      "loss": 4.1254,
      "step": 5737
    },
    {
      "epoch": 0.05738,
      "grad_norm": 0.9189790636785452,
      "learning_rate": 0.003,
      "loss": 4.1005,
      "step": 5738
    },
    {
      "epoch": 0.05739,
      "grad_norm": 0.9705384160287442,
      "learning_rate": 0.003,
      "loss": 4.1097,
      "step": 5739
    },
    {
      "epoch": 0.0574,
      "grad_norm": 0.9376119011389905,
      "learning_rate": 0.003,
      "loss": 4.1143,
      "step": 5740
    },
    {
      "epoch": 0.05741,
      "grad_norm": 1.1362834727779139,
      "learning_rate": 0.003,
      "loss": 4.1069,
      "step": 5741
    },
    {
      "epoch": 0.05742,
      "grad_norm": 1.2370365233043261,
      "learning_rate": 0.003,
      "loss": 4.1287,
      "step": 5742
    },
    {
      "epoch": 0.05743,
      "grad_norm": 1.3113048377687142,
      "learning_rate": 0.003,
      "loss": 4.1243,
      "step": 5743
    },
    {
      "epoch": 0.05744,
      "grad_norm": 0.8776507234514279,
      "learning_rate": 0.003,
      "loss": 4.1266,
      "step": 5744
    },
    {
      "epoch": 0.05745,
      "grad_norm": 0.9245134319819897,
      "learning_rate": 0.003,
      "loss": 4.1273,
      "step": 5745
    },
    {
      "epoch": 0.05746,
      "grad_norm": 1.0258527613597674,
      "learning_rate": 0.003,
      "loss": 4.1127,
      "step": 5746
    },
    {
      "epoch": 0.05747,
      "grad_norm": 1.0057412762907374,
      "learning_rate": 0.003,
      "loss": 4.142,
      "step": 5747
    },
    {
      "epoch": 0.05748,
      "grad_norm": 1.1704423833777504,
      "learning_rate": 0.003,
      "loss": 4.1662,
      "step": 5748
    },
    {
      "epoch": 0.05749,
      "grad_norm": 0.8033582728084808,
      "learning_rate": 0.003,
      "loss": 4.1067,
      "step": 5749
    },
    {
      "epoch": 0.0575,
      "grad_norm": 0.8202271878884321,
      "learning_rate": 0.003,
      "loss": 4.1277,
      "step": 5750
    },
    {
      "epoch": 0.05751,
      "grad_norm": 0.8323486916968836,
      "learning_rate": 0.003,
      "loss": 4.1417,
      "step": 5751
    },
    {
      "epoch": 0.05752,
      "grad_norm": 0.824247137213286,
      "learning_rate": 0.003,
      "loss": 4.1301,
      "step": 5752
    },
    {
      "epoch": 0.05753,
      "grad_norm": 0.8446556412554472,
      "learning_rate": 0.003,
      "loss": 4.1186,
      "step": 5753
    },
    {
      "epoch": 0.05754,
      "grad_norm": 1.1966126000714083,
      "learning_rate": 0.003,
      "loss": 4.1252,
      "step": 5754
    },
    {
      "epoch": 0.05755,
      "grad_norm": 1.1964226101976614,
      "learning_rate": 0.003,
      "loss": 4.109,
      "step": 5755
    },
    {
      "epoch": 0.05756,
      "grad_norm": 0.9699060929143068,
      "learning_rate": 0.003,
      "loss": 4.1407,
      "step": 5756
    },
    {
      "epoch": 0.05757,
      "grad_norm": 0.8663250789930335,
      "learning_rate": 0.003,
      "loss": 4.1228,
      "step": 5757
    },
    {
      "epoch": 0.05758,
      "grad_norm": 0.8597853728829586,
      "learning_rate": 0.003,
      "loss": 4.121,
      "step": 5758
    },
    {
      "epoch": 0.05759,
      "grad_norm": 0.8256237685530105,
      "learning_rate": 0.003,
      "loss": 4.1204,
      "step": 5759
    },
    {
      "epoch": 0.0576,
      "grad_norm": 0.7431015057566424,
      "learning_rate": 0.003,
      "loss": 4.112,
      "step": 5760
    },
    {
      "epoch": 0.05761,
      "grad_norm": 0.9128267641846647,
      "learning_rate": 0.003,
      "loss": 4.1092,
      "step": 5761
    },
    {
      "epoch": 0.05762,
      "grad_norm": 1.195756627275649,
      "learning_rate": 0.003,
      "loss": 4.0996,
      "step": 5762
    },
    {
      "epoch": 0.05763,
      "grad_norm": 1.141592654618907,
      "learning_rate": 0.003,
      "loss": 4.1152,
      "step": 5763
    },
    {
      "epoch": 0.05764,
      "grad_norm": 0.9724043681560312,
      "learning_rate": 0.003,
      "loss": 4.111,
      "step": 5764
    },
    {
      "epoch": 0.05765,
      "grad_norm": 0.9718984433800117,
      "learning_rate": 0.003,
      "loss": 4.1229,
      "step": 5765
    },
    {
      "epoch": 0.05766,
      "grad_norm": 0.8591409992621984,
      "learning_rate": 0.003,
      "loss": 4.1198,
      "step": 5766
    },
    {
      "epoch": 0.05767,
      "grad_norm": 0.9080480852024645,
      "learning_rate": 0.003,
      "loss": 4.109,
      "step": 5767
    },
    {
      "epoch": 0.05768,
      "grad_norm": 0.9292125393107181,
      "learning_rate": 0.003,
      "loss": 4.129,
      "step": 5768
    },
    {
      "epoch": 0.05769,
      "grad_norm": 1.0176905281603372,
      "learning_rate": 0.003,
      "loss": 4.1178,
      "step": 5769
    },
    {
      "epoch": 0.0577,
      "grad_norm": 1.160814957884824,
      "learning_rate": 0.003,
      "loss": 4.1257,
      "step": 5770
    },
    {
      "epoch": 0.05771,
      "grad_norm": 1.0307295215517236,
      "learning_rate": 0.003,
      "loss": 4.1307,
      "step": 5771
    },
    {
      "epoch": 0.05772,
      "grad_norm": 0.8386292091089582,
      "learning_rate": 0.003,
      "loss": 4.1343,
      "step": 5772
    },
    {
      "epoch": 0.05773,
      "grad_norm": 0.9057048000447152,
      "learning_rate": 0.003,
      "loss": 4.1294,
      "step": 5773
    },
    {
      "epoch": 0.05774,
      "grad_norm": 1.019919318588754,
      "learning_rate": 0.003,
      "loss": 4.1087,
      "step": 5774
    },
    {
      "epoch": 0.05775,
      "grad_norm": 1.0210339610557355,
      "learning_rate": 0.003,
      "loss": 4.1171,
      "step": 5775
    },
    {
      "epoch": 0.05776,
      "grad_norm": 1.1520039772321047,
      "learning_rate": 0.003,
      "loss": 4.1115,
      "step": 5776
    },
    {
      "epoch": 0.05777,
      "grad_norm": 1.010271480309236,
      "learning_rate": 0.003,
      "loss": 4.126,
      "step": 5777
    },
    {
      "epoch": 0.05778,
      "grad_norm": 1.127623586377467,
      "learning_rate": 0.003,
      "loss": 4.1136,
      "step": 5778
    },
    {
      "epoch": 0.05779,
      "grad_norm": 1.1899760224553186,
      "learning_rate": 0.003,
      "loss": 4.143,
      "step": 5779
    },
    {
      "epoch": 0.0578,
      "grad_norm": 0.9780858457407019,
      "learning_rate": 0.003,
      "loss": 4.0952,
      "step": 5780
    },
    {
      "epoch": 0.05781,
      "grad_norm": 1.0370323662272365,
      "learning_rate": 0.003,
      "loss": 4.1123,
      "step": 5781
    },
    {
      "epoch": 0.05782,
      "grad_norm": 1.113436156639303,
      "learning_rate": 0.003,
      "loss": 4.1441,
      "step": 5782
    },
    {
      "epoch": 0.05783,
      "grad_norm": 0.9966451868055693,
      "learning_rate": 0.003,
      "loss": 4.0838,
      "step": 5783
    },
    {
      "epoch": 0.05784,
      "grad_norm": 1.0113209641377017,
      "learning_rate": 0.003,
      "loss": 4.1248,
      "step": 5784
    },
    {
      "epoch": 0.05785,
      "grad_norm": 0.9893055065203489,
      "learning_rate": 0.003,
      "loss": 4.1376,
      "step": 5785
    },
    {
      "epoch": 0.05786,
      "grad_norm": 1.1620422429372013,
      "learning_rate": 0.003,
      "loss": 4.1313,
      "step": 5786
    },
    {
      "epoch": 0.05787,
      "grad_norm": 1.1194385908898823,
      "learning_rate": 0.003,
      "loss": 4.1001,
      "step": 5787
    },
    {
      "epoch": 0.05788,
      "grad_norm": 0.8497725539713678,
      "learning_rate": 0.003,
      "loss": 4.1178,
      "step": 5788
    },
    {
      "epoch": 0.05789,
      "grad_norm": 0.8685312277670877,
      "learning_rate": 0.003,
      "loss": 4.116,
      "step": 5789
    },
    {
      "epoch": 0.0579,
      "grad_norm": 1.0031081903513304,
      "learning_rate": 0.003,
      "loss": 4.1364,
      "step": 5790
    },
    {
      "epoch": 0.05791,
      "grad_norm": 0.8056020601521044,
      "learning_rate": 0.003,
      "loss": 4.1092,
      "step": 5791
    },
    {
      "epoch": 0.05792,
      "grad_norm": 0.8347661618494451,
      "learning_rate": 0.003,
      "loss": 4.112,
      "step": 5792
    },
    {
      "epoch": 0.05793,
      "grad_norm": 0.8137473201924559,
      "learning_rate": 0.003,
      "loss": 4.1025,
      "step": 5793
    },
    {
      "epoch": 0.05794,
      "grad_norm": 0.9720764458835657,
      "learning_rate": 0.003,
      "loss": 4.1177,
      "step": 5794
    },
    {
      "epoch": 0.05795,
      "grad_norm": 1.1854800461039692,
      "learning_rate": 0.003,
      "loss": 4.1547,
      "step": 5795
    },
    {
      "epoch": 0.05796,
      "grad_norm": 0.9757913448880616,
      "learning_rate": 0.003,
      "loss": 4.1208,
      "step": 5796
    },
    {
      "epoch": 0.05797,
      "grad_norm": 1.0457868316612642,
      "learning_rate": 0.003,
      "loss": 4.1212,
      "step": 5797
    },
    {
      "epoch": 0.05798,
      "grad_norm": 0.9558189857089837,
      "learning_rate": 0.003,
      "loss": 4.1013,
      "step": 5798
    },
    {
      "epoch": 0.05799,
      "grad_norm": 0.9226240851035593,
      "learning_rate": 0.003,
      "loss": 4.1332,
      "step": 5799
    },
    {
      "epoch": 0.058,
      "grad_norm": 0.9993623389955384,
      "learning_rate": 0.003,
      "loss": 4.1352,
      "step": 5800
    },
    {
      "epoch": 0.05801,
      "grad_norm": 1.0017244438128723,
      "learning_rate": 0.003,
      "loss": 4.1173,
      "step": 5801
    },
    {
      "epoch": 0.05802,
      "grad_norm": 1.0206300916624451,
      "learning_rate": 0.003,
      "loss": 4.1264,
      "step": 5802
    },
    {
      "epoch": 0.05803,
      "grad_norm": 1.2026383802301528,
      "learning_rate": 0.003,
      "loss": 4.0936,
      "step": 5803
    },
    {
      "epoch": 0.05804,
      "grad_norm": 0.8471292816498621,
      "learning_rate": 0.003,
      "loss": 4.0817,
      "step": 5804
    },
    {
      "epoch": 0.05805,
      "grad_norm": 0.9206281349289829,
      "learning_rate": 0.003,
      "loss": 4.091,
      "step": 5805
    },
    {
      "epoch": 0.05806,
      "grad_norm": 0.9920161125208922,
      "learning_rate": 0.003,
      "loss": 4.1292,
      "step": 5806
    },
    {
      "epoch": 0.05807,
      "grad_norm": 1.1340630160886371,
      "learning_rate": 0.003,
      "loss": 4.1416,
      "step": 5807
    },
    {
      "epoch": 0.05808,
      "grad_norm": 1.0539888997436435,
      "learning_rate": 0.003,
      "loss": 4.1508,
      "step": 5808
    },
    {
      "epoch": 0.05809,
      "grad_norm": 0.9733624631098186,
      "learning_rate": 0.003,
      "loss": 4.1516,
      "step": 5809
    },
    {
      "epoch": 0.0581,
      "grad_norm": 1.019182599105166,
      "learning_rate": 0.003,
      "loss": 4.0987,
      "step": 5810
    },
    {
      "epoch": 0.05811,
      "grad_norm": 0.8351011669100157,
      "learning_rate": 0.003,
      "loss": 4.1086,
      "step": 5811
    },
    {
      "epoch": 0.05812,
      "grad_norm": 0.9144093431141755,
      "learning_rate": 0.003,
      "loss": 4.1135,
      "step": 5812
    },
    {
      "epoch": 0.05813,
      "grad_norm": 0.9946389439099612,
      "learning_rate": 0.003,
      "loss": 4.1247,
      "step": 5813
    },
    {
      "epoch": 0.05814,
      "grad_norm": 1.0550398914686734,
      "learning_rate": 0.003,
      "loss": 4.1072,
      "step": 5814
    },
    {
      "epoch": 0.05815,
      "grad_norm": 1.1913066080339185,
      "learning_rate": 0.003,
      "loss": 4.0908,
      "step": 5815
    },
    {
      "epoch": 0.05816,
      "grad_norm": 1.3602272683582077,
      "learning_rate": 0.003,
      "loss": 4.1414,
      "step": 5816
    },
    {
      "epoch": 0.05817,
      "grad_norm": 1.08919306882088,
      "learning_rate": 0.003,
      "loss": 4.1512,
      "step": 5817
    },
    {
      "epoch": 0.05818,
      "grad_norm": 0.9847118464175291,
      "learning_rate": 0.003,
      "loss": 4.1302,
      "step": 5818
    },
    {
      "epoch": 0.05819,
      "grad_norm": 0.8479452899517435,
      "learning_rate": 0.003,
      "loss": 4.1178,
      "step": 5819
    },
    {
      "epoch": 0.0582,
      "grad_norm": 0.8260945784335311,
      "learning_rate": 0.003,
      "loss": 4.1211,
      "step": 5820
    },
    {
      "epoch": 0.05821,
      "grad_norm": 0.8351762734324619,
      "learning_rate": 0.003,
      "loss": 4.1317,
      "step": 5821
    },
    {
      "epoch": 0.05822,
      "grad_norm": 0.8775633899266011,
      "learning_rate": 0.003,
      "loss": 4.0966,
      "step": 5822
    },
    {
      "epoch": 0.05823,
      "grad_norm": 0.8828213129541331,
      "learning_rate": 0.003,
      "loss": 4.1249,
      "step": 5823
    },
    {
      "epoch": 0.05824,
      "grad_norm": 0.9275613652569992,
      "learning_rate": 0.003,
      "loss": 4.1288,
      "step": 5824
    },
    {
      "epoch": 0.05825,
      "grad_norm": 1.1583620136133705,
      "learning_rate": 0.003,
      "loss": 4.1575,
      "step": 5825
    },
    {
      "epoch": 0.05826,
      "grad_norm": 1.0057654996842236,
      "learning_rate": 0.003,
      "loss": 4.1039,
      "step": 5826
    },
    {
      "epoch": 0.05827,
      "grad_norm": 0.9029292549527209,
      "learning_rate": 0.003,
      "loss": 4.128,
      "step": 5827
    },
    {
      "epoch": 0.05828,
      "grad_norm": 0.84303329849258,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 5828
    },
    {
      "epoch": 0.05829,
      "grad_norm": 0.7396485829705943,
      "learning_rate": 0.003,
      "loss": 4.1232,
      "step": 5829
    },
    {
      "epoch": 0.0583,
      "grad_norm": 0.7632310839185854,
      "learning_rate": 0.003,
      "loss": 4.0718,
      "step": 5830
    },
    {
      "epoch": 0.05831,
      "grad_norm": 0.7643152457844856,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 5831
    },
    {
      "epoch": 0.05832,
      "grad_norm": 0.9136308228809447,
      "learning_rate": 0.003,
      "loss": 4.1359,
      "step": 5832
    },
    {
      "epoch": 0.05833,
      "grad_norm": 0.9801023803968252,
      "learning_rate": 0.003,
      "loss": 4.1204,
      "step": 5833
    },
    {
      "epoch": 0.05834,
      "grad_norm": 1.0650227589926176,
      "learning_rate": 0.003,
      "loss": 4.133,
      "step": 5834
    },
    {
      "epoch": 0.05835,
      "grad_norm": 1.1611792836713886,
      "learning_rate": 0.003,
      "loss": 4.1527,
      "step": 5835
    },
    {
      "epoch": 0.05836,
      "grad_norm": 1.0083690824532536,
      "learning_rate": 0.003,
      "loss": 4.1312,
      "step": 5836
    },
    {
      "epoch": 0.05837,
      "grad_norm": 1.0369684104345316,
      "learning_rate": 0.003,
      "loss": 4.1272,
      "step": 5837
    },
    {
      "epoch": 0.05838,
      "grad_norm": 1.007969306404381,
      "learning_rate": 0.003,
      "loss": 4.1275,
      "step": 5838
    },
    {
      "epoch": 0.05839,
      "grad_norm": 1.1056827923761021,
      "learning_rate": 0.003,
      "loss": 4.1307,
      "step": 5839
    },
    {
      "epoch": 0.0584,
      "grad_norm": 0.9832904688706899,
      "learning_rate": 0.003,
      "loss": 4.1472,
      "step": 5840
    },
    {
      "epoch": 0.05841,
      "grad_norm": 1.014502517643171,
      "learning_rate": 0.003,
      "loss": 4.1005,
      "step": 5841
    },
    {
      "epoch": 0.05842,
      "grad_norm": 1.1081359379902629,
      "learning_rate": 0.003,
      "loss": 4.1212,
      "step": 5842
    },
    {
      "epoch": 0.05843,
      "grad_norm": 1.0842690626582987,
      "learning_rate": 0.003,
      "loss": 4.1275,
      "step": 5843
    },
    {
      "epoch": 0.05844,
      "grad_norm": 0.8319622822533052,
      "learning_rate": 0.003,
      "loss": 4.1169,
      "step": 5844
    },
    {
      "epoch": 0.05845,
      "grad_norm": 1.0109437760206588,
      "learning_rate": 0.003,
      "loss": 4.1198,
      "step": 5845
    },
    {
      "epoch": 0.05846,
      "grad_norm": 0.9842272986484365,
      "learning_rate": 0.003,
      "loss": 4.1124,
      "step": 5846
    },
    {
      "epoch": 0.05847,
      "grad_norm": 1.039801475783821,
      "learning_rate": 0.003,
      "loss": 4.1452,
      "step": 5847
    },
    {
      "epoch": 0.05848,
      "grad_norm": 1.0562885170944185,
      "learning_rate": 0.003,
      "loss": 4.1202,
      "step": 5848
    },
    {
      "epoch": 0.05849,
      "grad_norm": 1.1734872924729944,
      "learning_rate": 0.003,
      "loss": 4.0865,
      "step": 5849
    },
    {
      "epoch": 0.0585,
      "grad_norm": 1.1879995500766252,
      "learning_rate": 0.003,
      "loss": 4.1174,
      "step": 5850
    },
    {
      "epoch": 0.05851,
      "grad_norm": 0.9294532448621421,
      "learning_rate": 0.003,
      "loss": 4.105,
      "step": 5851
    },
    {
      "epoch": 0.05852,
      "grad_norm": 0.8214805038934484,
      "learning_rate": 0.003,
      "loss": 4.1104,
      "step": 5852
    },
    {
      "epoch": 0.05853,
      "grad_norm": 1.077263514126393,
      "learning_rate": 0.003,
      "loss": 4.104,
      "step": 5853
    },
    {
      "epoch": 0.05854,
      "grad_norm": 1.006270515716583,
      "learning_rate": 0.003,
      "loss": 4.1699,
      "step": 5854
    },
    {
      "epoch": 0.05855,
      "grad_norm": 0.9464023045138196,
      "learning_rate": 0.003,
      "loss": 4.1304,
      "step": 5855
    },
    {
      "epoch": 0.05856,
      "grad_norm": 1.0642803849206124,
      "learning_rate": 0.003,
      "loss": 4.1227,
      "step": 5856
    },
    {
      "epoch": 0.05857,
      "grad_norm": 1.04585347576585,
      "learning_rate": 0.003,
      "loss": 4.1351,
      "step": 5857
    },
    {
      "epoch": 0.05858,
      "grad_norm": 1.0759346161603385,
      "learning_rate": 0.003,
      "loss": 4.1053,
      "step": 5858
    },
    {
      "epoch": 0.05859,
      "grad_norm": 0.9350487503243448,
      "learning_rate": 0.003,
      "loss": 4.1246,
      "step": 5859
    },
    {
      "epoch": 0.0586,
      "grad_norm": 0.9483831159895448,
      "learning_rate": 0.003,
      "loss": 4.1303,
      "step": 5860
    },
    {
      "epoch": 0.05861,
      "grad_norm": 0.831301332566721,
      "learning_rate": 0.003,
      "loss": 4.1424,
      "step": 5861
    },
    {
      "epoch": 0.05862,
      "grad_norm": 0.7707470466714661,
      "learning_rate": 0.003,
      "loss": 4.1299,
      "step": 5862
    },
    {
      "epoch": 0.05863,
      "grad_norm": 0.8619072544088463,
      "learning_rate": 0.003,
      "loss": 4.1329,
      "step": 5863
    },
    {
      "epoch": 0.05864,
      "grad_norm": 0.9235747550972097,
      "learning_rate": 0.003,
      "loss": 4.1105,
      "step": 5864
    },
    {
      "epoch": 0.05865,
      "grad_norm": 0.9363447226159133,
      "learning_rate": 0.003,
      "loss": 4.1093,
      "step": 5865
    },
    {
      "epoch": 0.05866,
      "grad_norm": 0.9458818904497686,
      "learning_rate": 0.003,
      "loss": 4.1298,
      "step": 5866
    },
    {
      "epoch": 0.05867,
      "grad_norm": 0.9576156779583576,
      "learning_rate": 0.003,
      "loss": 4.1125,
      "step": 5867
    },
    {
      "epoch": 0.05868,
      "grad_norm": 1.2435430439641602,
      "learning_rate": 0.003,
      "loss": 4.1065,
      "step": 5868
    },
    {
      "epoch": 0.05869,
      "grad_norm": 0.7952050411734592,
      "learning_rate": 0.003,
      "loss": 4.1097,
      "step": 5869
    },
    {
      "epoch": 0.0587,
      "grad_norm": 0.6231521938068694,
      "learning_rate": 0.003,
      "loss": 4.1031,
      "step": 5870
    },
    {
      "epoch": 0.05871,
      "grad_norm": 0.6598671272314561,
      "learning_rate": 0.003,
      "loss": 4.1065,
      "step": 5871
    },
    {
      "epoch": 0.05872,
      "grad_norm": 0.6664379166990358,
      "learning_rate": 0.003,
      "loss": 4.1015,
      "step": 5872
    },
    {
      "epoch": 0.05873,
      "grad_norm": 0.8141081232506757,
      "learning_rate": 0.003,
      "loss": 4.1177,
      "step": 5873
    },
    {
      "epoch": 0.05874,
      "grad_norm": 0.8920232475096385,
      "learning_rate": 0.003,
      "loss": 4.1095,
      "step": 5874
    },
    {
      "epoch": 0.05875,
      "grad_norm": 0.9032886130922955,
      "learning_rate": 0.003,
      "loss": 4.1236,
      "step": 5875
    },
    {
      "epoch": 0.05876,
      "grad_norm": 0.9540834088331597,
      "learning_rate": 0.003,
      "loss": 4.119,
      "step": 5876
    },
    {
      "epoch": 0.05877,
      "grad_norm": 1.2497342414913382,
      "learning_rate": 0.003,
      "loss": 4.1326,
      "step": 5877
    },
    {
      "epoch": 0.05878,
      "grad_norm": 1.157702626258542,
      "learning_rate": 0.003,
      "loss": 4.1391,
      "step": 5878
    },
    {
      "epoch": 0.05879,
      "grad_norm": 1.3519563204657061,
      "learning_rate": 0.003,
      "loss": 4.1564,
      "step": 5879
    },
    {
      "epoch": 0.0588,
      "grad_norm": 0.8959524192760782,
      "learning_rate": 0.003,
      "loss": 4.1135,
      "step": 5880
    },
    {
      "epoch": 0.05881,
      "grad_norm": 1.0117816850047339,
      "learning_rate": 0.003,
      "loss": 4.1191,
      "step": 5881
    },
    {
      "epoch": 0.05882,
      "grad_norm": 1.184013692574266,
      "learning_rate": 0.003,
      "loss": 4.1378,
      "step": 5882
    },
    {
      "epoch": 0.05883,
      "grad_norm": 0.9923400292990168,
      "learning_rate": 0.003,
      "loss": 4.1288,
      "step": 5883
    },
    {
      "epoch": 0.05884,
      "grad_norm": 1.1254965238193124,
      "learning_rate": 0.003,
      "loss": 4.1181,
      "step": 5884
    },
    {
      "epoch": 0.05885,
      "grad_norm": 1.0857352979543535,
      "learning_rate": 0.003,
      "loss": 4.1351,
      "step": 5885
    },
    {
      "epoch": 0.05886,
      "grad_norm": 1.0761862053653697,
      "learning_rate": 0.003,
      "loss": 4.1286,
      "step": 5886
    },
    {
      "epoch": 0.05887,
      "grad_norm": 0.9285727732957737,
      "learning_rate": 0.003,
      "loss": 4.1169,
      "step": 5887
    },
    {
      "epoch": 0.05888,
      "grad_norm": 0.9743669592357651,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 5888
    },
    {
      "epoch": 0.05889,
      "grad_norm": 1.2027520897440067,
      "learning_rate": 0.003,
      "loss": 4.1115,
      "step": 5889
    },
    {
      "epoch": 0.0589,
      "grad_norm": 1.1888490061155312,
      "learning_rate": 0.003,
      "loss": 4.1201,
      "step": 5890
    },
    {
      "epoch": 0.05891,
      "grad_norm": 1.0357674592702995,
      "learning_rate": 0.003,
      "loss": 4.1155,
      "step": 5891
    },
    {
      "epoch": 0.05892,
      "grad_norm": 0.8999706532424413,
      "learning_rate": 0.003,
      "loss": 4.1449,
      "step": 5892
    },
    {
      "epoch": 0.05893,
      "grad_norm": 0.9379818703870834,
      "learning_rate": 0.003,
      "loss": 4.1174,
      "step": 5893
    },
    {
      "epoch": 0.05894,
      "grad_norm": 0.9480316835764753,
      "learning_rate": 0.003,
      "loss": 4.0921,
      "step": 5894
    },
    {
      "epoch": 0.05895,
      "grad_norm": 1.0172489807277192,
      "learning_rate": 0.003,
      "loss": 4.1034,
      "step": 5895
    },
    {
      "epoch": 0.05896,
      "grad_norm": 1.037063209191496,
      "learning_rate": 0.003,
      "loss": 4.1312,
      "step": 5896
    },
    {
      "epoch": 0.05897,
      "grad_norm": 0.9545225500016502,
      "learning_rate": 0.003,
      "loss": 4.1348,
      "step": 5897
    },
    {
      "epoch": 0.05898,
      "grad_norm": 0.9646572312575767,
      "learning_rate": 0.003,
      "loss": 4.1296,
      "step": 5898
    },
    {
      "epoch": 0.05899,
      "grad_norm": 0.9753171874462672,
      "learning_rate": 0.003,
      "loss": 4.0932,
      "step": 5899
    },
    {
      "epoch": 0.059,
      "grad_norm": 1.0009130566634394,
      "learning_rate": 0.003,
      "loss": 4.1202,
      "step": 5900
    },
    {
      "epoch": 0.05901,
      "grad_norm": 1.080584743348167,
      "learning_rate": 0.003,
      "loss": 4.107,
      "step": 5901
    },
    {
      "epoch": 0.05902,
      "grad_norm": 1.0596981865427317,
      "learning_rate": 0.003,
      "loss": 4.1249,
      "step": 5902
    },
    {
      "epoch": 0.05903,
      "grad_norm": 0.9172095701197899,
      "learning_rate": 0.003,
      "loss": 4.1251,
      "step": 5903
    },
    {
      "epoch": 0.05904,
      "grad_norm": 0.9690723756267208,
      "learning_rate": 0.003,
      "loss": 4.1121,
      "step": 5904
    },
    {
      "epoch": 0.05905,
      "grad_norm": 1.056830689213177,
      "learning_rate": 0.003,
      "loss": 4.1153,
      "step": 5905
    },
    {
      "epoch": 0.05906,
      "grad_norm": 0.9769191100777919,
      "learning_rate": 0.003,
      "loss": 4.1138,
      "step": 5906
    },
    {
      "epoch": 0.05907,
      "grad_norm": 1.1036371067840394,
      "learning_rate": 0.003,
      "loss": 4.1083,
      "step": 5907
    },
    {
      "epoch": 0.05908,
      "grad_norm": 1.1015376780354955,
      "learning_rate": 0.003,
      "loss": 4.1307,
      "step": 5908
    },
    {
      "epoch": 0.05909,
      "grad_norm": 0.8663999061554115,
      "learning_rate": 0.003,
      "loss": 4.134,
      "step": 5909
    },
    {
      "epoch": 0.0591,
      "grad_norm": 0.7190720749570488,
      "learning_rate": 0.003,
      "loss": 4.1007,
      "step": 5910
    },
    {
      "epoch": 0.05911,
      "grad_norm": 0.766933214132005,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 5911
    },
    {
      "epoch": 0.05912,
      "grad_norm": 0.8890912016150025,
      "learning_rate": 0.003,
      "loss": 4.1359,
      "step": 5912
    },
    {
      "epoch": 0.05913,
      "grad_norm": 1.1652210778892036,
      "learning_rate": 0.003,
      "loss": 4.1649,
      "step": 5913
    },
    {
      "epoch": 0.05914,
      "grad_norm": 0.9485707320910097,
      "learning_rate": 0.003,
      "loss": 4.1307,
      "step": 5914
    },
    {
      "epoch": 0.05915,
      "grad_norm": 0.8956370926442963,
      "learning_rate": 0.003,
      "loss": 4.1297,
      "step": 5915
    },
    {
      "epoch": 0.05916,
      "grad_norm": 0.9214734200683395,
      "learning_rate": 0.003,
      "loss": 4.1179,
      "step": 5916
    },
    {
      "epoch": 0.05917,
      "grad_norm": 1.0875072720858519,
      "learning_rate": 0.003,
      "loss": 4.102,
      "step": 5917
    },
    {
      "epoch": 0.05918,
      "grad_norm": 1.2287859167426936,
      "learning_rate": 0.003,
      "loss": 4.1201,
      "step": 5918
    },
    {
      "epoch": 0.05919,
      "grad_norm": 0.8943537238514924,
      "learning_rate": 0.003,
      "loss": 4.1079,
      "step": 5919
    },
    {
      "epoch": 0.0592,
      "grad_norm": 0.9891453949805679,
      "learning_rate": 0.003,
      "loss": 4.0924,
      "step": 5920
    },
    {
      "epoch": 0.05921,
      "grad_norm": 1.0861383341347572,
      "learning_rate": 0.003,
      "loss": 4.1406,
      "step": 5921
    },
    {
      "epoch": 0.05922,
      "grad_norm": 1.1782653575037294,
      "learning_rate": 0.003,
      "loss": 4.1343,
      "step": 5922
    },
    {
      "epoch": 0.05923,
      "grad_norm": 1.1910252107452277,
      "learning_rate": 0.003,
      "loss": 4.0986,
      "step": 5923
    },
    {
      "epoch": 0.05924,
      "grad_norm": 1.1161246036648609,
      "learning_rate": 0.003,
      "loss": 4.128,
      "step": 5924
    },
    {
      "epoch": 0.05925,
      "grad_norm": 1.1329629249127982,
      "learning_rate": 0.003,
      "loss": 4.1184,
      "step": 5925
    },
    {
      "epoch": 0.05926,
      "grad_norm": 1.1093442658198536,
      "learning_rate": 0.003,
      "loss": 4.1229,
      "step": 5926
    },
    {
      "epoch": 0.05927,
      "grad_norm": 1.1038710636777183,
      "learning_rate": 0.003,
      "loss": 4.1044,
      "step": 5927
    },
    {
      "epoch": 0.05928,
      "grad_norm": 0.9173030931279694,
      "learning_rate": 0.003,
      "loss": 4.1203,
      "step": 5928
    },
    {
      "epoch": 0.05929,
      "grad_norm": 0.9040076215100182,
      "learning_rate": 0.003,
      "loss": 4.1298,
      "step": 5929
    },
    {
      "epoch": 0.0593,
      "grad_norm": 0.9168916841143755,
      "learning_rate": 0.003,
      "loss": 4.1299,
      "step": 5930
    },
    {
      "epoch": 0.05931,
      "grad_norm": 0.9557678467541673,
      "learning_rate": 0.003,
      "loss": 4.1115,
      "step": 5931
    },
    {
      "epoch": 0.05932,
      "grad_norm": 1.0099260007035409,
      "learning_rate": 0.003,
      "loss": 4.1403,
      "step": 5932
    },
    {
      "epoch": 0.05933,
      "grad_norm": 0.9621825722693325,
      "learning_rate": 0.003,
      "loss": 4.1091,
      "step": 5933
    },
    {
      "epoch": 0.05934,
      "grad_norm": 1.0971071255328568,
      "learning_rate": 0.003,
      "loss": 4.1519,
      "step": 5934
    },
    {
      "epoch": 0.05935,
      "grad_norm": 0.9234790751127311,
      "learning_rate": 0.003,
      "loss": 4.1298,
      "step": 5935
    },
    {
      "epoch": 0.05936,
      "grad_norm": 0.9821910858313653,
      "learning_rate": 0.003,
      "loss": 4.128,
      "step": 5936
    },
    {
      "epoch": 0.05937,
      "grad_norm": 1.053295387630497,
      "learning_rate": 0.003,
      "loss": 4.1308,
      "step": 5937
    },
    {
      "epoch": 0.05938,
      "grad_norm": 0.9118058500310073,
      "learning_rate": 0.003,
      "loss": 4.1027,
      "step": 5938
    },
    {
      "epoch": 0.05939,
      "grad_norm": 0.9011024881921385,
      "learning_rate": 0.003,
      "loss": 4.1146,
      "step": 5939
    },
    {
      "epoch": 0.0594,
      "grad_norm": 0.924319370161119,
      "learning_rate": 0.003,
      "loss": 4.131,
      "step": 5940
    },
    {
      "epoch": 0.05941,
      "grad_norm": 1.0245197208399006,
      "learning_rate": 0.003,
      "loss": 4.1346,
      "step": 5941
    },
    {
      "epoch": 0.05942,
      "grad_norm": 1.0834208067154592,
      "learning_rate": 0.003,
      "loss": 4.1267,
      "step": 5942
    },
    {
      "epoch": 0.05943,
      "grad_norm": 1.1741526716357884,
      "learning_rate": 0.003,
      "loss": 4.1152,
      "step": 5943
    },
    {
      "epoch": 0.05944,
      "grad_norm": 0.9071829121363102,
      "learning_rate": 0.003,
      "loss": 4.0672,
      "step": 5944
    },
    {
      "epoch": 0.05945,
      "grad_norm": 0.7177835099971523,
      "learning_rate": 0.003,
      "loss": 4.105,
      "step": 5945
    },
    {
      "epoch": 0.05946,
      "grad_norm": 0.6696791120677105,
      "learning_rate": 0.003,
      "loss": 4.0978,
      "step": 5946
    },
    {
      "epoch": 0.05947,
      "grad_norm": 0.8016409208057959,
      "learning_rate": 0.003,
      "loss": 4.0936,
      "step": 5947
    },
    {
      "epoch": 0.05948,
      "grad_norm": 0.9133072242258121,
      "learning_rate": 0.003,
      "loss": 4.1128,
      "step": 5948
    },
    {
      "epoch": 0.05949,
      "grad_norm": 1.0761773223858178,
      "learning_rate": 0.003,
      "loss": 4.1108,
      "step": 5949
    },
    {
      "epoch": 0.0595,
      "grad_norm": 0.9836659779552777,
      "learning_rate": 0.003,
      "loss": 4.1193,
      "step": 5950
    },
    {
      "epoch": 0.05951,
      "grad_norm": 1.0199335343578013,
      "learning_rate": 0.003,
      "loss": 4.1117,
      "step": 5951
    },
    {
      "epoch": 0.05952,
      "grad_norm": 0.9743246006911659,
      "learning_rate": 0.003,
      "loss": 4.1239,
      "step": 5952
    },
    {
      "epoch": 0.05953,
      "grad_norm": 1.2016427351028913,
      "learning_rate": 0.003,
      "loss": 4.1098,
      "step": 5953
    },
    {
      "epoch": 0.05954,
      "grad_norm": 0.9106029846122331,
      "learning_rate": 0.003,
      "loss": 4.1251,
      "step": 5954
    },
    {
      "epoch": 0.05955,
      "grad_norm": 0.87535481561734,
      "learning_rate": 0.003,
      "loss": 4.1051,
      "step": 5955
    },
    {
      "epoch": 0.05956,
      "grad_norm": 0.8781809465631218,
      "learning_rate": 0.003,
      "loss": 4.0911,
      "step": 5956
    },
    {
      "epoch": 0.05957,
      "grad_norm": 0.9484771032245364,
      "learning_rate": 0.003,
      "loss": 4.0997,
      "step": 5957
    },
    {
      "epoch": 0.05958,
      "grad_norm": 1.1611462973785118,
      "learning_rate": 0.003,
      "loss": 4.1158,
      "step": 5958
    },
    {
      "epoch": 0.05959,
      "grad_norm": 1.021761889730662,
      "learning_rate": 0.003,
      "loss": 4.1123,
      "step": 5959
    },
    {
      "epoch": 0.0596,
      "grad_norm": 1.0395052086664378,
      "learning_rate": 0.003,
      "loss": 4.1291,
      "step": 5960
    },
    {
      "epoch": 0.05961,
      "grad_norm": 1.038972275353387,
      "learning_rate": 0.003,
      "loss": 4.1177,
      "step": 5961
    },
    {
      "epoch": 0.05962,
      "grad_norm": 1.1517542544342303,
      "learning_rate": 0.003,
      "loss": 4.1141,
      "step": 5962
    },
    {
      "epoch": 0.05963,
      "grad_norm": 1.0186946928220215,
      "learning_rate": 0.003,
      "loss": 4.1218,
      "step": 5963
    },
    {
      "epoch": 0.05964,
      "grad_norm": 1.0734942613507523,
      "learning_rate": 0.003,
      "loss": 4.1208,
      "step": 5964
    },
    {
      "epoch": 0.05965,
      "grad_norm": 0.99398075190589,
      "learning_rate": 0.003,
      "loss": 4.1133,
      "step": 5965
    },
    {
      "epoch": 0.05966,
      "grad_norm": 0.9660720138071446,
      "learning_rate": 0.003,
      "loss": 4.1276,
      "step": 5966
    },
    {
      "epoch": 0.05967,
      "grad_norm": 1.0547956518514383,
      "learning_rate": 0.003,
      "loss": 4.1098,
      "step": 5967
    },
    {
      "epoch": 0.05968,
      "grad_norm": 1.0422524262760788,
      "learning_rate": 0.003,
      "loss": 4.1111,
      "step": 5968
    },
    {
      "epoch": 0.05969,
      "grad_norm": 1.205455021996495,
      "learning_rate": 0.003,
      "loss": 4.1072,
      "step": 5969
    },
    {
      "epoch": 0.0597,
      "grad_norm": 0.8909956282412519,
      "learning_rate": 0.003,
      "loss": 4.106,
      "step": 5970
    },
    {
      "epoch": 0.05971,
      "grad_norm": 0.9943205977156692,
      "learning_rate": 0.003,
      "loss": 4.126,
      "step": 5971
    },
    {
      "epoch": 0.05972,
      "grad_norm": 1.1691908066767673,
      "learning_rate": 0.003,
      "loss": 4.145,
      "step": 5972
    },
    {
      "epoch": 0.05973,
      "grad_norm": 1.1397498496713665,
      "learning_rate": 0.003,
      "loss": 4.1047,
      "step": 5973
    },
    {
      "epoch": 0.05974,
      "grad_norm": 1.0955142426242983,
      "learning_rate": 0.003,
      "loss": 4.1005,
      "step": 5974
    },
    {
      "epoch": 0.05975,
      "grad_norm": 0.9528979235197279,
      "learning_rate": 0.003,
      "loss": 4.1359,
      "step": 5975
    },
    {
      "epoch": 0.05976,
      "grad_norm": 1.0192429202061466,
      "learning_rate": 0.003,
      "loss": 4.1195,
      "step": 5976
    },
    {
      "epoch": 0.05977,
      "grad_norm": 1.0732106838304054,
      "learning_rate": 0.003,
      "loss": 4.1342,
      "step": 5977
    },
    {
      "epoch": 0.05978,
      "grad_norm": 1.0142239464322467,
      "learning_rate": 0.003,
      "loss": 4.1142,
      "step": 5978
    },
    {
      "epoch": 0.05979,
      "grad_norm": 0.9137036549885006,
      "learning_rate": 0.003,
      "loss": 4.1206,
      "step": 5979
    },
    {
      "epoch": 0.0598,
      "grad_norm": 0.9017921713902286,
      "learning_rate": 0.003,
      "loss": 4.0972,
      "step": 5980
    },
    {
      "epoch": 0.05981,
      "grad_norm": 0.9418925607728872,
      "learning_rate": 0.003,
      "loss": 4.0993,
      "step": 5981
    },
    {
      "epoch": 0.05982,
      "grad_norm": 1.0614608522568088,
      "learning_rate": 0.003,
      "loss": 4.1192,
      "step": 5982
    },
    {
      "epoch": 0.05983,
      "grad_norm": 0.9276124651301009,
      "learning_rate": 0.003,
      "loss": 4.1293,
      "step": 5983
    },
    {
      "epoch": 0.05984,
      "grad_norm": 1.0099523983857785,
      "learning_rate": 0.003,
      "loss": 4.0989,
      "step": 5984
    },
    {
      "epoch": 0.05985,
      "grad_norm": 1.021527017852559,
      "learning_rate": 0.003,
      "loss": 4.1321,
      "step": 5985
    },
    {
      "epoch": 0.05986,
      "grad_norm": 1.125446892694059,
      "learning_rate": 0.003,
      "loss": 4.1342,
      "step": 5986
    },
    {
      "epoch": 0.05987,
      "grad_norm": 1.0737688373374732,
      "learning_rate": 0.003,
      "loss": 4.1047,
      "step": 5987
    },
    {
      "epoch": 0.05988,
      "grad_norm": 0.9164702997141159,
      "learning_rate": 0.003,
      "loss": 4.1099,
      "step": 5988
    },
    {
      "epoch": 0.05989,
      "grad_norm": 0.9143021900180711,
      "learning_rate": 0.003,
      "loss": 4.1077,
      "step": 5989
    },
    {
      "epoch": 0.0599,
      "grad_norm": 0.8543938538827425,
      "learning_rate": 0.003,
      "loss": 4.1136,
      "step": 5990
    },
    {
      "epoch": 0.05991,
      "grad_norm": 1.0063680270151982,
      "learning_rate": 0.003,
      "loss": 4.0852,
      "step": 5991
    },
    {
      "epoch": 0.05992,
      "grad_norm": 1.2334495218996615,
      "learning_rate": 0.003,
      "loss": 4.1186,
      "step": 5992
    },
    {
      "epoch": 0.05993,
      "grad_norm": 0.9732503663927103,
      "learning_rate": 0.003,
      "loss": 4.1229,
      "step": 5993
    },
    {
      "epoch": 0.05994,
      "grad_norm": 1.0952554321561894,
      "learning_rate": 0.003,
      "loss": 4.1077,
      "step": 5994
    },
    {
      "epoch": 0.05995,
      "grad_norm": 1.0674546890074543,
      "learning_rate": 0.003,
      "loss": 4.1131,
      "step": 5995
    },
    {
      "epoch": 0.05996,
      "grad_norm": 0.9812098630340207,
      "learning_rate": 0.003,
      "loss": 4.1448,
      "step": 5996
    },
    {
      "epoch": 0.05997,
      "grad_norm": 1.0336990678275986,
      "learning_rate": 0.003,
      "loss": 4.0907,
      "step": 5997
    },
    {
      "epoch": 0.05998,
      "grad_norm": 1.0167004385621488,
      "learning_rate": 0.003,
      "loss": 4.1414,
      "step": 5998
    },
    {
      "epoch": 0.05999,
      "grad_norm": 1.1118826651465459,
      "learning_rate": 0.003,
      "loss": 4.1104,
      "step": 5999
    },
    {
      "epoch": 0.06,
      "grad_norm": 1.0614094902208722,
      "learning_rate": 0.003,
      "loss": 4.1238,
      "step": 6000
    },
    {
      "epoch": 0.06001,
      "grad_norm": 0.9983812953779062,
      "learning_rate": 0.003,
      "loss": 4.1061,
      "step": 6001
    },
    {
      "epoch": 0.06002,
      "grad_norm": 1.0543477507195134,
      "learning_rate": 0.003,
      "loss": 4.1048,
      "step": 6002
    },
    {
      "epoch": 0.06003,
      "grad_norm": 0.7908508239243327,
      "learning_rate": 0.003,
      "loss": 4.1167,
      "step": 6003
    },
    {
      "epoch": 0.06004,
      "grad_norm": 0.7408629642754656,
      "learning_rate": 0.003,
      "loss": 4.0948,
      "step": 6004
    },
    {
      "epoch": 0.06005,
      "grad_norm": 0.7225830777674686,
      "learning_rate": 0.003,
      "loss": 4.1107,
      "step": 6005
    },
    {
      "epoch": 0.06006,
      "grad_norm": 0.8254182825933346,
      "learning_rate": 0.003,
      "loss": 4.0918,
      "step": 6006
    },
    {
      "epoch": 0.06007,
      "grad_norm": 1.0047409573685806,
      "learning_rate": 0.003,
      "loss": 4.1456,
      "step": 6007
    },
    {
      "epoch": 0.06008,
      "grad_norm": 1.3454769118513017,
      "learning_rate": 0.003,
      "loss": 4.1106,
      "step": 6008
    },
    {
      "epoch": 0.06009,
      "grad_norm": 0.8841761212056402,
      "learning_rate": 0.003,
      "loss": 4.1089,
      "step": 6009
    },
    {
      "epoch": 0.0601,
      "grad_norm": 0.8712614480464654,
      "learning_rate": 0.003,
      "loss": 4.1128,
      "step": 6010
    },
    {
      "epoch": 0.06011,
      "grad_norm": 0.9614357198294633,
      "learning_rate": 0.003,
      "loss": 4.1191,
      "step": 6011
    },
    {
      "epoch": 0.06012,
      "grad_norm": 1.0365500927855609,
      "learning_rate": 0.003,
      "loss": 4.128,
      "step": 6012
    },
    {
      "epoch": 0.06013,
      "grad_norm": 1.0079447498840501,
      "learning_rate": 0.003,
      "loss": 4.1199,
      "step": 6013
    },
    {
      "epoch": 0.06014,
      "grad_norm": 1.027171898423869,
      "learning_rate": 0.003,
      "loss": 4.1107,
      "step": 6014
    },
    {
      "epoch": 0.06015,
      "grad_norm": 0.9671591709956947,
      "learning_rate": 0.003,
      "loss": 4.1236,
      "step": 6015
    },
    {
      "epoch": 0.06016,
      "grad_norm": 0.8094084922333825,
      "learning_rate": 0.003,
      "loss": 4.1241,
      "step": 6016
    },
    {
      "epoch": 0.06017,
      "grad_norm": 0.8969442169529565,
      "learning_rate": 0.003,
      "loss": 4.1287,
      "step": 6017
    },
    {
      "epoch": 0.06018,
      "grad_norm": 0.8714590129924783,
      "learning_rate": 0.003,
      "loss": 4.1439,
      "step": 6018
    },
    {
      "epoch": 0.06019,
      "grad_norm": 0.9863848503972414,
      "learning_rate": 0.003,
      "loss": 4.1133,
      "step": 6019
    },
    {
      "epoch": 0.0602,
      "grad_norm": 0.9823451734348092,
      "learning_rate": 0.003,
      "loss": 4.1107,
      "step": 6020
    },
    {
      "epoch": 0.06021,
      "grad_norm": 1.0823387099836543,
      "learning_rate": 0.003,
      "loss": 4.1316,
      "step": 6021
    },
    {
      "epoch": 0.06022,
      "grad_norm": 1.2506242840661803,
      "learning_rate": 0.003,
      "loss": 4.1407,
      "step": 6022
    },
    {
      "epoch": 0.06023,
      "grad_norm": 1.1016631235570162,
      "learning_rate": 0.003,
      "loss": 4.1218,
      "step": 6023
    },
    {
      "epoch": 0.06024,
      "grad_norm": 1.0131110871671019,
      "learning_rate": 0.003,
      "loss": 4.1012,
      "step": 6024
    },
    {
      "epoch": 0.06025,
      "grad_norm": 0.9550711760770073,
      "learning_rate": 0.003,
      "loss": 4.1549,
      "step": 6025
    },
    {
      "epoch": 0.06026,
      "grad_norm": 1.0082742845706594,
      "learning_rate": 0.003,
      "loss": 4.1238,
      "step": 6026
    },
    {
      "epoch": 0.06027,
      "grad_norm": 1.1193983758663688,
      "learning_rate": 0.003,
      "loss": 4.1192,
      "step": 6027
    },
    {
      "epoch": 0.06028,
      "grad_norm": 1.0297321070613559,
      "learning_rate": 0.003,
      "loss": 4.1427,
      "step": 6028
    },
    {
      "epoch": 0.06029,
      "grad_norm": 1.1955788477078355,
      "learning_rate": 0.003,
      "loss": 4.1453,
      "step": 6029
    },
    {
      "epoch": 0.0603,
      "grad_norm": 0.9831040724140647,
      "learning_rate": 0.003,
      "loss": 4.1304,
      "step": 6030
    },
    {
      "epoch": 0.06031,
      "grad_norm": 1.1029620178962896,
      "learning_rate": 0.003,
      "loss": 4.1282,
      "step": 6031
    },
    {
      "epoch": 0.06032,
      "grad_norm": 1.0090020178039447,
      "learning_rate": 0.003,
      "loss": 4.1276,
      "step": 6032
    },
    {
      "epoch": 0.06033,
      "grad_norm": 1.120910029012685,
      "learning_rate": 0.003,
      "loss": 4.1408,
      "step": 6033
    },
    {
      "epoch": 0.06034,
      "grad_norm": 1.202799778814886,
      "learning_rate": 0.003,
      "loss": 4.1524,
      "step": 6034
    },
    {
      "epoch": 0.06035,
      "grad_norm": 1.2839095955018514,
      "learning_rate": 0.003,
      "loss": 4.1464,
      "step": 6035
    },
    {
      "epoch": 0.06036,
      "grad_norm": 0.8960072775662671,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 6036
    },
    {
      "epoch": 0.06037,
      "grad_norm": 0.9251248286636486,
      "learning_rate": 0.003,
      "loss": 4.1308,
      "step": 6037
    },
    {
      "epoch": 0.06038,
      "grad_norm": 1.0231580981647335,
      "learning_rate": 0.003,
      "loss": 4.1154,
      "step": 6038
    },
    {
      "epoch": 0.06039,
      "grad_norm": 1.0065304620292819,
      "learning_rate": 0.003,
      "loss": 4.0948,
      "step": 6039
    },
    {
      "epoch": 0.0604,
      "grad_norm": 0.9234637398061142,
      "learning_rate": 0.003,
      "loss": 4.1222,
      "step": 6040
    },
    {
      "epoch": 0.06041,
      "grad_norm": 0.9399687656493915,
      "learning_rate": 0.003,
      "loss": 4.1364,
      "step": 6041
    },
    {
      "epoch": 0.06042,
      "grad_norm": 0.9871340954259973,
      "learning_rate": 0.003,
      "loss": 4.1172,
      "step": 6042
    },
    {
      "epoch": 0.06043,
      "grad_norm": 1.1211726188172257,
      "learning_rate": 0.003,
      "loss": 4.1235,
      "step": 6043
    },
    {
      "epoch": 0.06044,
      "grad_norm": 0.9672368374829318,
      "learning_rate": 0.003,
      "loss": 4.1214,
      "step": 6044
    },
    {
      "epoch": 0.06045,
      "grad_norm": 0.9720049333758409,
      "learning_rate": 0.003,
      "loss": 4.1195,
      "step": 6045
    },
    {
      "epoch": 0.06046,
      "grad_norm": 1.1307499152097553,
      "learning_rate": 0.003,
      "loss": 4.1277,
      "step": 6046
    },
    {
      "epoch": 0.06047,
      "grad_norm": 1.1577066626632935,
      "learning_rate": 0.003,
      "loss": 4.1061,
      "step": 6047
    },
    {
      "epoch": 0.06048,
      "grad_norm": 0.934752567088904,
      "learning_rate": 0.003,
      "loss": 4.1015,
      "step": 6048
    },
    {
      "epoch": 0.06049,
      "grad_norm": 0.9051391606921817,
      "learning_rate": 0.003,
      "loss": 4.0951,
      "step": 6049
    },
    {
      "epoch": 0.0605,
      "grad_norm": 1.0677162480423936,
      "learning_rate": 0.003,
      "loss": 4.1248,
      "step": 6050
    },
    {
      "epoch": 0.06051,
      "grad_norm": 0.9753098802964573,
      "learning_rate": 0.003,
      "loss": 4.1366,
      "step": 6051
    },
    {
      "epoch": 0.06052,
      "grad_norm": 0.7981061068208121,
      "learning_rate": 0.003,
      "loss": 4.129,
      "step": 6052
    },
    {
      "epoch": 0.06053,
      "grad_norm": 0.8444006516702162,
      "learning_rate": 0.003,
      "loss": 4.0885,
      "step": 6053
    },
    {
      "epoch": 0.06054,
      "grad_norm": 0.8591163485087062,
      "learning_rate": 0.003,
      "loss": 4.1239,
      "step": 6054
    },
    {
      "epoch": 0.06055,
      "grad_norm": 0.9605032090948827,
      "learning_rate": 0.003,
      "loss": 4.1182,
      "step": 6055
    },
    {
      "epoch": 0.06056,
      "grad_norm": 1.3762618972232707,
      "learning_rate": 0.003,
      "loss": 4.1179,
      "step": 6056
    },
    {
      "epoch": 0.06057,
      "grad_norm": 1.0957643635595118,
      "learning_rate": 0.003,
      "loss": 4.0995,
      "step": 6057
    },
    {
      "epoch": 0.06058,
      "grad_norm": 0.9253580114680722,
      "learning_rate": 0.003,
      "loss": 4.1114,
      "step": 6058
    },
    {
      "epoch": 0.06059,
      "grad_norm": 1.0298887868390938,
      "learning_rate": 0.003,
      "loss": 4.125,
      "step": 6059
    },
    {
      "epoch": 0.0606,
      "grad_norm": 1.1659113696201608,
      "learning_rate": 0.003,
      "loss": 4.1275,
      "step": 6060
    },
    {
      "epoch": 0.06061,
      "grad_norm": 1.003664298622335,
      "learning_rate": 0.003,
      "loss": 4.1261,
      "step": 6061
    },
    {
      "epoch": 0.06062,
      "grad_norm": 1.4291730349399376,
      "learning_rate": 0.003,
      "loss": 4.1405,
      "step": 6062
    },
    {
      "epoch": 0.06063,
      "grad_norm": 0.8331420156511541,
      "learning_rate": 0.003,
      "loss": 4.0841,
      "step": 6063
    },
    {
      "epoch": 0.06064,
      "grad_norm": 0.7542070054653189,
      "learning_rate": 0.003,
      "loss": 4.1419,
      "step": 6064
    },
    {
      "epoch": 0.06065,
      "grad_norm": 0.8207912460389019,
      "learning_rate": 0.003,
      "loss": 4.1139,
      "step": 6065
    },
    {
      "epoch": 0.06066,
      "grad_norm": 0.9450469860084448,
      "learning_rate": 0.003,
      "loss": 4.1482,
      "step": 6066
    },
    {
      "epoch": 0.06067,
      "grad_norm": 1.2813480138223348,
      "learning_rate": 0.003,
      "loss": 4.1314,
      "step": 6067
    },
    {
      "epoch": 0.06068,
      "grad_norm": 0.9754293459370826,
      "learning_rate": 0.003,
      "loss": 4.1101,
      "step": 6068
    },
    {
      "epoch": 0.06069,
      "grad_norm": 0.9381993103405896,
      "learning_rate": 0.003,
      "loss": 4.1179,
      "step": 6069
    },
    {
      "epoch": 0.0607,
      "grad_norm": 0.969119385469152,
      "learning_rate": 0.003,
      "loss": 4.1198,
      "step": 6070
    },
    {
      "epoch": 0.06071,
      "grad_norm": 1.1338080882626715,
      "learning_rate": 0.003,
      "loss": 4.0977,
      "step": 6071
    },
    {
      "epoch": 0.06072,
      "grad_norm": 0.8631213140843556,
      "learning_rate": 0.003,
      "loss": 4.099,
      "step": 6072
    },
    {
      "epoch": 0.06073,
      "grad_norm": 0.9229201198227382,
      "learning_rate": 0.003,
      "loss": 4.1299,
      "step": 6073
    },
    {
      "epoch": 0.06074,
      "grad_norm": 1.178815613946597,
      "learning_rate": 0.003,
      "loss": 4.1186,
      "step": 6074
    },
    {
      "epoch": 0.06075,
      "grad_norm": 0.9473743130203446,
      "learning_rate": 0.003,
      "loss": 4.135,
      "step": 6075
    },
    {
      "epoch": 0.06076,
      "grad_norm": 1.041670242948814,
      "learning_rate": 0.003,
      "loss": 4.1213,
      "step": 6076
    },
    {
      "epoch": 0.06077,
      "grad_norm": 1.0127128248077857,
      "learning_rate": 0.003,
      "loss": 4.1186,
      "step": 6077
    },
    {
      "epoch": 0.06078,
      "grad_norm": 0.9898812176337916,
      "learning_rate": 0.003,
      "loss": 4.1273,
      "step": 6078
    },
    {
      "epoch": 0.06079,
      "grad_norm": 0.9621264068499179,
      "learning_rate": 0.003,
      "loss": 4.1197,
      "step": 6079
    },
    {
      "epoch": 0.0608,
      "grad_norm": 0.8712893680783403,
      "learning_rate": 0.003,
      "loss": 4.1172,
      "step": 6080
    },
    {
      "epoch": 0.06081,
      "grad_norm": 0.8647897086338766,
      "learning_rate": 0.003,
      "loss": 4.099,
      "step": 6081
    },
    {
      "epoch": 0.06082,
      "grad_norm": 0.8905130772955867,
      "learning_rate": 0.003,
      "loss": 4.1442,
      "step": 6082
    },
    {
      "epoch": 0.06083,
      "grad_norm": 1.0240935462028675,
      "learning_rate": 0.003,
      "loss": 4.1324,
      "step": 6083
    },
    {
      "epoch": 0.06084,
      "grad_norm": 1.1672577186095388,
      "learning_rate": 0.003,
      "loss": 4.0984,
      "step": 6084
    },
    {
      "epoch": 0.06085,
      "grad_norm": 1.3250963261390494,
      "learning_rate": 0.003,
      "loss": 4.0884,
      "step": 6085
    },
    {
      "epoch": 0.06086,
      "grad_norm": 0.870201800346383,
      "learning_rate": 0.003,
      "loss": 4.0918,
      "step": 6086
    },
    {
      "epoch": 0.06087,
      "grad_norm": 0.8933035330011422,
      "learning_rate": 0.003,
      "loss": 4.1295,
      "step": 6087
    },
    {
      "epoch": 0.06088,
      "grad_norm": 0.9944369027728199,
      "learning_rate": 0.003,
      "loss": 4.1052,
      "step": 6088
    },
    {
      "epoch": 0.06089,
      "grad_norm": 1.188704398920773,
      "learning_rate": 0.003,
      "loss": 4.1083,
      "step": 6089
    },
    {
      "epoch": 0.0609,
      "grad_norm": 0.9777021810669053,
      "learning_rate": 0.003,
      "loss": 4.148,
      "step": 6090
    },
    {
      "epoch": 0.06091,
      "grad_norm": 1.0499229093718996,
      "learning_rate": 0.003,
      "loss": 4.1203,
      "step": 6091
    },
    {
      "epoch": 0.06092,
      "grad_norm": 0.9796736938616526,
      "learning_rate": 0.003,
      "loss": 4.102,
      "step": 6092
    },
    {
      "epoch": 0.06093,
      "grad_norm": 0.8559383646964028,
      "learning_rate": 0.003,
      "loss": 4.1123,
      "step": 6093
    },
    {
      "epoch": 0.06094,
      "grad_norm": 0.9313283566372004,
      "learning_rate": 0.003,
      "loss": 4.1156,
      "step": 6094
    },
    {
      "epoch": 0.06095,
      "grad_norm": 1.0809762060800143,
      "learning_rate": 0.003,
      "loss": 4.1212,
      "step": 6095
    },
    {
      "epoch": 0.06096,
      "grad_norm": 1.0835678303665617,
      "learning_rate": 0.003,
      "loss": 4.1125,
      "step": 6096
    },
    {
      "epoch": 0.06097,
      "grad_norm": 1.0642504020628758,
      "learning_rate": 0.003,
      "loss": 4.1284,
      "step": 6097
    },
    {
      "epoch": 0.06098,
      "grad_norm": 0.9976998221148508,
      "learning_rate": 0.003,
      "loss": 4.1002,
      "step": 6098
    },
    {
      "epoch": 0.06099,
      "grad_norm": 1.0924202946682247,
      "learning_rate": 0.003,
      "loss": 4.1258,
      "step": 6099
    },
    {
      "epoch": 0.061,
      "grad_norm": 0.8943889339335144,
      "learning_rate": 0.003,
      "loss": 4.111,
      "step": 6100
    },
    {
      "epoch": 0.06101,
      "grad_norm": 0.8414480065098904,
      "learning_rate": 0.003,
      "loss": 4.1413,
      "step": 6101
    },
    {
      "epoch": 0.06102,
      "grad_norm": 0.8157966743179256,
      "learning_rate": 0.003,
      "loss": 4.122,
      "step": 6102
    },
    {
      "epoch": 0.06103,
      "grad_norm": 0.8333573956876027,
      "learning_rate": 0.003,
      "loss": 4.1349,
      "step": 6103
    },
    {
      "epoch": 0.06104,
      "grad_norm": 0.8222742472881592,
      "learning_rate": 0.003,
      "loss": 4.1056,
      "step": 6104
    },
    {
      "epoch": 0.06105,
      "grad_norm": 0.9085226278952419,
      "learning_rate": 0.003,
      "loss": 4.1129,
      "step": 6105
    },
    {
      "epoch": 0.06106,
      "grad_norm": 1.0436819426813295,
      "learning_rate": 0.003,
      "loss": 4.1372,
      "step": 6106
    },
    {
      "epoch": 0.06107,
      "grad_norm": 1.1352646100575459,
      "learning_rate": 0.003,
      "loss": 4.1499,
      "step": 6107
    },
    {
      "epoch": 0.06108,
      "grad_norm": 1.1088556231295252,
      "learning_rate": 0.003,
      "loss": 4.1134,
      "step": 6108
    },
    {
      "epoch": 0.06109,
      "grad_norm": 1.088382498713082,
      "learning_rate": 0.003,
      "loss": 4.0941,
      "step": 6109
    },
    {
      "epoch": 0.0611,
      "grad_norm": 1.0780432441806462,
      "learning_rate": 0.003,
      "loss": 4.1136,
      "step": 6110
    },
    {
      "epoch": 0.06111,
      "grad_norm": 1.197481994266544,
      "learning_rate": 0.003,
      "loss": 4.1309,
      "step": 6111
    },
    {
      "epoch": 0.06112,
      "grad_norm": 0.9669086710118441,
      "learning_rate": 0.003,
      "loss": 4.1227,
      "step": 6112
    },
    {
      "epoch": 0.06113,
      "grad_norm": 1.129056630459804,
      "learning_rate": 0.003,
      "loss": 4.1384,
      "step": 6113
    },
    {
      "epoch": 0.06114,
      "grad_norm": 0.990545101825822,
      "learning_rate": 0.003,
      "loss": 4.0942,
      "step": 6114
    },
    {
      "epoch": 0.06115,
      "grad_norm": 0.993200814873901,
      "learning_rate": 0.003,
      "loss": 4.0954,
      "step": 6115
    },
    {
      "epoch": 0.06116,
      "grad_norm": 1.1506429666248497,
      "learning_rate": 0.003,
      "loss": 4.1159,
      "step": 6116
    },
    {
      "epoch": 0.06117,
      "grad_norm": 1.232012893903766,
      "learning_rate": 0.003,
      "loss": 4.111,
      "step": 6117
    },
    {
      "epoch": 0.06118,
      "grad_norm": 0.9333069523142945,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 6118
    },
    {
      "epoch": 0.06119,
      "grad_norm": 0.9309794301782843,
      "learning_rate": 0.003,
      "loss": 4.0753,
      "step": 6119
    },
    {
      "epoch": 0.0612,
      "grad_norm": 0.9889641658765227,
      "learning_rate": 0.003,
      "loss": 4.1218,
      "step": 6120
    },
    {
      "epoch": 0.06121,
      "grad_norm": 0.9429924903195458,
      "learning_rate": 0.003,
      "loss": 4.1045,
      "step": 6121
    },
    {
      "epoch": 0.06122,
      "grad_norm": 0.9263560870190729,
      "learning_rate": 0.003,
      "loss": 4.1201,
      "step": 6122
    },
    {
      "epoch": 0.06123,
      "grad_norm": 0.9349458998129097,
      "learning_rate": 0.003,
      "loss": 4.1199,
      "step": 6123
    },
    {
      "epoch": 0.06124,
      "grad_norm": 0.8727337616002643,
      "learning_rate": 0.003,
      "loss": 4.1116,
      "step": 6124
    },
    {
      "epoch": 0.06125,
      "grad_norm": 0.8116241603577743,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 6125
    },
    {
      "epoch": 0.06126,
      "grad_norm": 0.8298969442086128,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 6126
    },
    {
      "epoch": 0.06127,
      "grad_norm": 1.0134811068303906,
      "learning_rate": 0.003,
      "loss": 4.0843,
      "step": 6127
    },
    {
      "epoch": 0.06128,
      "grad_norm": 1.1185496168002877,
      "learning_rate": 0.003,
      "loss": 4.1054,
      "step": 6128
    },
    {
      "epoch": 0.06129,
      "grad_norm": 0.9033340313373569,
      "learning_rate": 0.003,
      "loss": 4.1125,
      "step": 6129
    },
    {
      "epoch": 0.0613,
      "grad_norm": 0.7767340053994768,
      "learning_rate": 0.003,
      "loss": 4.1166,
      "step": 6130
    },
    {
      "epoch": 0.06131,
      "grad_norm": 0.7946814526459421,
      "learning_rate": 0.003,
      "loss": 4.1133,
      "step": 6131
    },
    {
      "epoch": 0.06132,
      "grad_norm": 0.775430711370884,
      "learning_rate": 0.003,
      "loss": 4.1112,
      "step": 6132
    },
    {
      "epoch": 0.06133,
      "grad_norm": 0.9286092538190712,
      "learning_rate": 0.003,
      "loss": 4.1063,
      "step": 6133
    },
    {
      "epoch": 0.06134,
      "grad_norm": 1.1838282950640109,
      "learning_rate": 0.003,
      "loss": 4.1024,
      "step": 6134
    },
    {
      "epoch": 0.06135,
      "grad_norm": 0.9253915308025362,
      "learning_rate": 0.003,
      "loss": 4.1239,
      "step": 6135
    },
    {
      "epoch": 0.06136,
      "grad_norm": 0.9923935179783585,
      "learning_rate": 0.003,
      "loss": 4.1066,
      "step": 6136
    },
    {
      "epoch": 0.06137,
      "grad_norm": 0.9538152751448672,
      "learning_rate": 0.003,
      "loss": 4.105,
      "step": 6137
    },
    {
      "epoch": 0.06138,
      "grad_norm": 0.9731598770998673,
      "learning_rate": 0.003,
      "loss": 4.0991,
      "step": 6138
    },
    {
      "epoch": 0.06139,
      "grad_norm": 1.1310802967564626,
      "learning_rate": 0.003,
      "loss": 4.1251,
      "step": 6139
    },
    {
      "epoch": 0.0614,
      "grad_norm": 1.0143613465951173,
      "learning_rate": 0.003,
      "loss": 4.0807,
      "step": 6140
    },
    {
      "epoch": 0.06141,
      "grad_norm": 0.9941376661663965,
      "learning_rate": 0.003,
      "loss": 4.084,
      "step": 6141
    },
    {
      "epoch": 0.06142,
      "grad_norm": 1.1125462176091532,
      "learning_rate": 0.003,
      "loss": 4.1159,
      "step": 6142
    },
    {
      "epoch": 0.06143,
      "grad_norm": 0.927598665034684,
      "learning_rate": 0.003,
      "loss": 4.1205,
      "step": 6143
    },
    {
      "epoch": 0.06144,
      "grad_norm": 1.0202798430001405,
      "learning_rate": 0.003,
      "loss": 4.1024,
      "step": 6144
    },
    {
      "epoch": 0.06145,
      "grad_norm": 1.1112123749988467,
      "learning_rate": 0.003,
      "loss": 4.1227,
      "step": 6145
    },
    {
      "epoch": 0.06146,
      "grad_norm": 0.9774352630984132,
      "learning_rate": 0.003,
      "loss": 4.0877,
      "step": 6146
    },
    {
      "epoch": 0.06147,
      "grad_norm": 1.1099041541034362,
      "learning_rate": 0.003,
      "loss": 4.1443,
      "step": 6147
    },
    {
      "epoch": 0.06148,
      "grad_norm": 1.3400741766228303,
      "learning_rate": 0.003,
      "loss": 4.1212,
      "step": 6148
    },
    {
      "epoch": 0.06149,
      "grad_norm": 0.7790383000135375,
      "learning_rate": 0.003,
      "loss": 4.1033,
      "step": 6149
    },
    {
      "epoch": 0.0615,
      "grad_norm": 0.6587385143781973,
      "learning_rate": 0.003,
      "loss": 4.1174,
      "step": 6150
    },
    {
      "epoch": 0.06151,
      "grad_norm": 0.8840013067228365,
      "learning_rate": 0.003,
      "loss": 4.1142,
      "step": 6151
    },
    {
      "epoch": 0.06152,
      "grad_norm": 1.171465834606882,
      "learning_rate": 0.003,
      "loss": 4.1077,
      "step": 6152
    },
    {
      "epoch": 0.06153,
      "grad_norm": 1.1290004133065623,
      "learning_rate": 0.003,
      "loss": 4.1331,
      "step": 6153
    },
    {
      "epoch": 0.06154,
      "grad_norm": 0.84568812643019,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 6154
    },
    {
      "epoch": 0.06155,
      "grad_norm": 0.7631870358160202,
      "learning_rate": 0.003,
      "loss": 4.1175,
      "step": 6155
    },
    {
      "epoch": 0.06156,
      "grad_norm": 0.8365491777650411,
      "learning_rate": 0.003,
      "loss": 4.1023,
      "step": 6156
    },
    {
      "epoch": 0.06157,
      "grad_norm": 0.962843672187252,
      "learning_rate": 0.003,
      "loss": 4.1247,
      "step": 6157
    },
    {
      "epoch": 0.06158,
      "grad_norm": 1.1397548341231054,
      "learning_rate": 0.003,
      "loss": 4.1488,
      "step": 6158
    },
    {
      "epoch": 0.06159,
      "grad_norm": 0.8950108190357373,
      "learning_rate": 0.003,
      "loss": 4.1021,
      "step": 6159
    },
    {
      "epoch": 0.0616,
      "grad_norm": 0.9839773780830863,
      "learning_rate": 0.003,
      "loss": 4.0983,
      "step": 6160
    },
    {
      "epoch": 0.06161,
      "grad_norm": 1.1856171086545502,
      "learning_rate": 0.003,
      "loss": 4.1333,
      "step": 6161
    },
    {
      "epoch": 0.06162,
      "grad_norm": 1.0668817290384003,
      "learning_rate": 0.003,
      "loss": 4.1069,
      "step": 6162
    },
    {
      "epoch": 0.06163,
      "grad_norm": 0.9679202467051432,
      "learning_rate": 0.003,
      "loss": 4.1453,
      "step": 6163
    },
    {
      "epoch": 0.06164,
      "grad_norm": 1.1789883371391081,
      "learning_rate": 0.003,
      "loss": 4.1291,
      "step": 6164
    },
    {
      "epoch": 0.06165,
      "grad_norm": 0.9378112937127264,
      "learning_rate": 0.003,
      "loss": 4.1049,
      "step": 6165
    },
    {
      "epoch": 0.06166,
      "grad_norm": 1.073717680785834,
      "learning_rate": 0.003,
      "loss": 4.1393,
      "step": 6166
    },
    {
      "epoch": 0.06167,
      "grad_norm": 1.0508780162488434,
      "learning_rate": 0.003,
      "loss": 4.1112,
      "step": 6167
    },
    {
      "epoch": 0.06168,
      "grad_norm": 1.1231275426430674,
      "learning_rate": 0.003,
      "loss": 4.1252,
      "step": 6168
    },
    {
      "epoch": 0.06169,
      "grad_norm": 0.9613639524735813,
      "learning_rate": 0.003,
      "loss": 4.1134,
      "step": 6169
    },
    {
      "epoch": 0.0617,
      "grad_norm": 0.893115062816479,
      "learning_rate": 0.003,
      "loss": 4.1167,
      "step": 6170
    },
    {
      "epoch": 0.06171,
      "grad_norm": 1.014755768462807,
      "learning_rate": 0.003,
      "loss": 4.1242,
      "step": 6171
    },
    {
      "epoch": 0.06172,
      "grad_norm": 1.1203964494434968,
      "learning_rate": 0.003,
      "loss": 4.1009,
      "step": 6172
    },
    {
      "epoch": 0.06173,
      "grad_norm": 1.1296240698689581,
      "learning_rate": 0.003,
      "loss": 4.132,
      "step": 6173
    },
    {
      "epoch": 0.06174,
      "grad_norm": 1.180308645919765,
      "learning_rate": 0.003,
      "loss": 4.1264,
      "step": 6174
    },
    {
      "epoch": 0.06175,
      "grad_norm": 0.9534486608810446,
      "learning_rate": 0.003,
      "loss": 4.1202,
      "step": 6175
    },
    {
      "epoch": 0.06176,
      "grad_norm": 1.0269321584216362,
      "learning_rate": 0.003,
      "loss": 4.119,
      "step": 6176
    },
    {
      "epoch": 0.06177,
      "grad_norm": 1.2053567559226814,
      "learning_rate": 0.003,
      "loss": 4.1177,
      "step": 6177
    },
    {
      "epoch": 0.06178,
      "grad_norm": 1.0146813059297823,
      "learning_rate": 0.003,
      "loss": 4.105,
      "step": 6178
    },
    {
      "epoch": 0.06179,
      "grad_norm": 1.1284915689903894,
      "learning_rate": 0.003,
      "loss": 4.1243,
      "step": 6179
    },
    {
      "epoch": 0.0618,
      "grad_norm": 0.9947070142693367,
      "learning_rate": 0.003,
      "loss": 4.1219,
      "step": 6180
    },
    {
      "epoch": 0.06181,
      "grad_norm": 1.0633559758739175,
      "learning_rate": 0.003,
      "loss": 4.1402,
      "step": 6181
    },
    {
      "epoch": 0.06182,
      "grad_norm": 1.1027400151633573,
      "learning_rate": 0.003,
      "loss": 4.0969,
      "step": 6182
    },
    {
      "epoch": 0.06183,
      "grad_norm": 1.0324647146802441,
      "learning_rate": 0.003,
      "loss": 4.1129,
      "step": 6183
    },
    {
      "epoch": 0.06184,
      "grad_norm": 1.1585114248380202,
      "learning_rate": 0.003,
      "loss": 4.1141,
      "step": 6184
    },
    {
      "epoch": 0.06185,
      "grad_norm": 0.9898954212712141,
      "learning_rate": 0.003,
      "loss": 4.102,
      "step": 6185
    },
    {
      "epoch": 0.06186,
      "grad_norm": 1.020362653358992,
      "learning_rate": 0.003,
      "loss": 4.1285,
      "step": 6186
    },
    {
      "epoch": 0.06187,
      "grad_norm": 1.0015357102361495,
      "learning_rate": 0.003,
      "loss": 4.1371,
      "step": 6187
    },
    {
      "epoch": 0.06188,
      "grad_norm": 1.0340445619644294,
      "learning_rate": 0.003,
      "loss": 4.1162,
      "step": 6188
    },
    {
      "epoch": 0.06189,
      "grad_norm": 1.033425949623886,
      "learning_rate": 0.003,
      "loss": 4.1031,
      "step": 6189
    },
    {
      "epoch": 0.0619,
      "grad_norm": 0.9791881079008582,
      "learning_rate": 0.003,
      "loss": 4.145,
      "step": 6190
    },
    {
      "epoch": 0.06191,
      "grad_norm": 1.0369953073953297,
      "learning_rate": 0.003,
      "loss": 4.1021,
      "step": 6191
    },
    {
      "epoch": 0.06192,
      "grad_norm": 1.0550940203599017,
      "learning_rate": 0.003,
      "loss": 4.1051,
      "step": 6192
    },
    {
      "epoch": 0.06193,
      "grad_norm": 1.004119892499567,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 6193
    },
    {
      "epoch": 0.06194,
      "grad_norm": 1.1667997286871377,
      "learning_rate": 0.003,
      "loss": 4.1007,
      "step": 6194
    },
    {
      "epoch": 0.06195,
      "grad_norm": 0.8305937619838769,
      "learning_rate": 0.003,
      "loss": 4.1119,
      "step": 6195
    },
    {
      "epoch": 0.06196,
      "grad_norm": 0.7005743659730198,
      "learning_rate": 0.003,
      "loss": 4.1217,
      "step": 6196
    },
    {
      "epoch": 0.06197,
      "grad_norm": 0.7420600370286529,
      "learning_rate": 0.003,
      "loss": 4.106,
      "step": 6197
    },
    {
      "epoch": 0.06198,
      "grad_norm": 0.7767521916979286,
      "learning_rate": 0.003,
      "loss": 4.1326,
      "step": 6198
    },
    {
      "epoch": 0.06199,
      "grad_norm": 0.9637957082299587,
      "learning_rate": 0.003,
      "loss": 4.1252,
      "step": 6199
    },
    {
      "epoch": 0.062,
      "grad_norm": 1.436081636862012,
      "learning_rate": 0.003,
      "loss": 4.1411,
      "step": 6200
    },
    {
      "epoch": 0.06201,
      "grad_norm": 0.9348049703706455,
      "learning_rate": 0.003,
      "loss": 4.1025,
      "step": 6201
    },
    {
      "epoch": 0.06202,
      "grad_norm": 0.9670197292189014,
      "learning_rate": 0.003,
      "loss": 4.1473,
      "step": 6202
    },
    {
      "epoch": 0.06203,
      "grad_norm": 1.0733101607809545,
      "learning_rate": 0.003,
      "loss": 4.1352,
      "step": 6203
    },
    {
      "epoch": 0.06204,
      "grad_norm": 1.1583376498097138,
      "learning_rate": 0.003,
      "loss": 4.1333,
      "step": 6204
    },
    {
      "epoch": 0.06205,
      "grad_norm": 1.0094346779280738,
      "learning_rate": 0.003,
      "loss": 4.1342,
      "step": 6205
    },
    {
      "epoch": 0.06206,
      "grad_norm": 1.1703726685169265,
      "learning_rate": 0.003,
      "loss": 4.1316,
      "step": 6206
    },
    {
      "epoch": 0.06207,
      "grad_norm": 0.9450958344284114,
      "learning_rate": 0.003,
      "loss": 4.0873,
      "step": 6207
    },
    {
      "epoch": 0.06208,
      "grad_norm": 0.9939553682249204,
      "learning_rate": 0.003,
      "loss": 4.1166,
      "step": 6208
    },
    {
      "epoch": 0.06209,
      "grad_norm": 1.048829583157075,
      "learning_rate": 0.003,
      "loss": 4.1038,
      "step": 6209
    },
    {
      "epoch": 0.0621,
      "grad_norm": 1.1283576786370575,
      "learning_rate": 0.003,
      "loss": 4.1647,
      "step": 6210
    },
    {
      "epoch": 0.06211,
      "grad_norm": 1.0202411361181278,
      "learning_rate": 0.003,
      "loss": 4.1265,
      "step": 6211
    },
    {
      "epoch": 0.06212,
      "grad_norm": 0.8463803172469656,
      "learning_rate": 0.003,
      "loss": 4.127,
      "step": 6212
    },
    {
      "epoch": 0.06213,
      "grad_norm": 0.8831158988837825,
      "learning_rate": 0.003,
      "loss": 4.107,
      "step": 6213
    },
    {
      "epoch": 0.06214,
      "grad_norm": 0.9471127988954445,
      "learning_rate": 0.003,
      "loss": 4.1094,
      "step": 6214
    },
    {
      "epoch": 0.06215,
      "grad_norm": 1.1099784730474564,
      "learning_rate": 0.003,
      "loss": 4.1347,
      "step": 6215
    },
    {
      "epoch": 0.06216,
      "grad_norm": 0.8618468021683433,
      "learning_rate": 0.003,
      "loss": 4.1546,
      "step": 6216
    },
    {
      "epoch": 0.06217,
      "grad_norm": 1.0078275003589592,
      "learning_rate": 0.003,
      "loss": 4.1181,
      "step": 6217
    },
    {
      "epoch": 0.06218,
      "grad_norm": 1.2368143042206021,
      "learning_rate": 0.003,
      "loss": 4.1132,
      "step": 6218
    },
    {
      "epoch": 0.06219,
      "grad_norm": 1.054833838062907,
      "learning_rate": 0.003,
      "loss": 4.1371,
      "step": 6219
    },
    {
      "epoch": 0.0622,
      "grad_norm": 1.3062224581370208,
      "learning_rate": 0.003,
      "loss": 4.137,
      "step": 6220
    },
    {
      "epoch": 0.06221,
      "grad_norm": 0.8604885677509169,
      "learning_rate": 0.003,
      "loss": 4.1301,
      "step": 6221
    },
    {
      "epoch": 0.06222,
      "grad_norm": 0.8641494139028316,
      "learning_rate": 0.003,
      "loss": 4.1158,
      "step": 6222
    },
    {
      "epoch": 0.06223,
      "grad_norm": 0.9055413740295657,
      "learning_rate": 0.003,
      "loss": 4.1262,
      "step": 6223
    },
    {
      "epoch": 0.06224,
      "grad_norm": 0.939566222161941,
      "learning_rate": 0.003,
      "loss": 4.0953,
      "step": 6224
    },
    {
      "epoch": 0.06225,
      "grad_norm": 0.9877692010672254,
      "learning_rate": 0.003,
      "loss": 4.1228,
      "step": 6225
    },
    {
      "epoch": 0.06226,
      "grad_norm": 1.2373704185349645,
      "learning_rate": 0.003,
      "loss": 4.1047,
      "step": 6226
    },
    {
      "epoch": 0.06227,
      "grad_norm": 1.0421769966469974,
      "learning_rate": 0.003,
      "loss": 4.1156,
      "step": 6227
    },
    {
      "epoch": 0.06228,
      "grad_norm": 1.0069660017991295,
      "learning_rate": 0.003,
      "loss": 4.1204,
      "step": 6228
    },
    {
      "epoch": 0.06229,
      "grad_norm": 0.9902407890356086,
      "learning_rate": 0.003,
      "loss": 4.1025,
      "step": 6229
    },
    {
      "epoch": 0.0623,
      "grad_norm": 1.000784859461883,
      "learning_rate": 0.003,
      "loss": 4.1158,
      "step": 6230
    },
    {
      "epoch": 0.06231,
      "grad_norm": 0.9879882528823176,
      "learning_rate": 0.003,
      "loss": 4.1046,
      "step": 6231
    },
    {
      "epoch": 0.06232,
      "grad_norm": 0.9161813009855883,
      "learning_rate": 0.003,
      "loss": 4.1025,
      "step": 6232
    },
    {
      "epoch": 0.06233,
      "grad_norm": 0.9421675613078943,
      "learning_rate": 0.003,
      "loss": 4.0985,
      "step": 6233
    },
    {
      "epoch": 0.06234,
      "grad_norm": 0.9233627757140611,
      "learning_rate": 0.003,
      "loss": 4.105,
      "step": 6234
    },
    {
      "epoch": 0.06235,
      "grad_norm": 0.8773342302782378,
      "learning_rate": 0.003,
      "loss": 4.107,
      "step": 6235
    },
    {
      "epoch": 0.06236,
      "grad_norm": 0.8554767024277897,
      "learning_rate": 0.003,
      "loss": 4.1187,
      "step": 6236
    },
    {
      "epoch": 0.06237,
      "grad_norm": 0.8891401545205588,
      "learning_rate": 0.003,
      "loss": 4.1092,
      "step": 6237
    },
    {
      "epoch": 0.06238,
      "grad_norm": 0.9901683523219613,
      "learning_rate": 0.003,
      "loss": 4.1091,
      "step": 6238
    },
    {
      "epoch": 0.06239,
      "grad_norm": 1.2816193140265006,
      "learning_rate": 0.003,
      "loss": 4.099,
      "step": 6239
    },
    {
      "epoch": 0.0624,
      "grad_norm": 0.9761347780125502,
      "learning_rate": 0.003,
      "loss": 4.1073,
      "step": 6240
    },
    {
      "epoch": 0.06241,
      "grad_norm": 1.0466186560774475,
      "learning_rate": 0.003,
      "loss": 4.0848,
      "step": 6241
    },
    {
      "epoch": 0.06242,
      "grad_norm": 1.1002056251421792,
      "learning_rate": 0.003,
      "loss": 4.1194,
      "step": 6242
    },
    {
      "epoch": 0.06243,
      "grad_norm": 1.0586273465264278,
      "learning_rate": 0.003,
      "loss": 4.1044,
      "step": 6243
    },
    {
      "epoch": 0.06244,
      "grad_norm": 1.1607710935816504,
      "learning_rate": 0.003,
      "loss": 4.1426,
      "step": 6244
    },
    {
      "epoch": 0.06245,
      "grad_norm": 0.9752269488488016,
      "learning_rate": 0.003,
      "loss": 4.1148,
      "step": 6245
    },
    {
      "epoch": 0.06246,
      "grad_norm": 1.089994727247547,
      "learning_rate": 0.003,
      "loss": 4.1344,
      "step": 6246
    },
    {
      "epoch": 0.06247,
      "grad_norm": 1.0813807058658953,
      "learning_rate": 0.003,
      "loss": 4.1165,
      "step": 6247
    },
    {
      "epoch": 0.06248,
      "grad_norm": 1.0666231347706039,
      "learning_rate": 0.003,
      "loss": 4.1176,
      "step": 6248
    },
    {
      "epoch": 0.06249,
      "grad_norm": 1.0262020212481742,
      "learning_rate": 0.003,
      "loss": 4.1303,
      "step": 6249
    },
    {
      "epoch": 0.0625,
      "grad_norm": 1.2632454332795369,
      "learning_rate": 0.003,
      "loss": 4.1194,
      "step": 6250
    },
    {
      "epoch": 0.06251,
      "grad_norm": 0.9575866697235751,
      "learning_rate": 0.003,
      "loss": 4.1042,
      "step": 6251
    },
    {
      "epoch": 0.06252,
      "grad_norm": 1.0727447713803615,
      "learning_rate": 0.003,
      "loss": 4.1287,
      "step": 6252
    },
    {
      "epoch": 0.06253,
      "grad_norm": 1.275488151651881,
      "learning_rate": 0.003,
      "loss": 4.1261,
      "step": 6253
    },
    {
      "epoch": 0.06254,
      "grad_norm": 0.8302208080842566,
      "learning_rate": 0.003,
      "loss": 4.11,
      "step": 6254
    },
    {
      "epoch": 0.06255,
      "grad_norm": 0.9130697914120645,
      "learning_rate": 0.003,
      "loss": 4.1201,
      "step": 6255
    },
    {
      "epoch": 0.06256,
      "grad_norm": 0.9667951839279011,
      "learning_rate": 0.003,
      "loss": 4.0977,
      "step": 6256
    },
    {
      "epoch": 0.06257,
      "grad_norm": 0.8489314632779762,
      "learning_rate": 0.003,
      "loss": 4.0729,
      "step": 6257
    },
    {
      "epoch": 0.06258,
      "grad_norm": 0.8445325664151822,
      "learning_rate": 0.003,
      "loss": 4.126,
      "step": 6258
    },
    {
      "epoch": 0.06259,
      "grad_norm": 0.7672593438540465,
      "learning_rate": 0.003,
      "loss": 4.1391,
      "step": 6259
    },
    {
      "epoch": 0.0626,
      "grad_norm": 0.8964348171771911,
      "learning_rate": 0.003,
      "loss": 4.1107,
      "step": 6260
    },
    {
      "epoch": 0.06261,
      "grad_norm": 0.9947853097219037,
      "learning_rate": 0.003,
      "loss": 4.098,
      "step": 6261
    },
    {
      "epoch": 0.06262,
      "grad_norm": 1.2241118778779938,
      "learning_rate": 0.003,
      "loss": 4.1107,
      "step": 6262
    },
    {
      "epoch": 0.06263,
      "grad_norm": 1.1642987312142146,
      "learning_rate": 0.003,
      "loss": 4.1377,
      "step": 6263
    },
    {
      "epoch": 0.06264,
      "grad_norm": 0.8953851392238773,
      "learning_rate": 0.003,
      "loss": 4.1085,
      "step": 6264
    },
    {
      "epoch": 0.06265,
      "grad_norm": 0.8318963279886489,
      "learning_rate": 0.003,
      "loss": 4.1273,
      "step": 6265
    },
    {
      "epoch": 0.06266,
      "grad_norm": 0.8609508836654348,
      "learning_rate": 0.003,
      "loss": 4.1118,
      "step": 6266
    },
    {
      "epoch": 0.06267,
      "grad_norm": 0.8740732914198333,
      "learning_rate": 0.003,
      "loss": 4.1163,
      "step": 6267
    },
    {
      "epoch": 0.06268,
      "grad_norm": 1.05944492334423,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 6268
    },
    {
      "epoch": 0.06269,
      "grad_norm": 1.1546078108699898,
      "learning_rate": 0.003,
      "loss": 4.0982,
      "step": 6269
    },
    {
      "epoch": 0.0627,
      "grad_norm": 1.1402430212111558,
      "learning_rate": 0.003,
      "loss": 4.1105,
      "step": 6270
    },
    {
      "epoch": 0.06271,
      "grad_norm": 0.8999820730504591,
      "learning_rate": 0.003,
      "loss": 4.1202,
      "step": 6271
    },
    {
      "epoch": 0.06272,
      "grad_norm": 0.8074571914853922,
      "learning_rate": 0.003,
      "loss": 4.1315,
      "step": 6272
    },
    {
      "epoch": 0.06273,
      "grad_norm": 0.8233642450661368,
      "learning_rate": 0.003,
      "loss": 4.127,
      "step": 6273
    },
    {
      "epoch": 0.06274,
      "grad_norm": 0.9047777840671057,
      "learning_rate": 0.003,
      "loss": 4.109,
      "step": 6274
    },
    {
      "epoch": 0.06275,
      "grad_norm": 0.9781934686902589,
      "learning_rate": 0.003,
      "loss": 4.1019,
      "step": 6275
    },
    {
      "epoch": 0.06276,
      "grad_norm": 1.04325282479346,
      "learning_rate": 0.003,
      "loss": 4.1199,
      "step": 6276
    },
    {
      "epoch": 0.06277,
      "grad_norm": 1.0621790353885807,
      "learning_rate": 0.003,
      "loss": 4.1265,
      "step": 6277
    },
    {
      "epoch": 0.06278,
      "grad_norm": 1.1452838103158116,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 6278
    },
    {
      "epoch": 0.06279,
      "grad_norm": 0.929599885878623,
      "learning_rate": 0.003,
      "loss": 4.1327,
      "step": 6279
    },
    {
      "epoch": 0.0628,
      "grad_norm": 1.1623398174815285,
      "learning_rate": 0.003,
      "loss": 4.109,
      "step": 6280
    },
    {
      "epoch": 0.06281,
      "grad_norm": 1.151758981216677,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 6281
    },
    {
      "epoch": 0.06282,
      "grad_norm": 1.007355594619288,
      "learning_rate": 0.003,
      "loss": 4.1092,
      "step": 6282
    },
    {
      "epoch": 0.06283,
      "grad_norm": 1.220826744113072,
      "learning_rate": 0.003,
      "loss": 4.0902,
      "step": 6283
    },
    {
      "epoch": 0.06284,
      "grad_norm": 0.8704770237141937,
      "learning_rate": 0.003,
      "loss": 4.1146,
      "step": 6284
    },
    {
      "epoch": 0.06285,
      "grad_norm": 0.8383126810956327,
      "learning_rate": 0.003,
      "loss": 4.1108,
      "step": 6285
    },
    {
      "epoch": 0.06286,
      "grad_norm": 0.8800742184339927,
      "learning_rate": 0.003,
      "loss": 4.1012,
      "step": 6286
    },
    {
      "epoch": 0.06287,
      "grad_norm": 1.0812936724403794,
      "learning_rate": 0.003,
      "loss": 4.1004,
      "step": 6287
    },
    {
      "epoch": 0.06288,
      "grad_norm": 1.2593934067897568,
      "learning_rate": 0.003,
      "loss": 4.1381,
      "step": 6288
    },
    {
      "epoch": 0.06289,
      "grad_norm": 1.1429060251374439,
      "learning_rate": 0.003,
      "loss": 4.088,
      "step": 6289
    },
    {
      "epoch": 0.0629,
      "grad_norm": 1.246481182234239,
      "learning_rate": 0.003,
      "loss": 4.1159,
      "step": 6290
    },
    {
      "epoch": 0.06291,
      "grad_norm": 0.960676109509977,
      "learning_rate": 0.003,
      "loss": 4.0841,
      "step": 6291
    },
    {
      "epoch": 0.06292,
      "grad_norm": 0.9723196671162704,
      "learning_rate": 0.003,
      "loss": 4.1103,
      "step": 6292
    },
    {
      "epoch": 0.06293,
      "grad_norm": 1.125411684140196,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 6293
    },
    {
      "epoch": 0.06294,
      "grad_norm": 1.0139672152671215,
      "learning_rate": 0.003,
      "loss": 4.1222,
      "step": 6294
    },
    {
      "epoch": 0.06295,
      "grad_norm": 1.2175400090011317,
      "learning_rate": 0.003,
      "loss": 4.0712,
      "step": 6295
    },
    {
      "epoch": 0.06296,
      "grad_norm": 0.8801642637892075,
      "learning_rate": 0.003,
      "loss": 4.141,
      "step": 6296
    },
    {
      "epoch": 0.06297,
      "grad_norm": 0.8995632624538455,
      "learning_rate": 0.003,
      "loss": 4.1108,
      "step": 6297
    },
    {
      "epoch": 0.06298,
      "grad_norm": 1.0073654159072412,
      "learning_rate": 0.003,
      "loss": 4.1109,
      "step": 6298
    },
    {
      "epoch": 0.06299,
      "grad_norm": 1.1330731816130224,
      "learning_rate": 0.003,
      "loss": 4.1129,
      "step": 6299
    },
    {
      "epoch": 0.063,
      "grad_norm": 1.0282659113929185,
      "learning_rate": 0.003,
      "loss": 4.1614,
      "step": 6300
    },
    {
      "epoch": 0.06301,
      "grad_norm": 1.0726720577475604,
      "learning_rate": 0.003,
      "loss": 4.1376,
      "step": 6301
    },
    {
      "epoch": 0.06302,
      "grad_norm": 1.0496260954229397,
      "learning_rate": 0.003,
      "loss": 4.1252,
      "step": 6302
    },
    {
      "epoch": 0.06303,
      "grad_norm": 1.1253852761118548,
      "learning_rate": 0.003,
      "loss": 4.1267,
      "step": 6303
    },
    {
      "epoch": 0.06304,
      "grad_norm": 0.9024308896902583,
      "learning_rate": 0.003,
      "loss": 4.1346,
      "step": 6304
    },
    {
      "epoch": 0.06305,
      "grad_norm": 0.8914762325481002,
      "learning_rate": 0.003,
      "loss": 4.1426,
      "step": 6305
    },
    {
      "epoch": 0.06306,
      "grad_norm": 1.035268496629464,
      "learning_rate": 0.003,
      "loss": 4.1064,
      "step": 6306
    },
    {
      "epoch": 0.06307,
      "grad_norm": 1.1527964693828754,
      "learning_rate": 0.003,
      "loss": 4.1053,
      "step": 6307
    },
    {
      "epoch": 0.06308,
      "grad_norm": 1.0156899019344043,
      "learning_rate": 0.003,
      "loss": 4.1221,
      "step": 6308
    },
    {
      "epoch": 0.06309,
      "grad_norm": 0.9817564469713144,
      "learning_rate": 0.003,
      "loss": 4.1091,
      "step": 6309
    },
    {
      "epoch": 0.0631,
      "grad_norm": 1.125639940555161,
      "learning_rate": 0.003,
      "loss": 4.1125,
      "step": 6310
    },
    {
      "epoch": 0.06311,
      "grad_norm": 1.146532285357419,
      "learning_rate": 0.003,
      "loss": 4.1069,
      "step": 6311
    },
    {
      "epoch": 0.06312,
      "grad_norm": 0.9764239611962195,
      "learning_rate": 0.003,
      "loss": 4.0987,
      "step": 6312
    },
    {
      "epoch": 0.06313,
      "grad_norm": 0.8815145041719636,
      "learning_rate": 0.003,
      "loss": 4.121,
      "step": 6313
    },
    {
      "epoch": 0.06314,
      "grad_norm": 0.840467663491767,
      "learning_rate": 0.003,
      "loss": 4.1249,
      "step": 6314
    },
    {
      "epoch": 0.06315,
      "grad_norm": 0.7533539654208644,
      "learning_rate": 0.003,
      "loss": 4.0989,
      "step": 6315
    },
    {
      "epoch": 0.06316,
      "grad_norm": 0.8052544193540351,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 6316
    },
    {
      "epoch": 0.06317,
      "grad_norm": 0.9863065368724497,
      "learning_rate": 0.003,
      "loss": 4.1032,
      "step": 6317
    },
    {
      "epoch": 0.06318,
      "grad_norm": 1.217882389975133,
      "learning_rate": 0.003,
      "loss": 4.1552,
      "step": 6318
    },
    {
      "epoch": 0.06319,
      "grad_norm": 0.70847036618039,
      "learning_rate": 0.003,
      "loss": 4.1434,
      "step": 6319
    },
    {
      "epoch": 0.0632,
      "grad_norm": 0.8377363984109976,
      "learning_rate": 0.003,
      "loss": 4.1087,
      "step": 6320
    },
    {
      "epoch": 0.06321,
      "grad_norm": 1.0324765911073792,
      "learning_rate": 0.003,
      "loss": 4.1251,
      "step": 6321
    },
    {
      "epoch": 0.06322,
      "grad_norm": 1.3181893770856248,
      "learning_rate": 0.003,
      "loss": 4.1172,
      "step": 6322
    },
    {
      "epoch": 0.06323,
      "grad_norm": 1.1690328295155383,
      "learning_rate": 0.003,
      "loss": 4.106,
      "step": 6323
    },
    {
      "epoch": 0.06324,
      "grad_norm": 1.0679893724078005,
      "learning_rate": 0.003,
      "loss": 4.1254,
      "step": 6324
    },
    {
      "epoch": 0.06325,
      "grad_norm": 0.9783736109608521,
      "learning_rate": 0.003,
      "loss": 4.1076,
      "step": 6325
    },
    {
      "epoch": 0.06326,
      "grad_norm": 1.003334131616264,
      "learning_rate": 0.003,
      "loss": 4.1216,
      "step": 6326
    },
    {
      "epoch": 0.06327,
      "grad_norm": 1.026805442964879,
      "learning_rate": 0.003,
      "loss": 4.0914,
      "step": 6327
    },
    {
      "epoch": 0.06328,
      "grad_norm": 1.076746193210205,
      "learning_rate": 0.003,
      "loss": 4.1477,
      "step": 6328
    },
    {
      "epoch": 0.06329,
      "grad_norm": 0.9881716508673234,
      "learning_rate": 0.003,
      "loss": 4.1246,
      "step": 6329
    },
    {
      "epoch": 0.0633,
      "grad_norm": 1.008130034970649,
      "learning_rate": 0.003,
      "loss": 4.1002,
      "step": 6330
    },
    {
      "epoch": 0.06331,
      "grad_norm": 1.1763834546994973,
      "learning_rate": 0.003,
      "loss": 4.1062,
      "step": 6331
    },
    {
      "epoch": 0.06332,
      "grad_norm": 1.0145017626609247,
      "learning_rate": 0.003,
      "loss": 4.1021,
      "step": 6332
    },
    {
      "epoch": 0.06333,
      "grad_norm": 1.1827671265849429,
      "learning_rate": 0.003,
      "loss": 4.1115,
      "step": 6333
    },
    {
      "epoch": 0.06334,
      "grad_norm": 1.1730909666687077,
      "learning_rate": 0.003,
      "loss": 4.1157,
      "step": 6334
    },
    {
      "epoch": 0.06335,
      "grad_norm": 1.0011266798315859,
      "learning_rate": 0.003,
      "loss": 4.0787,
      "step": 6335
    },
    {
      "epoch": 0.06336,
      "grad_norm": 1.1675286156010463,
      "learning_rate": 0.003,
      "loss": 4.146,
      "step": 6336
    },
    {
      "epoch": 0.06337,
      "grad_norm": 0.9575385222661729,
      "learning_rate": 0.003,
      "loss": 4.1428,
      "step": 6337
    },
    {
      "epoch": 0.06338,
      "grad_norm": 1.0319657712516945,
      "learning_rate": 0.003,
      "loss": 4.1292,
      "step": 6338
    },
    {
      "epoch": 0.06339,
      "grad_norm": 1.086306029136807,
      "learning_rate": 0.003,
      "loss": 4.1022,
      "step": 6339
    },
    {
      "epoch": 0.0634,
      "grad_norm": 1.1043242914825164,
      "learning_rate": 0.003,
      "loss": 4.0987,
      "step": 6340
    },
    {
      "epoch": 0.06341,
      "grad_norm": 0.8543521892094537,
      "learning_rate": 0.003,
      "loss": 4.091,
      "step": 6341
    },
    {
      "epoch": 0.06342,
      "grad_norm": 1.1724944784180746,
      "learning_rate": 0.003,
      "loss": 4.1236,
      "step": 6342
    },
    {
      "epoch": 0.06343,
      "grad_norm": 1.0104393227402537,
      "learning_rate": 0.003,
      "loss": 4.1282,
      "step": 6343
    },
    {
      "epoch": 0.06344,
      "grad_norm": 0.9854141445654092,
      "learning_rate": 0.003,
      "loss": 4.0859,
      "step": 6344
    },
    {
      "epoch": 0.06345,
      "grad_norm": 1.0901443364751084,
      "learning_rate": 0.003,
      "loss": 4.1038,
      "step": 6345
    },
    {
      "epoch": 0.06346,
      "grad_norm": 0.9719436488074422,
      "learning_rate": 0.003,
      "loss": 4.1194,
      "step": 6346
    },
    {
      "epoch": 0.06347,
      "grad_norm": 1.1087617785535473,
      "learning_rate": 0.003,
      "loss": 4.1094,
      "step": 6347
    },
    {
      "epoch": 0.06348,
      "grad_norm": 1.062332265895712,
      "learning_rate": 0.003,
      "loss": 4.1053,
      "step": 6348
    },
    {
      "epoch": 0.06349,
      "grad_norm": 1.0763754416322948,
      "learning_rate": 0.003,
      "loss": 4.1067,
      "step": 6349
    },
    {
      "epoch": 0.0635,
      "grad_norm": 0.8840755219842192,
      "learning_rate": 0.003,
      "loss": 4.0967,
      "step": 6350
    },
    {
      "epoch": 0.06351,
      "grad_norm": 0.8206117947783813,
      "learning_rate": 0.003,
      "loss": 4.119,
      "step": 6351
    },
    {
      "epoch": 0.06352,
      "grad_norm": 0.9715905967435101,
      "learning_rate": 0.003,
      "loss": 4.101,
      "step": 6352
    },
    {
      "epoch": 0.06353,
      "grad_norm": 1.3403472905771021,
      "learning_rate": 0.003,
      "loss": 4.1237,
      "step": 6353
    },
    {
      "epoch": 0.06354,
      "grad_norm": 0.7230549013634665,
      "learning_rate": 0.003,
      "loss": 4.1079,
      "step": 6354
    },
    {
      "epoch": 0.06355,
      "grad_norm": 0.6387342513840211,
      "learning_rate": 0.003,
      "loss": 4.1129,
      "step": 6355
    },
    {
      "epoch": 0.06356,
      "grad_norm": 0.8116813215118523,
      "learning_rate": 0.003,
      "loss": 4.1063,
      "step": 6356
    },
    {
      "epoch": 0.06357,
      "grad_norm": 1.0732408549550134,
      "learning_rate": 0.003,
      "loss": 4.1304,
      "step": 6357
    },
    {
      "epoch": 0.06358,
      "grad_norm": 1.4061400227332301,
      "learning_rate": 0.003,
      "loss": 4.1054,
      "step": 6358
    },
    {
      "epoch": 0.06359,
      "grad_norm": 0.816262506494798,
      "learning_rate": 0.003,
      "loss": 4.0985,
      "step": 6359
    },
    {
      "epoch": 0.0636,
      "grad_norm": 0.7856925452250502,
      "learning_rate": 0.003,
      "loss": 4.1251,
      "step": 6360
    },
    {
      "epoch": 0.06361,
      "grad_norm": 0.8172194066665021,
      "learning_rate": 0.003,
      "loss": 4.1173,
      "step": 6361
    },
    {
      "epoch": 0.06362,
      "grad_norm": 0.8937786474083863,
      "learning_rate": 0.003,
      "loss": 4.1256,
      "step": 6362
    },
    {
      "epoch": 0.06363,
      "grad_norm": 1.213011420537028,
      "learning_rate": 0.003,
      "loss": 4.1141,
      "step": 6363
    },
    {
      "epoch": 0.06364,
      "grad_norm": 0.8563828460718517,
      "learning_rate": 0.003,
      "loss": 4.106,
      "step": 6364
    },
    {
      "epoch": 0.06365,
      "grad_norm": 1.0114885163377767,
      "learning_rate": 0.003,
      "loss": 4.0969,
      "step": 6365
    },
    {
      "epoch": 0.06366,
      "grad_norm": 1.0801002256043084,
      "learning_rate": 0.003,
      "loss": 4.1261,
      "step": 6366
    },
    {
      "epoch": 0.06367,
      "grad_norm": 0.9786682306601094,
      "learning_rate": 0.003,
      "loss": 4.1056,
      "step": 6367
    },
    {
      "epoch": 0.06368,
      "grad_norm": 1.0821388307035957,
      "learning_rate": 0.003,
      "loss": 4.1443,
      "step": 6368
    },
    {
      "epoch": 0.06369,
      "grad_norm": 1.2004164285754237,
      "learning_rate": 0.003,
      "loss": 4.1524,
      "step": 6369
    },
    {
      "epoch": 0.0637,
      "grad_norm": 1.0351207503381683,
      "learning_rate": 0.003,
      "loss": 4.1074,
      "step": 6370
    },
    {
      "epoch": 0.06371,
      "grad_norm": 1.061549907692225,
      "learning_rate": 0.003,
      "loss": 4.1287,
      "step": 6371
    },
    {
      "epoch": 0.06372,
      "grad_norm": 0.9665848388085393,
      "learning_rate": 0.003,
      "loss": 4.1267,
      "step": 6372
    },
    {
      "epoch": 0.06373,
      "grad_norm": 1.0606306406187926,
      "learning_rate": 0.003,
      "loss": 4.1205,
      "step": 6373
    },
    {
      "epoch": 0.06374,
      "grad_norm": 0.9478688183673424,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 6374
    },
    {
      "epoch": 0.06375,
      "grad_norm": 0.9561463271072295,
      "learning_rate": 0.003,
      "loss": 4.094,
      "step": 6375
    },
    {
      "epoch": 0.06376,
      "grad_norm": 1.0954200184165113,
      "learning_rate": 0.003,
      "loss": 4.1256,
      "step": 6376
    },
    {
      "epoch": 0.06377,
      "grad_norm": 1.0149546225474946,
      "learning_rate": 0.003,
      "loss": 4.0917,
      "step": 6377
    },
    {
      "epoch": 0.06378,
      "grad_norm": 1.2530939110895587,
      "learning_rate": 0.003,
      "loss": 4.1334,
      "step": 6378
    },
    {
      "epoch": 0.06379,
      "grad_norm": 0.9536829019923387,
      "learning_rate": 0.003,
      "loss": 4.1279,
      "step": 6379
    },
    {
      "epoch": 0.0638,
      "grad_norm": 1.124365245664102,
      "learning_rate": 0.003,
      "loss": 4.1074,
      "step": 6380
    },
    {
      "epoch": 0.06381,
      "grad_norm": 1.067522258480721,
      "learning_rate": 0.003,
      "loss": 4.0911,
      "step": 6381
    },
    {
      "epoch": 0.06382,
      "grad_norm": 1.165510546851359,
      "learning_rate": 0.003,
      "loss": 4.1016,
      "step": 6382
    },
    {
      "epoch": 0.06383,
      "grad_norm": 0.9796661652974314,
      "learning_rate": 0.003,
      "loss": 4.1279,
      "step": 6383
    },
    {
      "epoch": 0.06384,
      "grad_norm": 0.9583436524182503,
      "learning_rate": 0.003,
      "loss": 4.1259,
      "step": 6384
    },
    {
      "epoch": 0.06385,
      "grad_norm": 0.9606808146113693,
      "learning_rate": 0.003,
      "loss": 4.0883,
      "step": 6385
    },
    {
      "epoch": 0.06386,
      "grad_norm": 1.0631319488352189,
      "learning_rate": 0.003,
      "loss": 4.0999,
      "step": 6386
    },
    {
      "epoch": 0.06387,
      "grad_norm": 0.9436696570931993,
      "learning_rate": 0.003,
      "loss": 4.1103,
      "step": 6387
    },
    {
      "epoch": 0.06388,
      "grad_norm": 1.0318755835894513,
      "learning_rate": 0.003,
      "loss": 4.1043,
      "step": 6388
    },
    {
      "epoch": 0.06389,
      "grad_norm": 1.0643029277751763,
      "learning_rate": 0.003,
      "loss": 4.0909,
      "step": 6389
    },
    {
      "epoch": 0.0639,
      "grad_norm": 0.9274983406171419,
      "learning_rate": 0.003,
      "loss": 4.1085,
      "step": 6390
    },
    {
      "epoch": 0.06391,
      "grad_norm": 1.1657830691809308,
      "learning_rate": 0.003,
      "loss": 4.1344,
      "step": 6391
    },
    {
      "epoch": 0.06392,
      "grad_norm": 1.0164575608007218,
      "learning_rate": 0.003,
      "loss": 4.1024,
      "step": 6392
    },
    {
      "epoch": 0.06393,
      "grad_norm": 1.0672288928145917,
      "learning_rate": 0.003,
      "loss": 4.119,
      "step": 6393
    },
    {
      "epoch": 0.06394,
      "grad_norm": 1.2905438243321443,
      "learning_rate": 0.003,
      "loss": 4.1153,
      "step": 6394
    },
    {
      "epoch": 0.06395,
      "grad_norm": 1.10123470204369,
      "learning_rate": 0.003,
      "loss": 4.1387,
      "step": 6395
    },
    {
      "epoch": 0.06396,
      "grad_norm": 1.0124081789493147,
      "learning_rate": 0.003,
      "loss": 4.1487,
      "step": 6396
    },
    {
      "epoch": 0.06397,
      "grad_norm": 0.9031540181239835,
      "learning_rate": 0.003,
      "loss": 4.1005,
      "step": 6397
    },
    {
      "epoch": 0.06398,
      "grad_norm": 0.8920146505501644,
      "learning_rate": 0.003,
      "loss": 4.092,
      "step": 6398
    },
    {
      "epoch": 0.06399,
      "grad_norm": 0.8773434678917014,
      "learning_rate": 0.003,
      "loss": 4.1232,
      "step": 6399
    },
    {
      "epoch": 0.064,
      "grad_norm": 0.9294523161088548,
      "learning_rate": 0.003,
      "loss": 4.1114,
      "step": 6400
    },
    {
      "epoch": 0.06401,
      "grad_norm": 0.8919392182837542,
      "learning_rate": 0.003,
      "loss": 4.0974,
      "step": 6401
    },
    {
      "epoch": 0.06402,
      "grad_norm": 0.8417805555156116,
      "learning_rate": 0.003,
      "loss": 4.0984,
      "step": 6402
    },
    {
      "epoch": 0.06403,
      "grad_norm": 0.9725502407864219,
      "learning_rate": 0.003,
      "loss": 4.1012,
      "step": 6403
    },
    {
      "epoch": 0.06404,
      "grad_norm": 1.0332905744103569,
      "learning_rate": 0.003,
      "loss": 4.1543,
      "step": 6404
    },
    {
      "epoch": 0.06405,
      "grad_norm": 1.2514671984682015,
      "learning_rate": 0.003,
      "loss": 4.1156,
      "step": 6405
    },
    {
      "epoch": 0.06406,
      "grad_norm": 1.1553499196304904,
      "learning_rate": 0.003,
      "loss": 4.1044,
      "step": 6406
    },
    {
      "epoch": 0.06407,
      "grad_norm": 0.9874546200949287,
      "learning_rate": 0.003,
      "loss": 4.1032,
      "step": 6407
    },
    {
      "epoch": 0.06408,
      "grad_norm": 1.0027041565820474,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 6408
    },
    {
      "epoch": 0.06409,
      "grad_norm": 1.0037310565561153,
      "learning_rate": 0.003,
      "loss": 4.1262,
      "step": 6409
    },
    {
      "epoch": 0.0641,
      "grad_norm": 1.0349990838623648,
      "learning_rate": 0.003,
      "loss": 4.116,
      "step": 6410
    },
    {
      "epoch": 0.06411,
      "grad_norm": 1.2505472332691576,
      "learning_rate": 0.003,
      "loss": 4.1131,
      "step": 6411
    },
    {
      "epoch": 0.06412,
      "grad_norm": 1.107423585218049,
      "learning_rate": 0.003,
      "loss": 4.1046,
      "step": 6412
    },
    {
      "epoch": 0.06413,
      "grad_norm": 1.0437351552377427,
      "learning_rate": 0.003,
      "loss": 4.1383,
      "step": 6413
    },
    {
      "epoch": 0.06414,
      "grad_norm": 1.1164278775889673,
      "learning_rate": 0.003,
      "loss": 4.0994,
      "step": 6414
    },
    {
      "epoch": 0.06415,
      "grad_norm": 0.9745118316725514,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 6415
    },
    {
      "epoch": 0.06416,
      "grad_norm": 0.9563092447642871,
      "learning_rate": 0.003,
      "loss": 4.1326,
      "step": 6416
    },
    {
      "epoch": 0.06417,
      "grad_norm": 0.9473328664106608,
      "learning_rate": 0.003,
      "loss": 4.1021,
      "step": 6417
    },
    {
      "epoch": 0.06418,
      "grad_norm": 1.006366955338522,
      "learning_rate": 0.003,
      "loss": 4.0935,
      "step": 6418
    },
    {
      "epoch": 0.06419,
      "grad_norm": 1.3186351537429872,
      "learning_rate": 0.003,
      "loss": 4.1086,
      "step": 6419
    },
    {
      "epoch": 0.0642,
      "grad_norm": 0.874887713674597,
      "learning_rate": 0.003,
      "loss": 4.1102,
      "step": 6420
    },
    {
      "epoch": 0.06421,
      "grad_norm": 0.793681194621428,
      "learning_rate": 0.003,
      "loss": 4.1217,
      "step": 6421
    },
    {
      "epoch": 0.06422,
      "grad_norm": 0.8959604597659665,
      "learning_rate": 0.003,
      "loss": 4.1115,
      "step": 6422
    },
    {
      "epoch": 0.06423,
      "grad_norm": 0.9509492440139267,
      "learning_rate": 0.003,
      "loss": 4.1261,
      "step": 6423
    },
    {
      "epoch": 0.06424,
      "grad_norm": 1.1536140647765203,
      "learning_rate": 0.003,
      "loss": 4.0898,
      "step": 6424
    },
    {
      "epoch": 0.06425,
      "grad_norm": 1.0770680367269025,
      "learning_rate": 0.003,
      "loss": 4.1109,
      "step": 6425
    },
    {
      "epoch": 0.06426,
      "grad_norm": 1.2598814102700444,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 6426
    },
    {
      "epoch": 0.06427,
      "grad_norm": 0.8655844051388307,
      "learning_rate": 0.003,
      "loss": 4.0913,
      "step": 6427
    },
    {
      "epoch": 0.06428,
      "grad_norm": 0.8184055602347383,
      "learning_rate": 0.003,
      "loss": 4.1119,
      "step": 6428
    },
    {
      "epoch": 0.06429,
      "grad_norm": 0.8554320627088341,
      "learning_rate": 0.003,
      "loss": 4.1128,
      "step": 6429
    },
    {
      "epoch": 0.0643,
      "grad_norm": 0.8949349865885768,
      "learning_rate": 0.003,
      "loss": 4.0857,
      "step": 6430
    },
    {
      "epoch": 0.06431,
      "grad_norm": 0.934335416030418,
      "learning_rate": 0.003,
      "loss": 4.1042,
      "step": 6431
    },
    {
      "epoch": 0.06432,
      "grad_norm": 0.9265117928117284,
      "learning_rate": 0.003,
      "loss": 4.0967,
      "step": 6432
    },
    {
      "epoch": 0.06433,
      "grad_norm": 1.024797684763137,
      "learning_rate": 0.003,
      "loss": 4.1274,
      "step": 6433
    },
    {
      "epoch": 0.06434,
      "grad_norm": 1.3600455167556202,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 6434
    },
    {
      "epoch": 0.06435,
      "grad_norm": 0.9529262641132417,
      "learning_rate": 0.003,
      "loss": 4.0954,
      "step": 6435
    },
    {
      "epoch": 0.06436,
      "grad_norm": 1.034119140498242,
      "learning_rate": 0.003,
      "loss": 4.1157,
      "step": 6436
    },
    {
      "epoch": 0.06437,
      "grad_norm": 1.0713285668150787,
      "learning_rate": 0.003,
      "loss": 4.1141,
      "step": 6437
    },
    {
      "epoch": 0.06438,
      "grad_norm": 0.9683106581176993,
      "learning_rate": 0.003,
      "loss": 4.1059,
      "step": 6438
    },
    {
      "epoch": 0.06439,
      "grad_norm": 0.9652914703033583,
      "learning_rate": 0.003,
      "loss": 4.1039,
      "step": 6439
    },
    {
      "epoch": 0.0644,
      "grad_norm": 1.0197094752336315,
      "learning_rate": 0.003,
      "loss": 4.1024,
      "step": 6440
    },
    {
      "epoch": 0.06441,
      "grad_norm": 1.0610898230757766,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 6441
    },
    {
      "epoch": 0.06442,
      "grad_norm": 0.9375680251241779,
      "learning_rate": 0.003,
      "loss": 4.0986,
      "step": 6442
    },
    {
      "epoch": 0.06443,
      "grad_norm": 0.882726438804907,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 6443
    },
    {
      "epoch": 0.06444,
      "grad_norm": 0.8666984972124473,
      "learning_rate": 0.003,
      "loss": 4.0968,
      "step": 6444
    },
    {
      "epoch": 0.06445,
      "grad_norm": 1.0588303268695138,
      "learning_rate": 0.003,
      "loss": 4.1148,
      "step": 6445
    },
    {
      "epoch": 0.06446,
      "grad_norm": 1.2128013732008698,
      "learning_rate": 0.003,
      "loss": 4.1065,
      "step": 6446
    },
    {
      "epoch": 0.06447,
      "grad_norm": 1.0289917232865164,
      "learning_rate": 0.003,
      "loss": 4.1172,
      "step": 6447
    },
    {
      "epoch": 0.06448,
      "grad_norm": 1.161447002205071,
      "learning_rate": 0.003,
      "loss": 4.1247,
      "step": 6448
    },
    {
      "epoch": 0.06449,
      "grad_norm": 1.1207683657781975,
      "learning_rate": 0.003,
      "loss": 4.15,
      "step": 6449
    },
    {
      "epoch": 0.0645,
      "grad_norm": 1.1830174596242193,
      "learning_rate": 0.003,
      "loss": 4.0992,
      "step": 6450
    },
    {
      "epoch": 0.06451,
      "grad_norm": 0.8475401638958145,
      "learning_rate": 0.003,
      "loss": 4.1394,
      "step": 6451
    },
    {
      "epoch": 0.06452,
      "grad_norm": 0.864303947498384,
      "learning_rate": 0.003,
      "loss": 4.101,
      "step": 6452
    },
    {
      "epoch": 0.06453,
      "grad_norm": 0.9576553598100936,
      "learning_rate": 0.003,
      "loss": 4.1452,
      "step": 6453
    },
    {
      "epoch": 0.06454,
      "grad_norm": 0.9814262975984199,
      "learning_rate": 0.003,
      "loss": 4.1007,
      "step": 6454
    },
    {
      "epoch": 0.06455,
      "grad_norm": 1.0787724739719247,
      "learning_rate": 0.003,
      "loss": 4.1123,
      "step": 6455
    },
    {
      "epoch": 0.06456,
      "grad_norm": 1.1228799973202512,
      "learning_rate": 0.003,
      "loss": 4.132,
      "step": 6456
    },
    {
      "epoch": 0.06457,
      "grad_norm": 1.0268253444336903,
      "learning_rate": 0.003,
      "loss": 4.1089,
      "step": 6457
    },
    {
      "epoch": 0.06458,
      "grad_norm": 1.0108645808692163,
      "learning_rate": 0.003,
      "loss": 4.1194,
      "step": 6458
    },
    {
      "epoch": 0.06459,
      "grad_norm": 1.1418149699378657,
      "learning_rate": 0.003,
      "loss": 4.1345,
      "step": 6459
    },
    {
      "epoch": 0.0646,
      "grad_norm": 0.8824017335220107,
      "learning_rate": 0.003,
      "loss": 4.1047,
      "step": 6460
    },
    {
      "epoch": 0.06461,
      "grad_norm": 1.0178895025907293,
      "learning_rate": 0.003,
      "loss": 4.149,
      "step": 6461
    },
    {
      "epoch": 0.06462,
      "grad_norm": 1.1346084629753426,
      "learning_rate": 0.003,
      "loss": 4.1018,
      "step": 6462
    },
    {
      "epoch": 0.06463,
      "grad_norm": 1.0391228751646258,
      "learning_rate": 0.003,
      "loss": 4.1021,
      "step": 6463
    },
    {
      "epoch": 0.06464,
      "grad_norm": 1.2899475407345256,
      "learning_rate": 0.003,
      "loss": 4.0989,
      "step": 6464
    },
    {
      "epoch": 0.06465,
      "grad_norm": 1.1687110082198426,
      "learning_rate": 0.003,
      "loss": 4.0972,
      "step": 6465
    },
    {
      "epoch": 0.06466,
      "grad_norm": 0.9657486632446244,
      "learning_rate": 0.003,
      "loss": 4.108,
      "step": 6466
    },
    {
      "epoch": 0.06467,
      "grad_norm": 1.0898402207089803,
      "learning_rate": 0.003,
      "loss": 4.1003,
      "step": 6467
    },
    {
      "epoch": 0.06468,
      "grad_norm": 1.0616683733064285,
      "learning_rate": 0.003,
      "loss": 4.1236,
      "step": 6468
    },
    {
      "epoch": 0.06469,
      "grad_norm": 1.0893197005186201,
      "learning_rate": 0.003,
      "loss": 4.1215,
      "step": 6469
    },
    {
      "epoch": 0.0647,
      "grad_norm": 1.0058220508191251,
      "learning_rate": 0.003,
      "loss": 4.1314,
      "step": 6470
    },
    {
      "epoch": 0.06471,
      "grad_norm": 0.956797815052642,
      "learning_rate": 0.003,
      "loss": 4.1055,
      "step": 6471
    },
    {
      "epoch": 0.06472,
      "grad_norm": 1.0128039862356037,
      "learning_rate": 0.003,
      "loss": 4.1165,
      "step": 6472
    },
    {
      "epoch": 0.06473,
      "grad_norm": 1.2167082158141116,
      "learning_rate": 0.003,
      "loss": 4.1294,
      "step": 6473
    },
    {
      "epoch": 0.06474,
      "grad_norm": 0.7988940618326229,
      "learning_rate": 0.003,
      "loss": 4.1391,
      "step": 6474
    },
    {
      "epoch": 0.06475,
      "grad_norm": 0.7962292180465622,
      "learning_rate": 0.003,
      "loss": 4.1319,
      "step": 6475
    },
    {
      "epoch": 0.06476,
      "grad_norm": 0.7713643156623257,
      "learning_rate": 0.003,
      "loss": 4.1011,
      "step": 6476
    },
    {
      "epoch": 0.06477,
      "grad_norm": 1.0364319372786666,
      "learning_rate": 0.003,
      "loss": 4.0932,
      "step": 6477
    },
    {
      "epoch": 0.06478,
      "grad_norm": 1.1397532882735162,
      "learning_rate": 0.003,
      "loss": 4.0904,
      "step": 6478
    },
    {
      "epoch": 0.06479,
      "grad_norm": 0.7537458893609261,
      "learning_rate": 0.003,
      "loss": 4.0978,
      "step": 6479
    },
    {
      "epoch": 0.0648,
      "grad_norm": 0.8777247030769344,
      "learning_rate": 0.003,
      "loss": 4.0926,
      "step": 6480
    },
    {
      "epoch": 0.06481,
      "grad_norm": 0.8860162677530811,
      "learning_rate": 0.003,
      "loss": 4.1055,
      "step": 6481
    },
    {
      "epoch": 0.06482,
      "grad_norm": 0.7666515029588695,
      "learning_rate": 0.003,
      "loss": 4.0994,
      "step": 6482
    },
    {
      "epoch": 0.06483,
      "grad_norm": 0.8203308848715672,
      "learning_rate": 0.003,
      "loss": 4.1105,
      "step": 6483
    },
    {
      "epoch": 0.06484,
      "grad_norm": 0.8079518261592182,
      "learning_rate": 0.003,
      "loss": 4.1056,
      "step": 6484
    },
    {
      "epoch": 0.06485,
      "grad_norm": 0.8931148106546561,
      "learning_rate": 0.003,
      "loss": 4.1285,
      "step": 6485
    },
    {
      "epoch": 0.06486,
      "grad_norm": 0.9951987941612116,
      "learning_rate": 0.003,
      "loss": 4.1297,
      "step": 6486
    },
    {
      "epoch": 0.06487,
      "grad_norm": 1.041100076013719,
      "learning_rate": 0.003,
      "loss": 4.1225,
      "step": 6487
    },
    {
      "epoch": 0.06488,
      "grad_norm": 1.1977548341810142,
      "learning_rate": 0.003,
      "loss": 4.1311,
      "step": 6488
    },
    {
      "epoch": 0.06489,
      "grad_norm": 1.4315443170694175,
      "learning_rate": 0.003,
      "loss": 4.1237,
      "step": 6489
    },
    {
      "epoch": 0.0649,
      "grad_norm": 0.790139556257875,
      "learning_rate": 0.003,
      "loss": 4.1012,
      "step": 6490
    },
    {
      "epoch": 0.06491,
      "grad_norm": 0.8725006774086241,
      "learning_rate": 0.003,
      "loss": 4.1091,
      "step": 6491
    },
    {
      "epoch": 0.06492,
      "grad_norm": 0.8550610838598619,
      "learning_rate": 0.003,
      "loss": 4.121,
      "step": 6492
    },
    {
      "epoch": 0.06493,
      "grad_norm": 1.0672439328854844,
      "learning_rate": 0.003,
      "loss": 4.1126,
      "step": 6493
    },
    {
      "epoch": 0.06494,
      "grad_norm": 1.1053803317543687,
      "learning_rate": 0.003,
      "loss": 4.1258,
      "step": 6494
    },
    {
      "epoch": 0.06495,
      "grad_norm": 1.2245048272685835,
      "learning_rate": 0.003,
      "loss": 4.1162,
      "step": 6495
    },
    {
      "epoch": 0.06496,
      "grad_norm": 1.1827354647076507,
      "learning_rate": 0.003,
      "loss": 4.0856,
      "step": 6496
    },
    {
      "epoch": 0.06497,
      "grad_norm": 0.8875741851301479,
      "learning_rate": 0.003,
      "loss": 4.1044,
      "step": 6497
    },
    {
      "epoch": 0.06498,
      "grad_norm": 0.9405556784618091,
      "learning_rate": 0.003,
      "loss": 4.0968,
      "step": 6498
    },
    {
      "epoch": 0.06499,
      "grad_norm": 1.1518206025009547,
      "learning_rate": 0.003,
      "loss": 4.1345,
      "step": 6499
    },
    {
      "epoch": 0.065,
      "grad_norm": 1.1135199791235904,
      "learning_rate": 0.003,
      "loss": 4.1104,
      "step": 6500
    },
    {
      "epoch": 0.06501,
      "grad_norm": 0.9426453963848523,
      "learning_rate": 0.003,
      "loss": 4.1028,
      "step": 6501
    },
    {
      "epoch": 0.06502,
      "grad_norm": 0.906946070942705,
      "learning_rate": 0.003,
      "loss": 4.1102,
      "step": 6502
    },
    {
      "epoch": 0.06503,
      "grad_norm": 0.8632347389219635,
      "learning_rate": 0.003,
      "loss": 4.1244,
      "step": 6503
    },
    {
      "epoch": 0.06504,
      "grad_norm": 0.9623491280205102,
      "learning_rate": 0.003,
      "loss": 4.1057,
      "step": 6504
    },
    {
      "epoch": 0.06505,
      "grad_norm": 1.4229366271424375,
      "learning_rate": 0.003,
      "loss": 4.1182,
      "step": 6505
    },
    {
      "epoch": 0.06506,
      "grad_norm": 1.0451736131803195,
      "learning_rate": 0.003,
      "loss": 4.0946,
      "step": 6506
    },
    {
      "epoch": 0.06507,
      "grad_norm": 1.2294334171006416,
      "learning_rate": 0.003,
      "loss": 4.0789,
      "step": 6507
    },
    {
      "epoch": 0.06508,
      "grad_norm": 0.9445531435779846,
      "learning_rate": 0.003,
      "loss": 4.1095,
      "step": 6508
    },
    {
      "epoch": 0.06509,
      "grad_norm": 0.9256926488563311,
      "learning_rate": 0.003,
      "loss": 4.0954,
      "step": 6509
    },
    {
      "epoch": 0.0651,
      "grad_norm": 1.2262032469738913,
      "learning_rate": 0.003,
      "loss": 4.1386,
      "step": 6510
    },
    {
      "epoch": 0.06511,
      "grad_norm": 1.0322387593080742,
      "learning_rate": 0.003,
      "loss": 4.0973,
      "step": 6511
    },
    {
      "epoch": 0.06512,
      "grad_norm": 1.322871770532383,
      "learning_rate": 0.003,
      "loss": 4.0921,
      "step": 6512
    },
    {
      "epoch": 0.06513,
      "grad_norm": 1.0020496514714246,
      "learning_rate": 0.003,
      "loss": 4.1176,
      "step": 6513
    },
    {
      "epoch": 0.06514,
      "grad_norm": 1.0807327376240277,
      "learning_rate": 0.003,
      "loss": 4.1059,
      "step": 6514
    },
    {
      "epoch": 0.06515,
      "grad_norm": 0.9992369872861028,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 6515
    },
    {
      "epoch": 0.06516,
      "grad_norm": 1.0690181996728259,
      "learning_rate": 0.003,
      "loss": 4.1105,
      "step": 6516
    },
    {
      "epoch": 0.06517,
      "grad_norm": 1.0205546370680254,
      "learning_rate": 0.003,
      "loss": 4.1091,
      "step": 6517
    },
    {
      "epoch": 0.06518,
      "grad_norm": 0.9661649072469782,
      "learning_rate": 0.003,
      "loss": 4.127,
      "step": 6518
    },
    {
      "epoch": 0.06519,
      "grad_norm": 1.1841310244092509,
      "learning_rate": 0.003,
      "loss": 4.1091,
      "step": 6519
    },
    {
      "epoch": 0.0652,
      "grad_norm": 1.0151264535711446,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 6520
    },
    {
      "epoch": 0.06521,
      "grad_norm": 1.1533183181347186,
      "learning_rate": 0.003,
      "loss": 4.1242,
      "step": 6521
    },
    {
      "epoch": 0.06522,
      "grad_norm": 1.0222075376967539,
      "learning_rate": 0.003,
      "loss": 4.1231,
      "step": 6522
    },
    {
      "epoch": 0.06523,
      "grad_norm": 1.1249451027703596,
      "learning_rate": 0.003,
      "loss": 4.1144,
      "step": 6523
    },
    {
      "epoch": 0.06524,
      "grad_norm": 0.9778774962433593,
      "learning_rate": 0.003,
      "loss": 4.1084,
      "step": 6524
    },
    {
      "epoch": 0.06525,
      "grad_norm": 1.038580395735807,
      "learning_rate": 0.003,
      "loss": 4.127,
      "step": 6525
    },
    {
      "epoch": 0.06526,
      "grad_norm": 1.075878133922806,
      "learning_rate": 0.003,
      "loss": 4.1126,
      "step": 6526
    },
    {
      "epoch": 0.06527,
      "grad_norm": 1.025791974704542,
      "learning_rate": 0.003,
      "loss": 4.1221,
      "step": 6527
    },
    {
      "epoch": 0.06528,
      "grad_norm": 0.9973318916151819,
      "learning_rate": 0.003,
      "loss": 4.1242,
      "step": 6528
    },
    {
      "epoch": 0.06529,
      "grad_norm": 1.2162973451905112,
      "learning_rate": 0.003,
      "loss": 4.1154,
      "step": 6529
    },
    {
      "epoch": 0.0653,
      "grad_norm": 0.9871372103084888,
      "learning_rate": 0.003,
      "loss": 4.1073,
      "step": 6530
    },
    {
      "epoch": 0.06531,
      "grad_norm": 1.1945236960287906,
      "learning_rate": 0.003,
      "loss": 4.0991,
      "step": 6531
    },
    {
      "epoch": 0.06532,
      "grad_norm": 0.9401643138302784,
      "learning_rate": 0.003,
      "loss": 4.1268,
      "step": 6532
    },
    {
      "epoch": 0.06533,
      "grad_norm": 1.1178892277715633,
      "learning_rate": 0.003,
      "loss": 4.1278,
      "step": 6533
    },
    {
      "epoch": 0.06534,
      "grad_norm": 0.9832931153486173,
      "learning_rate": 0.003,
      "loss": 4.1172,
      "step": 6534
    },
    {
      "epoch": 0.06535,
      "grad_norm": 0.8533727635187061,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 6535
    },
    {
      "epoch": 0.06536,
      "grad_norm": 0.7504468053256232,
      "learning_rate": 0.003,
      "loss": 4.091,
      "step": 6536
    },
    {
      "epoch": 0.06537,
      "grad_norm": 0.881013336525802,
      "learning_rate": 0.003,
      "loss": 4.1088,
      "step": 6537
    },
    {
      "epoch": 0.06538,
      "grad_norm": 1.0943393042527045,
      "learning_rate": 0.003,
      "loss": 4.1178,
      "step": 6538
    },
    {
      "epoch": 0.06539,
      "grad_norm": 1.1900608228822722,
      "learning_rate": 0.003,
      "loss": 4.0907,
      "step": 6539
    },
    {
      "epoch": 0.0654,
      "grad_norm": 0.8415174000109801,
      "learning_rate": 0.003,
      "loss": 4.1428,
      "step": 6540
    },
    {
      "epoch": 0.06541,
      "grad_norm": 0.6953210773431162,
      "learning_rate": 0.003,
      "loss": 4.1011,
      "step": 6541
    },
    {
      "epoch": 0.06542,
      "grad_norm": 0.7466595601641312,
      "learning_rate": 0.003,
      "loss": 4.0824,
      "step": 6542
    },
    {
      "epoch": 0.06543,
      "grad_norm": 0.7975588171096655,
      "learning_rate": 0.003,
      "loss": 4.1126,
      "step": 6543
    },
    {
      "epoch": 0.06544,
      "grad_norm": 0.8156245001878942,
      "learning_rate": 0.003,
      "loss": 4.1069,
      "step": 6544
    },
    {
      "epoch": 0.06545,
      "grad_norm": 0.9060264548647263,
      "learning_rate": 0.003,
      "loss": 4.114,
      "step": 6545
    },
    {
      "epoch": 0.06546,
      "grad_norm": 0.9960765646804454,
      "learning_rate": 0.003,
      "loss": 4.1009,
      "step": 6546
    },
    {
      "epoch": 0.06547,
      "grad_norm": 1.3831374261427445,
      "learning_rate": 0.003,
      "loss": 4.1087,
      "step": 6547
    },
    {
      "epoch": 0.06548,
      "grad_norm": 0.782948712181559,
      "learning_rate": 0.003,
      "loss": 4.0992,
      "step": 6548
    },
    {
      "epoch": 0.06549,
      "grad_norm": 0.8977206509308298,
      "learning_rate": 0.003,
      "loss": 4.1313,
      "step": 6549
    },
    {
      "epoch": 0.0655,
      "grad_norm": 1.120628275139245,
      "learning_rate": 0.003,
      "loss": 4.085,
      "step": 6550
    },
    {
      "epoch": 0.06551,
      "grad_norm": 1.2840718294242444,
      "learning_rate": 0.003,
      "loss": 4.0926,
      "step": 6551
    },
    {
      "epoch": 0.06552,
      "grad_norm": 1.1241386761670837,
      "learning_rate": 0.003,
      "loss": 4.1271,
      "step": 6552
    },
    {
      "epoch": 0.06553,
      "grad_norm": 0.7870751727024481,
      "learning_rate": 0.003,
      "loss": 4.0998,
      "step": 6553
    },
    {
      "epoch": 0.06554,
      "grad_norm": 0.6531268213697407,
      "learning_rate": 0.003,
      "loss": 4.1027,
      "step": 6554
    },
    {
      "epoch": 0.06555,
      "grad_norm": 0.7458288916343057,
      "learning_rate": 0.003,
      "loss": 4.0972,
      "step": 6555
    },
    {
      "epoch": 0.06556,
      "grad_norm": 0.7935619352156358,
      "learning_rate": 0.003,
      "loss": 4.1172,
      "step": 6556
    },
    {
      "epoch": 0.06557,
      "grad_norm": 0.9338290170497964,
      "learning_rate": 0.003,
      "loss": 4.0948,
      "step": 6557
    },
    {
      "epoch": 0.06558,
      "grad_norm": 1.163914741576227,
      "learning_rate": 0.003,
      "loss": 4.134,
      "step": 6558
    },
    {
      "epoch": 0.06559,
      "grad_norm": 1.030586956016889,
      "learning_rate": 0.003,
      "loss": 4.11,
      "step": 6559
    },
    {
      "epoch": 0.0656,
      "grad_norm": 1.1181941312817716,
      "learning_rate": 0.003,
      "loss": 4.1146,
      "step": 6560
    },
    {
      "epoch": 0.06561,
      "grad_norm": 1.0126984372372947,
      "learning_rate": 0.003,
      "loss": 4.1282,
      "step": 6561
    },
    {
      "epoch": 0.06562,
      "grad_norm": 1.0009991860930807,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 6562
    },
    {
      "epoch": 0.06563,
      "grad_norm": 1.1006051624397504,
      "learning_rate": 0.003,
      "loss": 4.1136,
      "step": 6563
    },
    {
      "epoch": 0.06564,
      "grad_norm": 1.0608902444027226,
      "learning_rate": 0.003,
      "loss": 4.1179,
      "step": 6564
    },
    {
      "epoch": 0.06565,
      "grad_norm": 1.0951281729266462,
      "learning_rate": 0.003,
      "loss": 4.0883,
      "step": 6565
    },
    {
      "epoch": 0.06566,
      "grad_norm": 1.1144896086481675,
      "learning_rate": 0.003,
      "loss": 4.1154,
      "step": 6566
    },
    {
      "epoch": 0.06567,
      "grad_norm": 1.2539223651632134,
      "learning_rate": 0.003,
      "loss": 4.1325,
      "step": 6567
    },
    {
      "epoch": 0.06568,
      "grad_norm": 1.2136692336762676,
      "learning_rate": 0.003,
      "loss": 4.1423,
      "step": 6568
    },
    {
      "epoch": 0.06569,
      "grad_norm": 1.0107611711365292,
      "learning_rate": 0.003,
      "loss": 4.0837,
      "step": 6569
    },
    {
      "epoch": 0.0657,
      "grad_norm": 1.1701756562475913,
      "learning_rate": 0.003,
      "loss": 4.1126,
      "step": 6570
    },
    {
      "epoch": 0.06571,
      "grad_norm": 1.118588414395485,
      "learning_rate": 0.003,
      "loss": 4.1175,
      "step": 6571
    },
    {
      "epoch": 0.06572,
      "grad_norm": 1.1618445605816499,
      "learning_rate": 0.003,
      "loss": 4.0856,
      "step": 6572
    },
    {
      "epoch": 0.06573,
      "grad_norm": 1.0406499653295629,
      "learning_rate": 0.003,
      "loss": 4.1103,
      "step": 6573
    },
    {
      "epoch": 0.06574,
      "grad_norm": 1.2822924507807092,
      "learning_rate": 0.003,
      "loss": 4.1044,
      "step": 6574
    },
    {
      "epoch": 0.06575,
      "grad_norm": 0.9150523557477509,
      "learning_rate": 0.003,
      "loss": 4.0927,
      "step": 6575
    },
    {
      "epoch": 0.06576,
      "grad_norm": 0.9513417232803534,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 6576
    },
    {
      "epoch": 0.06577,
      "grad_norm": 1.1218399136336974,
      "learning_rate": 0.003,
      "loss": 4.0929,
      "step": 6577
    },
    {
      "epoch": 0.06578,
      "grad_norm": 0.9394344957421601,
      "learning_rate": 0.003,
      "loss": 4.097,
      "step": 6578
    },
    {
      "epoch": 0.06579,
      "grad_norm": 1.0809645820357565,
      "learning_rate": 0.003,
      "loss": 4.0888,
      "step": 6579
    },
    {
      "epoch": 0.0658,
      "grad_norm": 1.214518886759514,
      "learning_rate": 0.003,
      "loss": 4.0871,
      "step": 6580
    },
    {
      "epoch": 0.06581,
      "grad_norm": 0.9638219433089225,
      "learning_rate": 0.003,
      "loss": 4.124,
      "step": 6581
    },
    {
      "epoch": 0.06582,
      "grad_norm": 0.9046848586781843,
      "learning_rate": 0.003,
      "loss": 4.1157,
      "step": 6582
    },
    {
      "epoch": 0.06583,
      "grad_norm": 0.8609622707678348,
      "learning_rate": 0.003,
      "loss": 4.0861,
      "step": 6583
    },
    {
      "epoch": 0.06584,
      "grad_norm": 0.8835791819190192,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 6584
    },
    {
      "epoch": 0.06585,
      "grad_norm": 0.9987318797336011,
      "learning_rate": 0.003,
      "loss": 4.1013,
      "step": 6585
    },
    {
      "epoch": 0.06586,
      "grad_norm": 1.1355073490088379,
      "learning_rate": 0.003,
      "loss": 4.1172,
      "step": 6586
    },
    {
      "epoch": 0.06587,
      "grad_norm": 1.0085175293063395,
      "learning_rate": 0.003,
      "loss": 4.1179,
      "step": 6587
    },
    {
      "epoch": 0.06588,
      "grad_norm": 0.9620876182851724,
      "learning_rate": 0.003,
      "loss": 4.1364,
      "step": 6588
    },
    {
      "epoch": 0.06589,
      "grad_norm": 1.048954389785324,
      "learning_rate": 0.003,
      "loss": 4.1253,
      "step": 6589
    },
    {
      "epoch": 0.0659,
      "grad_norm": 1.0238341074758999,
      "learning_rate": 0.003,
      "loss": 4.1349,
      "step": 6590
    },
    {
      "epoch": 0.06591,
      "grad_norm": 1.075709048948834,
      "learning_rate": 0.003,
      "loss": 4.1575,
      "step": 6591
    },
    {
      "epoch": 0.06592,
      "grad_norm": 1.1681526348561144,
      "learning_rate": 0.003,
      "loss": 4.1261,
      "step": 6592
    },
    {
      "epoch": 0.06593,
      "grad_norm": 0.9756080110323221,
      "learning_rate": 0.003,
      "loss": 4.1017,
      "step": 6593
    },
    {
      "epoch": 0.06594,
      "grad_norm": 1.0796908095925524,
      "learning_rate": 0.003,
      "loss": 4.1252,
      "step": 6594
    },
    {
      "epoch": 0.06595,
      "grad_norm": 1.0723639338576645,
      "learning_rate": 0.003,
      "loss": 4.0764,
      "step": 6595
    },
    {
      "epoch": 0.06596,
      "grad_norm": 1.2099686727476,
      "learning_rate": 0.003,
      "loss": 4.1326,
      "step": 6596
    },
    {
      "epoch": 0.06597,
      "grad_norm": 1.020495761358447,
      "learning_rate": 0.003,
      "loss": 4.1058,
      "step": 6597
    },
    {
      "epoch": 0.06598,
      "grad_norm": 1.250785346850008,
      "learning_rate": 0.003,
      "loss": 4.1453,
      "step": 6598
    },
    {
      "epoch": 0.06599,
      "grad_norm": 0.8082604467548349,
      "learning_rate": 0.003,
      "loss": 4.1178,
      "step": 6599
    },
    {
      "epoch": 0.066,
      "grad_norm": 0.7311810832534161,
      "learning_rate": 0.003,
      "loss": 4.1209,
      "step": 6600
    },
    {
      "epoch": 0.06601,
      "grad_norm": 0.8646948838436856,
      "learning_rate": 0.003,
      "loss": 4.1098,
      "step": 6601
    },
    {
      "epoch": 0.06602,
      "grad_norm": 1.2019272374710974,
      "learning_rate": 0.003,
      "loss": 4.1127,
      "step": 6602
    },
    {
      "epoch": 0.06603,
      "grad_norm": 0.99896718805981,
      "learning_rate": 0.003,
      "loss": 4.1045,
      "step": 6603
    },
    {
      "epoch": 0.06604,
      "grad_norm": 1.1797514234958721,
      "learning_rate": 0.003,
      "loss": 4.1059,
      "step": 6604
    },
    {
      "epoch": 0.06605,
      "grad_norm": 1.2720223453313813,
      "learning_rate": 0.003,
      "loss": 4.1313,
      "step": 6605
    },
    {
      "epoch": 0.06606,
      "grad_norm": 0.906064721496246,
      "learning_rate": 0.003,
      "loss": 4.1049,
      "step": 6606
    },
    {
      "epoch": 0.06607,
      "grad_norm": 0.8738244371131962,
      "learning_rate": 0.003,
      "loss": 4.0983,
      "step": 6607
    },
    {
      "epoch": 0.06608,
      "grad_norm": 0.9363228945652666,
      "learning_rate": 0.003,
      "loss": 4.0973,
      "step": 6608
    },
    {
      "epoch": 0.06609,
      "grad_norm": 1.1416719220665938,
      "learning_rate": 0.003,
      "loss": 4.1219,
      "step": 6609
    },
    {
      "epoch": 0.0661,
      "grad_norm": 1.220985108255946,
      "learning_rate": 0.003,
      "loss": 4.1245,
      "step": 6610
    },
    {
      "epoch": 0.06611,
      "grad_norm": 1.137872261131753,
      "learning_rate": 0.003,
      "loss": 4.1059,
      "step": 6611
    },
    {
      "epoch": 0.06612,
      "grad_norm": 1.1395613837870757,
      "learning_rate": 0.003,
      "loss": 4.1021,
      "step": 6612
    },
    {
      "epoch": 0.06613,
      "grad_norm": 1.191648524183824,
      "learning_rate": 0.003,
      "loss": 4.1197,
      "step": 6613
    },
    {
      "epoch": 0.06614,
      "grad_norm": 0.8858296562908814,
      "learning_rate": 0.003,
      "loss": 4.114,
      "step": 6614
    },
    {
      "epoch": 0.06615,
      "grad_norm": 0.9819337255984676,
      "learning_rate": 0.003,
      "loss": 4.0937,
      "step": 6615
    },
    {
      "epoch": 0.06616,
      "grad_norm": 1.1867192771582191,
      "learning_rate": 0.003,
      "loss": 4.1552,
      "step": 6616
    },
    {
      "epoch": 0.06617,
      "grad_norm": 0.9053344714711246,
      "learning_rate": 0.003,
      "loss": 4.1233,
      "step": 6617
    },
    {
      "epoch": 0.06618,
      "grad_norm": 0.9219240319109411,
      "learning_rate": 0.003,
      "loss": 4.1333,
      "step": 6618
    },
    {
      "epoch": 0.06619,
      "grad_norm": 0.991583160819303,
      "learning_rate": 0.003,
      "loss": 4.1026,
      "step": 6619
    },
    {
      "epoch": 0.0662,
      "grad_norm": 1.1650808913340467,
      "learning_rate": 0.003,
      "loss": 4.097,
      "step": 6620
    },
    {
      "epoch": 0.06621,
      "grad_norm": 0.9080740223120065,
      "learning_rate": 0.003,
      "loss": 4.1248,
      "step": 6621
    },
    {
      "epoch": 0.06622,
      "grad_norm": 0.8680994255398555,
      "learning_rate": 0.003,
      "loss": 4.1174,
      "step": 6622
    },
    {
      "epoch": 0.06623,
      "grad_norm": 0.8628560499230488,
      "learning_rate": 0.003,
      "loss": 4.102,
      "step": 6623
    },
    {
      "epoch": 0.06624,
      "grad_norm": 0.845810905345881,
      "learning_rate": 0.003,
      "loss": 4.1235,
      "step": 6624
    },
    {
      "epoch": 0.06625,
      "grad_norm": 0.8753283211615485,
      "learning_rate": 0.003,
      "loss": 4.1089,
      "step": 6625
    },
    {
      "epoch": 0.06626,
      "grad_norm": 0.8898864094278442,
      "learning_rate": 0.003,
      "loss": 4.1275,
      "step": 6626
    },
    {
      "epoch": 0.06627,
      "grad_norm": 0.8556887714824031,
      "learning_rate": 0.003,
      "loss": 4.115,
      "step": 6627
    },
    {
      "epoch": 0.06628,
      "grad_norm": 0.8438393299522239,
      "learning_rate": 0.003,
      "loss": 4.1156,
      "step": 6628
    },
    {
      "epoch": 0.06629,
      "grad_norm": 1.0104192170537771,
      "learning_rate": 0.003,
      "loss": 4.1141,
      "step": 6629
    },
    {
      "epoch": 0.0663,
      "grad_norm": 1.4015375334692346,
      "learning_rate": 0.003,
      "loss": 4.1415,
      "step": 6630
    },
    {
      "epoch": 0.06631,
      "grad_norm": 0.9722935737313815,
      "learning_rate": 0.003,
      "loss": 4.1089,
      "step": 6631
    },
    {
      "epoch": 0.06632,
      "grad_norm": 0.9803618166102699,
      "learning_rate": 0.003,
      "loss": 4.1123,
      "step": 6632
    },
    {
      "epoch": 0.06633,
      "grad_norm": 1.1296705060941024,
      "learning_rate": 0.003,
      "loss": 4.1345,
      "step": 6633
    },
    {
      "epoch": 0.06634,
      "grad_norm": 1.0907326109387054,
      "learning_rate": 0.003,
      "loss": 4.1204,
      "step": 6634
    },
    {
      "epoch": 0.06635,
      "grad_norm": 0.9435618555110209,
      "learning_rate": 0.003,
      "loss": 4.1341,
      "step": 6635
    },
    {
      "epoch": 0.06636,
      "grad_norm": 1.0285537900430537,
      "learning_rate": 0.003,
      "loss": 4.0925,
      "step": 6636
    },
    {
      "epoch": 0.06637,
      "grad_norm": 1.202489591650724,
      "learning_rate": 0.003,
      "loss": 4.1195,
      "step": 6637
    },
    {
      "epoch": 0.06638,
      "grad_norm": 1.017321963175585,
      "learning_rate": 0.003,
      "loss": 4.0822,
      "step": 6638
    },
    {
      "epoch": 0.06639,
      "grad_norm": 1.0524483668700617,
      "learning_rate": 0.003,
      "loss": 4.1168,
      "step": 6639
    },
    {
      "epoch": 0.0664,
      "grad_norm": 1.1482240654374392,
      "learning_rate": 0.003,
      "loss": 4.1017,
      "step": 6640
    },
    {
      "epoch": 0.06641,
      "grad_norm": 0.9809058091856554,
      "learning_rate": 0.003,
      "loss": 4.1153,
      "step": 6641
    },
    {
      "epoch": 0.06642,
      "grad_norm": 1.1082029804560825,
      "learning_rate": 0.003,
      "loss": 4.1234,
      "step": 6642
    },
    {
      "epoch": 0.06643,
      "grad_norm": 1.1934857560760654,
      "learning_rate": 0.003,
      "loss": 4.1151,
      "step": 6643
    },
    {
      "epoch": 0.06644,
      "grad_norm": 0.9229309859195602,
      "learning_rate": 0.003,
      "loss": 4.1113,
      "step": 6644
    },
    {
      "epoch": 0.06645,
      "grad_norm": 1.072671684854488,
      "learning_rate": 0.003,
      "loss": 4.098,
      "step": 6645
    },
    {
      "epoch": 0.06646,
      "grad_norm": 1.170997003327203,
      "learning_rate": 0.003,
      "loss": 4.102,
      "step": 6646
    },
    {
      "epoch": 0.06647,
      "grad_norm": 1.3361960097562062,
      "learning_rate": 0.003,
      "loss": 4.1346,
      "step": 6647
    },
    {
      "epoch": 0.06648,
      "grad_norm": 0.814630751575606,
      "learning_rate": 0.003,
      "loss": 4.0984,
      "step": 6648
    },
    {
      "epoch": 0.06649,
      "grad_norm": 0.7234763720955096,
      "learning_rate": 0.003,
      "loss": 4.1066,
      "step": 6649
    },
    {
      "epoch": 0.0665,
      "grad_norm": 0.8177448387254658,
      "learning_rate": 0.003,
      "loss": 4.1051,
      "step": 6650
    },
    {
      "epoch": 0.06651,
      "grad_norm": 0.9720558429276546,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 6651
    },
    {
      "epoch": 0.06652,
      "grad_norm": 1.194866309569147,
      "learning_rate": 0.003,
      "loss": 4.1084,
      "step": 6652
    },
    {
      "epoch": 0.06653,
      "grad_norm": 0.8816716043893494,
      "learning_rate": 0.003,
      "loss": 4.1276,
      "step": 6653
    },
    {
      "epoch": 0.06654,
      "grad_norm": 1.090559865889525,
      "learning_rate": 0.003,
      "loss": 4.0941,
      "step": 6654
    },
    {
      "epoch": 0.06655,
      "grad_norm": 1.0705116003746482,
      "learning_rate": 0.003,
      "loss": 4.109,
      "step": 6655
    },
    {
      "epoch": 0.06656,
      "grad_norm": 1.2331529032280468,
      "learning_rate": 0.003,
      "loss": 4.0966,
      "step": 6656
    },
    {
      "epoch": 0.06657,
      "grad_norm": 0.9218838449430262,
      "learning_rate": 0.003,
      "loss": 4.1117,
      "step": 6657
    },
    {
      "epoch": 0.06658,
      "grad_norm": 1.0193847734871568,
      "learning_rate": 0.003,
      "loss": 4.1033,
      "step": 6658
    },
    {
      "epoch": 0.06659,
      "grad_norm": 1.2471775692333547,
      "learning_rate": 0.003,
      "loss": 4.1247,
      "step": 6659
    },
    {
      "epoch": 0.0666,
      "grad_norm": 1.0479297195190378,
      "learning_rate": 0.003,
      "loss": 4.1018,
      "step": 6660
    },
    {
      "epoch": 0.06661,
      "grad_norm": 1.1007839298335338,
      "learning_rate": 0.003,
      "loss": 4.1016,
      "step": 6661
    },
    {
      "epoch": 0.06662,
      "grad_norm": 0.9250727846278579,
      "learning_rate": 0.003,
      "loss": 4.0926,
      "step": 6662
    },
    {
      "epoch": 0.06663,
      "grad_norm": 0.9825952693837798,
      "learning_rate": 0.003,
      "loss": 4.1601,
      "step": 6663
    },
    {
      "epoch": 0.06664,
      "grad_norm": 1.3207328842359807,
      "learning_rate": 0.003,
      "loss": 4.1293,
      "step": 6664
    },
    {
      "epoch": 0.06665,
      "grad_norm": 1.0063660849323326,
      "learning_rate": 0.003,
      "loss": 4.1467,
      "step": 6665
    },
    {
      "epoch": 0.06666,
      "grad_norm": 1.0152175322629062,
      "learning_rate": 0.003,
      "loss": 4.0764,
      "step": 6666
    },
    {
      "epoch": 0.06667,
      "grad_norm": 1.2629059213515375,
      "learning_rate": 0.003,
      "loss": 4.1108,
      "step": 6667
    },
    {
      "epoch": 0.06668,
      "grad_norm": 1.0230139614340956,
      "learning_rate": 0.003,
      "loss": 4.1052,
      "step": 6668
    },
    {
      "epoch": 0.06669,
      "grad_norm": 1.168998935273423,
      "learning_rate": 0.003,
      "loss": 4.1106,
      "step": 6669
    },
    {
      "epoch": 0.0667,
      "grad_norm": 0.8296397320581785,
      "learning_rate": 0.003,
      "loss": 4.1178,
      "step": 6670
    },
    {
      "epoch": 0.06671,
      "grad_norm": 1.001643971176605,
      "learning_rate": 0.003,
      "loss": 4.1143,
      "step": 6671
    },
    {
      "epoch": 0.06672,
      "grad_norm": 1.1457901096644219,
      "learning_rate": 0.003,
      "loss": 4.1024,
      "step": 6672
    },
    {
      "epoch": 0.06673,
      "grad_norm": 1.207557539094707,
      "learning_rate": 0.003,
      "loss": 4.1017,
      "step": 6673
    },
    {
      "epoch": 0.06674,
      "grad_norm": 1.182055831031003,
      "learning_rate": 0.003,
      "loss": 4.128,
      "step": 6674
    },
    {
      "epoch": 0.06675,
      "grad_norm": 1.034589515323592,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 6675
    },
    {
      "epoch": 0.06676,
      "grad_norm": 1.0280150455811632,
      "learning_rate": 0.003,
      "loss": 4.1206,
      "step": 6676
    },
    {
      "epoch": 0.06677,
      "grad_norm": 1.1343104818692615,
      "learning_rate": 0.003,
      "loss": 4.1346,
      "step": 6677
    },
    {
      "epoch": 0.06678,
      "grad_norm": 1.0649939504536055,
      "learning_rate": 0.003,
      "loss": 4.1415,
      "step": 6678
    },
    {
      "epoch": 0.06679,
      "grad_norm": 1.046088080295711,
      "learning_rate": 0.003,
      "loss": 4.1132,
      "step": 6679
    },
    {
      "epoch": 0.0668,
      "grad_norm": 1.1012078408727637,
      "learning_rate": 0.003,
      "loss": 4.1501,
      "step": 6680
    },
    {
      "epoch": 0.06681,
      "grad_norm": 1.0162711597155334,
      "learning_rate": 0.003,
      "loss": 4.1012,
      "step": 6681
    },
    {
      "epoch": 0.06682,
      "grad_norm": 1.2483056090722793,
      "learning_rate": 0.003,
      "loss": 4.1331,
      "step": 6682
    },
    {
      "epoch": 0.06683,
      "grad_norm": 0.9804124144487479,
      "learning_rate": 0.003,
      "loss": 4.1175,
      "step": 6683
    },
    {
      "epoch": 0.06684,
      "grad_norm": 1.0642062663332452,
      "learning_rate": 0.003,
      "loss": 4.1204,
      "step": 6684
    },
    {
      "epoch": 0.06685,
      "grad_norm": 1.2069965741700133,
      "learning_rate": 0.003,
      "loss": 4.1153,
      "step": 6685
    },
    {
      "epoch": 0.06686,
      "grad_norm": 0.8152999583568941,
      "learning_rate": 0.003,
      "loss": 4.093,
      "step": 6686
    },
    {
      "epoch": 0.06687,
      "grad_norm": 0.7938321166951481,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 6687
    },
    {
      "epoch": 0.06688,
      "grad_norm": 0.8347836923393193,
      "learning_rate": 0.003,
      "loss": 4.0946,
      "step": 6688
    },
    {
      "epoch": 0.06689,
      "grad_norm": 1.0457470131352429,
      "learning_rate": 0.003,
      "loss": 4.1329,
      "step": 6689
    },
    {
      "epoch": 0.0669,
      "grad_norm": 1.233670905876264,
      "learning_rate": 0.003,
      "loss": 4.1037,
      "step": 6690
    },
    {
      "epoch": 0.06691,
      "grad_norm": 0.8950769144466896,
      "learning_rate": 0.003,
      "loss": 4.1102,
      "step": 6691
    },
    {
      "epoch": 0.06692,
      "grad_norm": 0.9963325414694945,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 6692
    },
    {
      "epoch": 0.06693,
      "grad_norm": 1.1667705229561567,
      "learning_rate": 0.003,
      "loss": 4.1083,
      "step": 6693
    },
    {
      "epoch": 0.06694,
      "grad_norm": 1.0395357257985816,
      "learning_rate": 0.003,
      "loss": 4.1018,
      "step": 6694
    },
    {
      "epoch": 0.06695,
      "grad_norm": 1.0389020701257194,
      "learning_rate": 0.003,
      "loss": 4.1036,
      "step": 6695
    },
    {
      "epoch": 0.06696,
      "grad_norm": 0.9090318347719712,
      "learning_rate": 0.003,
      "loss": 4.1038,
      "step": 6696
    },
    {
      "epoch": 0.06697,
      "grad_norm": 1.1170620435815923,
      "learning_rate": 0.003,
      "loss": 4.1156,
      "step": 6697
    },
    {
      "epoch": 0.06698,
      "grad_norm": 1.090791170002388,
      "learning_rate": 0.003,
      "loss": 4.1051,
      "step": 6698
    },
    {
      "epoch": 0.06699,
      "grad_norm": 0.9919188684094656,
      "learning_rate": 0.003,
      "loss": 4.1013,
      "step": 6699
    },
    {
      "epoch": 0.067,
      "grad_norm": 1.049150223859985,
      "learning_rate": 0.003,
      "loss": 4.1221,
      "step": 6700
    },
    {
      "epoch": 0.06701,
      "grad_norm": 0.9745571304224953,
      "learning_rate": 0.003,
      "loss": 4.1199,
      "step": 6701
    },
    {
      "epoch": 0.06702,
      "grad_norm": 1.1042289026728087,
      "learning_rate": 0.003,
      "loss": 4.1106,
      "step": 6702
    },
    {
      "epoch": 0.06703,
      "grad_norm": 1.1602988891479034,
      "learning_rate": 0.003,
      "loss": 4.1277,
      "step": 6703
    },
    {
      "epoch": 0.06704,
      "grad_norm": 1.0541345749074789,
      "learning_rate": 0.003,
      "loss": 4.0967,
      "step": 6704
    },
    {
      "epoch": 0.06705,
      "grad_norm": 1.151339776734197,
      "learning_rate": 0.003,
      "loss": 4.1074,
      "step": 6705
    },
    {
      "epoch": 0.06706,
      "grad_norm": 0.9294993068889601,
      "learning_rate": 0.003,
      "loss": 4.1488,
      "step": 6706
    },
    {
      "epoch": 0.06707,
      "grad_norm": 1.2952523670027591,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 6707
    },
    {
      "epoch": 0.06708,
      "grad_norm": 1.2446326352660062,
      "learning_rate": 0.003,
      "loss": 4.1089,
      "step": 6708
    },
    {
      "epoch": 0.06709,
      "grad_norm": 1.1128123400028667,
      "learning_rate": 0.003,
      "loss": 4.1044,
      "step": 6709
    },
    {
      "epoch": 0.0671,
      "grad_norm": 1.0751514025340694,
      "learning_rate": 0.003,
      "loss": 4.1266,
      "step": 6710
    },
    {
      "epoch": 0.06711,
      "grad_norm": 0.9972877341330995,
      "learning_rate": 0.003,
      "loss": 4.1379,
      "step": 6711
    },
    {
      "epoch": 0.06712,
      "grad_norm": 1.335174960677335,
      "learning_rate": 0.003,
      "loss": 4.0973,
      "step": 6712
    },
    {
      "epoch": 0.06713,
      "grad_norm": 1.0604145566158192,
      "learning_rate": 0.003,
      "loss": 4.105,
      "step": 6713
    },
    {
      "epoch": 0.06714,
      "grad_norm": 0.9845536309503395,
      "learning_rate": 0.003,
      "loss": 4.1056,
      "step": 6714
    },
    {
      "epoch": 0.06715,
      "grad_norm": 0.98104073747809,
      "learning_rate": 0.003,
      "loss": 4.1076,
      "step": 6715
    },
    {
      "epoch": 0.06716,
      "grad_norm": 1.0253080591457324,
      "learning_rate": 0.003,
      "loss": 4.1148,
      "step": 6716
    },
    {
      "epoch": 0.06717,
      "grad_norm": 1.1656130950903412,
      "learning_rate": 0.003,
      "loss": 4.1405,
      "step": 6717
    },
    {
      "epoch": 0.06718,
      "grad_norm": 0.890282809238404,
      "learning_rate": 0.003,
      "loss": 4.0939,
      "step": 6718
    },
    {
      "epoch": 0.06719,
      "grad_norm": 1.100865501357716,
      "learning_rate": 0.003,
      "loss": 4.1,
      "step": 6719
    },
    {
      "epoch": 0.0672,
      "grad_norm": 1.2966985301388,
      "learning_rate": 0.003,
      "loss": 4.1179,
      "step": 6720
    },
    {
      "epoch": 0.06721,
      "grad_norm": 0.7681709705927325,
      "learning_rate": 0.003,
      "loss": 4.1204,
      "step": 6721
    },
    {
      "epoch": 0.06722,
      "grad_norm": 0.7579242685661051,
      "learning_rate": 0.003,
      "loss": 4.1277,
      "step": 6722
    },
    {
      "epoch": 0.06723,
      "grad_norm": 0.8463786269974299,
      "learning_rate": 0.003,
      "loss": 4.1179,
      "step": 6723
    },
    {
      "epoch": 0.06724,
      "grad_norm": 1.2355718601511936,
      "learning_rate": 0.003,
      "loss": 4.0856,
      "step": 6724
    },
    {
      "epoch": 0.06725,
      "grad_norm": 0.9973220857824812,
      "learning_rate": 0.003,
      "loss": 4.1143,
      "step": 6725
    },
    {
      "epoch": 0.06726,
      "grad_norm": 1.3121262958901847,
      "learning_rate": 0.003,
      "loss": 4.1473,
      "step": 6726
    },
    {
      "epoch": 0.06727,
      "grad_norm": 0.7506377029622459,
      "learning_rate": 0.003,
      "loss": 4.1019,
      "step": 6727
    },
    {
      "epoch": 0.06728,
      "grad_norm": 0.7375690656350768,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 6728
    },
    {
      "epoch": 0.06729,
      "grad_norm": 0.8237808367201397,
      "learning_rate": 0.003,
      "loss": 4.0942,
      "step": 6729
    },
    {
      "epoch": 0.0673,
      "grad_norm": 0.93797912273744,
      "learning_rate": 0.003,
      "loss": 4.1064,
      "step": 6730
    },
    {
      "epoch": 0.06731,
      "grad_norm": 0.9765511050435496,
      "learning_rate": 0.003,
      "loss": 4.0792,
      "step": 6731
    },
    {
      "epoch": 0.06732,
      "grad_norm": 1.1004645713502215,
      "learning_rate": 0.003,
      "loss": 4.1075,
      "step": 6732
    },
    {
      "epoch": 0.06733,
      "grad_norm": 1.1027248682447823,
      "learning_rate": 0.003,
      "loss": 4.1197,
      "step": 6733
    },
    {
      "epoch": 0.06734,
      "grad_norm": 1.1253735749654317,
      "learning_rate": 0.003,
      "loss": 4.1141,
      "step": 6734
    },
    {
      "epoch": 0.06735,
      "grad_norm": 1.003542734520633,
      "learning_rate": 0.003,
      "loss": 4.0897,
      "step": 6735
    },
    {
      "epoch": 0.06736,
      "grad_norm": 1.1429551938720557,
      "learning_rate": 0.003,
      "loss": 4.0999,
      "step": 6736
    },
    {
      "epoch": 0.06737,
      "grad_norm": 1.0609352949018371,
      "learning_rate": 0.003,
      "loss": 4.107,
      "step": 6737
    },
    {
      "epoch": 0.06738,
      "grad_norm": 1.0412380492368407,
      "learning_rate": 0.003,
      "loss": 4.1004,
      "step": 6738
    },
    {
      "epoch": 0.06739,
      "grad_norm": 1.225522061118244,
      "learning_rate": 0.003,
      "loss": 4.1134,
      "step": 6739
    },
    {
      "epoch": 0.0674,
      "grad_norm": 0.9920747285040594,
      "learning_rate": 0.003,
      "loss": 4.1025,
      "step": 6740
    },
    {
      "epoch": 0.06741,
      "grad_norm": 1.3728636340948883,
      "learning_rate": 0.003,
      "loss": 4.102,
      "step": 6741
    },
    {
      "epoch": 0.06742,
      "grad_norm": 1.12361874184517,
      "learning_rate": 0.003,
      "loss": 4.1229,
      "step": 6742
    },
    {
      "epoch": 0.06743,
      "grad_norm": 0.9616698612197171,
      "learning_rate": 0.003,
      "loss": 4.0871,
      "step": 6743
    },
    {
      "epoch": 0.06744,
      "grad_norm": 1.1122702070965411,
      "learning_rate": 0.003,
      "loss": 4.1266,
      "step": 6744
    },
    {
      "epoch": 0.06745,
      "grad_norm": 1.1170307488637157,
      "learning_rate": 0.003,
      "loss": 4.0993,
      "step": 6745
    },
    {
      "epoch": 0.06746,
      "grad_norm": 1.014704964150931,
      "learning_rate": 0.003,
      "loss": 4.1036,
      "step": 6746
    },
    {
      "epoch": 0.06747,
      "grad_norm": 1.0082376235883914,
      "learning_rate": 0.003,
      "loss": 4.1082,
      "step": 6747
    },
    {
      "epoch": 0.06748,
      "grad_norm": 1.0285387580619565,
      "learning_rate": 0.003,
      "loss": 4.1137,
      "step": 6748
    },
    {
      "epoch": 0.06749,
      "grad_norm": 0.9949987658288557,
      "learning_rate": 0.003,
      "loss": 4.1048,
      "step": 6749
    },
    {
      "epoch": 0.0675,
      "grad_norm": 1.012959502751053,
      "learning_rate": 0.003,
      "loss": 4.0948,
      "step": 6750
    },
    {
      "epoch": 0.06751,
      "grad_norm": 1.159863000962202,
      "learning_rate": 0.003,
      "loss": 4.1354,
      "step": 6751
    },
    {
      "epoch": 0.06752,
      "grad_norm": 1.158507578022585,
      "learning_rate": 0.003,
      "loss": 4.1294,
      "step": 6752
    },
    {
      "epoch": 0.06753,
      "grad_norm": 1.0857842005130671,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 6753
    },
    {
      "epoch": 0.06754,
      "grad_norm": 1.1455822073063449,
      "learning_rate": 0.003,
      "loss": 4.1133,
      "step": 6754
    },
    {
      "epoch": 0.06755,
      "grad_norm": 0.932580990979775,
      "learning_rate": 0.003,
      "loss": 4.0986,
      "step": 6755
    },
    {
      "epoch": 0.06756,
      "grad_norm": 0.945017592655439,
      "learning_rate": 0.003,
      "loss": 4.1142,
      "step": 6756
    },
    {
      "epoch": 0.06757,
      "grad_norm": 1.1087441244810934,
      "learning_rate": 0.003,
      "loss": 4.1224,
      "step": 6757
    },
    {
      "epoch": 0.06758,
      "grad_norm": 1.2032017160137123,
      "learning_rate": 0.003,
      "loss": 4.1154,
      "step": 6758
    },
    {
      "epoch": 0.06759,
      "grad_norm": 0.9725904113161775,
      "learning_rate": 0.003,
      "loss": 4.1414,
      "step": 6759
    },
    {
      "epoch": 0.0676,
      "grad_norm": 0.9994768531593359,
      "learning_rate": 0.003,
      "loss": 4.1252,
      "step": 6760
    },
    {
      "epoch": 0.06761,
      "grad_norm": 1.2158744246583477,
      "learning_rate": 0.003,
      "loss": 4.1072,
      "step": 6761
    },
    {
      "epoch": 0.06762,
      "grad_norm": 0.9948770438066381,
      "learning_rate": 0.003,
      "loss": 4.112,
      "step": 6762
    },
    {
      "epoch": 0.06763,
      "grad_norm": 1.0569961624633628,
      "learning_rate": 0.003,
      "loss": 4.1118,
      "step": 6763
    },
    {
      "epoch": 0.06764,
      "grad_norm": 1.013959466655787,
      "learning_rate": 0.003,
      "loss": 4.0962,
      "step": 6764
    },
    {
      "epoch": 0.06765,
      "grad_norm": 1.1364024067522305,
      "learning_rate": 0.003,
      "loss": 4.1118,
      "step": 6765
    },
    {
      "epoch": 0.06766,
      "grad_norm": 0.8459015884205136,
      "learning_rate": 0.003,
      "loss": 4.122,
      "step": 6766
    },
    {
      "epoch": 0.06767,
      "grad_norm": 0.880794645124822,
      "learning_rate": 0.003,
      "loss": 4.1211,
      "step": 6767
    },
    {
      "epoch": 0.06768,
      "grad_norm": 0.9020257743979438,
      "learning_rate": 0.003,
      "loss": 4.0843,
      "step": 6768
    },
    {
      "epoch": 0.06769,
      "grad_norm": 0.957067840155779,
      "learning_rate": 0.003,
      "loss": 4.1016,
      "step": 6769
    },
    {
      "epoch": 0.0677,
      "grad_norm": 0.9312965378254958,
      "learning_rate": 0.003,
      "loss": 4.1155,
      "step": 6770
    },
    {
      "epoch": 0.06771,
      "grad_norm": 0.9824294156270907,
      "learning_rate": 0.003,
      "loss": 4.1037,
      "step": 6771
    },
    {
      "epoch": 0.06772,
      "grad_norm": 1.1173456344519563,
      "learning_rate": 0.003,
      "loss": 4.1186,
      "step": 6772
    },
    {
      "epoch": 0.06773,
      "grad_norm": 0.9102193256903605,
      "learning_rate": 0.003,
      "loss": 4.1257,
      "step": 6773
    },
    {
      "epoch": 0.06774,
      "grad_norm": 0.9547527714430646,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 6774
    },
    {
      "epoch": 0.06775,
      "grad_norm": 1.0781058147542015,
      "learning_rate": 0.003,
      "loss": 4.1137,
      "step": 6775
    },
    {
      "epoch": 0.06776,
      "grad_norm": 1.6499486306229654,
      "learning_rate": 0.003,
      "loss": 4.153,
      "step": 6776
    },
    {
      "epoch": 0.06777,
      "grad_norm": 0.8340051804173416,
      "learning_rate": 0.003,
      "loss": 4.1254,
      "step": 6777
    },
    {
      "epoch": 0.06778,
      "grad_norm": 0.9281740251373968,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 6778
    },
    {
      "epoch": 0.06779,
      "grad_norm": 1.007311758088431,
      "learning_rate": 0.003,
      "loss": 4.1196,
      "step": 6779
    },
    {
      "epoch": 0.0678,
      "grad_norm": 1.0343368874975791,
      "learning_rate": 0.003,
      "loss": 4.0822,
      "step": 6780
    },
    {
      "epoch": 0.06781,
      "grad_norm": 1.0257339097012195,
      "learning_rate": 0.003,
      "loss": 4.1447,
      "step": 6781
    },
    {
      "epoch": 0.06782,
      "grad_norm": 1.208826935719483,
      "learning_rate": 0.003,
      "loss": 4.1296,
      "step": 6782
    },
    {
      "epoch": 0.06783,
      "grad_norm": 1.1183146143319012,
      "learning_rate": 0.003,
      "loss": 4.1384,
      "step": 6783
    },
    {
      "epoch": 0.06784,
      "grad_norm": 1.0684998687741116,
      "learning_rate": 0.003,
      "loss": 4.1055,
      "step": 6784
    },
    {
      "epoch": 0.06785,
      "grad_norm": 1.095620038881207,
      "learning_rate": 0.003,
      "loss": 4.1287,
      "step": 6785
    },
    {
      "epoch": 0.06786,
      "grad_norm": 1.0056898765693647,
      "learning_rate": 0.003,
      "loss": 4.0925,
      "step": 6786
    },
    {
      "epoch": 0.06787,
      "grad_norm": 1.4297140113997957,
      "learning_rate": 0.003,
      "loss": 4.0884,
      "step": 6787
    },
    {
      "epoch": 0.06788,
      "grad_norm": 0.790153682740165,
      "learning_rate": 0.003,
      "loss": 4.1265,
      "step": 6788
    },
    {
      "epoch": 0.06789,
      "grad_norm": 0.7738912403286867,
      "learning_rate": 0.003,
      "loss": 4.0877,
      "step": 6789
    },
    {
      "epoch": 0.0679,
      "grad_norm": 0.9134316450396154,
      "learning_rate": 0.003,
      "loss": 4.1181,
      "step": 6790
    },
    {
      "epoch": 0.06791,
      "grad_norm": 1.095403623939103,
      "learning_rate": 0.003,
      "loss": 4.1094,
      "step": 6791
    },
    {
      "epoch": 0.06792,
      "grad_norm": 1.0057315719319384,
      "learning_rate": 0.003,
      "loss": 4.1271,
      "step": 6792
    },
    {
      "epoch": 0.06793,
      "grad_norm": 1.0520802836117387,
      "learning_rate": 0.003,
      "loss": 4.1233,
      "step": 6793
    },
    {
      "epoch": 0.06794,
      "grad_norm": 0.930030498565471,
      "learning_rate": 0.003,
      "loss": 4.1306,
      "step": 6794
    },
    {
      "epoch": 0.06795,
      "grad_norm": 1.224689098436433,
      "learning_rate": 0.003,
      "loss": 4.0901,
      "step": 6795
    },
    {
      "epoch": 0.06796,
      "grad_norm": 1.296575828943012,
      "learning_rate": 0.003,
      "loss": 4.1244,
      "step": 6796
    },
    {
      "epoch": 0.06797,
      "grad_norm": 0.9404333531599239,
      "learning_rate": 0.003,
      "loss": 4.1372,
      "step": 6797
    },
    {
      "epoch": 0.06798,
      "grad_norm": 0.9774637017162385,
      "learning_rate": 0.003,
      "loss": 4.1046,
      "step": 6798
    },
    {
      "epoch": 0.06799,
      "grad_norm": 1.3189103861369607,
      "learning_rate": 0.003,
      "loss": 4.1445,
      "step": 6799
    },
    {
      "epoch": 0.068,
      "grad_norm": 0.8760990280464664,
      "learning_rate": 0.003,
      "loss": 4.1117,
      "step": 6800
    },
    {
      "epoch": 0.06801,
      "grad_norm": 0.7762558071690656,
      "learning_rate": 0.003,
      "loss": 4.0845,
      "step": 6801
    },
    {
      "epoch": 0.06802,
      "grad_norm": 0.8561625366506563,
      "learning_rate": 0.003,
      "loss": 4.1037,
      "step": 6802
    },
    {
      "epoch": 0.06803,
      "grad_norm": 0.9139469300515602,
      "learning_rate": 0.003,
      "loss": 4.1082,
      "step": 6803
    },
    {
      "epoch": 0.06804,
      "grad_norm": 1.1348166936740396,
      "learning_rate": 0.003,
      "loss": 4.127,
      "step": 6804
    },
    {
      "epoch": 0.06805,
      "grad_norm": 1.1939376321332151,
      "learning_rate": 0.003,
      "loss": 4.1062,
      "step": 6805
    },
    {
      "epoch": 0.06806,
      "grad_norm": 0.9515480548410903,
      "learning_rate": 0.003,
      "loss": 4.0994,
      "step": 6806
    },
    {
      "epoch": 0.06807,
      "grad_norm": 0.9471722400961542,
      "learning_rate": 0.003,
      "loss": 4.1046,
      "step": 6807
    },
    {
      "epoch": 0.06808,
      "grad_norm": 0.916585569019858,
      "learning_rate": 0.003,
      "loss": 4.1089,
      "step": 6808
    },
    {
      "epoch": 0.06809,
      "grad_norm": 1.2113066481378751,
      "learning_rate": 0.003,
      "loss": 4.0877,
      "step": 6809
    },
    {
      "epoch": 0.0681,
      "grad_norm": 1.340856802998418,
      "learning_rate": 0.003,
      "loss": 4.1112,
      "step": 6810
    },
    {
      "epoch": 0.06811,
      "grad_norm": 0.9885027786684883,
      "learning_rate": 0.003,
      "loss": 4.1374,
      "step": 6811
    },
    {
      "epoch": 0.06812,
      "grad_norm": 1.0267551949527556,
      "learning_rate": 0.003,
      "loss": 4.103,
      "step": 6812
    },
    {
      "epoch": 0.06813,
      "grad_norm": 1.040795196495314,
      "learning_rate": 0.003,
      "loss": 4.0821,
      "step": 6813
    },
    {
      "epoch": 0.06814,
      "grad_norm": 1.11293418398358,
      "learning_rate": 0.003,
      "loss": 4.1208,
      "step": 6814
    },
    {
      "epoch": 0.06815,
      "grad_norm": 1.1607149753899817,
      "learning_rate": 0.003,
      "loss": 4.1291,
      "step": 6815
    },
    {
      "epoch": 0.06816,
      "grad_norm": 0.8713926376226725,
      "learning_rate": 0.003,
      "loss": 4.1231,
      "step": 6816
    },
    {
      "epoch": 0.06817,
      "grad_norm": 0.9081597850780296,
      "learning_rate": 0.003,
      "loss": 4.0952,
      "step": 6817
    },
    {
      "epoch": 0.06818,
      "grad_norm": 0.9942673729666544,
      "learning_rate": 0.003,
      "loss": 4.1193,
      "step": 6818
    },
    {
      "epoch": 0.06819,
      "grad_norm": 1.0537961386880241,
      "learning_rate": 0.003,
      "loss": 4.1111,
      "step": 6819
    },
    {
      "epoch": 0.0682,
      "grad_norm": 1.0451146888171359,
      "learning_rate": 0.003,
      "loss": 4.1416,
      "step": 6820
    },
    {
      "epoch": 0.06821,
      "grad_norm": 1.1139335819462208,
      "learning_rate": 0.003,
      "loss": 4.1307,
      "step": 6821
    },
    {
      "epoch": 0.06822,
      "grad_norm": 1.054644343046522,
      "learning_rate": 0.003,
      "loss": 4.0913,
      "step": 6822
    },
    {
      "epoch": 0.06823,
      "grad_norm": 1.0144607819110447,
      "learning_rate": 0.003,
      "loss": 4.1084,
      "step": 6823
    },
    {
      "epoch": 0.06824,
      "grad_norm": 0.9958932547970886,
      "learning_rate": 0.003,
      "loss": 4.0837,
      "step": 6824
    },
    {
      "epoch": 0.06825,
      "grad_norm": 0.8766930589389056,
      "learning_rate": 0.003,
      "loss": 4.0977,
      "step": 6825
    },
    {
      "epoch": 0.06826,
      "grad_norm": 0.82277829993784,
      "learning_rate": 0.003,
      "loss": 4.0917,
      "step": 6826
    },
    {
      "epoch": 0.06827,
      "grad_norm": 0.9752386388524714,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 6827
    },
    {
      "epoch": 0.06828,
      "grad_norm": 1.205573894590776,
      "learning_rate": 0.003,
      "loss": 4.113,
      "step": 6828
    },
    {
      "epoch": 0.06829,
      "grad_norm": 1.133172664265929,
      "learning_rate": 0.003,
      "loss": 4.1181,
      "step": 6829
    },
    {
      "epoch": 0.0683,
      "grad_norm": 0.8238913017656965,
      "learning_rate": 0.003,
      "loss": 4.1025,
      "step": 6830
    },
    {
      "epoch": 0.06831,
      "grad_norm": 0.8316896827214876,
      "learning_rate": 0.003,
      "loss": 4.1253,
      "step": 6831
    },
    {
      "epoch": 0.06832,
      "grad_norm": 0.9269287661184776,
      "learning_rate": 0.003,
      "loss": 4.0941,
      "step": 6832
    },
    {
      "epoch": 0.06833,
      "grad_norm": 0.9989134257290364,
      "learning_rate": 0.003,
      "loss": 4.0809,
      "step": 6833
    },
    {
      "epoch": 0.06834,
      "grad_norm": 1.1107856107305376,
      "learning_rate": 0.003,
      "loss": 4.1194,
      "step": 6834
    },
    {
      "epoch": 0.06835,
      "grad_norm": 1.0125690092416209,
      "learning_rate": 0.003,
      "loss": 4.1158,
      "step": 6835
    },
    {
      "epoch": 0.06836,
      "grad_norm": 1.2778620275741064,
      "learning_rate": 0.003,
      "loss": 4.1469,
      "step": 6836
    },
    {
      "epoch": 0.06837,
      "grad_norm": 0.993797792799617,
      "learning_rate": 0.003,
      "loss": 4.0932,
      "step": 6837
    },
    {
      "epoch": 0.06838,
      "grad_norm": 1.385862043374062,
      "learning_rate": 0.003,
      "loss": 4.1107,
      "step": 6838
    },
    {
      "epoch": 0.06839,
      "grad_norm": 0.8693787226168019,
      "learning_rate": 0.003,
      "loss": 4.0932,
      "step": 6839
    },
    {
      "epoch": 0.0684,
      "grad_norm": 0.8191901365828969,
      "learning_rate": 0.003,
      "loss": 4.1143,
      "step": 6840
    },
    {
      "epoch": 0.06841,
      "grad_norm": 0.8891671375692035,
      "learning_rate": 0.003,
      "loss": 4.1116,
      "step": 6841
    },
    {
      "epoch": 0.06842,
      "grad_norm": 1.0307688402129975,
      "learning_rate": 0.003,
      "loss": 4.1135,
      "step": 6842
    },
    {
      "epoch": 0.06843,
      "grad_norm": 1.0868160126053539,
      "learning_rate": 0.003,
      "loss": 4.1097,
      "step": 6843
    },
    {
      "epoch": 0.06844,
      "grad_norm": 1.1395915635803298,
      "learning_rate": 0.003,
      "loss": 4.1083,
      "step": 6844
    },
    {
      "epoch": 0.06845,
      "grad_norm": 1.2053792036841005,
      "learning_rate": 0.003,
      "loss": 4.1278,
      "step": 6845
    },
    {
      "epoch": 0.06846,
      "grad_norm": 0.9394084816348595,
      "learning_rate": 0.003,
      "loss": 4.1085,
      "step": 6846
    },
    {
      "epoch": 0.06847,
      "grad_norm": 0.9659910146473426,
      "learning_rate": 0.003,
      "loss": 4.0975,
      "step": 6847
    },
    {
      "epoch": 0.06848,
      "grad_norm": 1.3652228201962051,
      "learning_rate": 0.003,
      "loss": 4.1206,
      "step": 6848
    },
    {
      "epoch": 0.06849,
      "grad_norm": 0.8944786288593367,
      "learning_rate": 0.003,
      "loss": 4.098,
      "step": 6849
    },
    {
      "epoch": 0.0685,
      "grad_norm": 0.8468146869585674,
      "learning_rate": 0.003,
      "loss": 4.0992,
      "step": 6850
    },
    {
      "epoch": 0.06851,
      "grad_norm": 0.9087271188351737,
      "learning_rate": 0.003,
      "loss": 4.0811,
      "step": 6851
    },
    {
      "epoch": 0.06852,
      "grad_norm": 1.0334027372289312,
      "learning_rate": 0.003,
      "loss": 4.0928,
      "step": 6852
    },
    {
      "epoch": 0.06853,
      "grad_norm": 0.902983888059127,
      "learning_rate": 0.003,
      "loss": 4.0857,
      "step": 6853
    },
    {
      "epoch": 0.06854,
      "grad_norm": 1.1605358694940422,
      "learning_rate": 0.003,
      "loss": 4.1266,
      "step": 6854
    },
    {
      "epoch": 0.06855,
      "grad_norm": 1.2624753429091504,
      "learning_rate": 0.003,
      "loss": 4.1411,
      "step": 6855
    },
    {
      "epoch": 0.06856,
      "grad_norm": 1.118915168359842,
      "learning_rate": 0.003,
      "loss": 4.1344,
      "step": 6856
    },
    {
      "epoch": 0.06857,
      "grad_norm": 0.967318710251076,
      "learning_rate": 0.003,
      "loss": 4.1154,
      "step": 6857
    },
    {
      "epoch": 0.06858,
      "grad_norm": 0.8993491747442931,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 6858
    },
    {
      "epoch": 0.06859,
      "grad_norm": 0.8640520875840221,
      "learning_rate": 0.003,
      "loss": 4.0856,
      "step": 6859
    },
    {
      "epoch": 0.0686,
      "grad_norm": 1.078288558641258,
      "learning_rate": 0.003,
      "loss": 4.0972,
      "step": 6860
    },
    {
      "epoch": 0.06861,
      "grad_norm": 1.179816478483578,
      "learning_rate": 0.003,
      "loss": 4.0935,
      "step": 6861
    },
    {
      "epoch": 0.06862,
      "grad_norm": 0.9056185959377018,
      "learning_rate": 0.003,
      "loss": 4.0868,
      "step": 6862
    },
    {
      "epoch": 0.06863,
      "grad_norm": 0.9165282656767371,
      "learning_rate": 0.003,
      "loss": 4.0952,
      "step": 6863
    },
    {
      "epoch": 0.06864,
      "grad_norm": 1.1201407060467088,
      "learning_rate": 0.003,
      "loss": 4.1103,
      "step": 6864
    },
    {
      "epoch": 0.06865,
      "grad_norm": 1.0179258305510663,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 6865
    },
    {
      "epoch": 0.06866,
      "grad_norm": 1.2117982840621284,
      "learning_rate": 0.003,
      "loss": 4.1195,
      "step": 6866
    },
    {
      "epoch": 0.06867,
      "grad_norm": 1.145110761860685,
      "learning_rate": 0.003,
      "loss": 4.1314,
      "step": 6867
    },
    {
      "epoch": 0.06868,
      "grad_norm": 0.8964379313752789,
      "learning_rate": 0.003,
      "loss": 4.0861,
      "step": 6868
    },
    {
      "epoch": 0.06869,
      "grad_norm": 0.8278694268577412,
      "learning_rate": 0.003,
      "loss": 4.1133,
      "step": 6869
    },
    {
      "epoch": 0.0687,
      "grad_norm": 0.9210440687808206,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 6870
    },
    {
      "epoch": 0.06871,
      "grad_norm": 1.2931682466657848,
      "learning_rate": 0.003,
      "loss": 4.0892,
      "step": 6871
    },
    {
      "epoch": 0.06872,
      "grad_norm": 1.158244309207232,
      "learning_rate": 0.003,
      "loss": 4.0892,
      "step": 6872
    },
    {
      "epoch": 0.06873,
      "grad_norm": 1.0509391434511997,
      "learning_rate": 0.003,
      "loss": 4.0957,
      "step": 6873
    },
    {
      "epoch": 0.06874,
      "grad_norm": 1.0160683857215573,
      "learning_rate": 0.003,
      "loss": 4.1117,
      "step": 6874
    },
    {
      "epoch": 0.06875,
      "grad_norm": 1.070894253002053,
      "learning_rate": 0.003,
      "loss": 4.1183,
      "step": 6875
    },
    {
      "epoch": 0.06876,
      "grad_norm": 1.0559885742243682,
      "learning_rate": 0.003,
      "loss": 4.1237,
      "step": 6876
    },
    {
      "epoch": 0.06877,
      "grad_norm": 0.9568759782863472,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 6877
    },
    {
      "epoch": 0.06878,
      "grad_norm": 1.1647575826059193,
      "learning_rate": 0.003,
      "loss": 4.0888,
      "step": 6878
    },
    {
      "epoch": 0.06879,
      "grad_norm": 1.0530843336006634,
      "learning_rate": 0.003,
      "loss": 4.0941,
      "step": 6879
    },
    {
      "epoch": 0.0688,
      "grad_norm": 1.0399591258677314,
      "learning_rate": 0.003,
      "loss": 4.1056,
      "step": 6880
    },
    {
      "epoch": 0.06881,
      "grad_norm": 0.989276355858342,
      "learning_rate": 0.003,
      "loss": 4.0919,
      "step": 6881
    },
    {
      "epoch": 0.06882,
      "grad_norm": 1.2920882835812373,
      "learning_rate": 0.003,
      "loss": 4.1033,
      "step": 6882
    },
    {
      "epoch": 0.06883,
      "grad_norm": 1.1043413882784838,
      "learning_rate": 0.003,
      "loss": 4.1245,
      "step": 6883
    },
    {
      "epoch": 0.06884,
      "grad_norm": 1.0613281788454871,
      "learning_rate": 0.003,
      "loss": 4.0873,
      "step": 6884
    },
    {
      "epoch": 0.06885,
      "grad_norm": 0.96781655650821,
      "learning_rate": 0.003,
      "loss": 4.1078,
      "step": 6885
    },
    {
      "epoch": 0.06886,
      "grad_norm": 0.9517962324408646,
      "learning_rate": 0.003,
      "loss": 4.0904,
      "step": 6886
    },
    {
      "epoch": 0.06887,
      "grad_norm": 0.9638770692400819,
      "learning_rate": 0.003,
      "loss": 4.1159,
      "step": 6887
    },
    {
      "epoch": 0.06888,
      "grad_norm": 1.0795473222518885,
      "learning_rate": 0.003,
      "loss": 4.0938,
      "step": 6888
    },
    {
      "epoch": 0.06889,
      "grad_norm": 0.959615080421356,
      "learning_rate": 0.003,
      "loss": 4.1061,
      "step": 6889
    },
    {
      "epoch": 0.0689,
      "grad_norm": 1.000200309338473,
      "learning_rate": 0.003,
      "loss": 4.0862,
      "step": 6890
    },
    {
      "epoch": 0.06891,
      "grad_norm": 1.107853464297452,
      "learning_rate": 0.003,
      "loss": 4.1259,
      "step": 6891
    },
    {
      "epoch": 0.06892,
      "grad_norm": 1.09996425163966,
      "learning_rate": 0.003,
      "loss": 4.1043,
      "step": 6892
    },
    {
      "epoch": 0.06893,
      "grad_norm": 1.1680931931384533,
      "learning_rate": 0.003,
      "loss": 4.1144,
      "step": 6893
    },
    {
      "epoch": 0.06894,
      "grad_norm": 1.105626112595867,
      "learning_rate": 0.003,
      "loss": 4.0994,
      "step": 6894
    },
    {
      "epoch": 0.06895,
      "grad_norm": 0.8725390460274325,
      "learning_rate": 0.003,
      "loss": 4.091,
      "step": 6895
    },
    {
      "epoch": 0.06896,
      "grad_norm": 1.0781792472155975,
      "learning_rate": 0.003,
      "loss": 4.0892,
      "step": 6896
    },
    {
      "epoch": 0.06897,
      "grad_norm": 1.411658300042236,
      "learning_rate": 0.003,
      "loss": 4.1405,
      "step": 6897
    },
    {
      "epoch": 0.06898,
      "grad_norm": 0.9732387067386347,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 6898
    },
    {
      "epoch": 0.06899,
      "grad_norm": 1.0275999231438366,
      "learning_rate": 0.003,
      "loss": 4.1369,
      "step": 6899
    },
    {
      "epoch": 0.069,
      "grad_norm": 1.0294308999954391,
      "learning_rate": 0.003,
      "loss": 4.1034,
      "step": 6900
    },
    {
      "epoch": 0.06901,
      "grad_norm": 1.1909528295828076,
      "learning_rate": 0.003,
      "loss": 4.0854,
      "step": 6901
    },
    {
      "epoch": 0.06902,
      "grad_norm": 1.014749745517845,
      "learning_rate": 0.003,
      "loss": 4.0929,
      "step": 6902
    },
    {
      "epoch": 0.06903,
      "grad_norm": 1.0651050361740424,
      "learning_rate": 0.003,
      "loss": 4.1034,
      "step": 6903
    },
    {
      "epoch": 0.06904,
      "grad_norm": 1.1039173799433109,
      "learning_rate": 0.003,
      "loss": 4.1039,
      "step": 6904
    },
    {
      "epoch": 0.06905,
      "grad_norm": 0.9239962540742471,
      "learning_rate": 0.003,
      "loss": 4.1288,
      "step": 6905
    },
    {
      "epoch": 0.06906,
      "grad_norm": 0.8654819902503579,
      "learning_rate": 0.003,
      "loss": 4.0753,
      "step": 6906
    },
    {
      "epoch": 0.06907,
      "grad_norm": 1.1296324777588087,
      "learning_rate": 0.003,
      "loss": 4.0978,
      "step": 6907
    },
    {
      "epoch": 0.06908,
      "grad_norm": 1.3859704456042667,
      "learning_rate": 0.003,
      "loss": 4.0819,
      "step": 6908
    },
    {
      "epoch": 0.06909,
      "grad_norm": 1.0521744111723947,
      "learning_rate": 0.003,
      "loss": 4.1136,
      "step": 6909
    },
    {
      "epoch": 0.0691,
      "grad_norm": 1.0884172288485674,
      "learning_rate": 0.003,
      "loss": 4.0828,
      "step": 6910
    },
    {
      "epoch": 0.06911,
      "grad_norm": 0.9789284124718082,
      "learning_rate": 0.003,
      "loss": 4.1245,
      "step": 6911
    },
    {
      "epoch": 0.06912,
      "grad_norm": 0.9437089236297371,
      "learning_rate": 0.003,
      "loss": 4.1002,
      "step": 6912
    },
    {
      "epoch": 0.06913,
      "grad_norm": 0.8815007693721976,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 6913
    },
    {
      "epoch": 0.06914,
      "grad_norm": 0.9983756175361831,
      "learning_rate": 0.003,
      "loss": 4.1261,
      "step": 6914
    },
    {
      "epoch": 0.06915,
      "grad_norm": 1.233420747523598,
      "learning_rate": 0.003,
      "loss": 4.1369,
      "step": 6915
    },
    {
      "epoch": 0.06916,
      "grad_norm": 1.0634074261162114,
      "learning_rate": 0.003,
      "loss": 4.103,
      "step": 6916
    },
    {
      "epoch": 0.06917,
      "grad_norm": 1.0640603780382807,
      "learning_rate": 0.003,
      "loss": 4.0882,
      "step": 6917
    },
    {
      "epoch": 0.06918,
      "grad_norm": 1.071505216595764,
      "learning_rate": 0.003,
      "loss": 4.128,
      "step": 6918
    },
    {
      "epoch": 0.06919,
      "grad_norm": 0.9525463685819003,
      "learning_rate": 0.003,
      "loss": 4.121,
      "step": 6919
    },
    {
      "epoch": 0.0692,
      "grad_norm": 1.0292478999895873,
      "learning_rate": 0.003,
      "loss": 4.1131,
      "step": 6920
    },
    {
      "epoch": 0.06921,
      "grad_norm": 1.0529101386571624,
      "learning_rate": 0.003,
      "loss": 4.1222,
      "step": 6921
    },
    {
      "epoch": 0.06922,
      "grad_norm": 1.0651675683551554,
      "learning_rate": 0.003,
      "loss": 4.116,
      "step": 6922
    },
    {
      "epoch": 0.06923,
      "grad_norm": 1.1004871894989456,
      "learning_rate": 0.003,
      "loss": 4.0992,
      "step": 6923
    },
    {
      "epoch": 0.06924,
      "grad_norm": 1.2691682406714033,
      "learning_rate": 0.003,
      "loss": 4.1405,
      "step": 6924
    },
    {
      "epoch": 0.06925,
      "grad_norm": 1.0417482481029872,
      "learning_rate": 0.003,
      "loss": 4.1309,
      "step": 6925
    },
    {
      "epoch": 0.06926,
      "grad_norm": 1.1830131179045373,
      "learning_rate": 0.003,
      "loss": 4.125,
      "step": 6926
    },
    {
      "epoch": 0.06927,
      "grad_norm": 1.0664599500199052,
      "learning_rate": 0.003,
      "loss": 4.1196,
      "step": 6927
    },
    {
      "epoch": 0.06928,
      "grad_norm": 1.017895846116543,
      "learning_rate": 0.003,
      "loss": 4.0938,
      "step": 6928
    },
    {
      "epoch": 0.06929,
      "grad_norm": 1.2322119521200885,
      "learning_rate": 0.003,
      "loss": 4.1106,
      "step": 6929
    },
    {
      "epoch": 0.0693,
      "grad_norm": 0.9444007543363695,
      "learning_rate": 0.003,
      "loss": 4.1498,
      "step": 6930
    },
    {
      "epoch": 0.06931,
      "grad_norm": 1.2022274249029867,
      "learning_rate": 0.003,
      "loss": 4.1224,
      "step": 6931
    },
    {
      "epoch": 0.06932,
      "grad_norm": 1.148339118744329,
      "learning_rate": 0.003,
      "loss": 4.0959,
      "step": 6932
    },
    {
      "epoch": 0.06933,
      "grad_norm": 1.0316199156026564,
      "learning_rate": 0.003,
      "loss": 4.132,
      "step": 6933
    },
    {
      "epoch": 0.06934,
      "grad_norm": 0.9521700971631617,
      "learning_rate": 0.003,
      "loss": 4.0953,
      "step": 6934
    },
    {
      "epoch": 0.06935,
      "grad_norm": 1.0650733326681008,
      "learning_rate": 0.003,
      "loss": 4.119,
      "step": 6935
    },
    {
      "epoch": 0.06936,
      "grad_norm": 1.0622590447464284,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 6936
    },
    {
      "epoch": 0.06937,
      "grad_norm": 1.0213424894082164,
      "learning_rate": 0.003,
      "loss": 4.102,
      "step": 6937
    },
    {
      "epoch": 0.06938,
      "grad_norm": 1.0634441753043125,
      "learning_rate": 0.003,
      "loss": 4.0768,
      "step": 6938
    },
    {
      "epoch": 0.06939,
      "grad_norm": 0.9122903874612669,
      "learning_rate": 0.003,
      "loss": 4.1059,
      "step": 6939
    },
    {
      "epoch": 0.0694,
      "grad_norm": 1.1312835792631537,
      "learning_rate": 0.003,
      "loss": 4.0953,
      "step": 6940
    },
    {
      "epoch": 0.06941,
      "grad_norm": 1.0379316048960034,
      "learning_rate": 0.003,
      "loss": 4.1154,
      "step": 6941
    },
    {
      "epoch": 0.06942,
      "grad_norm": 0.9970308628077427,
      "learning_rate": 0.003,
      "loss": 4.0914,
      "step": 6942
    },
    {
      "epoch": 0.06943,
      "grad_norm": 0.9814957047921263,
      "learning_rate": 0.003,
      "loss": 4.1225,
      "step": 6943
    },
    {
      "epoch": 0.06944,
      "grad_norm": 1.059683349285963,
      "learning_rate": 0.003,
      "loss": 4.1032,
      "step": 6944
    },
    {
      "epoch": 0.06945,
      "grad_norm": 1.0340425314303883,
      "learning_rate": 0.003,
      "loss": 4.1328,
      "step": 6945
    },
    {
      "epoch": 0.06946,
      "grad_norm": 1.0976200591310759,
      "learning_rate": 0.003,
      "loss": 4.1118,
      "step": 6946
    },
    {
      "epoch": 0.06947,
      "grad_norm": 1.1069722316961914,
      "learning_rate": 0.003,
      "loss": 4.1001,
      "step": 6947
    },
    {
      "epoch": 0.06948,
      "grad_norm": 0.9412592759167051,
      "learning_rate": 0.003,
      "loss": 4.1094,
      "step": 6948
    },
    {
      "epoch": 0.06949,
      "grad_norm": 0.9635427895046388,
      "learning_rate": 0.003,
      "loss": 4.1162,
      "step": 6949
    },
    {
      "epoch": 0.0695,
      "grad_norm": 1.1696562924083407,
      "learning_rate": 0.003,
      "loss": 4.0979,
      "step": 6950
    },
    {
      "epoch": 0.06951,
      "grad_norm": 0.9122948415660266,
      "learning_rate": 0.003,
      "loss": 4.1236,
      "step": 6951
    },
    {
      "epoch": 0.06952,
      "grad_norm": 1.0086842274617382,
      "learning_rate": 0.003,
      "loss": 4.1305,
      "step": 6952
    },
    {
      "epoch": 0.06953,
      "grad_norm": 0.9788527064917946,
      "learning_rate": 0.003,
      "loss": 4.1258,
      "step": 6953
    },
    {
      "epoch": 0.06954,
      "grad_norm": 1.2559346458301088,
      "learning_rate": 0.003,
      "loss": 4.0773,
      "step": 6954
    },
    {
      "epoch": 0.06955,
      "grad_norm": 0.8368392604009263,
      "learning_rate": 0.003,
      "loss": 4.1167,
      "step": 6955
    },
    {
      "epoch": 0.06956,
      "grad_norm": 0.9878198015577839,
      "learning_rate": 0.003,
      "loss": 4.1196,
      "step": 6956
    },
    {
      "epoch": 0.06957,
      "grad_norm": 1.1449000449568048,
      "learning_rate": 0.003,
      "loss": 4.0993,
      "step": 6957
    },
    {
      "epoch": 0.06958,
      "grad_norm": 1.1872333547049227,
      "learning_rate": 0.003,
      "loss": 4.1218,
      "step": 6958
    },
    {
      "epoch": 0.06959,
      "grad_norm": 1.064048588165346,
      "learning_rate": 0.003,
      "loss": 4.1114,
      "step": 6959
    },
    {
      "epoch": 0.0696,
      "grad_norm": 1.039713959773937,
      "learning_rate": 0.003,
      "loss": 4.1212,
      "step": 6960
    },
    {
      "epoch": 0.06961,
      "grad_norm": 1.0996296813655457,
      "learning_rate": 0.003,
      "loss": 4.1247,
      "step": 6961
    },
    {
      "epoch": 0.06962,
      "grad_norm": 0.9879371143151359,
      "learning_rate": 0.003,
      "loss": 4.1133,
      "step": 6962
    },
    {
      "epoch": 0.06963,
      "grad_norm": 1.0549404515531287,
      "learning_rate": 0.003,
      "loss": 4.1148,
      "step": 6963
    },
    {
      "epoch": 0.06964,
      "grad_norm": 1.1314314837164443,
      "learning_rate": 0.003,
      "loss": 4.114,
      "step": 6964
    },
    {
      "epoch": 0.06965,
      "grad_norm": 1.1674676649395428,
      "learning_rate": 0.003,
      "loss": 4.0956,
      "step": 6965
    },
    {
      "epoch": 0.06966,
      "grad_norm": 1.0504852564702463,
      "learning_rate": 0.003,
      "loss": 4.1199,
      "step": 6966
    },
    {
      "epoch": 0.06967,
      "grad_norm": 1.150124143433524,
      "learning_rate": 0.003,
      "loss": 4.1041,
      "step": 6967
    },
    {
      "epoch": 0.06968,
      "grad_norm": 1.0016009335375253,
      "learning_rate": 0.003,
      "loss": 4.1073,
      "step": 6968
    },
    {
      "epoch": 0.06969,
      "grad_norm": 1.2023901390365654,
      "learning_rate": 0.003,
      "loss": 4.1282,
      "step": 6969
    },
    {
      "epoch": 0.0697,
      "grad_norm": 0.8664049863091234,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 6970
    },
    {
      "epoch": 0.06971,
      "grad_norm": 1.0124884250644552,
      "learning_rate": 0.003,
      "loss": 4.102,
      "step": 6971
    },
    {
      "epoch": 0.06972,
      "grad_norm": 1.2760143566845368,
      "learning_rate": 0.003,
      "loss": 4.0753,
      "step": 6972
    },
    {
      "epoch": 0.06973,
      "grad_norm": 1.0138958170391523,
      "learning_rate": 0.003,
      "loss": 4.1126,
      "step": 6973
    },
    {
      "epoch": 0.06974,
      "grad_norm": 1.1582433174589497,
      "learning_rate": 0.003,
      "loss": 4.1065,
      "step": 6974
    },
    {
      "epoch": 0.06975,
      "grad_norm": 1.1427796390190856,
      "learning_rate": 0.003,
      "loss": 4.1151,
      "step": 6975
    },
    {
      "epoch": 0.06976,
      "grad_norm": 0.9310217784301746,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 6976
    },
    {
      "epoch": 0.06977,
      "grad_norm": 0.9058473639895622,
      "learning_rate": 0.003,
      "loss": 4.1424,
      "step": 6977
    },
    {
      "epoch": 0.06978,
      "grad_norm": 1.0711966126178156,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 6978
    },
    {
      "epoch": 0.06979,
      "grad_norm": 1.0970262192518259,
      "learning_rate": 0.003,
      "loss": 4.1281,
      "step": 6979
    },
    {
      "epoch": 0.0698,
      "grad_norm": 1.0412974314007526,
      "learning_rate": 0.003,
      "loss": 4.112,
      "step": 6980
    },
    {
      "epoch": 0.06981,
      "grad_norm": 0.9795157568742333,
      "learning_rate": 0.003,
      "loss": 4.0872,
      "step": 6981
    },
    {
      "epoch": 0.06982,
      "grad_norm": 1.0944682868487863,
      "learning_rate": 0.003,
      "loss": 4.0993,
      "step": 6982
    },
    {
      "epoch": 0.06983,
      "grad_norm": 0.9694518653743476,
      "learning_rate": 0.003,
      "loss": 4.0821,
      "step": 6983
    },
    {
      "epoch": 0.06984,
      "grad_norm": 1.2844571684315234,
      "learning_rate": 0.003,
      "loss": 4.0917,
      "step": 6984
    },
    {
      "epoch": 0.06985,
      "grad_norm": 1.0332117175116347,
      "learning_rate": 0.003,
      "loss": 4.1221,
      "step": 6985
    },
    {
      "epoch": 0.06986,
      "grad_norm": 1.0819077584971564,
      "learning_rate": 0.003,
      "loss": 4.1161,
      "step": 6986
    },
    {
      "epoch": 0.06987,
      "grad_norm": 0.9731206337011348,
      "learning_rate": 0.003,
      "loss": 4.1154,
      "step": 6987
    },
    {
      "epoch": 0.06988,
      "grad_norm": 1.0718738925220694,
      "learning_rate": 0.003,
      "loss": 4.0977,
      "step": 6988
    },
    {
      "epoch": 0.06989,
      "grad_norm": 1.054505002503669,
      "learning_rate": 0.003,
      "loss": 4.0864,
      "step": 6989
    },
    {
      "epoch": 0.0699,
      "grad_norm": 1.049446778506991,
      "learning_rate": 0.003,
      "loss": 4.084,
      "step": 6990
    },
    {
      "epoch": 0.06991,
      "grad_norm": 1.0281964154708396,
      "learning_rate": 0.003,
      "loss": 4.1155,
      "step": 6991
    },
    {
      "epoch": 0.06992,
      "grad_norm": 1.1546779439763237,
      "learning_rate": 0.003,
      "loss": 4.0872,
      "step": 6992
    },
    {
      "epoch": 0.06993,
      "grad_norm": 0.9314030046479128,
      "learning_rate": 0.003,
      "loss": 4.0835,
      "step": 6993
    },
    {
      "epoch": 0.06994,
      "grad_norm": 0.9562143137910827,
      "learning_rate": 0.003,
      "loss": 4.0895,
      "step": 6994
    },
    {
      "epoch": 0.06995,
      "grad_norm": 1.1679910334460406,
      "learning_rate": 0.003,
      "loss": 4.1243,
      "step": 6995
    },
    {
      "epoch": 0.06996,
      "grad_norm": 0.9969645718957871,
      "learning_rate": 0.003,
      "loss": 4.1048,
      "step": 6996
    },
    {
      "epoch": 0.06997,
      "grad_norm": 1.1879263627500822,
      "learning_rate": 0.003,
      "loss": 4.0845,
      "step": 6997
    },
    {
      "epoch": 0.06998,
      "grad_norm": 1.0073414720644633,
      "learning_rate": 0.003,
      "loss": 4.104,
      "step": 6998
    },
    {
      "epoch": 0.06999,
      "grad_norm": 1.044274131329711,
      "learning_rate": 0.003,
      "loss": 4.0923,
      "step": 6999
    },
    {
      "epoch": 0.07,
      "grad_norm": 1.0064889519673954,
      "learning_rate": 0.003,
      "loss": 4.0958,
      "step": 7000
    },
    {
      "epoch": 0.07001,
      "grad_norm": 0.9375889716051797,
      "learning_rate": 0.003,
      "loss": 4.149,
      "step": 7001
    },
    {
      "epoch": 0.07002,
      "grad_norm": 0.9684683452814823,
      "learning_rate": 0.003,
      "loss": 4.0991,
      "step": 7002
    },
    {
      "epoch": 0.07003,
      "grad_norm": 1.1198927333018025,
      "learning_rate": 0.003,
      "loss": 4.0942,
      "step": 7003
    },
    {
      "epoch": 0.07004,
      "grad_norm": 0.8203608521817181,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 7004
    },
    {
      "epoch": 0.07005,
      "grad_norm": 0.9540874717299233,
      "learning_rate": 0.003,
      "loss": 4.1036,
      "step": 7005
    },
    {
      "epoch": 0.07006,
      "grad_norm": 0.9464816735202635,
      "learning_rate": 0.003,
      "loss": 4.1034,
      "step": 7006
    },
    {
      "epoch": 0.07007,
      "grad_norm": 0.941064497178461,
      "learning_rate": 0.003,
      "loss": 4.1035,
      "step": 7007
    },
    {
      "epoch": 0.07008,
      "grad_norm": 1.1484153762506384,
      "learning_rate": 0.003,
      "loss": 4.1057,
      "step": 7008
    },
    {
      "epoch": 0.07009,
      "grad_norm": 1.425852706227398,
      "learning_rate": 0.003,
      "loss": 4.1266,
      "step": 7009
    },
    {
      "epoch": 0.0701,
      "grad_norm": 1.1008237282072466,
      "learning_rate": 0.003,
      "loss": 4.0978,
      "step": 7010
    },
    {
      "epoch": 0.07011,
      "grad_norm": 1.030644753513,
      "learning_rate": 0.003,
      "loss": 4.0919,
      "step": 7011
    },
    {
      "epoch": 0.07012,
      "grad_norm": 1.0687016058276446,
      "learning_rate": 0.003,
      "loss": 4.1248,
      "step": 7012
    },
    {
      "epoch": 0.07013,
      "grad_norm": 1.1341288027770202,
      "learning_rate": 0.003,
      "loss": 4.0972,
      "step": 7013
    },
    {
      "epoch": 0.07014,
      "grad_norm": 1.0040444860045297,
      "learning_rate": 0.003,
      "loss": 4.0835,
      "step": 7014
    },
    {
      "epoch": 0.07015,
      "grad_norm": 0.9932353648755569,
      "learning_rate": 0.003,
      "loss": 4.0913,
      "step": 7015
    },
    {
      "epoch": 0.07016,
      "grad_norm": 1.2880395082608038,
      "learning_rate": 0.003,
      "loss": 4.1011,
      "step": 7016
    },
    {
      "epoch": 0.07017,
      "grad_norm": 0.7682702654491351,
      "learning_rate": 0.003,
      "loss": 4.0781,
      "step": 7017
    },
    {
      "epoch": 0.07018,
      "grad_norm": 0.810727853810661,
      "learning_rate": 0.003,
      "loss": 4.1085,
      "step": 7018
    },
    {
      "epoch": 0.07019,
      "grad_norm": 0.965846833420985,
      "learning_rate": 0.003,
      "loss": 4.0992,
      "step": 7019
    },
    {
      "epoch": 0.0702,
      "grad_norm": 1.4179828887882526,
      "learning_rate": 0.003,
      "loss": 4.0998,
      "step": 7020
    },
    {
      "epoch": 0.07021,
      "grad_norm": 1.0180276871047684,
      "learning_rate": 0.003,
      "loss": 4.1105,
      "step": 7021
    },
    {
      "epoch": 0.07022,
      "grad_norm": 1.1160919397810931,
      "learning_rate": 0.003,
      "loss": 4.1165,
      "step": 7022
    },
    {
      "epoch": 0.07023,
      "grad_norm": 0.9242970527362745,
      "learning_rate": 0.003,
      "loss": 4.1063,
      "step": 7023
    },
    {
      "epoch": 0.07024,
      "grad_norm": 0.9246329587526372,
      "learning_rate": 0.003,
      "loss": 4.0831,
      "step": 7024
    },
    {
      "epoch": 0.07025,
      "grad_norm": 1.0838078965811182,
      "learning_rate": 0.003,
      "loss": 4.1151,
      "step": 7025
    },
    {
      "epoch": 0.07026,
      "grad_norm": 1.1206485645739708,
      "learning_rate": 0.003,
      "loss": 4.0909,
      "step": 7026
    },
    {
      "epoch": 0.07027,
      "grad_norm": 1.136885864091667,
      "learning_rate": 0.003,
      "loss": 4.1011,
      "step": 7027
    },
    {
      "epoch": 0.07028,
      "grad_norm": 1.1013879817070413,
      "learning_rate": 0.003,
      "loss": 4.1,
      "step": 7028
    },
    {
      "epoch": 0.07029,
      "grad_norm": 1.0906829990632632,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 7029
    },
    {
      "epoch": 0.0703,
      "grad_norm": 1.180643431396029,
      "learning_rate": 0.003,
      "loss": 4.1249,
      "step": 7030
    },
    {
      "epoch": 0.07031,
      "grad_norm": 1.1456048218509032,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 7031
    },
    {
      "epoch": 0.07032,
      "grad_norm": 1.154341685812463,
      "learning_rate": 0.003,
      "loss": 4.1141,
      "step": 7032
    },
    {
      "epoch": 0.07033,
      "grad_norm": 0.7739985174184032,
      "learning_rate": 0.003,
      "loss": 4.1052,
      "step": 7033
    },
    {
      "epoch": 0.07034,
      "grad_norm": 0.6448433625987843,
      "learning_rate": 0.003,
      "loss": 4.0928,
      "step": 7034
    },
    {
      "epoch": 0.07035,
      "grad_norm": 0.904325986660508,
      "learning_rate": 0.003,
      "loss": 4.0981,
      "step": 7035
    },
    {
      "epoch": 0.07036,
      "grad_norm": 1.2393718269495677,
      "learning_rate": 0.003,
      "loss": 4.0935,
      "step": 7036
    },
    {
      "epoch": 0.07037,
      "grad_norm": 1.277593084466098,
      "learning_rate": 0.003,
      "loss": 4.0877,
      "step": 7037
    },
    {
      "epoch": 0.07038,
      "grad_norm": 0.8061574634404284,
      "learning_rate": 0.003,
      "loss": 4.0977,
      "step": 7038
    },
    {
      "epoch": 0.07039,
      "grad_norm": 0.8656267328817133,
      "learning_rate": 0.003,
      "loss": 4.0944,
      "step": 7039
    },
    {
      "epoch": 0.0704,
      "grad_norm": 1.0016475050937732,
      "learning_rate": 0.003,
      "loss": 4.1218,
      "step": 7040
    },
    {
      "epoch": 0.07041,
      "grad_norm": 1.1456059826411173,
      "learning_rate": 0.003,
      "loss": 4.0994,
      "step": 7041
    },
    {
      "epoch": 0.07042,
      "grad_norm": 1.0851732002617531,
      "learning_rate": 0.003,
      "loss": 4.0842,
      "step": 7042
    },
    {
      "epoch": 0.07043,
      "grad_norm": 1.0945008161058172,
      "learning_rate": 0.003,
      "loss": 4.1175,
      "step": 7043
    },
    {
      "epoch": 0.07044,
      "grad_norm": 1.0110807128605013,
      "learning_rate": 0.003,
      "loss": 4.12,
      "step": 7044
    },
    {
      "epoch": 0.07045,
      "grad_norm": 1.1320858898966155,
      "learning_rate": 0.003,
      "loss": 4.1092,
      "step": 7045
    },
    {
      "epoch": 0.07046,
      "grad_norm": 0.9871821231222185,
      "learning_rate": 0.003,
      "loss": 4.1144,
      "step": 7046
    },
    {
      "epoch": 0.07047,
      "grad_norm": 1.165418907729337,
      "learning_rate": 0.003,
      "loss": 4.1141,
      "step": 7047
    },
    {
      "epoch": 0.07048,
      "grad_norm": 1.0948822008051076,
      "learning_rate": 0.003,
      "loss": 4.1037,
      "step": 7048
    },
    {
      "epoch": 0.07049,
      "grad_norm": 1.2705996592970472,
      "learning_rate": 0.003,
      "loss": 4.1076,
      "step": 7049
    },
    {
      "epoch": 0.0705,
      "grad_norm": 0.8758701708456724,
      "learning_rate": 0.003,
      "loss": 4.1182,
      "step": 7050
    },
    {
      "epoch": 0.07051,
      "grad_norm": 0.8750281089164476,
      "learning_rate": 0.003,
      "loss": 4.1225,
      "step": 7051
    },
    {
      "epoch": 0.07052,
      "grad_norm": 1.0447997235101967,
      "learning_rate": 0.003,
      "loss": 4.0852,
      "step": 7052
    },
    {
      "epoch": 0.07053,
      "grad_norm": 1.1098900162205543,
      "learning_rate": 0.003,
      "loss": 4.0971,
      "step": 7053
    },
    {
      "epoch": 0.07054,
      "grad_norm": 0.9031483797124182,
      "learning_rate": 0.003,
      "loss": 4.0934,
      "step": 7054
    },
    {
      "epoch": 0.07055,
      "grad_norm": 0.8793147098000734,
      "learning_rate": 0.003,
      "loss": 4.0986,
      "step": 7055
    },
    {
      "epoch": 0.07056,
      "grad_norm": 1.1095062750777271,
      "learning_rate": 0.003,
      "loss": 4.0934,
      "step": 7056
    },
    {
      "epoch": 0.07057,
      "grad_norm": 1.1618977353387767,
      "learning_rate": 0.003,
      "loss": 4.1166,
      "step": 7057
    },
    {
      "epoch": 0.07058,
      "grad_norm": 1.0527234862708261,
      "learning_rate": 0.003,
      "loss": 4.0961,
      "step": 7058
    },
    {
      "epoch": 0.07059,
      "grad_norm": 1.1568117066507144,
      "learning_rate": 0.003,
      "loss": 4.1073,
      "step": 7059
    },
    {
      "epoch": 0.0706,
      "grad_norm": 1.1415615066464118,
      "learning_rate": 0.003,
      "loss": 4.1061,
      "step": 7060
    },
    {
      "epoch": 0.07061,
      "grad_norm": 1.055923444343412,
      "learning_rate": 0.003,
      "loss": 4.1005,
      "step": 7061
    },
    {
      "epoch": 0.07062,
      "grad_norm": 1.0170765646038162,
      "learning_rate": 0.003,
      "loss": 4.1346,
      "step": 7062
    },
    {
      "epoch": 0.07063,
      "grad_norm": 1.1842698666417368,
      "learning_rate": 0.003,
      "loss": 4.1148,
      "step": 7063
    },
    {
      "epoch": 0.07064,
      "grad_norm": 1.2178372221711813,
      "learning_rate": 0.003,
      "loss": 4.1252,
      "step": 7064
    },
    {
      "epoch": 0.07065,
      "grad_norm": 0.9601391763214442,
      "learning_rate": 0.003,
      "loss": 4.1117,
      "step": 7065
    },
    {
      "epoch": 0.07066,
      "grad_norm": 0.916815953425334,
      "learning_rate": 0.003,
      "loss": 4.1129,
      "step": 7066
    },
    {
      "epoch": 0.07067,
      "grad_norm": 1.186217043377779,
      "learning_rate": 0.003,
      "loss": 4.1507,
      "step": 7067
    },
    {
      "epoch": 0.07068,
      "grad_norm": 0.9796528568320318,
      "learning_rate": 0.003,
      "loss": 4.0807,
      "step": 7068
    },
    {
      "epoch": 0.07069,
      "grad_norm": 0.8983177972252203,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 7069
    },
    {
      "epoch": 0.0707,
      "grad_norm": 0.7769178250315103,
      "learning_rate": 0.003,
      "loss": 4.106,
      "step": 7070
    },
    {
      "epoch": 0.07071,
      "grad_norm": 0.906617130093332,
      "learning_rate": 0.003,
      "loss": 4.0857,
      "step": 7071
    },
    {
      "epoch": 0.07072,
      "grad_norm": 1.1358405076574007,
      "learning_rate": 0.003,
      "loss": 4.0978,
      "step": 7072
    },
    {
      "epoch": 0.07073,
      "grad_norm": 0.9412288770953748,
      "learning_rate": 0.003,
      "loss": 4.106,
      "step": 7073
    },
    {
      "epoch": 0.07074,
      "grad_norm": 1.0725538617168138,
      "learning_rate": 0.003,
      "loss": 4.094,
      "step": 7074
    },
    {
      "epoch": 0.07075,
      "grad_norm": 1.1672154966038255,
      "learning_rate": 0.003,
      "loss": 4.1384,
      "step": 7075
    },
    {
      "epoch": 0.07076,
      "grad_norm": 0.7062151739229583,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 7076
    },
    {
      "epoch": 0.07077,
      "grad_norm": 0.7739957741542802,
      "learning_rate": 0.003,
      "loss": 4.088,
      "step": 7077
    },
    {
      "epoch": 0.07078,
      "grad_norm": 1.0600410398425977,
      "learning_rate": 0.003,
      "loss": 4.1198,
      "step": 7078
    },
    {
      "epoch": 0.07079,
      "grad_norm": 1.3492326363705884,
      "learning_rate": 0.003,
      "loss": 4.1311,
      "step": 7079
    },
    {
      "epoch": 0.0708,
      "grad_norm": 0.9657503725086062,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 7080
    },
    {
      "epoch": 0.07081,
      "grad_norm": 1.1299019533525023,
      "learning_rate": 0.003,
      "loss": 4.1147,
      "step": 7081
    },
    {
      "epoch": 0.07082,
      "grad_norm": 0.9937833074975806,
      "learning_rate": 0.003,
      "loss": 4.0996,
      "step": 7082
    },
    {
      "epoch": 0.07083,
      "grad_norm": 1.2185933541832479,
      "learning_rate": 0.003,
      "loss": 4.0938,
      "step": 7083
    },
    {
      "epoch": 0.07084,
      "grad_norm": 0.9454456982283054,
      "learning_rate": 0.003,
      "loss": 4.1025,
      "step": 7084
    },
    {
      "epoch": 0.07085,
      "grad_norm": 0.8473361168787964,
      "learning_rate": 0.003,
      "loss": 4.1203,
      "step": 7085
    },
    {
      "epoch": 0.07086,
      "grad_norm": 0.9176320211127105,
      "learning_rate": 0.003,
      "loss": 4.0985,
      "step": 7086
    },
    {
      "epoch": 0.07087,
      "grad_norm": 1.1161177199303243,
      "learning_rate": 0.003,
      "loss": 4.0997,
      "step": 7087
    },
    {
      "epoch": 0.07088,
      "grad_norm": 1.0509489980561992,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 7088
    },
    {
      "epoch": 0.07089,
      "grad_norm": 0.9913619741647909,
      "learning_rate": 0.003,
      "loss": 4.0986,
      "step": 7089
    },
    {
      "epoch": 0.0709,
      "grad_norm": 1.1620589469680658,
      "learning_rate": 0.003,
      "loss": 4.105,
      "step": 7090
    },
    {
      "epoch": 0.07091,
      "grad_norm": 0.9945148846700074,
      "learning_rate": 0.003,
      "loss": 4.0926,
      "step": 7091
    },
    {
      "epoch": 0.07092,
      "grad_norm": 1.2536700163254593,
      "learning_rate": 0.003,
      "loss": 4.0984,
      "step": 7092
    },
    {
      "epoch": 0.07093,
      "grad_norm": 1.354622823433653,
      "learning_rate": 0.003,
      "loss": 4.125,
      "step": 7093
    },
    {
      "epoch": 0.07094,
      "grad_norm": 1.0630146455351173,
      "learning_rate": 0.003,
      "loss": 4.1384,
      "step": 7094
    },
    {
      "epoch": 0.07095,
      "grad_norm": 0.9454764813025995,
      "learning_rate": 0.003,
      "loss": 4.1224,
      "step": 7095
    },
    {
      "epoch": 0.07096,
      "grad_norm": 1.014738932196474,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 7096
    },
    {
      "epoch": 0.07097,
      "grad_norm": 1.1938770168715513,
      "learning_rate": 0.003,
      "loss": 4.1185,
      "step": 7097
    },
    {
      "epoch": 0.07098,
      "grad_norm": 0.8847286671786706,
      "learning_rate": 0.003,
      "loss": 4.1141,
      "step": 7098
    },
    {
      "epoch": 0.07099,
      "grad_norm": 1.043185377273873,
      "learning_rate": 0.003,
      "loss": 4.1178,
      "step": 7099
    },
    {
      "epoch": 0.071,
      "grad_norm": 1.1488413629431675,
      "learning_rate": 0.003,
      "loss": 4.11,
      "step": 7100
    },
    {
      "epoch": 0.07101,
      "grad_norm": 1.060125066460162,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 7101
    },
    {
      "epoch": 0.07102,
      "grad_norm": 1.043423213524699,
      "learning_rate": 0.003,
      "loss": 4.1043,
      "step": 7102
    },
    {
      "epoch": 0.07103,
      "grad_norm": 1.1487827868033067,
      "learning_rate": 0.003,
      "loss": 4.1185,
      "step": 7103
    },
    {
      "epoch": 0.07104,
      "grad_norm": 0.906308246298158,
      "learning_rate": 0.003,
      "loss": 4.1073,
      "step": 7104
    },
    {
      "epoch": 0.07105,
      "grad_norm": 1.0869757018932018,
      "learning_rate": 0.003,
      "loss": 4.1009,
      "step": 7105
    },
    {
      "epoch": 0.07106,
      "grad_norm": 0.994534751066206,
      "learning_rate": 0.003,
      "loss": 4.1143,
      "step": 7106
    },
    {
      "epoch": 0.07107,
      "grad_norm": 1.1286644088879794,
      "learning_rate": 0.003,
      "loss": 4.1153,
      "step": 7107
    },
    {
      "epoch": 0.07108,
      "grad_norm": 1.238356098237964,
      "learning_rate": 0.003,
      "loss": 4.126,
      "step": 7108
    },
    {
      "epoch": 0.07109,
      "grad_norm": 0.813617584956547,
      "learning_rate": 0.003,
      "loss": 4.101,
      "step": 7109
    },
    {
      "epoch": 0.0711,
      "grad_norm": 1.0042212442414669,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 7110
    },
    {
      "epoch": 0.07111,
      "grad_norm": 1.182160032235518,
      "learning_rate": 0.003,
      "loss": 4.0999,
      "step": 7111
    },
    {
      "epoch": 0.07112,
      "grad_norm": 1.2179586602810781,
      "learning_rate": 0.003,
      "loss": 4.1049,
      "step": 7112
    },
    {
      "epoch": 0.07113,
      "grad_norm": 1.1629649917625506,
      "learning_rate": 0.003,
      "loss": 4.1389,
      "step": 7113
    },
    {
      "epoch": 0.07114,
      "grad_norm": 1.062488521920579,
      "learning_rate": 0.003,
      "loss": 4.1132,
      "step": 7114
    },
    {
      "epoch": 0.07115,
      "grad_norm": 1.0870573312988112,
      "learning_rate": 0.003,
      "loss": 4.1009,
      "step": 7115
    },
    {
      "epoch": 0.07116,
      "grad_norm": 1.0520477518578835,
      "learning_rate": 0.003,
      "loss": 4.1036,
      "step": 7116
    },
    {
      "epoch": 0.07117,
      "grad_norm": 1.0392810415558837,
      "learning_rate": 0.003,
      "loss": 4.1196,
      "step": 7117
    },
    {
      "epoch": 0.07118,
      "grad_norm": 1.002861445960153,
      "learning_rate": 0.003,
      "loss": 4.124,
      "step": 7118
    },
    {
      "epoch": 0.07119,
      "grad_norm": 1.0589727147603938,
      "learning_rate": 0.003,
      "loss": 4.1023,
      "step": 7119
    },
    {
      "epoch": 0.0712,
      "grad_norm": 1.2060768839825937,
      "learning_rate": 0.003,
      "loss": 4.1042,
      "step": 7120
    },
    {
      "epoch": 0.07121,
      "grad_norm": 1.0484074848536764,
      "learning_rate": 0.003,
      "loss": 4.1037,
      "step": 7121
    },
    {
      "epoch": 0.07122,
      "grad_norm": 1.1680927354417303,
      "learning_rate": 0.003,
      "loss": 4.096,
      "step": 7122
    },
    {
      "epoch": 0.07123,
      "grad_norm": 1.0283240641322164,
      "learning_rate": 0.003,
      "loss": 4.087,
      "step": 7123
    },
    {
      "epoch": 0.07124,
      "grad_norm": 1.0363051726979002,
      "learning_rate": 0.003,
      "loss": 4.0842,
      "step": 7124
    },
    {
      "epoch": 0.07125,
      "grad_norm": 1.1375243622127051,
      "learning_rate": 0.003,
      "loss": 4.0934,
      "step": 7125
    },
    {
      "epoch": 0.07126,
      "grad_norm": 1.1681796706587095,
      "learning_rate": 0.003,
      "loss": 4.1313,
      "step": 7126
    },
    {
      "epoch": 0.07127,
      "grad_norm": 1.0438987028881606,
      "learning_rate": 0.003,
      "loss": 4.0967,
      "step": 7127
    },
    {
      "epoch": 0.07128,
      "grad_norm": 1.0929096590435705,
      "learning_rate": 0.003,
      "loss": 4.0825,
      "step": 7128
    },
    {
      "epoch": 0.07129,
      "grad_norm": 1.074862563144551,
      "learning_rate": 0.003,
      "loss": 4.1175,
      "step": 7129
    },
    {
      "epoch": 0.0713,
      "grad_norm": 1.0366606289855014,
      "learning_rate": 0.003,
      "loss": 4.1048,
      "step": 7130
    },
    {
      "epoch": 0.07131,
      "grad_norm": 1.0083999678728495,
      "learning_rate": 0.003,
      "loss": 4.0907,
      "step": 7131
    },
    {
      "epoch": 0.07132,
      "grad_norm": 1.1570389544595974,
      "learning_rate": 0.003,
      "loss": 4.1291,
      "step": 7132
    },
    {
      "epoch": 0.07133,
      "grad_norm": 1.0373700523560319,
      "learning_rate": 0.003,
      "loss": 4.1337,
      "step": 7133
    },
    {
      "epoch": 0.07134,
      "grad_norm": 1.1150502540574474,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 7134
    },
    {
      "epoch": 0.07135,
      "grad_norm": 1.0459023320389105,
      "learning_rate": 0.003,
      "loss": 4.1043,
      "step": 7135
    },
    {
      "epoch": 0.07136,
      "grad_norm": 1.0145196590306376,
      "learning_rate": 0.003,
      "loss": 4.1404,
      "step": 7136
    },
    {
      "epoch": 0.07137,
      "grad_norm": 1.25764777060964,
      "learning_rate": 0.003,
      "loss": 4.1102,
      "step": 7137
    },
    {
      "epoch": 0.07138,
      "grad_norm": 0.9231210864041297,
      "learning_rate": 0.003,
      "loss": 4.1013,
      "step": 7138
    },
    {
      "epoch": 0.07139,
      "grad_norm": 0.9792328685329048,
      "learning_rate": 0.003,
      "loss": 4.1013,
      "step": 7139
    },
    {
      "epoch": 0.0714,
      "grad_norm": 1.0188971768817803,
      "learning_rate": 0.003,
      "loss": 4.1118,
      "step": 7140
    },
    {
      "epoch": 0.07141,
      "grad_norm": 1.1228754806364862,
      "learning_rate": 0.003,
      "loss": 4.0907,
      "step": 7141
    },
    {
      "epoch": 0.07142,
      "grad_norm": 1.1921395910674482,
      "learning_rate": 0.003,
      "loss": 4.0808,
      "step": 7142
    },
    {
      "epoch": 0.07143,
      "grad_norm": 1.2166066068555654,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 7143
    },
    {
      "epoch": 0.07144,
      "grad_norm": 1.1851929558296612,
      "learning_rate": 0.003,
      "loss": 4.0845,
      "step": 7144
    },
    {
      "epoch": 0.07145,
      "grad_norm": 1.0160455666249097,
      "learning_rate": 0.003,
      "loss": 4.1034,
      "step": 7145
    },
    {
      "epoch": 0.07146,
      "grad_norm": 1.2365692135288116,
      "learning_rate": 0.003,
      "loss": 4.1105,
      "step": 7146
    },
    {
      "epoch": 0.07147,
      "grad_norm": 0.9464163524127707,
      "learning_rate": 0.003,
      "loss": 4.1111,
      "step": 7147
    },
    {
      "epoch": 0.07148,
      "grad_norm": 0.9187524935486385,
      "learning_rate": 0.003,
      "loss": 4.1135,
      "step": 7148
    },
    {
      "epoch": 0.07149,
      "grad_norm": 0.9915622145108923,
      "learning_rate": 0.003,
      "loss": 4.1216,
      "step": 7149
    },
    {
      "epoch": 0.0715,
      "grad_norm": 1.24972865830939,
      "learning_rate": 0.003,
      "loss": 4.1132,
      "step": 7150
    },
    {
      "epoch": 0.07151,
      "grad_norm": 1.0498265673158704,
      "learning_rate": 0.003,
      "loss": 4.1106,
      "step": 7151
    },
    {
      "epoch": 0.07152,
      "grad_norm": 1.1525404433626172,
      "learning_rate": 0.003,
      "loss": 4.129,
      "step": 7152
    },
    {
      "epoch": 0.07153,
      "grad_norm": 1.1094991540332446,
      "learning_rate": 0.003,
      "loss": 4.1127,
      "step": 7153
    },
    {
      "epoch": 0.07154,
      "grad_norm": 1.0631614580252882,
      "learning_rate": 0.003,
      "loss": 4.1438,
      "step": 7154
    },
    {
      "epoch": 0.07155,
      "grad_norm": 1.2822551903595085,
      "learning_rate": 0.003,
      "loss": 4.1114,
      "step": 7155
    },
    {
      "epoch": 0.07156,
      "grad_norm": 0.8602155152627339,
      "learning_rate": 0.003,
      "loss": 4.1119,
      "step": 7156
    },
    {
      "epoch": 0.07157,
      "grad_norm": 0.914412750635205,
      "learning_rate": 0.003,
      "loss": 4.0952,
      "step": 7157
    },
    {
      "epoch": 0.07158,
      "grad_norm": 1.3500367025031255,
      "learning_rate": 0.003,
      "loss": 4.1139,
      "step": 7158
    },
    {
      "epoch": 0.07159,
      "grad_norm": 1.0131822935746835,
      "learning_rate": 0.003,
      "loss": 4.1146,
      "step": 7159
    },
    {
      "epoch": 0.0716,
      "grad_norm": 1.107157491662557,
      "learning_rate": 0.003,
      "loss": 4.0845,
      "step": 7160
    },
    {
      "epoch": 0.07161,
      "grad_norm": 1.0609422302924651,
      "learning_rate": 0.003,
      "loss": 4.1164,
      "step": 7161
    },
    {
      "epoch": 0.07162,
      "grad_norm": 1.0617956432065383,
      "learning_rate": 0.003,
      "loss": 4.1163,
      "step": 7162
    },
    {
      "epoch": 0.07163,
      "grad_norm": 0.9961491455303284,
      "learning_rate": 0.003,
      "loss": 4.109,
      "step": 7163
    },
    {
      "epoch": 0.07164,
      "grad_norm": 1.0469051381193664,
      "learning_rate": 0.003,
      "loss": 4.0907,
      "step": 7164
    },
    {
      "epoch": 0.07165,
      "grad_norm": 1.1237775895482525,
      "learning_rate": 0.003,
      "loss": 4.0962,
      "step": 7165
    },
    {
      "epoch": 0.07166,
      "grad_norm": 1.2279690263793916,
      "learning_rate": 0.003,
      "loss": 4.1004,
      "step": 7166
    },
    {
      "epoch": 0.07167,
      "grad_norm": 1.023424584094055,
      "learning_rate": 0.003,
      "loss": 4.1139,
      "step": 7167
    },
    {
      "epoch": 0.07168,
      "grad_norm": 1.2707315933813994,
      "learning_rate": 0.003,
      "loss": 4.1359,
      "step": 7168
    },
    {
      "epoch": 0.07169,
      "grad_norm": 0.9172282319574873,
      "learning_rate": 0.003,
      "loss": 4.0963,
      "step": 7169
    },
    {
      "epoch": 0.0717,
      "grad_norm": 0.9083594703987031,
      "learning_rate": 0.003,
      "loss": 4.0871,
      "step": 7170
    },
    {
      "epoch": 0.07171,
      "grad_norm": 1.0486138594611163,
      "learning_rate": 0.003,
      "loss": 4.1162,
      "step": 7171
    },
    {
      "epoch": 0.07172,
      "grad_norm": 0.9700158502392761,
      "learning_rate": 0.003,
      "loss": 4.1153,
      "step": 7172
    },
    {
      "epoch": 0.07173,
      "grad_norm": 0.9301256536300708,
      "learning_rate": 0.003,
      "loss": 4.115,
      "step": 7173
    },
    {
      "epoch": 0.07174,
      "grad_norm": 0.8411748081354862,
      "learning_rate": 0.003,
      "loss": 4.0854,
      "step": 7174
    },
    {
      "epoch": 0.07175,
      "grad_norm": 0.978496890781848,
      "learning_rate": 0.003,
      "loss": 4.1256,
      "step": 7175
    },
    {
      "epoch": 0.07176,
      "grad_norm": 1.017883820000192,
      "learning_rate": 0.003,
      "loss": 4.1,
      "step": 7176
    },
    {
      "epoch": 0.07177,
      "grad_norm": 1.0876540425618906,
      "learning_rate": 0.003,
      "loss": 4.1099,
      "step": 7177
    },
    {
      "epoch": 0.07178,
      "grad_norm": 1.2815250814974248,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 7178
    },
    {
      "epoch": 0.07179,
      "grad_norm": 1.0343617750573875,
      "learning_rate": 0.003,
      "loss": 4.1043,
      "step": 7179
    },
    {
      "epoch": 0.0718,
      "grad_norm": 1.122066580167411,
      "learning_rate": 0.003,
      "loss": 4.1148,
      "step": 7180
    },
    {
      "epoch": 0.07181,
      "grad_norm": 0.9895968388675276,
      "learning_rate": 0.003,
      "loss": 4.0993,
      "step": 7181
    },
    {
      "epoch": 0.07182,
      "grad_norm": 1.140460079055109,
      "learning_rate": 0.003,
      "loss": 4.112,
      "step": 7182
    },
    {
      "epoch": 0.07183,
      "grad_norm": 1.0141873443326739,
      "learning_rate": 0.003,
      "loss": 4.1083,
      "step": 7183
    },
    {
      "epoch": 0.07184,
      "grad_norm": 1.0502527797264405,
      "learning_rate": 0.003,
      "loss": 4.0862,
      "step": 7184
    },
    {
      "epoch": 0.07185,
      "grad_norm": 1.2124587803714009,
      "learning_rate": 0.003,
      "loss": 4.0956,
      "step": 7185
    },
    {
      "epoch": 0.07186,
      "grad_norm": 0.9885002491164363,
      "learning_rate": 0.003,
      "loss": 4.1086,
      "step": 7186
    },
    {
      "epoch": 0.07187,
      "grad_norm": 1.111264868286653,
      "learning_rate": 0.003,
      "loss": 4.0986,
      "step": 7187
    },
    {
      "epoch": 0.07188,
      "grad_norm": 1.0145615669399324,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 7188
    },
    {
      "epoch": 0.07189,
      "grad_norm": 1.0217994358701927,
      "learning_rate": 0.003,
      "loss": 4.1135,
      "step": 7189
    },
    {
      "epoch": 0.0719,
      "grad_norm": 1.0413788946815261,
      "learning_rate": 0.003,
      "loss": 4.1107,
      "step": 7190
    },
    {
      "epoch": 0.07191,
      "grad_norm": 1.094167060143487,
      "learning_rate": 0.003,
      "loss": 4.1239,
      "step": 7191
    },
    {
      "epoch": 0.07192,
      "grad_norm": 1.0055555200473454,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 7192
    },
    {
      "epoch": 0.07193,
      "grad_norm": 0.9850273042601376,
      "learning_rate": 0.003,
      "loss": 4.1097,
      "step": 7193
    },
    {
      "epoch": 0.07194,
      "grad_norm": 1.0146797688856157,
      "learning_rate": 0.003,
      "loss": 4.1098,
      "step": 7194
    },
    {
      "epoch": 0.07195,
      "grad_norm": 1.1432646128077548,
      "learning_rate": 0.003,
      "loss": 4.0891,
      "step": 7195
    },
    {
      "epoch": 0.07196,
      "grad_norm": 1.0481540314236983,
      "learning_rate": 0.003,
      "loss": 4.094,
      "step": 7196
    },
    {
      "epoch": 0.07197,
      "grad_norm": 0.9542580970677246,
      "learning_rate": 0.003,
      "loss": 4.117,
      "step": 7197
    },
    {
      "epoch": 0.07198,
      "grad_norm": 1.10288485158813,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 7198
    },
    {
      "epoch": 0.07199,
      "grad_norm": 1.1509658058015648,
      "learning_rate": 0.003,
      "loss": 4.0943,
      "step": 7199
    },
    {
      "epoch": 0.072,
      "grad_norm": 1.0793475034183406,
      "learning_rate": 0.003,
      "loss": 4.1278,
      "step": 7200
    },
    {
      "epoch": 0.07201,
      "grad_norm": 0.9556489386143434,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 7201
    },
    {
      "epoch": 0.07202,
      "grad_norm": 0.9809904337905765,
      "learning_rate": 0.003,
      "loss": 4.1219,
      "step": 7202
    },
    {
      "epoch": 0.07203,
      "grad_norm": 1.1707509067401052,
      "learning_rate": 0.003,
      "loss": 4.0923,
      "step": 7203
    },
    {
      "epoch": 0.07204,
      "grad_norm": 1.3323882560669078,
      "learning_rate": 0.003,
      "loss": 4.1331,
      "step": 7204
    },
    {
      "epoch": 0.07205,
      "grad_norm": 1.1334906555538489,
      "learning_rate": 0.003,
      "loss": 4.1271,
      "step": 7205
    },
    {
      "epoch": 0.07206,
      "grad_norm": 1.1729688120105672,
      "learning_rate": 0.003,
      "loss": 4.1048,
      "step": 7206
    },
    {
      "epoch": 0.07207,
      "grad_norm": 0.8357880798460113,
      "learning_rate": 0.003,
      "loss": 4.1004,
      "step": 7207
    },
    {
      "epoch": 0.07208,
      "grad_norm": 1.0253844160329557,
      "learning_rate": 0.003,
      "loss": 4.0953,
      "step": 7208
    },
    {
      "epoch": 0.07209,
      "grad_norm": 1.2699597428691942,
      "learning_rate": 0.003,
      "loss": 4.0995,
      "step": 7209
    },
    {
      "epoch": 0.0721,
      "grad_norm": 0.844492273082808,
      "learning_rate": 0.003,
      "loss": 4.093,
      "step": 7210
    },
    {
      "epoch": 0.07211,
      "grad_norm": 0.9959930714732329,
      "learning_rate": 0.003,
      "loss": 4.1101,
      "step": 7211
    },
    {
      "epoch": 0.07212,
      "grad_norm": 1.2255608222794574,
      "learning_rate": 0.003,
      "loss": 4.1309,
      "step": 7212
    },
    {
      "epoch": 0.07213,
      "grad_norm": 1.0751280898968962,
      "learning_rate": 0.003,
      "loss": 4.1341,
      "step": 7213
    },
    {
      "epoch": 0.07214,
      "grad_norm": 1.112519303902804,
      "learning_rate": 0.003,
      "loss": 4.0849,
      "step": 7214
    },
    {
      "epoch": 0.07215,
      "grad_norm": 1.0219317668597434,
      "learning_rate": 0.003,
      "loss": 4.108,
      "step": 7215
    },
    {
      "epoch": 0.07216,
      "grad_norm": 0.9213765728970278,
      "learning_rate": 0.003,
      "loss": 4.1156,
      "step": 7216
    },
    {
      "epoch": 0.07217,
      "grad_norm": 0.8562736879877679,
      "learning_rate": 0.003,
      "loss": 4.1048,
      "step": 7217
    },
    {
      "epoch": 0.07218,
      "grad_norm": 1.0774634512005679,
      "learning_rate": 0.003,
      "loss": 4.1134,
      "step": 7218
    },
    {
      "epoch": 0.07219,
      "grad_norm": 1.1802394760260384,
      "learning_rate": 0.003,
      "loss": 4.0937,
      "step": 7219
    },
    {
      "epoch": 0.0722,
      "grad_norm": 0.919224360560586,
      "learning_rate": 0.003,
      "loss": 4.1118,
      "step": 7220
    },
    {
      "epoch": 0.07221,
      "grad_norm": 0.9309775053843943,
      "learning_rate": 0.003,
      "loss": 4.1138,
      "step": 7221
    },
    {
      "epoch": 0.07222,
      "grad_norm": 1.251585304062438,
      "learning_rate": 0.003,
      "loss": 4.1072,
      "step": 7222
    },
    {
      "epoch": 0.07223,
      "grad_norm": 1.2153975646372994,
      "learning_rate": 0.003,
      "loss": 4.1248,
      "step": 7223
    },
    {
      "epoch": 0.07224,
      "grad_norm": 0.881797645512631,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 7224
    },
    {
      "epoch": 0.07225,
      "grad_norm": 1.0903953232903485,
      "learning_rate": 0.003,
      "loss": 4.1121,
      "step": 7225
    },
    {
      "epoch": 0.07226,
      "grad_norm": 1.3155759554258537,
      "learning_rate": 0.003,
      "loss": 4.1166,
      "step": 7226
    },
    {
      "epoch": 0.07227,
      "grad_norm": 1.0076198414601187,
      "learning_rate": 0.003,
      "loss": 4.0908,
      "step": 7227
    },
    {
      "epoch": 0.07228,
      "grad_norm": 1.1644273856024416,
      "learning_rate": 0.003,
      "loss": 4.0879,
      "step": 7228
    },
    {
      "epoch": 0.07229,
      "grad_norm": 0.9826701191964766,
      "learning_rate": 0.003,
      "loss": 4.1052,
      "step": 7229
    },
    {
      "epoch": 0.0723,
      "grad_norm": 1.0039682774338574,
      "learning_rate": 0.003,
      "loss": 4.0768,
      "step": 7230
    },
    {
      "epoch": 0.07231,
      "grad_norm": 1.3290115071414985,
      "learning_rate": 0.003,
      "loss": 4.1198,
      "step": 7231
    },
    {
      "epoch": 0.07232,
      "grad_norm": 0.8889806292787095,
      "learning_rate": 0.003,
      "loss": 4.1044,
      "step": 7232
    },
    {
      "epoch": 0.07233,
      "grad_norm": 0.9033119332258699,
      "learning_rate": 0.003,
      "loss": 4.1063,
      "step": 7233
    },
    {
      "epoch": 0.07234,
      "grad_norm": 0.9850515096650099,
      "learning_rate": 0.003,
      "loss": 4.0861,
      "step": 7234
    },
    {
      "epoch": 0.07235,
      "grad_norm": 1.0280057066621469,
      "learning_rate": 0.003,
      "loss": 4.1324,
      "step": 7235
    },
    {
      "epoch": 0.07236,
      "grad_norm": 0.8959961876752984,
      "learning_rate": 0.003,
      "loss": 4.1287,
      "step": 7236
    },
    {
      "epoch": 0.07237,
      "grad_norm": 0.9578859446824379,
      "learning_rate": 0.003,
      "loss": 4.0914,
      "step": 7237
    },
    {
      "epoch": 0.07238,
      "grad_norm": 1.0904222667194374,
      "learning_rate": 0.003,
      "loss": 4.1223,
      "step": 7238
    },
    {
      "epoch": 0.07239,
      "grad_norm": 1.1354385440528534,
      "learning_rate": 0.003,
      "loss": 4.1233,
      "step": 7239
    },
    {
      "epoch": 0.0724,
      "grad_norm": 1.1445170478246343,
      "learning_rate": 0.003,
      "loss": 4.0808,
      "step": 7240
    },
    {
      "epoch": 0.07241,
      "grad_norm": 1.330496175036632,
      "learning_rate": 0.003,
      "loss": 4.1283,
      "step": 7241
    },
    {
      "epoch": 0.07242,
      "grad_norm": 1.0029178572683988,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 7242
    },
    {
      "epoch": 0.07243,
      "grad_norm": 1.0419556676047186,
      "learning_rate": 0.003,
      "loss": 4.1167,
      "step": 7243
    },
    {
      "epoch": 0.07244,
      "grad_norm": 1.1405191704713464,
      "learning_rate": 0.003,
      "loss": 4.1016,
      "step": 7244
    },
    {
      "epoch": 0.07245,
      "grad_norm": 1.055957134761977,
      "learning_rate": 0.003,
      "loss": 4.1,
      "step": 7245
    },
    {
      "epoch": 0.07246,
      "grad_norm": 0.9746458798890896,
      "learning_rate": 0.003,
      "loss": 4.1078,
      "step": 7246
    },
    {
      "epoch": 0.07247,
      "grad_norm": 1.2482468143322891,
      "learning_rate": 0.003,
      "loss": 4.1325,
      "step": 7247
    },
    {
      "epoch": 0.07248,
      "grad_norm": 0.8855907919154823,
      "learning_rate": 0.003,
      "loss": 4.0924,
      "step": 7248
    },
    {
      "epoch": 0.07249,
      "grad_norm": 1.0413021832365006,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 7249
    },
    {
      "epoch": 0.0725,
      "grad_norm": 1.1504077090947193,
      "learning_rate": 0.003,
      "loss": 4.0915,
      "step": 7250
    },
    {
      "epoch": 0.07251,
      "grad_norm": 1.509045508344056,
      "learning_rate": 0.003,
      "loss": 4.1148,
      "step": 7251
    },
    {
      "epoch": 0.07252,
      "grad_norm": 0.7932617771234077,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 7252
    },
    {
      "epoch": 0.07253,
      "grad_norm": 0.8358630953519474,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 7253
    },
    {
      "epoch": 0.07254,
      "grad_norm": 1.1222171632357134,
      "learning_rate": 0.003,
      "loss": 4.1253,
      "step": 7254
    },
    {
      "epoch": 0.07255,
      "grad_norm": 1.0804481975183948,
      "learning_rate": 0.003,
      "loss": 4.1212,
      "step": 7255
    },
    {
      "epoch": 0.07256,
      "grad_norm": 1.2281418759865774,
      "learning_rate": 0.003,
      "loss": 4.124,
      "step": 7256
    },
    {
      "epoch": 0.07257,
      "grad_norm": 0.8609220422938224,
      "learning_rate": 0.003,
      "loss": 4.1049,
      "step": 7257
    },
    {
      "epoch": 0.07258,
      "grad_norm": 0.917114476220674,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 7258
    },
    {
      "epoch": 0.07259,
      "grad_norm": 1.1443689847230054,
      "learning_rate": 0.003,
      "loss": 4.106,
      "step": 7259
    },
    {
      "epoch": 0.0726,
      "grad_norm": 0.8744085214791258,
      "learning_rate": 0.003,
      "loss": 4.0793,
      "step": 7260
    },
    {
      "epoch": 0.07261,
      "grad_norm": 0.9452003703653208,
      "learning_rate": 0.003,
      "loss": 4.1068,
      "step": 7261
    },
    {
      "epoch": 0.07262,
      "grad_norm": 1.113889337528543,
      "learning_rate": 0.003,
      "loss": 4.1041,
      "step": 7262
    },
    {
      "epoch": 0.07263,
      "grad_norm": 1.1631423603603335,
      "learning_rate": 0.003,
      "loss": 4.113,
      "step": 7263
    },
    {
      "epoch": 0.07264,
      "grad_norm": 1.0786175525014503,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 7264
    },
    {
      "epoch": 0.07265,
      "grad_norm": 1.1580180230536627,
      "learning_rate": 0.003,
      "loss": 4.1077,
      "step": 7265
    },
    {
      "epoch": 0.07266,
      "grad_norm": 0.9070949557708119,
      "learning_rate": 0.003,
      "loss": 4.1203,
      "step": 7266
    },
    {
      "epoch": 0.07267,
      "grad_norm": 1.0924109492734309,
      "learning_rate": 0.003,
      "loss": 4.1088,
      "step": 7267
    },
    {
      "epoch": 0.07268,
      "grad_norm": 1.265085711037148,
      "learning_rate": 0.003,
      "loss": 4.1246,
      "step": 7268
    },
    {
      "epoch": 0.07269,
      "grad_norm": 0.9220577386588262,
      "learning_rate": 0.003,
      "loss": 4.1021,
      "step": 7269
    },
    {
      "epoch": 0.0727,
      "grad_norm": 1.0994973872701153,
      "learning_rate": 0.003,
      "loss": 4.0919,
      "step": 7270
    },
    {
      "epoch": 0.07271,
      "grad_norm": 1.0390055174761674,
      "learning_rate": 0.003,
      "loss": 4.1003,
      "step": 7271
    },
    {
      "epoch": 0.07272,
      "grad_norm": 1.109067023286393,
      "learning_rate": 0.003,
      "loss": 4.0953,
      "step": 7272
    },
    {
      "epoch": 0.07273,
      "grad_norm": 1.190775009770159,
      "learning_rate": 0.003,
      "loss": 4.1119,
      "step": 7273
    },
    {
      "epoch": 0.07274,
      "grad_norm": 0.9643586526737157,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 7274
    },
    {
      "epoch": 0.07275,
      "grad_norm": 0.807588478281446,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 7275
    },
    {
      "epoch": 0.07276,
      "grad_norm": 0.7367459059199548,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 7276
    },
    {
      "epoch": 0.07277,
      "grad_norm": 0.872868798546834,
      "learning_rate": 0.003,
      "loss": 4.0958,
      "step": 7277
    },
    {
      "epoch": 0.07278,
      "grad_norm": 1.021625890463033,
      "learning_rate": 0.003,
      "loss": 4.1077,
      "step": 7278
    },
    {
      "epoch": 0.07279,
      "grad_norm": 1.1561771690179692,
      "learning_rate": 0.003,
      "loss": 4.0914,
      "step": 7279
    },
    {
      "epoch": 0.0728,
      "grad_norm": 0.995174312767239,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 7280
    },
    {
      "epoch": 0.07281,
      "grad_norm": 1.2231458022379895,
      "learning_rate": 0.003,
      "loss": 4.0999,
      "step": 7281
    },
    {
      "epoch": 0.07282,
      "grad_norm": 0.9752915595680061,
      "learning_rate": 0.003,
      "loss": 4.1119,
      "step": 7282
    },
    {
      "epoch": 0.07283,
      "grad_norm": 1.117893543537355,
      "learning_rate": 0.003,
      "loss": 4.1293,
      "step": 7283
    },
    {
      "epoch": 0.07284,
      "grad_norm": 1.0083611434052082,
      "learning_rate": 0.003,
      "loss": 4.0841,
      "step": 7284
    },
    {
      "epoch": 0.07285,
      "grad_norm": 0.9630552431056575,
      "learning_rate": 0.003,
      "loss": 4.1123,
      "step": 7285
    },
    {
      "epoch": 0.07286,
      "grad_norm": 0.8169119244859634,
      "learning_rate": 0.003,
      "loss": 4.0835,
      "step": 7286
    },
    {
      "epoch": 0.07287,
      "grad_norm": 0.903401863942306,
      "learning_rate": 0.003,
      "loss": 4.1082,
      "step": 7287
    },
    {
      "epoch": 0.07288,
      "grad_norm": 1.0869951380389518,
      "learning_rate": 0.003,
      "loss": 4.0852,
      "step": 7288
    },
    {
      "epoch": 0.07289,
      "grad_norm": 1.3801544823334104,
      "learning_rate": 0.003,
      "loss": 4.1174,
      "step": 7289
    },
    {
      "epoch": 0.0729,
      "grad_norm": 0.9555863894636518,
      "learning_rate": 0.003,
      "loss": 4.1294,
      "step": 7290
    },
    {
      "epoch": 0.07291,
      "grad_norm": 0.9538466741745802,
      "learning_rate": 0.003,
      "loss": 4.092,
      "step": 7291
    },
    {
      "epoch": 0.07292,
      "grad_norm": 0.9359092532053526,
      "learning_rate": 0.003,
      "loss": 4.1041,
      "step": 7292
    },
    {
      "epoch": 0.07293,
      "grad_norm": 1.095958126570705,
      "learning_rate": 0.003,
      "loss": 4.094,
      "step": 7293
    },
    {
      "epoch": 0.07294,
      "grad_norm": 1.2068433544777886,
      "learning_rate": 0.003,
      "loss": 4.0814,
      "step": 7294
    },
    {
      "epoch": 0.07295,
      "grad_norm": 0.9752948921226386,
      "learning_rate": 0.003,
      "loss": 4.0727,
      "step": 7295
    },
    {
      "epoch": 0.07296,
      "grad_norm": 1.0045088941314217,
      "learning_rate": 0.003,
      "loss": 4.0879,
      "step": 7296
    },
    {
      "epoch": 0.07297,
      "grad_norm": 1.2851924940126396,
      "learning_rate": 0.003,
      "loss": 4.1304,
      "step": 7297
    },
    {
      "epoch": 0.07298,
      "grad_norm": 0.8984769908416159,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 7298
    },
    {
      "epoch": 0.07299,
      "grad_norm": 1.0616115245681086,
      "learning_rate": 0.003,
      "loss": 4.1007,
      "step": 7299
    },
    {
      "epoch": 0.073,
      "grad_norm": 1.124975187025094,
      "learning_rate": 0.003,
      "loss": 4.0884,
      "step": 7300
    },
    {
      "epoch": 0.07301,
      "grad_norm": 1.0277803074493952,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 7301
    },
    {
      "epoch": 0.07302,
      "grad_norm": 1.2837150074018109,
      "learning_rate": 0.003,
      "loss": 4.0838,
      "step": 7302
    },
    {
      "epoch": 0.07303,
      "grad_norm": 0.964759536670288,
      "learning_rate": 0.003,
      "loss": 4.1097,
      "step": 7303
    },
    {
      "epoch": 0.07304,
      "grad_norm": 1.1222488995629787,
      "learning_rate": 0.003,
      "loss": 4.0954,
      "step": 7304
    },
    {
      "epoch": 0.07305,
      "grad_norm": 1.0053576459104663,
      "learning_rate": 0.003,
      "loss": 4.086,
      "step": 7305
    },
    {
      "epoch": 0.07306,
      "grad_norm": 0.9644307566496255,
      "learning_rate": 0.003,
      "loss": 4.1231,
      "step": 7306
    },
    {
      "epoch": 0.07307,
      "grad_norm": 1.1082915404106022,
      "learning_rate": 0.003,
      "loss": 4.1324,
      "step": 7307
    },
    {
      "epoch": 0.07308,
      "grad_norm": 0.982019032729292,
      "learning_rate": 0.003,
      "loss": 4.1019,
      "step": 7308
    },
    {
      "epoch": 0.07309,
      "grad_norm": 1.1223828148450474,
      "learning_rate": 0.003,
      "loss": 4.1101,
      "step": 7309
    },
    {
      "epoch": 0.0731,
      "grad_norm": 1.1820726188937547,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 7310
    },
    {
      "epoch": 0.07311,
      "grad_norm": 1.293636953613459,
      "learning_rate": 0.003,
      "loss": 4.1104,
      "step": 7311
    },
    {
      "epoch": 0.07312,
      "grad_norm": 1.1654414594535567,
      "learning_rate": 0.003,
      "loss": 4.1196,
      "step": 7312
    },
    {
      "epoch": 0.07313,
      "grad_norm": 0.955703652754267,
      "learning_rate": 0.003,
      "loss": 4.1038,
      "step": 7313
    },
    {
      "epoch": 0.07314,
      "grad_norm": 0.9882220489290244,
      "learning_rate": 0.003,
      "loss": 4.1156,
      "step": 7314
    },
    {
      "epoch": 0.07315,
      "grad_norm": 1.15014313157189,
      "learning_rate": 0.003,
      "loss": 4.1039,
      "step": 7315
    },
    {
      "epoch": 0.07316,
      "grad_norm": 1.1357410212032995,
      "learning_rate": 0.003,
      "loss": 4.1239,
      "step": 7316
    },
    {
      "epoch": 0.07317,
      "grad_norm": 1.0678436839863392,
      "learning_rate": 0.003,
      "loss": 4.1067,
      "step": 7317
    },
    {
      "epoch": 0.07318,
      "grad_norm": 1.0552939298230526,
      "learning_rate": 0.003,
      "loss": 4.0911,
      "step": 7318
    },
    {
      "epoch": 0.07319,
      "grad_norm": 1.0631930674320003,
      "learning_rate": 0.003,
      "loss": 4.1093,
      "step": 7319
    },
    {
      "epoch": 0.0732,
      "grad_norm": 1.378616480794266,
      "learning_rate": 0.003,
      "loss": 4.0949,
      "step": 7320
    },
    {
      "epoch": 0.07321,
      "grad_norm": 0.8515266821180929,
      "learning_rate": 0.003,
      "loss": 4.0929,
      "step": 7321
    },
    {
      "epoch": 0.07322,
      "grad_norm": 0.9617491536309828,
      "learning_rate": 0.003,
      "loss": 4.1151,
      "step": 7322
    },
    {
      "epoch": 0.07323,
      "grad_norm": 1.2026300700448722,
      "learning_rate": 0.003,
      "loss": 4.1065,
      "step": 7323
    },
    {
      "epoch": 0.07324,
      "grad_norm": 1.1582938023197826,
      "learning_rate": 0.003,
      "loss": 4.1018,
      "step": 7324
    },
    {
      "epoch": 0.07325,
      "grad_norm": 0.9248125901403387,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 7325
    },
    {
      "epoch": 0.07326,
      "grad_norm": 1.0036120714649015,
      "learning_rate": 0.003,
      "loss": 4.1154,
      "step": 7326
    },
    {
      "epoch": 0.07327,
      "grad_norm": 1.1752819874476683,
      "learning_rate": 0.003,
      "loss": 4.1201,
      "step": 7327
    },
    {
      "epoch": 0.07328,
      "grad_norm": 1.0844284580206358,
      "learning_rate": 0.003,
      "loss": 4.0763,
      "step": 7328
    },
    {
      "epoch": 0.07329,
      "grad_norm": 1.131174265281523,
      "learning_rate": 0.003,
      "loss": 4.1,
      "step": 7329
    },
    {
      "epoch": 0.0733,
      "grad_norm": 1.0902888266412114,
      "learning_rate": 0.003,
      "loss": 4.0876,
      "step": 7330
    },
    {
      "epoch": 0.07331,
      "grad_norm": 1.0697080836933746,
      "learning_rate": 0.003,
      "loss": 4.0828,
      "step": 7331
    },
    {
      "epoch": 0.07332,
      "grad_norm": 1.1425679752194804,
      "learning_rate": 0.003,
      "loss": 4.111,
      "step": 7332
    },
    {
      "epoch": 0.07333,
      "grad_norm": 1.0006191688665407,
      "learning_rate": 0.003,
      "loss": 4.1018,
      "step": 7333
    },
    {
      "epoch": 0.07334,
      "grad_norm": 0.9001558750090279,
      "learning_rate": 0.003,
      "loss": 4.0915,
      "step": 7334
    },
    {
      "epoch": 0.07335,
      "grad_norm": 0.9455956599304268,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 7335
    },
    {
      "epoch": 0.07336,
      "grad_norm": 1.0305099426952913,
      "learning_rate": 0.003,
      "loss": 4.116,
      "step": 7336
    },
    {
      "epoch": 0.07337,
      "grad_norm": 0.9198982480606116,
      "learning_rate": 0.003,
      "loss": 4.1026,
      "step": 7337
    },
    {
      "epoch": 0.07338,
      "grad_norm": 1.1098987695082583,
      "learning_rate": 0.003,
      "loss": 4.0931,
      "step": 7338
    },
    {
      "epoch": 0.07339,
      "grad_norm": 1.1452129437911507,
      "learning_rate": 0.003,
      "loss": 4.1276,
      "step": 7339
    },
    {
      "epoch": 0.0734,
      "grad_norm": 1.0502294889063275,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 7340
    },
    {
      "epoch": 0.07341,
      "grad_norm": 1.1208815982947145,
      "learning_rate": 0.003,
      "loss": 4.1039,
      "step": 7341
    },
    {
      "epoch": 0.07342,
      "grad_norm": 1.275789986817216,
      "learning_rate": 0.003,
      "loss": 4.1273,
      "step": 7342
    },
    {
      "epoch": 0.07343,
      "grad_norm": 0.9265911774890242,
      "learning_rate": 0.003,
      "loss": 4.1065,
      "step": 7343
    },
    {
      "epoch": 0.07344,
      "grad_norm": 0.9723085170850532,
      "learning_rate": 0.003,
      "loss": 4.1086,
      "step": 7344
    },
    {
      "epoch": 0.07345,
      "grad_norm": 1.310974637981528,
      "learning_rate": 0.003,
      "loss": 4.0888,
      "step": 7345
    },
    {
      "epoch": 0.07346,
      "grad_norm": 0.9859934935381308,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 7346
    },
    {
      "epoch": 0.07347,
      "grad_norm": 1.311642444465165,
      "learning_rate": 0.003,
      "loss": 4.103,
      "step": 7347
    },
    {
      "epoch": 0.07348,
      "grad_norm": 1.0755786167180137,
      "learning_rate": 0.003,
      "loss": 4.0998,
      "step": 7348
    },
    {
      "epoch": 0.07349,
      "grad_norm": 1.117632483088747,
      "learning_rate": 0.003,
      "loss": 4.1032,
      "step": 7349
    },
    {
      "epoch": 0.0735,
      "grad_norm": 0.9556614107870417,
      "learning_rate": 0.003,
      "loss": 4.0854,
      "step": 7350
    },
    {
      "epoch": 0.07351,
      "grad_norm": 1.0030831120053332,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 7351
    },
    {
      "epoch": 0.07352,
      "grad_norm": 1.0839532000386127,
      "learning_rate": 0.003,
      "loss": 4.1031,
      "step": 7352
    },
    {
      "epoch": 0.07353,
      "grad_norm": 0.956388917885665,
      "learning_rate": 0.003,
      "loss": 4.1221,
      "step": 7353
    },
    {
      "epoch": 0.07354,
      "grad_norm": 1.2540325368846519,
      "learning_rate": 0.003,
      "loss": 4.0891,
      "step": 7354
    },
    {
      "epoch": 0.07355,
      "grad_norm": 0.8476486590374245,
      "learning_rate": 0.003,
      "loss": 4.105,
      "step": 7355
    },
    {
      "epoch": 0.07356,
      "grad_norm": 0.9072579050921763,
      "learning_rate": 0.003,
      "loss": 4.1082,
      "step": 7356
    },
    {
      "epoch": 0.07357,
      "grad_norm": 1.07606465118927,
      "learning_rate": 0.003,
      "loss": 4.0967,
      "step": 7357
    },
    {
      "epoch": 0.07358,
      "grad_norm": 0.9758249229656148,
      "learning_rate": 0.003,
      "loss": 4.1061,
      "step": 7358
    },
    {
      "epoch": 0.07359,
      "grad_norm": 1.2808917830241577,
      "learning_rate": 0.003,
      "loss": 4.1186,
      "step": 7359
    },
    {
      "epoch": 0.0736,
      "grad_norm": 1.0389550581293132,
      "learning_rate": 0.003,
      "loss": 4.1194,
      "step": 7360
    },
    {
      "epoch": 0.07361,
      "grad_norm": 1.4439062529022064,
      "learning_rate": 0.003,
      "loss": 4.1017,
      "step": 7361
    },
    {
      "epoch": 0.07362,
      "grad_norm": 1.0842141270597971,
      "learning_rate": 0.003,
      "loss": 4.1248,
      "step": 7362
    },
    {
      "epoch": 0.07363,
      "grad_norm": 1.0586360928887677,
      "learning_rate": 0.003,
      "loss": 4.087,
      "step": 7363
    },
    {
      "epoch": 0.07364,
      "grad_norm": 1.0790877575745619,
      "learning_rate": 0.003,
      "loss": 4.1239,
      "step": 7364
    },
    {
      "epoch": 0.07365,
      "grad_norm": 1.0487723270085267,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 7365
    },
    {
      "epoch": 0.07366,
      "grad_norm": 0.9486821316909797,
      "learning_rate": 0.003,
      "loss": 4.1012,
      "step": 7366
    },
    {
      "epoch": 0.07367,
      "grad_norm": 1.0709893106339785,
      "learning_rate": 0.003,
      "loss": 4.1135,
      "step": 7367
    },
    {
      "epoch": 0.07368,
      "grad_norm": 1.3047096296178842,
      "learning_rate": 0.003,
      "loss": 4.1356,
      "step": 7368
    },
    {
      "epoch": 0.07369,
      "grad_norm": 0.9701406143317088,
      "learning_rate": 0.003,
      "loss": 4.0777,
      "step": 7369
    },
    {
      "epoch": 0.0737,
      "grad_norm": 0.9989447329095311,
      "learning_rate": 0.003,
      "loss": 4.1089,
      "step": 7370
    },
    {
      "epoch": 0.07371,
      "grad_norm": 0.9302946104087475,
      "learning_rate": 0.003,
      "loss": 4.1284,
      "step": 7371
    },
    {
      "epoch": 0.07372,
      "grad_norm": 1.0077914951421132,
      "learning_rate": 0.003,
      "loss": 4.1102,
      "step": 7372
    },
    {
      "epoch": 0.07373,
      "grad_norm": 1.0343422807862372,
      "learning_rate": 0.003,
      "loss": 4.1144,
      "step": 7373
    },
    {
      "epoch": 0.07374,
      "grad_norm": 1.0817869352825151,
      "learning_rate": 0.003,
      "loss": 4.1009,
      "step": 7374
    },
    {
      "epoch": 0.07375,
      "grad_norm": 0.9543927133710544,
      "learning_rate": 0.003,
      "loss": 4.1075,
      "step": 7375
    },
    {
      "epoch": 0.07376,
      "grad_norm": 1.0823157063184048,
      "learning_rate": 0.003,
      "loss": 4.1124,
      "step": 7376
    },
    {
      "epoch": 0.07377,
      "grad_norm": 1.4523320980123982,
      "learning_rate": 0.003,
      "loss": 4.1164,
      "step": 7377
    },
    {
      "epoch": 0.07378,
      "grad_norm": 0.9920344749925358,
      "learning_rate": 0.003,
      "loss": 4.1196,
      "step": 7378
    },
    {
      "epoch": 0.07379,
      "grad_norm": 1.2023630780465542,
      "learning_rate": 0.003,
      "loss": 4.094,
      "step": 7379
    },
    {
      "epoch": 0.0738,
      "grad_norm": 0.979976959424781,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 7380
    },
    {
      "epoch": 0.07381,
      "grad_norm": 1.2122785872569304,
      "learning_rate": 0.003,
      "loss": 4.0896,
      "step": 7381
    },
    {
      "epoch": 0.07382,
      "grad_norm": 1.0711158041090567,
      "learning_rate": 0.003,
      "loss": 4.1363,
      "step": 7382
    },
    {
      "epoch": 0.07383,
      "grad_norm": 1.0968738055753358,
      "learning_rate": 0.003,
      "loss": 4.1127,
      "step": 7383
    },
    {
      "epoch": 0.07384,
      "grad_norm": 1.0122366795759148,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 7384
    },
    {
      "epoch": 0.07385,
      "grad_norm": 1.1344254281864916,
      "learning_rate": 0.003,
      "loss": 4.0988,
      "step": 7385
    },
    {
      "epoch": 0.07386,
      "grad_norm": 0.9528627923235372,
      "learning_rate": 0.003,
      "loss": 4.1108,
      "step": 7386
    },
    {
      "epoch": 0.07387,
      "grad_norm": 1.0340287194364897,
      "learning_rate": 0.003,
      "loss": 4.0853,
      "step": 7387
    },
    {
      "epoch": 0.07388,
      "grad_norm": 1.0661053253287225,
      "learning_rate": 0.003,
      "loss": 4.1102,
      "step": 7388
    },
    {
      "epoch": 0.07389,
      "grad_norm": 1.0266500281332847,
      "learning_rate": 0.003,
      "loss": 4.0942,
      "step": 7389
    },
    {
      "epoch": 0.0739,
      "grad_norm": 1.129668600576453,
      "learning_rate": 0.003,
      "loss": 4.0874,
      "step": 7390
    },
    {
      "epoch": 0.07391,
      "grad_norm": 1.1431567245789562,
      "learning_rate": 0.003,
      "loss": 4.1135,
      "step": 7391
    },
    {
      "epoch": 0.07392,
      "grad_norm": 0.9263556557950041,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 7392
    },
    {
      "epoch": 0.07393,
      "grad_norm": 1.1810603985633787,
      "learning_rate": 0.003,
      "loss": 4.112,
      "step": 7393
    },
    {
      "epoch": 0.07394,
      "grad_norm": 1.3052057344266057,
      "learning_rate": 0.003,
      "loss": 4.1068,
      "step": 7394
    },
    {
      "epoch": 0.07395,
      "grad_norm": 0.8902451430955156,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 7395
    },
    {
      "epoch": 0.07396,
      "grad_norm": 0.9401136963979129,
      "learning_rate": 0.003,
      "loss": 4.0974,
      "step": 7396
    },
    {
      "epoch": 0.07397,
      "grad_norm": 1.1336701427876885,
      "learning_rate": 0.003,
      "loss": 4.1018,
      "step": 7397
    },
    {
      "epoch": 0.07398,
      "grad_norm": 1.021978406987868,
      "learning_rate": 0.003,
      "loss": 4.0967,
      "step": 7398
    },
    {
      "epoch": 0.07399,
      "grad_norm": 1.4954509140076173,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 7399
    },
    {
      "epoch": 0.074,
      "grad_norm": 1.0211411356410278,
      "learning_rate": 0.003,
      "loss": 4.1187,
      "step": 7400
    },
    {
      "epoch": 0.07401,
      "grad_norm": 1.0269092564499978,
      "learning_rate": 0.003,
      "loss": 4.0817,
      "step": 7401
    },
    {
      "epoch": 0.07402,
      "grad_norm": 1.4125982359822833,
      "learning_rate": 0.003,
      "loss": 4.1134,
      "step": 7402
    },
    {
      "epoch": 0.07403,
      "grad_norm": 0.9177008703221856,
      "learning_rate": 0.003,
      "loss": 4.112,
      "step": 7403
    },
    {
      "epoch": 0.07404,
      "grad_norm": 1.1319798151303864,
      "learning_rate": 0.003,
      "loss": 4.1469,
      "step": 7404
    },
    {
      "epoch": 0.07405,
      "grad_norm": 0.9497899734385127,
      "learning_rate": 0.003,
      "loss": 4.0969,
      "step": 7405
    },
    {
      "epoch": 0.07406,
      "grad_norm": 0.9312788500314818,
      "learning_rate": 0.003,
      "loss": 4.0991,
      "step": 7406
    },
    {
      "epoch": 0.07407,
      "grad_norm": 1.1720689446037251,
      "learning_rate": 0.003,
      "loss": 4.1182,
      "step": 7407
    },
    {
      "epoch": 0.07408,
      "grad_norm": 1.1799841021387516,
      "learning_rate": 0.003,
      "loss": 4.1127,
      "step": 7408
    },
    {
      "epoch": 0.07409,
      "grad_norm": 0.8985031842337684,
      "learning_rate": 0.003,
      "loss": 4.0919,
      "step": 7409
    },
    {
      "epoch": 0.0741,
      "grad_norm": 1.0335323207407554,
      "learning_rate": 0.003,
      "loss": 4.1166,
      "step": 7410
    },
    {
      "epoch": 0.07411,
      "grad_norm": 1.1752050110355394,
      "learning_rate": 0.003,
      "loss": 4.1093,
      "step": 7411
    },
    {
      "epoch": 0.07412,
      "grad_norm": 0.8792893537815811,
      "learning_rate": 0.003,
      "loss": 4.0922,
      "step": 7412
    },
    {
      "epoch": 0.07413,
      "grad_norm": 0.8217721513844498,
      "learning_rate": 0.003,
      "loss": 4.0923,
      "step": 7413
    },
    {
      "epoch": 0.07414,
      "grad_norm": 1.0066515265982865,
      "learning_rate": 0.003,
      "loss": 4.095,
      "step": 7414
    },
    {
      "epoch": 0.07415,
      "grad_norm": 1.1016182405167252,
      "learning_rate": 0.003,
      "loss": 4.0997,
      "step": 7415
    },
    {
      "epoch": 0.07416,
      "grad_norm": 1.1590514698613215,
      "learning_rate": 0.003,
      "loss": 4.0827,
      "step": 7416
    },
    {
      "epoch": 0.07417,
      "grad_norm": 1.2339180292510161,
      "learning_rate": 0.003,
      "loss": 4.106,
      "step": 7417
    },
    {
      "epoch": 0.07418,
      "grad_norm": 0.9874762325328805,
      "learning_rate": 0.003,
      "loss": 4.0841,
      "step": 7418
    },
    {
      "epoch": 0.07419,
      "grad_norm": 0.9246629621622819,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 7419
    },
    {
      "epoch": 0.0742,
      "grad_norm": 1.1184657030620395,
      "learning_rate": 0.003,
      "loss": 4.1114,
      "step": 7420
    },
    {
      "epoch": 0.07421,
      "grad_norm": 1.0677774940176092,
      "learning_rate": 0.003,
      "loss": 4.0825,
      "step": 7421
    },
    {
      "epoch": 0.07422,
      "grad_norm": 1.1836820756336819,
      "learning_rate": 0.003,
      "loss": 4.1064,
      "step": 7422
    },
    {
      "epoch": 0.07423,
      "grad_norm": 0.8743651545923469,
      "learning_rate": 0.003,
      "loss": 4.0784,
      "step": 7423
    },
    {
      "epoch": 0.07424,
      "grad_norm": 0.7825771879450308,
      "learning_rate": 0.003,
      "loss": 4.1019,
      "step": 7424
    },
    {
      "epoch": 0.07425,
      "grad_norm": 0.870565586334107,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 7425
    },
    {
      "epoch": 0.07426,
      "grad_norm": 0.8918705011152487,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 7426
    },
    {
      "epoch": 0.07427,
      "grad_norm": 0.8411163701679026,
      "learning_rate": 0.003,
      "loss": 4.0701,
      "step": 7427
    },
    {
      "epoch": 0.07428,
      "grad_norm": 0.93864995125254,
      "learning_rate": 0.003,
      "loss": 4.1256,
      "step": 7428
    },
    {
      "epoch": 0.07429,
      "grad_norm": 1.375676042770668,
      "learning_rate": 0.003,
      "loss": 4.1109,
      "step": 7429
    },
    {
      "epoch": 0.0743,
      "grad_norm": 1.2901201769037816,
      "learning_rate": 0.003,
      "loss": 4.0949,
      "step": 7430
    },
    {
      "epoch": 0.07431,
      "grad_norm": 1.0568232104600719,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 7431
    },
    {
      "epoch": 0.07432,
      "grad_norm": 1.1699681424250212,
      "learning_rate": 0.003,
      "loss": 4.1097,
      "step": 7432
    },
    {
      "epoch": 0.07433,
      "grad_norm": 0.8912336353896766,
      "learning_rate": 0.003,
      "loss": 4.0873,
      "step": 7433
    },
    {
      "epoch": 0.07434,
      "grad_norm": 0.9043967976875844,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 7434
    },
    {
      "epoch": 0.07435,
      "grad_norm": 0.9722703409572532,
      "learning_rate": 0.003,
      "loss": 4.0989,
      "step": 7435
    },
    {
      "epoch": 0.07436,
      "grad_norm": 1.2699630591839492,
      "learning_rate": 0.003,
      "loss": 4.1265,
      "step": 7436
    },
    {
      "epoch": 0.07437,
      "grad_norm": 1.2023886828716306,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 7437
    },
    {
      "epoch": 0.07438,
      "grad_norm": 1.1478584610411458,
      "learning_rate": 0.003,
      "loss": 4.1073,
      "step": 7438
    },
    {
      "epoch": 0.07439,
      "grad_norm": 0.9559788657368175,
      "learning_rate": 0.003,
      "loss": 4.0922,
      "step": 7439
    },
    {
      "epoch": 0.0744,
      "grad_norm": 0.9360959694218371,
      "learning_rate": 0.003,
      "loss": 4.0963,
      "step": 7440
    },
    {
      "epoch": 0.07441,
      "grad_norm": 1.1994542701856334,
      "learning_rate": 0.003,
      "loss": 4.1363,
      "step": 7441
    },
    {
      "epoch": 0.07442,
      "grad_norm": 1.148317105389101,
      "learning_rate": 0.003,
      "loss": 4.1122,
      "step": 7442
    },
    {
      "epoch": 0.07443,
      "grad_norm": 1.1828609361416955,
      "learning_rate": 0.003,
      "loss": 4.0958,
      "step": 7443
    },
    {
      "epoch": 0.07444,
      "grad_norm": 0.9984270524869665,
      "learning_rate": 0.003,
      "loss": 4.1146,
      "step": 7444
    },
    {
      "epoch": 0.07445,
      "grad_norm": 1.1046109735024745,
      "learning_rate": 0.003,
      "loss": 4.0991,
      "step": 7445
    },
    {
      "epoch": 0.07446,
      "grad_norm": 1.077502936625141,
      "learning_rate": 0.003,
      "loss": 4.0806,
      "step": 7446
    },
    {
      "epoch": 0.07447,
      "grad_norm": 1.3996128849618712,
      "learning_rate": 0.003,
      "loss": 4.0951,
      "step": 7447
    },
    {
      "epoch": 0.07448,
      "grad_norm": 1.0287142278411547,
      "learning_rate": 0.003,
      "loss": 4.0829,
      "step": 7448
    },
    {
      "epoch": 0.07449,
      "grad_norm": 1.2427549674708467,
      "learning_rate": 0.003,
      "loss": 4.1051,
      "step": 7449
    },
    {
      "epoch": 0.0745,
      "grad_norm": 0.9870702348058904,
      "learning_rate": 0.003,
      "loss": 4.1089,
      "step": 7450
    },
    {
      "epoch": 0.07451,
      "grad_norm": 1.0952820057543522,
      "learning_rate": 0.003,
      "loss": 4.1052,
      "step": 7451
    },
    {
      "epoch": 0.07452,
      "grad_norm": 1.0656349431590586,
      "learning_rate": 0.003,
      "loss": 4.0756,
      "step": 7452
    },
    {
      "epoch": 0.07453,
      "grad_norm": 1.0968374507716108,
      "learning_rate": 0.003,
      "loss": 4.1052,
      "step": 7453
    },
    {
      "epoch": 0.07454,
      "grad_norm": 1.1404103114047044,
      "learning_rate": 0.003,
      "loss": 4.0871,
      "step": 7454
    },
    {
      "epoch": 0.07455,
      "grad_norm": 1.0359953314455825,
      "learning_rate": 0.003,
      "loss": 4.0953,
      "step": 7455
    },
    {
      "epoch": 0.07456,
      "grad_norm": 1.1415929983014337,
      "learning_rate": 0.003,
      "loss": 4.1019,
      "step": 7456
    },
    {
      "epoch": 0.07457,
      "grad_norm": 1.1474751867174888,
      "learning_rate": 0.003,
      "loss": 4.094,
      "step": 7457
    },
    {
      "epoch": 0.07458,
      "grad_norm": 0.9827108066913763,
      "learning_rate": 0.003,
      "loss": 4.1046,
      "step": 7458
    },
    {
      "epoch": 0.07459,
      "grad_norm": 1.0934454393079214,
      "learning_rate": 0.003,
      "loss": 4.0911,
      "step": 7459
    },
    {
      "epoch": 0.0746,
      "grad_norm": 0.98268777973652,
      "learning_rate": 0.003,
      "loss": 4.103,
      "step": 7460
    },
    {
      "epoch": 0.07461,
      "grad_norm": 0.9816039441879064,
      "learning_rate": 0.003,
      "loss": 4.1116,
      "step": 7461
    },
    {
      "epoch": 0.07462,
      "grad_norm": 1.3051378208274698,
      "learning_rate": 0.003,
      "loss": 4.0809,
      "step": 7462
    },
    {
      "epoch": 0.07463,
      "grad_norm": 1.02310463494236,
      "learning_rate": 0.003,
      "loss": 4.1332,
      "step": 7463
    },
    {
      "epoch": 0.07464,
      "grad_norm": 1.2909266493327525,
      "learning_rate": 0.003,
      "loss": 4.0857,
      "step": 7464
    },
    {
      "epoch": 0.07465,
      "grad_norm": 0.9501490683976144,
      "learning_rate": 0.003,
      "loss": 4.104,
      "step": 7465
    },
    {
      "epoch": 0.07466,
      "grad_norm": 1.088750961064017,
      "learning_rate": 0.003,
      "loss": 4.1219,
      "step": 7466
    },
    {
      "epoch": 0.07467,
      "grad_norm": 1.1372556677278904,
      "learning_rate": 0.003,
      "loss": 4.092,
      "step": 7467
    },
    {
      "epoch": 0.07468,
      "grad_norm": 1.3878690319885936,
      "learning_rate": 0.003,
      "loss": 4.1147,
      "step": 7468
    },
    {
      "epoch": 0.07469,
      "grad_norm": 0.9635630973090923,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 7469
    },
    {
      "epoch": 0.0747,
      "grad_norm": 1.0927607741202252,
      "learning_rate": 0.003,
      "loss": 4.1264,
      "step": 7470
    },
    {
      "epoch": 0.07471,
      "grad_norm": 0.9589717096614562,
      "learning_rate": 0.003,
      "loss": 4.1062,
      "step": 7471
    },
    {
      "epoch": 0.07472,
      "grad_norm": 1.08022929264285,
      "learning_rate": 0.003,
      "loss": 4.1101,
      "step": 7472
    },
    {
      "epoch": 0.07473,
      "grad_norm": 1.1109856773117461,
      "learning_rate": 0.003,
      "loss": 4.1156,
      "step": 7473
    },
    {
      "epoch": 0.07474,
      "grad_norm": 1.1365304830035643,
      "learning_rate": 0.003,
      "loss": 4.0991,
      "step": 7474
    },
    {
      "epoch": 0.07475,
      "grad_norm": 1.2667706944106274,
      "learning_rate": 0.003,
      "loss": 4.1157,
      "step": 7475
    },
    {
      "epoch": 0.07476,
      "grad_norm": 0.9098797976710105,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 7476
    },
    {
      "epoch": 0.07477,
      "grad_norm": 1.0418500545925362,
      "learning_rate": 0.003,
      "loss": 4.1134,
      "step": 7477
    },
    {
      "epoch": 0.07478,
      "grad_norm": 1.086194005911626,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 7478
    },
    {
      "epoch": 0.07479,
      "grad_norm": 1.152475534521128,
      "learning_rate": 0.003,
      "loss": 4.0787,
      "step": 7479
    },
    {
      "epoch": 0.0748,
      "grad_norm": 1.2078785501955323,
      "learning_rate": 0.003,
      "loss": 4.0995,
      "step": 7480
    },
    {
      "epoch": 0.07481,
      "grad_norm": 0.9625364564038152,
      "learning_rate": 0.003,
      "loss": 4.1003,
      "step": 7481
    },
    {
      "epoch": 0.07482,
      "grad_norm": 1.1403385669598296,
      "learning_rate": 0.003,
      "loss": 4.1074,
      "step": 7482
    },
    {
      "epoch": 0.07483,
      "grad_norm": 1.182656884639201,
      "learning_rate": 0.003,
      "loss": 4.1327,
      "step": 7483
    },
    {
      "epoch": 0.07484,
      "grad_norm": 1.0500570511572587,
      "learning_rate": 0.003,
      "loss": 4.11,
      "step": 7484
    },
    {
      "epoch": 0.07485,
      "grad_norm": 1.1939538114667945,
      "learning_rate": 0.003,
      "loss": 4.1097,
      "step": 7485
    },
    {
      "epoch": 0.07486,
      "grad_norm": 0.8599612813420194,
      "learning_rate": 0.003,
      "loss": 4.0851,
      "step": 7486
    },
    {
      "epoch": 0.07487,
      "grad_norm": 0.906249363877033,
      "learning_rate": 0.003,
      "loss": 4.1022,
      "step": 7487
    },
    {
      "epoch": 0.07488,
      "grad_norm": 0.9120721853548994,
      "learning_rate": 0.003,
      "loss": 4.1112,
      "step": 7488
    },
    {
      "epoch": 0.07489,
      "grad_norm": 1.1756362495916781,
      "learning_rate": 0.003,
      "loss": 4.1267,
      "step": 7489
    },
    {
      "epoch": 0.0749,
      "grad_norm": 1.178043554191812,
      "learning_rate": 0.003,
      "loss": 4.0967,
      "step": 7490
    },
    {
      "epoch": 0.07491,
      "grad_norm": 1.177445877528429,
      "learning_rate": 0.003,
      "loss": 4.103,
      "step": 7491
    },
    {
      "epoch": 0.07492,
      "grad_norm": 1.2883563550681214,
      "learning_rate": 0.003,
      "loss": 4.1274,
      "step": 7492
    },
    {
      "epoch": 0.07493,
      "grad_norm": 0.9792550641752528,
      "learning_rate": 0.003,
      "loss": 4.1104,
      "step": 7493
    },
    {
      "epoch": 0.07494,
      "grad_norm": 1.192575626109686,
      "learning_rate": 0.003,
      "loss": 4.1253,
      "step": 7494
    },
    {
      "epoch": 0.07495,
      "grad_norm": 1.109696103067543,
      "learning_rate": 0.003,
      "loss": 4.1022,
      "step": 7495
    },
    {
      "epoch": 0.07496,
      "grad_norm": 1.0076672522580505,
      "learning_rate": 0.003,
      "loss": 4.0912,
      "step": 7496
    },
    {
      "epoch": 0.07497,
      "grad_norm": 1.2073380480227682,
      "learning_rate": 0.003,
      "loss": 4.1061,
      "step": 7497
    },
    {
      "epoch": 0.07498,
      "grad_norm": 1.0463371240066046,
      "learning_rate": 0.003,
      "loss": 4.0957,
      "step": 7498
    },
    {
      "epoch": 0.07499,
      "grad_norm": 1.1632287228456402,
      "learning_rate": 0.003,
      "loss": 4.0983,
      "step": 7499
    },
    {
      "epoch": 0.075,
      "grad_norm": 0.9397310085287326,
      "learning_rate": 0.003,
      "loss": 4.1133,
      "step": 7500
    },
    {
      "epoch": 0.07501,
      "grad_norm": 1.020093818883204,
      "learning_rate": 0.003,
      "loss": 4.09,
      "step": 7501
    },
    {
      "epoch": 0.07502,
      "grad_norm": 1.22722234628339,
      "learning_rate": 0.003,
      "loss": 4.0896,
      "step": 7502
    },
    {
      "epoch": 0.07503,
      "grad_norm": 1.033227705567588,
      "learning_rate": 0.003,
      "loss": 4.0876,
      "step": 7503
    },
    {
      "epoch": 0.07504,
      "grad_norm": 1.171817783999695,
      "learning_rate": 0.003,
      "loss": 4.0929,
      "step": 7504
    },
    {
      "epoch": 0.07505,
      "grad_norm": 1.0270833409022972,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 7505
    },
    {
      "epoch": 0.07506,
      "grad_norm": 1.2198086432393653,
      "learning_rate": 0.003,
      "loss": 4.0938,
      "step": 7506
    },
    {
      "epoch": 0.07507,
      "grad_norm": 0.8648598507253091,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 7507
    },
    {
      "epoch": 0.07508,
      "grad_norm": 0.9930152349407657,
      "learning_rate": 0.003,
      "loss": 4.1154,
      "step": 7508
    },
    {
      "epoch": 0.07509,
      "grad_norm": 1.1790407090479906,
      "learning_rate": 0.003,
      "loss": 4.1022,
      "step": 7509
    },
    {
      "epoch": 0.0751,
      "grad_norm": 0.9191788328705034,
      "learning_rate": 0.003,
      "loss": 4.0925,
      "step": 7510
    },
    {
      "epoch": 0.07511,
      "grad_norm": 1.0740498003601395,
      "learning_rate": 0.003,
      "loss": 4.0888,
      "step": 7511
    },
    {
      "epoch": 0.07512,
      "grad_norm": 0.8772452583013857,
      "learning_rate": 0.003,
      "loss": 4.1212,
      "step": 7512
    },
    {
      "epoch": 0.07513,
      "grad_norm": 0.9911439111577278,
      "learning_rate": 0.003,
      "loss": 4.1064,
      "step": 7513
    },
    {
      "epoch": 0.07514,
      "grad_norm": 1.5156680730273393,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 7514
    },
    {
      "epoch": 0.07515,
      "grad_norm": 0.9253063304788648,
      "learning_rate": 0.003,
      "loss": 4.1296,
      "step": 7515
    },
    {
      "epoch": 0.07516,
      "grad_norm": 1.113149970267038,
      "learning_rate": 0.003,
      "loss": 4.0841,
      "step": 7516
    },
    {
      "epoch": 0.07517,
      "grad_norm": 1.243025113042767,
      "learning_rate": 0.003,
      "loss": 4.1124,
      "step": 7517
    },
    {
      "epoch": 0.07518,
      "grad_norm": 0.8855817369758254,
      "learning_rate": 0.003,
      "loss": 4.0857,
      "step": 7518
    },
    {
      "epoch": 0.07519,
      "grad_norm": 0.9112412797077198,
      "learning_rate": 0.003,
      "loss": 4.0962,
      "step": 7519
    },
    {
      "epoch": 0.0752,
      "grad_norm": 1.116396451974829,
      "learning_rate": 0.003,
      "loss": 4.1036,
      "step": 7520
    },
    {
      "epoch": 0.07521,
      "grad_norm": 1.0324392071556105,
      "learning_rate": 0.003,
      "loss": 4.1034,
      "step": 7521
    },
    {
      "epoch": 0.07522,
      "grad_norm": 1.302244987987702,
      "learning_rate": 0.003,
      "loss": 4.0955,
      "step": 7522
    },
    {
      "epoch": 0.07523,
      "grad_norm": 0.9120906340155054,
      "learning_rate": 0.003,
      "loss": 4.0841,
      "step": 7523
    },
    {
      "epoch": 0.07524,
      "grad_norm": 1.1575722806063664,
      "learning_rate": 0.003,
      "loss": 4.1188,
      "step": 7524
    },
    {
      "epoch": 0.07525,
      "grad_norm": 1.273634189592798,
      "learning_rate": 0.003,
      "loss": 4.1278,
      "step": 7525
    },
    {
      "epoch": 0.07526,
      "grad_norm": 0.9217593936350734,
      "learning_rate": 0.003,
      "loss": 4.119,
      "step": 7526
    },
    {
      "epoch": 0.07527,
      "grad_norm": 1.063249795293575,
      "learning_rate": 0.003,
      "loss": 4.1213,
      "step": 7527
    },
    {
      "epoch": 0.07528,
      "grad_norm": 1.0381592400998108,
      "learning_rate": 0.003,
      "loss": 4.1257,
      "step": 7528
    },
    {
      "epoch": 0.07529,
      "grad_norm": 1.0503623433090992,
      "learning_rate": 0.003,
      "loss": 4.0929,
      "step": 7529
    },
    {
      "epoch": 0.0753,
      "grad_norm": 1.0575571009050915,
      "learning_rate": 0.003,
      "loss": 4.0926,
      "step": 7530
    },
    {
      "epoch": 0.07531,
      "grad_norm": 1.0284966628634444,
      "learning_rate": 0.003,
      "loss": 4.1221,
      "step": 7531
    },
    {
      "epoch": 0.07532,
      "grad_norm": 1.017348507432173,
      "learning_rate": 0.003,
      "loss": 4.1089,
      "step": 7532
    },
    {
      "epoch": 0.07533,
      "grad_norm": 1.228626600553027,
      "learning_rate": 0.003,
      "loss": 4.1019,
      "step": 7533
    },
    {
      "epoch": 0.07534,
      "grad_norm": 1.1401992033872428,
      "learning_rate": 0.003,
      "loss": 4.1035,
      "step": 7534
    },
    {
      "epoch": 0.07535,
      "grad_norm": 1.1630915222125624,
      "learning_rate": 0.003,
      "loss": 4.1133,
      "step": 7535
    },
    {
      "epoch": 0.07536,
      "grad_norm": 1.012891605331372,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 7536
    },
    {
      "epoch": 0.07537,
      "grad_norm": 1.2029828166327519,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 7537
    },
    {
      "epoch": 0.07538,
      "grad_norm": 1.0177281645657348,
      "learning_rate": 0.003,
      "loss": 4.1002,
      "step": 7538
    },
    {
      "epoch": 0.07539,
      "grad_norm": 1.2772627744588205,
      "learning_rate": 0.003,
      "loss": 4.0799,
      "step": 7539
    },
    {
      "epoch": 0.0754,
      "grad_norm": 0.9294525785897821,
      "learning_rate": 0.003,
      "loss": 4.0941,
      "step": 7540
    },
    {
      "epoch": 0.07541,
      "grad_norm": 1.0828336834647083,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 7541
    },
    {
      "epoch": 0.07542,
      "grad_norm": 1.1780063549119208,
      "learning_rate": 0.003,
      "loss": 4.0997,
      "step": 7542
    },
    {
      "epoch": 0.07543,
      "grad_norm": 1.0749429588406836,
      "learning_rate": 0.003,
      "loss": 4.1152,
      "step": 7543
    },
    {
      "epoch": 0.07544,
      "grad_norm": 1.0841966667755707,
      "learning_rate": 0.003,
      "loss": 4.1097,
      "step": 7544
    },
    {
      "epoch": 0.07545,
      "grad_norm": 0.9980448156387751,
      "learning_rate": 0.003,
      "loss": 4.1343,
      "step": 7545
    },
    {
      "epoch": 0.07546,
      "grad_norm": 1.2664949065709301,
      "learning_rate": 0.003,
      "loss": 4.098,
      "step": 7546
    },
    {
      "epoch": 0.07547,
      "grad_norm": 1.062678674841793,
      "learning_rate": 0.003,
      "loss": 4.112,
      "step": 7547
    },
    {
      "epoch": 0.07548,
      "grad_norm": 1.5159617863041532,
      "learning_rate": 0.003,
      "loss": 4.1355,
      "step": 7548
    },
    {
      "epoch": 0.07549,
      "grad_norm": 0.9217954430172313,
      "learning_rate": 0.003,
      "loss": 4.1264,
      "step": 7549
    },
    {
      "epoch": 0.0755,
      "grad_norm": 0.9469672042958677,
      "learning_rate": 0.003,
      "loss": 4.1245,
      "step": 7550
    },
    {
      "epoch": 0.07551,
      "grad_norm": 1.1010649743264453,
      "learning_rate": 0.003,
      "loss": 4.1158,
      "step": 7551
    },
    {
      "epoch": 0.07552,
      "grad_norm": 0.979006740783685,
      "learning_rate": 0.003,
      "loss": 4.1151,
      "step": 7552
    },
    {
      "epoch": 0.07553,
      "grad_norm": 1.2202632658315713,
      "learning_rate": 0.003,
      "loss": 4.1234,
      "step": 7553
    },
    {
      "epoch": 0.07554,
      "grad_norm": 1.0141175000304337,
      "learning_rate": 0.003,
      "loss": 4.1183,
      "step": 7554
    },
    {
      "epoch": 0.07555,
      "grad_norm": 1.2381367341566416,
      "learning_rate": 0.003,
      "loss": 4.1133,
      "step": 7555
    },
    {
      "epoch": 0.07556,
      "grad_norm": 1.1130694211087335,
      "learning_rate": 0.003,
      "loss": 4.1263,
      "step": 7556
    },
    {
      "epoch": 0.07557,
      "grad_norm": 1.220415386983482,
      "learning_rate": 0.003,
      "loss": 4.1006,
      "step": 7557
    },
    {
      "epoch": 0.07558,
      "grad_norm": 0.9103727107756383,
      "learning_rate": 0.003,
      "loss": 4.0969,
      "step": 7558
    },
    {
      "epoch": 0.07559,
      "grad_norm": 1.161068539868024,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 7559
    },
    {
      "epoch": 0.0756,
      "grad_norm": 1.2157119525881248,
      "learning_rate": 0.003,
      "loss": 4.1117,
      "step": 7560
    },
    {
      "epoch": 0.07561,
      "grad_norm": 0.8483939083465544,
      "learning_rate": 0.003,
      "loss": 4.0913,
      "step": 7561
    },
    {
      "epoch": 0.07562,
      "grad_norm": 0.8203601465351547,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 7562
    },
    {
      "epoch": 0.07563,
      "grad_norm": 1.0211935844350564,
      "learning_rate": 0.003,
      "loss": 4.1023,
      "step": 7563
    },
    {
      "epoch": 0.07564,
      "grad_norm": 1.243412295176371,
      "learning_rate": 0.003,
      "loss": 4.1278,
      "step": 7564
    },
    {
      "epoch": 0.07565,
      "grad_norm": 0.8572336614367662,
      "learning_rate": 0.003,
      "loss": 4.1014,
      "step": 7565
    },
    {
      "epoch": 0.07566,
      "grad_norm": 0.9904342760557475,
      "learning_rate": 0.003,
      "loss": 4.0994,
      "step": 7566
    },
    {
      "epoch": 0.07567,
      "grad_norm": 1.3580247317393612,
      "learning_rate": 0.003,
      "loss": 4.1183,
      "step": 7567
    },
    {
      "epoch": 0.07568,
      "grad_norm": 0.9605002083972642,
      "learning_rate": 0.003,
      "loss": 4.1087,
      "step": 7568
    },
    {
      "epoch": 0.07569,
      "grad_norm": 1.1090261834271542,
      "learning_rate": 0.003,
      "loss": 4.0884,
      "step": 7569
    },
    {
      "epoch": 0.0757,
      "grad_norm": 1.1287563780825858,
      "learning_rate": 0.003,
      "loss": 4.0891,
      "step": 7570
    },
    {
      "epoch": 0.07571,
      "grad_norm": 1.1003145238464291,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 7571
    },
    {
      "epoch": 0.07572,
      "grad_norm": 1.1635794583045487,
      "learning_rate": 0.003,
      "loss": 4.0822,
      "step": 7572
    },
    {
      "epoch": 0.07573,
      "grad_norm": 1.1124514629320816,
      "learning_rate": 0.003,
      "loss": 4.0987,
      "step": 7573
    },
    {
      "epoch": 0.07574,
      "grad_norm": 1.1040878656253004,
      "learning_rate": 0.003,
      "loss": 4.1126,
      "step": 7574
    },
    {
      "epoch": 0.07575,
      "grad_norm": 1.1860641536671122,
      "learning_rate": 0.003,
      "loss": 4.0933,
      "step": 7575
    },
    {
      "epoch": 0.07576,
      "grad_norm": 0.9866929732016607,
      "learning_rate": 0.003,
      "loss": 4.0861,
      "step": 7576
    },
    {
      "epoch": 0.07577,
      "grad_norm": 1.176366123038857,
      "learning_rate": 0.003,
      "loss": 4.1224,
      "step": 7577
    },
    {
      "epoch": 0.07578,
      "grad_norm": 0.9002821233286987,
      "learning_rate": 0.003,
      "loss": 4.1016,
      "step": 7578
    },
    {
      "epoch": 0.07579,
      "grad_norm": 1.0018491936918001,
      "learning_rate": 0.003,
      "loss": 4.0885,
      "step": 7579
    },
    {
      "epoch": 0.0758,
      "grad_norm": 1.1996191726980248,
      "learning_rate": 0.003,
      "loss": 4.1172,
      "step": 7580
    },
    {
      "epoch": 0.07581,
      "grad_norm": 1.0390536601921203,
      "learning_rate": 0.003,
      "loss": 4.0985,
      "step": 7581
    },
    {
      "epoch": 0.07582,
      "grad_norm": 1.3008748412907003,
      "learning_rate": 0.003,
      "loss": 4.1021,
      "step": 7582
    },
    {
      "epoch": 0.07583,
      "grad_norm": 0.9378232162415077,
      "learning_rate": 0.003,
      "loss": 4.0941,
      "step": 7583
    },
    {
      "epoch": 0.07584,
      "grad_norm": 0.9365440192341151,
      "learning_rate": 0.003,
      "loss": 4.138,
      "step": 7584
    },
    {
      "epoch": 0.07585,
      "grad_norm": 1.164036895229275,
      "learning_rate": 0.003,
      "loss": 4.1082,
      "step": 7585
    },
    {
      "epoch": 0.07586,
      "grad_norm": 1.0329782543130193,
      "learning_rate": 0.003,
      "loss": 4.0862,
      "step": 7586
    },
    {
      "epoch": 0.07587,
      "grad_norm": 1.377018066910138,
      "learning_rate": 0.003,
      "loss": 4.1337,
      "step": 7587
    },
    {
      "epoch": 0.07588,
      "grad_norm": 1.051480991233041,
      "learning_rate": 0.003,
      "loss": 4.1009,
      "step": 7588
    },
    {
      "epoch": 0.07589,
      "grad_norm": 1.0376720795682441,
      "learning_rate": 0.003,
      "loss": 4.1068,
      "step": 7589
    },
    {
      "epoch": 0.0759,
      "grad_norm": 1.220277986042294,
      "learning_rate": 0.003,
      "loss": 4.1093,
      "step": 7590
    },
    {
      "epoch": 0.07591,
      "grad_norm": 1.0320383144822298,
      "learning_rate": 0.003,
      "loss": 4.111,
      "step": 7591
    },
    {
      "epoch": 0.07592,
      "grad_norm": 1.113224947516195,
      "learning_rate": 0.003,
      "loss": 4.1142,
      "step": 7592
    },
    {
      "epoch": 0.07593,
      "grad_norm": 1.0193178300169947,
      "learning_rate": 0.003,
      "loss": 4.0811,
      "step": 7593
    },
    {
      "epoch": 0.07594,
      "grad_norm": 1.0127757135657491,
      "learning_rate": 0.003,
      "loss": 4.0829,
      "step": 7594
    },
    {
      "epoch": 0.07595,
      "grad_norm": 1.2522485470517928,
      "learning_rate": 0.003,
      "loss": 4.0884,
      "step": 7595
    },
    {
      "epoch": 0.07596,
      "grad_norm": 0.8461175020076812,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 7596
    },
    {
      "epoch": 0.07597,
      "grad_norm": 1.0614723159182662,
      "learning_rate": 0.003,
      "loss": 4.116,
      "step": 7597
    },
    {
      "epoch": 0.07598,
      "grad_norm": 1.2432661985382218,
      "learning_rate": 0.003,
      "loss": 4.1142,
      "step": 7598
    },
    {
      "epoch": 0.07599,
      "grad_norm": 0.9724406876800284,
      "learning_rate": 0.003,
      "loss": 4.1121,
      "step": 7599
    },
    {
      "epoch": 0.076,
      "grad_norm": 1.0294698539571896,
      "learning_rate": 0.003,
      "loss": 4.1005,
      "step": 7600
    },
    {
      "epoch": 0.07601,
      "grad_norm": 1.2075561785390323,
      "learning_rate": 0.003,
      "loss": 4.1236,
      "step": 7601
    },
    {
      "epoch": 0.07602,
      "grad_norm": 0.908367171679946,
      "learning_rate": 0.003,
      "loss": 4.0928,
      "step": 7602
    },
    {
      "epoch": 0.07603,
      "grad_norm": 0.819105434643421,
      "learning_rate": 0.003,
      "loss": 4.0926,
      "step": 7603
    },
    {
      "epoch": 0.07604,
      "grad_norm": 0.940096955111327,
      "learning_rate": 0.003,
      "loss": 4.1097,
      "step": 7604
    },
    {
      "epoch": 0.07605,
      "grad_norm": 1.1869813844923396,
      "learning_rate": 0.003,
      "loss": 4.1054,
      "step": 7605
    },
    {
      "epoch": 0.07606,
      "grad_norm": 1.319999351024039,
      "learning_rate": 0.003,
      "loss": 4.1612,
      "step": 7606
    },
    {
      "epoch": 0.07607,
      "grad_norm": 0.9928524946267742,
      "learning_rate": 0.003,
      "loss": 4.1131,
      "step": 7607
    },
    {
      "epoch": 0.07608,
      "grad_norm": 1.2163879425361146,
      "learning_rate": 0.003,
      "loss": 4.1334,
      "step": 7608
    },
    {
      "epoch": 0.07609,
      "grad_norm": 1.0931474918077035,
      "learning_rate": 0.003,
      "loss": 4.1031,
      "step": 7609
    },
    {
      "epoch": 0.0761,
      "grad_norm": 1.2939128848551429,
      "learning_rate": 0.003,
      "loss": 4.102,
      "step": 7610
    },
    {
      "epoch": 0.07611,
      "grad_norm": 0.9268786250276437,
      "learning_rate": 0.003,
      "loss": 4.1245,
      "step": 7611
    },
    {
      "epoch": 0.07612,
      "grad_norm": 1.09828222716354,
      "learning_rate": 0.003,
      "loss": 4.0928,
      "step": 7612
    },
    {
      "epoch": 0.07613,
      "grad_norm": 1.1664511437430545,
      "learning_rate": 0.003,
      "loss": 4.0886,
      "step": 7613
    },
    {
      "epoch": 0.07614,
      "grad_norm": 1.0431360064756068,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 7614
    },
    {
      "epoch": 0.07615,
      "grad_norm": 1.4227723083965413,
      "learning_rate": 0.003,
      "loss": 4.1075,
      "step": 7615
    },
    {
      "epoch": 0.07616,
      "grad_norm": 0.8909517959568098,
      "learning_rate": 0.003,
      "loss": 4.1002,
      "step": 7616
    },
    {
      "epoch": 0.07617,
      "grad_norm": 0.8509788287430593,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 7617
    },
    {
      "epoch": 0.07618,
      "grad_norm": 1.1167760380486436,
      "learning_rate": 0.003,
      "loss": 4.1378,
      "step": 7618
    },
    {
      "epoch": 0.07619,
      "grad_norm": 1.11193937966075,
      "learning_rate": 0.003,
      "loss": 4.1128,
      "step": 7619
    },
    {
      "epoch": 0.0762,
      "grad_norm": 1.2309465997063567,
      "learning_rate": 0.003,
      "loss": 4.0888,
      "step": 7620
    },
    {
      "epoch": 0.07621,
      "grad_norm": 0.8811305994875692,
      "learning_rate": 0.003,
      "loss": 4.1161,
      "step": 7621
    },
    {
      "epoch": 0.07622,
      "grad_norm": 0.9686305823838591,
      "learning_rate": 0.003,
      "loss": 4.1151,
      "step": 7622
    },
    {
      "epoch": 0.07623,
      "grad_norm": 0.9243277071622491,
      "learning_rate": 0.003,
      "loss": 4.0935,
      "step": 7623
    },
    {
      "epoch": 0.07624,
      "grad_norm": 1.191977900320886,
      "learning_rate": 0.003,
      "loss": 4.1062,
      "step": 7624
    },
    {
      "epoch": 0.07625,
      "grad_norm": 1.026341609349241,
      "learning_rate": 0.003,
      "loss": 4.0857,
      "step": 7625
    },
    {
      "epoch": 0.07626,
      "grad_norm": 1.711585880657544,
      "learning_rate": 0.003,
      "loss": 4.114,
      "step": 7626
    },
    {
      "epoch": 0.07627,
      "grad_norm": 1.1367491841820259,
      "learning_rate": 0.003,
      "loss": 4.117,
      "step": 7627
    },
    {
      "epoch": 0.07628,
      "grad_norm": 0.988369876898564,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 7628
    },
    {
      "epoch": 0.07629,
      "grad_norm": 1.121398232821057,
      "learning_rate": 0.003,
      "loss": 4.1029,
      "step": 7629
    },
    {
      "epoch": 0.0763,
      "grad_norm": 1.0136207435839946,
      "learning_rate": 0.003,
      "loss": 4.0856,
      "step": 7630
    },
    {
      "epoch": 0.07631,
      "grad_norm": 1.4267741493920876,
      "learning_rate": 0.003,
      "loss": 4.0994,
      "step": 7631
    },
    {
      "epoch": 0.07632,
      "grad_norm": 1.1159763620561827,
      "learning_rate": 0.003,
      "loss": 4.0859,
      "step": 7632
    },
    {
      "epoch": 0.07633,
      "grad_norm": 1.3484061208170044,
      "learning_rate": 0.003,
      "loss": 4.0975,
      "step": 7633
    },
    {
      "epoch": 0.07634,
      "grad_norm": 0.9621844147865416,
      "learning_rate": 0.003,
      "loss": 4.0814,
      "step": 7634
    },
    {
      "epoch": 0.07635,
      "grad_norm": 0.9183557192199527,
      "learning_rate": 0.003,
      "loss": 4.1015,
      "step": 7635
    },
    {
      "epoch": 0.07636,
      "grad_norm": 0.9807300765346603,
      "learning_rate": 0.003,
      "loss": 4.081,
      "step": 7636
    },
    {
      "epoch": 0.07637,
      "grad_norm": 0.987741492699573,
      "learning_rate": 0.003,
      "loss": 4.1195,
      "step": 7637
    },
    {
      "epoch": 0.07638,
      "grad_norm": 1.150598587756748,
      "learning_rate": 0.003,
      "loss": 4.1018,
      "step": 7638
    },
    {
      "epoch": 0.07639,
      "grad_norm": 1.1008913618099967,
      "learning_rate": 0.003,
      "loss": 4.1065,
      "step": 7639
    },
    {
      "epoch": 0.0764,
      "grad_norm": 1.069231691804542,
      "learning_rate": 0.003,
      "loss": 4.1348,
      "step": 7640
    },
    {
      "epoch": 0.07641,
      "grad_norm": 1.0412024223182812,
      "learning_rate": 0.003,
      "loss": 4.1288,
      "step": 7641
    },
    {
      "epoch": 0.07642,
      "grad_norm": 1.042062368982713,
      "learning_rate": 0.003,
      "loss": 4.1208,
      "step": 7642
    },
    {
      "epoch": 0.07643,
      "grad_norm": 1.2951167861590105,
      "learning_rate": 0.003,
      "loss": 4.1075,
      "step": 7643
    },
    {
      "epoch": 0.07644,
      "grad_norm": 1.0584656072218666,
      "learning_rate": 0.003,
      "loss": 4.0868,
      "step": 7644
    },
    {
      "epoch": 0.07645,
      "grad_norm": 1.072877867189409,
      "learning_rate": 0.003,
      "loss": 4.1025,
      "step": 7645
    },
    {
      "epoch": 0.07646,
      "grad_norm": 1.3048350731050382,
      "learning_rate": 0.003,
      "loss": 4.1072,
      "step": 7646
    },
    {
      "epoch": 0.07647,
      "grad_norm": 1.1612332984131792,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 7647
    },
    {
      "epoch": 0.07648,
      "grad_norm": 0.8560991825716354,
      "learning_rate": 0.003,
      "loss": 4.1004,
      "step": 7648
    },
    {
      "epoch": 0.07649,
      "grad_norm": 0.9881761378783187,
      "learning_rate": 0.003,
      "loss": 4.0996,
      "step": 7649
    },
    {
      "epoch": 0.0765,
      "grad_norm": 1.363753027922075,
      "learning_rate": 0.003,
      "loss": 4.1001,
      "step": 7650
    },
    {
      "epoch": 0.07651,
      "grad_norm": 0.8946406952460046,
      "learning_rate": 0.003,
      "loss": 4.0954,
      "step": 7651
    },
    {
      "epoch": 0.07652,
      "grad_norm": 0.9508520934841937,
      "learning_rate": 0.003,
      "loss": 4.0921,
      "step": 7652
    },
    {
      "epoch": 0.07653,
      "grad_norm": 1.1285258062605226,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 7653
    },
    {
      "epoch": 0.07654,
      "grad_norm": 1.066906075262894,
      "learning_rate": 0.003,
      "loss": 4.1045,
      "step": 7654
    },
    {
      "epoch": 0.07655,
      "grad_norm": 1.1447845805737322,
      "learning_rate": 0.003,
      "loss": 4.1133,
      "step": 7655
    },
    {
      "epoch": 0.07656,
      "grad_norm": 1.0026793162461074,
      "learning_rate": 0.003,
      "loss": 4.1133,
      "step": 7656
    },
    {
      "epoch": 0.07657,
      "grad_norm": 1.253277049833007,
      "learning_rate": 0.003,
      "loss": 4.1232,
      "step": 7657
    },
    {
      "epoch": 0.07658,
      "grad_norm": 0.9868834909744868,
      "learning_rate": 0.003,
      "loss": 4.1017,
      "step": 7658
    },
    {
      "epoch": 0.07659,
      "grad_norm": 1.3930524753298466,
      "learning_rate": 0.003,
      "loss": 4.1023,
      "step": 7659
    },
    {
      "epoch": 0.0766,
      "grad_norm": 1.147552881069107,
      "learning_rate": 0.003,
      "loss": 4.1419,
      "step": 7660
    },
    {
      "epoch": 0.07661,
      "grad_norm": 1.097273792838392,
      "learning_rate": 0.003,
      "loss": 4.0989,
      "step": 7661
    },
    {
      "epoch": 0.07662,
      "grad_norm": 0.9919267467777996,
      "learning_rate": 0.003,
      "loss": 4.1246,
      "step": 7662
    },
    {
      "epoch": 0.07663,
      "grad_norm": 0.9604902804967356,
      "learning_rate": 0.003,
      "loss": 4.117,
      "step": 7663
    },
    {
      "epoch": 0.07664,
      "grad_norm": 0.9241313914036474,
      "learning_rate": 0.003,
      "loss": 4.1069,
      "step": 7664
    },
    {
      "epoch": 0.07665,
      "grad_norm": 1.104796598248223,
      "learning_rate": 0.003,
      "loss": 4.0834,
      "step": 7665
    },
    {
      "epoch": 0.07666,
      "grad_norm": 1.3127029491687616,
      "learning_rate": 0.003,
      "loss": 4.1062,
      "step": 7666
    },
    {
      "epoch": 0.07667,
      "grad_norm": 1.2964036322532557,
      "learning_rate": 0.003,
      "loss": 4.1132,
      "step": 7667
    },
    {
      "epoch": 0.07668,
      "grad_norm": 0.9580286300224677,
      "learning_rate": 0.003,
      "loss": 4.1004,
      "step": 7668
    },
    {
      "epoch": 0.07669,
      "grad_norm": 0.9745998727959247,
      "learning_rate": 0.003,
      "loss": 4.1129,
      "step": 7669
    },
    {
      "epoch": 0.0767,
      "grad_norm": 1.0011917702341462,
      "learning_rate": 0.003,
      "loss": 4.0783,
      "step": 7670
    },
    {
      "epoch": 0.07671,
      "grad_norm": 1.0199987802111787,
      "learning_rate": 0.003,
      "loss": 4.1162,
      "step": 7671
    },
    {
      "epoch": 0.07672,
      "grad_norm": 0.9129909978014048,
      "learning_rate": 0.003,
      "loss": 4.0975,
      "step": 7672
    },
    {
      "epoch": 0.07673,
      "grad_norm": 1.0607294077470495,
      "learning_rate": 0.003,
      "loss": 4.1345,
      "step": 7673
    },
    {
      "epoch": 0.07674,
      "grad_norm": 1.0920171975344224,
      "learning_rate": 0.003,
      "loss": 4.0999,
      "step": 7674
    },
    {
      "epoch": 0.07675,
      "grad_norm": 1.0565727558971971,
      "learning_rate": 0.003,
      "loss": 4.0919,
      "step": 7675
    },
    {
      "epoch": 0.07676,
      "grad_norm": 1.2725834956197855,
      "learning_rate": 0.003,
      "loss": 4.096,
      "step": 7676
    },
    {
      "epoch": 0.07677,
      "grad_norm": 1.2598694460071018,
      "learning_rate": 0.003,
      "loss": 4.0986,
      "step": 7677
    },
    {
      "epoch": 0.07678,
      "grad_norm": 1.325358583295913,
      "learning_rate": 0.003,
      "loss": 4.1318,
      "step": 7678
    },
    {
      "epoch": 0.07679,
      "grad_norm": 0.9349674003951666,
      "learning_rate": 0.003,
      "loss": 4.1194,
      "step": 7679
    },
    {
      "epoch": 0.0768,
      "grad_norm": 1.0596741567867978,
      "learning_rate": 0.003,
      "loss": 4.0915,
      "step": 7680
    },
    {
      "epoch": 0.07681,
      "grad_norm": 1.1277581537166605,
      "learning_rate": 0.003,
      "loss": 4.105,
      "step": 7681
    },
    {
      "epoch": 0.07682,
      "grad_norm": 1.083698074015161,
      "learning_rate": 0.003,
      "loss": 4.0908,
      "step": 7682
    },
    {
      "epoch": 0.07683,
      "grad_norm": 0.9949340311875353,
      "learning_rate": 0.003,
      "loss": 4.104,
      "step": 7683
    },
    {
      "epoch": 0.07684,
      "grad_norm": 1.053359494756575,
      "learning_rate": 0.003,
      "loss": 4.1183,
      "step": 7684
    },
    {
      "epoch": 0.07685,
      "grad_norm": 1.0752188724225478,
      "learning_rate": 0.003,
      "loss": 4.0955,
      "step": 7685
    },
    {
      "epoch": 0.07686,
      "grad_norm": 0.9963305559685445,
      "learning_rate": 0.003,
      "loss": 4.0966,
      "step": 7686
    },
    {
      "epoch": 0.07687,
      "grad_norm": 1.1200067453730849,
      "learning_rate": 0.003,
      "loss": 4.1103,
      "step": 7687
    },
    {
      "epoch": 0.07688,
      "grad_norm": 1.4005284389753019,
      "learning_rate": 0.003,
      "loss": 4.1095,
      "step": 7688
    },
    {
      "epoch": 0.07689,
      "grad_norm": 0.9511902260435283,
      "learning_rate": 0.003,
      "loss": 4.0848,
      "step": 7689
    },
    {
      "epoch": 0.0769,
      "grad_norm": 1.1187995671857418,
      "learning_rate": 0.003,
      "loss": 4.1174,
      "step": 7690
    },
    {
      "epoch": 0.07691,
      "grad_norm": 1.2231215859628386,
      "learning_rate": 0.003,
      "loss": 4.1125,
      "step": 7691
    },
    {
      "epoch": 0.07692,
      "grad_norm": 0.8299137991876565,
      "learning_rate": 0.003,
      "loss": 4.1122,
      "step": 7692
    },
    {
      "epoch": 0.07693,
      "grad_norm": 0.9082081822910427,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 7693
    },
    {
      "epoch": 0.07694,
      "grad_norm": 1.0916728801306739,
      "learning_rate": 0.003,
      "loss": 4.0936,
      "step": 7694
    },
    {
      "epoch": 0.07695,
      "grad_norm": 0.9838815880287163,
      "learning_rate": 0.003,
      "loss": 4.1138,
      "step": 7695
    },
    {
      "epoch": 0.07696,
      "grad_norm": 1.155813106601287,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 7696
    },
    {
      "epoch": 0.07697,
      "grad_norm": 1.0839130165247974,
      "learning_rate": 0.003,
      "loss": 4.131,
      "step": 7697
    },
    {
      "epoch": 0.07698,
      "grad_norm": 1.1680104322789127,
      "learning_rate": 0.003,
      "loss": 4.1076,
      "step": 7698
    },
    {
      "epoch": 0.07699,
      "grad_norm": 1.0447890613446116,
      "learning_rate": 0.003,
      "loss": 4.1322,
      "step": 7699
    },
    {
      "epoch": 0.077,
      "grad_norm": 1.0704008306301402,
      "learning_rate": 0.003,
      "loss": 4.1059,
      "step": 7700
    },
    {
      "epoch": 0.07701,
      "grad_norm": 1.202553730312753,
      "learning_rate": 0.003,
      "loss": 4.0922,
      "step": 7701
    },
    {
      "epoch": 0.07702,
      "grad_norm": 1.0984179830351732,
      "learning_rate": 0.003,
      "loss": 4.092,
      "step": 7702
    },
    {
      "epoch": 0.07703,
      "grad_norm": 1.0470287328541912,
      "learning_rate": 0.003,
      "loss": 4.1061,
      "step": 7703
    },
    {
      "epoch": 0.07704,
      "grad_norm": 1.1097420437759358,
      "learning_rate": 0.003,
      "loss": 4.1083,
      "step": 7704
    },
    {
      "epoch": 0.07705,
      "grad_norm": 1.1144201249659018,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 7705
    },
    {
      "epoch": 0.07706,
      "grad_norm": 1.0217698112069022,
      "learning_rate": 0.003,
      "loss": 4.1088,
      "step": 7706
    },
    {
      "epoch": 0.07707,
      "grad_norm": 1.2109348836998135,
      "learning_rate": 0.003,
      "loss": 4.1099,
      "step": 7707
    },
    {
      "epoch": 0.07708,
      "grad_norm": 1.0553701436633687,
      "learning_rate": 0.003,
      "loss": 4.1202,
      "step": 7708
    },
    {
      "epoch": 0.07709,
      "grad_norm": 1.0487666289699271,
      "learning_rate": 0.003,
      "loss": 4.1068,
      "step": 7709
    },
    {
      "epoch": 0.0771,
      "grad_norm": 1.2192442347910983,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 7710
    },
    {
      "epoch": 0.07711,
      "grad_norm": 0.8886343142082175,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 7711
    },
    {
      "epoch": 0.07712,
      "grad_norm": 1.097685818072634,
      "learning_rate": 0.003,
      "loss": 4.1002,
      "step": 7712
    },
    {
      "epoch": 0.07713,
      "grad_norm": 1.0647709451424068,
      "learning_rate": 0.003,
      "loss": 4.0759,
      "step": 7713
    },
    {
      "epoch": 0.07714,
      "grad_norm": 1.1091271113459429,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 7714
    },
    {
      "epoch": 0.07715,
      "grad_norm": 1.1833117992139233,
      "learning_rate": 0.003,
      "loss": 4.1048,
      "step": 7715
    },
    {
      "epoch": 0.07716,
      "grad_norm": 1.3072350917328062,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 7716
    },
    {
      "epoch": 0.07717,
      "grad_norm": 1.1929125586163642,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 7717
    },
    {
      "epoch": 0.07718,
      "grad_norm": 0.9794043684657727,
      "learning_rate": 0.003,
      "loss": 4.1233,
      "step": 7718
    },
    {
      "epoch": 0.07719,
      "grad_norm": 0.975112457349595,
      "learning_rate": 0.003,
      "loss": 4.0922,
      "step": 7719
    },
    {
      "epoch": 0.0772,
      "grad_norm": 1.1897527208548928,
      "learning_rate": 0.003,
      "loss": 4.1289,
      "step": 7720
    },
    {
      "epoch": 0.07721,
      "grad_norm": 0.9330747049002993,
      "learning_rate": 0.003,
      "loss": 4.083,
      "step": 7721
    },
    {
      "epoch": 0.07722,
      "grad_norm": 1.1116995201555617,
      "learning_rate": 0.003,
      "loss": 4.1187,
      "step": 7722
    },
    {
      "epoch": 0.07723,
      "grad_norm": 0.9908749389404667,
      "learning_rate": 0.003,
      "loss": 4.1192,
      "step": 7723
    },
    {
      "epoch": 0.07724,
      "grad_norm": 1.3797674628535543,
      "learning_rate": 0.003,
      "loss": 4.1102,
      "step": 7724
    },
    {
      "epoch": 0.07725,
      "grad_norm": 0.8365626531454406,
      "learning_rate": 0.003,
      "loss": 4.0891,
      "step": 7725
    },
    {
      "epoch": 0.07726,
      "grad_norm": 0.8890967555331794,
      "learning_rate": 0.003,
      "loss": 4.116,
      "step": 7726
    },
    {
      "epoch": 0.07727,
      "grad_norm": 1.0153125391727966,
      "learning_rate": 0.003,
      "loss": 4.0797,
      "step": 7727
    },
    {
      "epoch": 0.07728,
      "grad_norm": 1.2781185982208176,
      "learning_rate": 0.003,
      "loss": 4.0914,
      "step": 7728
    },
    {
      "epoch": 0.07729,
      "grad_norm": 1.3348745122018035,
      "learning_rate": 0.003,
      "loss": 4.1172,
      "step": 7729
    },
    {
      "epoch": 0.0773,
      "grad_norm": 1.1346003292608733,
      "learning_rate": 0.003,
      "loss": 4.1084,
      "step": 7730
    },
    {
      "epoch": 0.07731,
      "grad_norm": 1.1382685641315156,
      "learning_rate": 0.003,
      "loss": 4.0985,
      "step": 7731
    },
    {
      "epoch": 0.07732,
      "grad_norm": 0.909477352989243,
      "learning_rate": 0.003,
      "loss": 4.1025,
      "step": 7732
    },
    {
      "epoch": 0.07733,
      "grad_norm": 0.9380438060395494,
      "learning_rate": 0.003,
      "loss": 4.1062,
      "step": 7733
    },
    {
      "epoch": 0.07734,
      "grad_norm": 1.182723768892894,
      "learning_rate": 0.003,
      "loss": 4.1143,
      "step": 7734
    },
    {
      "epoch": 0.07735,
      "grad_norm": 0.956856860551835,
      "learning_rate": 0.003,
      "loss": 4.0829,
      "step": 7735
    },
    {
      "epoch": 0.07736,
      "grad_norm": 1.2503508123110225,
      "learning_rate": 0.003,
      "loss": 4.1132,
      "step": 7736
    },
    {
      "epoch": 0.07737,
      "grad_norm": 0.9753617896147677,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 7737
    },
    {
      "epoch": 0.07738,
      "grad_norm": 1.165817507529701,
      "learning_rate": 0.003,
      "loss": 4.0965,
      "step": 7738
    },
    {
      "epoch": 0.07739,
      "grad_norm": 1.0658659573641052,
      "learning_rate": 0.003,
      "loss": 4.0986,
      "step": 7739
    },
    {
      "epoch": 0.0774,
      "grad_norm": 1.3038985858407492,
      "learning_rate": 0.003,
      "loss": 4.0859,
      "step": 7740
    },
    {
      "epoch": 0.07741,
      "grad_norm": 0.8212293829001464,
      "learning_rate": 0.003,
      "loss": 4.0823,
      "step": 7741
    },
    {
      "epoch": 0.07742,
      "grad_norm": 0.8961041116710575,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 7742
    },
    {
      "epoch": 0.07743,
      "grad_norm": 1.078993168136095,
      "learning_rate": 0.003,
      "loss": 4.0952,
      "step": 7743
    },
    {
      "epoch": 0.07744,
      "grad_norm": 1.2392253006965184,
      "learning_rate": 0.003,
      "loss": 4.0969,
      "step": 7744
    },
    {
      "epoch": 0.07745,
      "grad_norm": 1.092921186916631,
      "learning_rate": 0.003,
      "loss": 4.0961,
      "step": 7745
    },
    {
      "epoch": 0.07746,
      "grad_norm": 0.9498762317732079,
      "learning_rate": 0.003,
      "loss": 4.0989,
      "step": 7746
    },
    {
      "epoch": 0.07747,
      "grad_norm": 1.0777857631437682,
      "learning_rate": 0.003,
      "loss": 4.1033,
      "step": 7747
    },
    {
      "epoch": 0.07748,
      "grad_norm": 1.2501420325170711,
      "learning_rate": 0.003,
      "loss": 4.131,
      "step": 7748
    },
    {
      "epoch": 0.07749,
      "grad_norm": 1.1659362799933979,
      "learning_rate": 0.003,
      "loss": 4.1228,
      "step": 7749
    },
    {
      "epoch": 0.0775,
      "grad_norm": 1.0057776955954294,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 7750
    },
    {
      "epoch": 0.07751,
      "grad_norm": 1.0908977077309605,
      "learning_rate": 0.003,
      "loss": 4.0991,
      "step": 7751
    },
    {
      "epoch": 0.07752,
      "grad_norm": 1.0146076688175378,
      "learning_rate": 0.003,
      "loss": 4.1126,
      "step": 7752
    },
    {
      "epoch": 0.07753,
      "grad_norm": 1.363739323470198,
      "learning_rate": 0.003,
      "loss": 4.0897,
      "step": 7753
    },
    {
      "epoch": 0.07754,
      "grad_norm": 0.9532027349687967,
      "learning_rate": 0.003,
      "loss": 4.0945,
      "step": 7754
    },
    {
      "epoch": 0.07755,
      "grad_norm": 1.2117236044034614,
      "learning_rate": 0.003,
      "loss": 4.1049,
      "step": 7755
    },
    {
      "epoch": 0.07756,
      "grad_norm": 0.9695361568369883,
      "learning_rate": 0.003,
      "loss": 4.115,
      "step": 7756
    },
    {
      "epoch": 0.07757,
      "grad_norm": 0.9161470471426952,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 7757
    },
    {
      "epoch": 0.07758,
      "grad_norm": 1.048139217552709,
      "learning_rate": 0.003,
      "loss": 4.0824,
      "step": 7758
    },
    {
      "epoch": 0.07759,
      "grad_norm": 1.0718445202257145,
      "learning_rate": 0.003,
      "loss": 4.092,
      "step": 7759
    },
    {
      "epoch": 0.0776,
      "grad_norm": 1.0941281483649858,
      "learning_rate": 0.003,
      "loss": 4.075,
      "step": 7760
    },
    {
      "epoch": 0.07761,
      "grad_norm": 1.16599825792614,
      "learning_rate": 0.003,
      "loss": 4.1191,
      "step": 7761
    },
    {
      "epoch": 0.07762,
      "grad_norm": 1.1135150780281224,
      "learning_rate": 0.003,
      "loss": 4.0976,
      "step": 7762
    },
    {
      "epoch": 0.07763,
      "grad_norm": 1.2834428160255196,
      "learning_rate": 0.003,
      "loss": 4.1149,
      "step": 7763
    },
    {
      "epoch": 0.07764,
      "grad_norm": 1.0594672168271608,
      "learning_rate": 0.003,
      "loss": 4.1218,
      "step": 7764
    },
    {
      "epoch": 0.07765,
      "grad_norm": 1.1099903530222162,
      "learning_rate": 0.003,
      "loss": 4.1017,
      "step": 7765
    },
    {
      "epoch": 0.07766,
      "grad_norm": 1.1139847548103408,
      "learning_rate": 0.003,
      "loss": 4.0855,
      "step": 7766
    },
    {
      "epoch": 0.07767,
      "grad_norm": 1.0721664614212691,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 7767
    },
    {
      "epoch": 0.07768,
      "grad_norm": 1.1111257160722119,
      "learning_rate": 0.003,
      "loss": 4.0945,
      "step": 7768
    },
    {
      "epoch": 0.07769,
      "grad_norm": 1.3832464631797665,
      "learning_rate": 0.003,
      "loss": 4.1056,
      "step": 7769
    },
    {
      "epoch": 0.0777,
      "grad_norm": 0.843477392576227,
      "learning_rate": 0.003,
      "loss": 4.1105,
      "step": 7770
    },
    {
      "epoch": 0.07771,
      "grad_norm": 0.8510362431061711,
      "learning_rate": 0.003,
      "loss": 4.1101,
      "step": 7771
    },
    {
      "epoch": 0.07772,
      "grad_norm": 0.8897772192499654,
      "learning_rate": 0.003,
      "loss": 4.1006,
      "step": 7772
    },
    {
      "epoch": 0.07773,
      "grad_norm": 1.013378086803394,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 7773
    },
    {
      "epoch": 0.07774,
      "grad_norm": 1.0168273891280863,
      "learning_rate": 0.003,
      "loss": 4.1001,
      "step": 7774
    },
    {
      "epoch": 0.07775,
      "grad_norm": 1.072622212301239,
      "learning_rate": 0.003,
      "loss": 4.1108,
      "step": 7775
    },
    {
      "epoch": 0.07776,
      "grad_norm": 1.0581485073003125,
      "learning_rate": 0.003,
      "loss": 4.0966,
      "step": 7776
    },
    {
      "epoch": 0.07777,
      "grad_norm": 1.1302210145024325,
      "learning_rate": 0.003,
      "loss": 4.1173,
      "step": 7777
    },
    {
      "epoch": 0.07778,
      "grad_norm": 1.024976398839125,
      "learning_rate": 0.003,
      "loss": 4.1171,
      "step": 7778
    },
    {
      "epoch": 0.07779,
      "grad_norm": 1.1717175996834288,
      "learning_rate": 0.003,
      "loss": 4.0828,
      "step": 7779
    },
    {
      "epoch": 0.0778,
      "grad_norm": 1.427357253672805,
      "learning_rate": 0.003,
      "loss": 4.0809,
      "step": 7780
    },
    {
      "epoch": 0.07781,
      "grad_norm": 1.0969858418068819,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 7781
    },
    {
      "epoch": 0.07782,
      "grad_norm": 1.2968713137185408,
      "learning_rate": 0.003,
      "loss": 4.0909,
      "step": 7782
    },
    {
      "epoch": 0.07783,
      "grad_norm": 0.8773486594119719,
      "learning_rate": 0.003,
      "loss": 4.0899,
      "step": 7783
    },
    {
      "epoch": 0.07784,
      "grad_norm": 1.0342548469691255,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 7784
    },
    {
      "epoch": 0.07785,
      "grad_norm": 1.10039126806273,
      "learning_rate": 0.003,
      "loss": 4.1206,
      "step": 7785
    },
    {
      "epoch": 0.07786,
      "grad_norm": 1.0452193011897093,
      "learning_rate": 0.003,
      "loss": 4.0851,
      "step": 7786
    },
    {
      "epoch": 0.07787,
      "grad_norm": 1.1642092124226817,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 7787
    },
    {
      "epoch": 0.07788,
      "grad_norm": 1.1434848747699677,
      "learning_rate": 0.003,
      "loss": 4.0907,
      "step": 7788
    },
    {
      "epoch": 0.07789,
      "grad_norm": 1.257771451993656,
      "learning_rate": 0.003,
      "loss": 4.0768,
      "step": 7789
    },
    {
      "epoch": 0.0779,
      "grad_norm": 1.1643784681384861,
      "learning_rate": 0.003,
      "loss": 4.1285,
      "step": 7790
    },
    {
      "epoch": 0.07791,
      "grad_norm": 0.8236374738012967,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 7791
    },
    {
      "epoch": 0.07792,
      "grad_norm": 1.0332387020792073,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 7792
    },
    {
      "epoch": 0.07793,
      "grad_norm": 1.1512556053819554,
      "learning_rate": 0.003,
      "loss": 4.0767,
      "step": 7793
    },
    {
      "epoch": 0.07794,
      "grad_norm": 1.03917363342737,
      "learning_rate": 0.003,
      "loss": 4.1239,
      "step": 7794
    },
    {
      "epoch": 0.07795,
      "grad_norm": 1.6696662165864793,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 7795
    },
    {
      "epoch": 0.07796,
      "grad_norm": 0.787952991787084,
      "learning_rate": 0.003,
      "loss": 4.0891,
      "step": 7796
    },
    {
      "epoch": 0.07797,
      "grad_norm": 0.8948697180629456,
      "learning_rate": 0.003,
      "loss": 4.084,
      "step": 7797
    },
    {
      "epoch": 0.07798,
      "grad_norm": 0.9493585597786888,
      "learning_rate": 0.003,
      "loss": 4.1203,
      "step": 7798
    },
    {
      "epoch": 0.07799,
      "grad_norm": 1.2066941416147947,
      "learning_rate": 0.003,
      "loss": 4.1283,
      "step": 7799
    },
    {
      "epoch": 0.078,
      "grad_norm": 1.0241472782325425,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 7800
    },
    {
      "epoch": 0.07801,
      "grad_norm": 1.0033620445871885,
      "learning_rate": 0.003,
      "loss": 4.0972,
      "step": 7801
    },
    {
      "epoch": 0.07802,
      "grad_norm": 1.0483115871730873,
      "learning_rate": 0.003,
      "loss": 4.0817,
      "step": 7802
    },
    {
      "epoch": 0.07803,
      "grad_norm": 1.1661836582075822,
      "learning_rate": 0.003,
      "loss": 4.0931,
      "step": 7803
    },
    {
      "epoch": 0.07804,
      "grad_norm": 1.0045845234300916,
      "learning_rate": 0.003,
      "loss": 4.115,
      "step": 7804
    },
    {
      "epoch": 0.07805,
      "grad_norm": 1.3719059597176528,
      "learning_rate": 0.003,
      "loss": 4.1117,
      "step": 7805
    },
    {
      "epoch": 0.07806,
      "grad_norm": 1.1373415665521494,
      "learning_rate": 0.003,
      "loss": 4.1191,
      "step": 7806
    },
    {
      "epoch": 0.07807,
      "grad_norm": 1.1540676505275376,
      "learning_rate": 0.003,
      "loss": 4.1231,
      "step": 7807
    },
    {
      "epoch": 0.07808,
      "grad_norm": 1.304033145674968,
      "learning_rate": 0.003,
      "loss": 4.0918,
      "step": 7808
    },
    {
      "epoch": 0.07809,
      "grad_norm": 1.1020823842953529,
      "learning_rate": 0.003,
      "loss": 4.1039,
      "step": 7809
    },
    {
      "epoch": 0.0781,
      "grad_norm": 1.081555316618965,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 7810
    },
    {
      "epoch": 0.07811,
      "grad_norm": 0.9962888054522434,
      "learning_rate": 0.003,
      "loss": 4.0946,
      "step": 7811
    },
    {
      "epoch": 0.07812,
      "grad_norm": 1.1186570479651625,
      "learning_rate": 0.003,
      "loss": 4.0993,
      "step": 7812
    },
    {
      "epoch": 0.07813,
      "grad_norm": 1.1090198553450583,
      "learning_rate": 0.003,
      "loss": 4.1117,
      "step": 7813
    },
    {
      "epoch": 0.07814,
      "grad_norm": 1.2848083778524626,
      "learning_rate": 0.003,
      "loss": 4.1436,
      "step": 7814
    },
    {
      "epoch": 0.07815,
      "grad_norm": 0.9594867164410406,
      "learning_rate": 0.003,
      "loss": 4.1034,
      "step": 7815
    },
    {
      "epoch": 0.07816,
      "grad_norm": 1.0723647489392034,
      "learning_rate": 0.003,
      "loss": 4.0933,
      "step": 7816
    },
    {
      "epoch": 0.07817,
      "grad_norm": 1.2086534813788286,
      "learning_rate": 0.003,
      "loss": 4.0958,
      "step": 7817
    },
    {
      "epoch": 0.07818,
      "grad_norm": 1.2308305090547291,
      "learning_rate": 0.003,
      "loss": 4.088,
      "step": 7818
    },
    {
      "epoch": 0.07819,
      "grad_norm": 1.1260665579293765,
      "learning_rate": 0.003,
      "loss": 4.0767,
      "step": 7819
    },
    {
      "epoch": 0.0782,
      "grad_norm": 1.037032588684538,
      "learning_rate": 0.003,
      "loss": 4.1191,
      "step": 7820
    },
    {
      "epoch": 0.07821,
      "grad_norm": 1.1246879048340699,
      "learning_rate": 0.003,
      "loss": 4.0885,
      "step": 7821
    },
    {
      "epoch": 0.07822,
      "grad_norm": 1.0979965409828816,
      "learning_rate": 0.003,
      "loss": 4.0781,
      "step": 7822
    },
    {
      "epoch": 0.07823,
      "grad_norm": 1.1545268347310245,
      "learning_rate": 0.003,
      "loss": 4.0959,
      "step": 7823
    },
    {
      "epoch": 0.07824,
      "grad_norm": 0.98432541262975,
      "learning_rate": 0.003,
      "loss": 4.1239,
      "step": 7824
    },
    {
      "epoch": 0.07825,
      "grad_norm": 1.256001106118303,
      "learning_rate": 0.003,
      "loss": 4.0913,
      "step": 7825
    },
    {
      "epoch": 0.07826,
      "grad_norm": 1.0371372534263357,
      "learning_rate": 0.003,
      "loss": 4.0815,
      "step": 7826
    },
    {
      "epoch": 0.07827,
      "grad_norm": 1.3685741094922728,
      "learning_rate": 0.003,
      "loss": 4.1127,
      "step": 7827
    },
    {
      "epoch": 0.07828,
      "grad_norm": 1.0775990135733946,
      "learning_rate": 0.003,
      "loss": 4.1138,
      "step": 7828
    },
    {
      "epoch": 0.07829,
      "grad_norm": 1.3362851668698614,
      "learning_rate": 0.003,
      "loss": 4.1213,
      "step": 7829
    },
    {
      "epoch": 0.0783,
      "grad_norm": 0.9305364252486059,
      "learning_rate": 0.003,
      "loss": 4.0819,
      "step": 7830
    },
    {
      "epoch": 0.07831,
      "grad_norm": 0.8515151977423888,
      "learning_rate": 0.003,
      "loss": 4.0864,
      "step": 7831
    },
    {
      "epoch": 0.07832,
      "grad_norm": 0.8728982928691557,
      "learning_rate": 0.003,
      "loss": 4.0968,
      "step": 7832
    },
    {
      "epoch": 0.07833,
      "grad_norm": 0.9928756485225986,
      "learning_rate": 0.003,
      "loss": 4.0943,
      "step": 7833
    },
    {
      "epoch": 0.07834,
      "grad_norm": 1.4351901450584381,
      "learning_rate": 0.003,
      "loss": 4.1542,
      "step": 7834
    },
    {
      "epoch": 0.07835,
      "grad_norm": 0.9793084700860754,
      "learning_rate": 0.003,
      "loss": 4.1065,
      "step": 7835
    },
    {
      "epoch": 0.07836,
      "grad_norm": 1.2828041473172216,
      "learning_rate": 0.003,
      "loss": 4.0977,
      "step": 7836
    },
    {
      "epoch": 0.07837,
      "grad_norm": 1.0306628659997845,
      "learning_rate": 0.003,
      "loss": 4.1037,
      "step": 7837
    },
    {
      "epoch": 0.07838,
      "grad_norm": 1.2201424308363111,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 7838
    },
    {
      "epoch": 0.07839,
      "grad_norm": 0.8727982004597001,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 7839
    },
    {
      "epoch": 0.0784,
      "grad_norm": 0.9518942667143618,
      "learning_rate": 0.003,
      "loss": 4.1125,
      "step": 7840
    },
    {
      "epoch": 0.07841,
      "grad_norm": 1.1002413813695149,
      "learning_rate": 0.003,
      "loss": 4.094,
      "step": 7841
    },
    {
      "epoch": 0.07842,
      "grad_norm": 1.3289013602682165,
      "learning_rate": 0.003,
      "loss": 4.0907,
      "step": 7842
    },
    {
      "epoch": 0.07843,
      "grad_norm": 1.1995549008047959,
      "learning_rate": 0.003,
      "loss": 4.1039,
      "step": 7843
    },
    {
      "epoch": 0.07844,
      "grad_norm": 1.302842725312235,
      "learning_rate": 0.003,
      "loss": 4.1077,
      "step": 7844
    },
    {
      "epoch": 0.07845,
      "grad_norm": 0.8615424064159155,
      "learning_rate": 0.003,
      "loss": 4.1102,
      "step": 7845
    },
    {
      "epoch": 0.07846,
      "grad_norm": 1.1032730283055294,
      "learning_rate": 0.003,
      "loss": 4.1301,
      "step": 7846
    },
    {
      "epoch": 0.07847,
      "grad_norm": 1.1716909284747095,
      "learning_rate": 0.003,
      "loss": 4.0811,
      "step": 7847
    },
    {
      "epoch": 0.07848,
      "grad_norm": 1.0298840210031728,
      "learning_rate": 0.003,
      "loss": 4.119,
      "step": 7848
    },
    {
      "epoch": 0.07849,
      "grad_norm": 0.9361652769304023,
      "learning_rate": 0.003,
      "loss": 4.126,
      "step": 7849
    },
    {
      "epoch": 0.0785,
      "grad_norm": 0.9654921254423353,
      "learning_rate": 0.003,
      "loss": 4.0953,
      "step": 7850
    },
    {
      "epoch": 0.07851,
      "grad_norm": 1.3042027618759802,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 7851
    },
    {
      "epoch": 0.07852,
      "grad_norm": 1.2431447724018592,
      "learning_rate": 0.003,
      "loss": 4.1292,
      "step": 7852
    },
    {
      "epoch": 0.07853,
      "grad_norm": 1.083275442045458,
      "learning_rate": 0.003,
      "loss": 4.093,
      "step": 7853
    },
    {
      "epoch": 0.07854,
      "grad_norm": 0.9580563046958107,
      "learning_rate": 0.003,
      "loss": 4.1075,
      "step": 7854
    },
    {
      "epoch": 0.07855,
      "grad_norm": 0.9661427902084164,
      "learning_rate": 0.003,
      "loss": 4.0952,
      "step": 7855
    },
    {
      "epoch": 0.07856,
      "grad_norm": 1.1404243379470476,
      "learning_rate": 0.003,
      "loss": 4.1035,
      "step": 7856
    },
    {
      "epoch": 0.07857,
      "grad_norm": 1.1278970478970323,
      "learning_rate": 0.003,
      "loss": 4.1202,
      "step": 7857
    },
    {
      "epoch": 0.07858,
      "grad_norm": 1.2400683738024636,
      "learning_rate": 0.003,
      "loss": 4.113,
      "step": 7858
    },
    {
      "epoch": 0.07859,
      "grad_norm": 1.1367394355297402,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 7859
    },
    {
      "epoch": 0.0786,
      "grad_norm": 1.1458758894227754,
      "learning_rate": 0.003,
      "loss": 4.096,
      "step": 7860
    },
    {
      "epoch": 0.07861,
      "grad_norm": 1.020642921999076,
      "learning_rate": 0.003,
      "loss": 4.1145,
      "step": 7861
    },
    {
      "epoch": 0.07862,
      "grad_norm": 0.9153329211108876,
      "learning_rate": 0.003,
      "loss": 4.1321,
      "step": 7862
    },
    {
      "epoch": 0.07863,
      "grad_norm": 1.0169957598242134,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 7863
    },
    {
      "epoch": 0.07864,
      "grad_norm": 1.259541799929286,
      "learning_rate": 0.003,
      "loss": 4.1166,
      "step": 7864
    },
    {
      "epoch": 0.07865,
      "grad_norm": 1.053664052392776,
      "learning_rate": 0.003,
      "loss": 4.0865,
      "step": 7865
    },
    {
      "epoch": 0.07866,
      "grad_norm": 1.1541810165983357,
      "learning_rate": 0.003,
      "loss": 4.0947,
      "step": 7866
    },
    {
      "epoch": 0.07867,
      "grad_norm": 1.0834227683128044,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 7867
    },
    {
      "epoch": 0.07868,
      "grad_norm": 1.1142727145292364,
      "learning_rate": 0.003,
      "loss": 4.1176,
      "step": 7868
    },
    {
      "epoch": 0.07869,
      "grad_norm": 1.3213620312341967,
      "learning_rate": 0.003,
      "loss": 4.1037,
      "step": 7869
    },
    {
      "epoch": 0.0787,
      "grad_norm": 0.9543811915443509,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 7870
    },
    {
      "epoch": 0.07871,
      "grad_norm": 1.0556891019646535,
      "learning_rate": 0.003,
      "loss": 4.1321,
      "step": 7871
    },
    {
      "epoch": 0.07872,
      "grad_norm": 1.0423505032259703,
      "learning_rate": 0.003,
      "loss": 4.1261,
      "step": 7872
    },
    {
      "epoch": 0.07873,
      "grad_norm": 1.2630689394264598,
      "learning_rate": 0.003,
      "loss": 4.0867,
      "step": 7873
    },
    {
      "epoch": 0.07874,
      "grad_norm": 1.0744222348469838,
      "learning_rate": 0.003,
      "loss": 4.0978,
      "step": 7874
    },
    {
      "epoch": 0.07875,
      "grad_norm": 1.1590033383841085,
      "learning_rate": 0.003,
      "loss": 4.1099,
      "step": 7875
    },
    {
      "epoch": 0.07876,
      "grad_norm": 1.0641603834704565,
      "learning_rate": 0.003,
      "loss": 4.1085,
      "step": 7876
    },
    {
      "epoch": 0.07877,
      "grad_norm": 1.07771416779964,
      "learning_rate": 0.003,
      "loss": 4.0941,
      "step": 7877
    },
    {
      "epoch": 0.07878,
      "grad_norm": 1.0105372101646315,
      "learning_rate": 0.003,
      "loss": 4.1039,
      "step": 7878
    },
    {
      "epoch": 0.07879,
      "grad_norm": 0.964112703343037,
      "learning_rate": 0.003,
      "loss": 4.1048,
      "step": 7879
    },
    {
      "epoch": 0.0788,
      "grad_norm": 1.0161410845205,
      "learning_rate": 0.003,
      "loss": 4.0879,
      "step": 7880
    },
    {
      "epoch": 0.07881,
      "grad_norm": 1.1773807921941188,
      "learning_rate": 0.003,
      "loss": 4.1029,
      "step": 7881
    },
    {
      "epoch": 0.07882,
      "grad_norm": 1.10321775987289,
      "learning_rate": 0.003,
      "loss": 4.0936,
      "step": 7882
    },
    {
      "epoch": 0.07883,
      "grad_norm": 1.369091366939899,
      "learning_rate": 0.003,
      "loss": 4.0823,
      "step": 7883
    },
    {
      "epoch": 0.07884,
      "grad_norm": 0.9760109355931481,
      "learning_rate": 0.003,
      "loss": 4.0989,
      "step": 7884
    },
    {
      "epoch": 0.07885,
      "grad_norm": 0.9854215860262345,
      "learning_rate": 0.003,
      "loss": 4.0876,
      "step": 7885
    },
    {
      "epoch": 0.07886,
      "grad_norm": 1.0764050852490537,
      "learning_rate": 0.003,
      "loss": 4.1153,
      "step": 7886
    },
    {
      "epoch": 0.07887,
      "grad_norm": 1.1764071385853805,
      "learning_rate": 0.003,
      "loss": 4.0926,
      "step": 7887
    },
    {
      "epoch": 0.07888,
      "grad_norm": 1.209620298516619,
      "learning_rate": 0.003,
      "loss": 4.106,
      "step": 7888
    },
    {
      "epoch": 0.07889,
      "grad_norm": 1.0297274496770714,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 7889
    },
    {
      "epoch": 0.0789,
      "grad_norm": 1.1786330089012873,
      "learning_rate": 0.003,
      "loss": 4.0908,
      "step": 7890
    },
    {
      "epoch": 0.07891,
      "grad_norm": 1.0533061689526215,
      "learning_rate": 0.003,
      "loss": 4.0864,
      "step": 7891
    },
    {
      "epoch": 0.07892,
      "grad_norm": 1.1205645980083818,
      "learning_rate": 0.003,
      "loss": 4.0856,
      "step": 7892
    },
    {
      "epoch": 0.07893,
      "grad_norm": 1.0523794156863215,
      "learning_rate": 0.003,
      "loss": 4.1082,
      "step": 7893
    },
    {
      "epoch": 0.07894,
      "grad_norm": 0.9699886855362052,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 7894
    },
    {
      "epoch": 0.07895,
      "grad_norm": 0.9833094035853261,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 7895
    },
    {
      "epoch": 0.07896,
      "grad_norm": 0.9776394904354138,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 7896
    },
    {
      "epoch": 0.07897,
      "grad_norm": 1.1062378440010827,
      "learning_rate": 0.003,
      "loss": 4.1185,
      "step": 7897
    },
    {
      "epoch": 0.07898,
      "grad_norm": 1.0700004254123947,
      "learning_rate": 0.003,
      "loss": 4.0999,
      "step": 7898
    },
    {
      "epoch": 0.07899,
      "grad_norm": 1.293605829425846,
      "learning_rate": 0.003,
      "loss": 4.0858,
      "step": 7899
    },
    {
      "epoch": 0.079,
      "grad_norm": 1.0712959158479753,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 7900
    },
    {
      "epoch": 0.07901,
      "grad_norm": 1.3719650813885331,
      "learning_rate": 0.003,
      "loss": 4.1227,
      "step": 7901
    },
    {
      "epoch": 0.07902,
      "grad_norm": 1.0271054136067208,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 7902
    },
    {
      "epoch": 0.07903,
      "grad_norm": 1.4611371181707402,
      "learning_rate": 0.003,
      "loss": 4.0956,
      "step": 7903
    },
    {
      "epoch": 0.07904,
      "grad_norm": 0.8252296257800624,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 7904
    },
    {
      "epoch": 0.07905,
      "grad_norm": 0.7895305759219005,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 7905
    },
    {
      "epoch": 0.07906,
      "grad_norm": 0.9276855253246521,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 7906
    },
    {
      "epoch": 0.07907,
      "grad_norm": 1.0888746411346901,
      "learning_rate": 0.003,
      "loss": 4.1025,
      "step": 7907
    },
    {
      "epoch": 0.07908,
      "grad_norm": 1.1846497029397596,
      "learning_rate": 0.003,
      "loss": 4.1073,
      "step": 7908
    },
    {
      "epoch": 0.07909,
      "grad_norm": 1.3242319782732728,
      "learning_rate": 0.003,
      "loss": 4.1117,
      "step": 7909
    },
    {
      "epoch": 0.0791,
      "grad_norm": 0.9082616315727167,
      "learning_rate": 0.003,
      "loss": 4.0879,
      "step": 7910
    },
    {
      "epoch": 0.07911,
      "grad_norm": 0.977177222280417,
      "learning_rate": 0.003,
      "loss": 4.1098,
      "step": 7911
    },
    {
      "epoch": 0.07912,
      "grad_norm": 1.134278943251006,
      "learning_rate": 0.003,
      "loss": 4.087,
      "step": 7912
    },
    {
      "epoch": 0.07913,
      "grad_norm": 0.9726471608410041,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 7913
    },
    {
      "epoch": 0.07914,
      "grad_norm": 1.0400459476467385,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 7914
    },
    {
      "epoch": 0.07915,
      "grad_norm": 1.1254579113656433,
      "learning_rate": 0.003,
      "loss": 4.082,
      "step": 7915
    },
    {
      "epoch": 0.07916,
      "grad_norm": 1.170036571371052,
      "learning_rate": 0.003,
      "loss": 4.1113,
      "step": 7916
    },
    {
      "epoch": 0.07917,
      "grad_norm": 0.9982422946503648,
      "learning_rate": 0.003,
      "loss": 4.1077,
      "step": 7917
    },
    {
      "epoch": 0.07918,
      "grad_norm": 1.334554846263995,
      "learning_rate": 0.003,
      "loss": 4.1118,
      "step": 7918
    },
    {
      "epoch": 0.07919,
      "grad_norm": 0.9593512516129795,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 7919
    },
    {
      "epoch": 0.0792,
      "grad_norm": 1.2945155738424934,
      "learning_rate": 0.003,
      "loss": 4.1006,
      "step": 7920
    },
    {
      "epoch": 0.07921,
      "grad_norm": 0.998743031279866,
      "learning_rate": 0.003,
      "loss": 4.1017,
      "step": 7921
    },
    {
      "epoch": 0.07922,
      "grad_norm": 1.2856678381292854,
      "learning_rate": 0.003,
      "loss": 4.0856,
      "step": 7922
    },
    {
      "epoch": 0.07923,
      "grad_norm": 1.3839771793815328,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 7923
    },
    {
      "epoch": 0.07924,
      "grad_norm": 0.843757956272009,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 7924
    },
    {
      "epoch": 0.07925,
      "grad_norm": 0.8712759778946663,
      "learning_rate": 0.003,
      "loss": 4.1073,
      "step": 7925
    },
    {
      "epoch": 0.07926,
      "grad_norm": 0.8792339530446677,
      "learning_rate": 0.003,
      "loss": 4.1161,
      "step": 7926
    },
    {
      "epoch": 0.07927,
      "grad_norm": 0.9425668093513904,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 7927
    },
    {
      "epoch": 0.07928,
      "grad_norm": 1.181163559854013,
      "learning_rate": 0.003,
      "loss": 4.1098,
      "step": 7928
    },
    {
      "epoch": 0.07929,
      "grad_norm": 1.0055246739263888,
      "learning_rate": 0.003,
      "loss": 4.0849,
      "step": 7929
    },
    {
      "epoch": 0.0793,
      "grad_norm": 1.1521126305735454,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 7930
    },
    {
      "epoch": 0.07931,
      "grad_norm": 1.1425888894120744,
      "learning_rate": 0.003,
      "loss": 4.1103,
      "step": 7931
    },
    {
      "epoch": 0.07932,
      "grad_norm": 1.2461666977358548,
      "learning_rate": 0.003,
      "loss": 4.1154,
      "step": 7932
    },
    {
      "epoch": 0.07933,
      "grad_norm": 1.0256002678148357,
      "learning_rate": 0.003,
      "loss": 4.0987,
      "step": 7933
    },
    {
      "epoch": 0.07934,
      "grad_norm": 1.3007738245290619,
      "learning_rate": 0.003,
      "loss": 4.1253,
      "step": 7934
    },
    {
      "epoch": 0.07935,
      "grad_norm": 0.9296174844539437,
      "learning_rate": 0.003,
      "loss": 4.0996,
      "step": 7935
    },
    {
      "epoch": 0.07936,
      "grad_norm": 0.8725870895380266,
      "learning_rate": 0.003,
      "loss": 4.0912,
      "step": 7936
    },
    {
      "epoch": 0.07937,
      "grad_norm": 0.932587169003262,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 7937
    },
    {
      "epoch": 0.07938,
      "grad_norm": 1.0963665524323598,
      "learning_rate": 0.003,
      "loss": 4.0757,
      "step": 7938
    },
    {
      "epoch": 0.07939,
      "grad_norm": 1.1192645927998164,
      "learning_rate": 0.003,
      "loss": 4.0828,
      "step": 7939
    },
    {
      "epoch": 0.0794,
      "grad_norm": 1.0887510653304764,
      "learning_rate": 0.003,
      "loss": 4.0882,
      "step": 7940
    },
    {
      "epoch": 0.07941,
      "grad_norm": 1.568700579038047,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 7941
    },
    {
      "epoch": 0.07942,
      "grad_norm": 0.9367815838429774,
      "learning_rate": 0.003,
      "loss": 4.075,
      "step": 7942
    },
    {
      "epoch": 0.07943,
      "grad_norm": 0.989902176784745,
      "learning_rate": 0.003,
      "loss": 4.082,
      "step": 7943
    },
    {
      "epoch": 0.07944,
      "grad_norm": 1.2806804391718558,
      "learning_rate": 0.003,
      "loss": 4.1317,
      "step": 7944
    },
    {
      "epoch": 0.07945,
      "grad_norm": 1.0465168074755917,
      "learning_rate": 0.003,
      "loss": 4.1036,
      "step": 7945
    },
    {
      "epoch": 0.07946,
      "grad_norm": 1.1723891827338213,
      "learning_rate": 0.003,
      "loss": 4.1063,
      "step": 7946
    },
    {
      "epoch": 0.07947,
      "grad_norm": 1.1681881065505169,
      "learning_rate": 0.003,
      "loss": 4.1247,
      "step": 7947
    },
    {
      "epoch": 0.07948,
      "grad_norm": 1.0403361408707281,
      "learning_rate": 0.003,
      "loss": 4.1027,
      "step": 7948
    },
    {
      "epoch": 0.07949,
      "grad_norm": 1.089737847328847,
      "learning_rate": 0.003,
      "loss": 4.1179,
      "step": 7949
    },
    {
      "epoch": 0.0795,
      "grad_norm": 1.0006486459411923,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 7950
    },
    {
      "epoch": 0.07951,
      "grad_norm": 1.0643791943109515,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 7951
    },
    {
      "epoch": 0.07952,
      "grad_norm": 1.1537519015439461,
      "learning_rate": 0.003,
      "loss": 4.1085,
      "step": 7952
    },
    {
      "epoch": 0.07953,
      "grad_norm": 1.183449549789871,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 7953
    },
    {
      "epoch": 0.07954,
      "grad_norm": 0.9709964660164878,
      "learning_rate": 0.003,
      "loss": 4.1149,
      "step": 7954
    },
    {
      "epoch": 0.07955,
      "grad_norm": 1.2278848117081071,
      "learning_rate": 0.003,
      "loss": 4.116,
      "step": 7955
    },
    {
      "epoch": 0.07956,
      "grad_norm": 1.1583662422335557,
      "learning_rate": 0.003,
      "loss": 4.0997,
      "step": 7956
    },
    {
      "epoch": 0.07957,
      "grad_norm": 1.0170490448671368,
      "learning_rate": 0.003,
      "loss": 4.0855,
      "step": 7957
    },
    {
      "epoch": 0.07958,
      "grad_norm": 1.084771863517993,
      "learning_rate": 0.003,
      "loss": 4.0962,
      "step": 7958
    },
    {
      "epoch": 0.07959,
      "grad_norm": 1.1488835770046757,
      "learning_rate": 0.003,
      "loss": 4.1015,
      "step": 7959
    },
    {
      "epoch": 0.0796,
      "grad_norm": 1.2272254053247194,
      "learning_rate": 0.003,
      "loss": 4.0996,
      "step": 7960
    },
    {
      "epoch": 0.07961,
      "grad_norm": 0.7550769269887291,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 7961
    },
    {
      "epoch": 0.07962,
      "grad_norm": 0.7402534276864938,
      "learning_rate": 0.003,
      "loss": 4.0986,
      "step": 7962
    },
    {
      "epoch": 0.07963,
      "grad_norm": 0.8779302221658243,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 7963
    },
    {
      "epoch": 0.07964,
      "grad_norm": 1.0462430118892019,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 7964
    },
    {
      "epoch": 0.07965,
      "grad_norm": 1.1593091162794427,
      "learning_rate": 0.003,
      "loss": 4.1039,
      "step": 7965
    },
    {
      "epoch": 0.07966,
      "grad_norm": 1.0541722292533413,
      "learning_rate": 0.003,
      "loss": 4.0796,
      "step": 7966
    },
    {
      "epoch": 0.07967,
      "grad_norm": 1.1496986651653818,
      "learning_rate": 0.003,
      "loss": 4.1054,
      "step": 7967
    },
    {
      "epoch": 0.07968,
      "grad_norm": 1.2051439905100614,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 7968
    },
    {
      "epoch": 0.07969,
      "grad_norm": 1.166558887648512,
      "learning_rate": 0.003,
      "loss": 4.1204,
      "step": 7969
    },
    {
      "epoch": 0.0797,
      "grad_norm": 0.9177683584105356,
      "learning_rate": 0.003,
      "loss": 4.0929,
      "step": 7970
    },
    {
      "epoch": 0.07971,
      "grad_norm": 1.1341545449465498,
      "learning_rate": 0.003,
      "loss": 4.0842,
      "step": 7971
    },
    {
      "epoch": 0.07972,
      "grad_norm": 1.5081463937449056,
      "learning_rate": 0.003,
      "loss": 4.0989,
      "step": 7972
    },
    {
      "epoch": 0.07973,
      "grad_norm": 0.8474584989165531,
      "learning_rate": 0.003,
      "loss": 4.1,
      "step": 7973
    },
    {
      "epoch": 0.07974,
      "grad_norm": 1.1289338429631244,
      "learning_rate": 0.003,
      "loss": 4.0904,
      "step": 7974
    },
    {
      "epoch": 0.07975,
      "grad_norm": 1.0951365579102945,
      "learning_rate": 0.003,
      "loss": 4.1214,
      "step": 7975
    },
    {
      "epoch": 0.07976,
      "grad_norm": 1.202331783937366,
      "learning_rate": 0.003,
      "loss": 4.1087,
      "step": 7976
    },
    {
      "epoch": 0.07977,
      "grad_norm": 1.129368184698674,
      "learning_rate": 0.003,
      "loss": 4.0986,
      "step": 7977
    },
    {
      "epoch": 0.07978,
      "grad_norm": 1.1036298314531698,
      "learning_rate": 0.003,
      "loss": 4.1033,
      "step": 7978
    },
    {
      "epoch": 0.07979,
      "grad_norm": 1.0740326562297844,
      "learning_rate": 0.003,
      "loss": 4.0822,
      "step": 7979
    },
    {
      "epoch": 0.0798,
      "grad_norm": 1.2013980109141247,
      "learning_rate": 0.003,
      "loss": 4.115,
      "step": 7980
    },
    {
      "epoch": 0.07981,
      "grad_norm": 1.2324688029721227,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 7981
    },
    {
      "epoch": 0.07982,
      "grad_norm": 1.102179948446623,
      "learning_rate": 0.003,
      "loss": 4.1,
      "step": 7982
    },
    {
      "epoch": 0.07983,
      "grad_norm": 1.0997229370918842,
      "learning_rate": 0.003,
      "loss": 4.126,
      "step": 7983
    },
    {
      "epoch": 0.07984,
      "grad_norm": 1.1774300218492646,
      "learning_rate": 0.003,
      "loss": 4.1067,
      "step": 7984
    },
    {
      "epoch": 0.07985,
      "grad_norm": 1.0396930643543236,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 7985
    },
    {
      "epoch": 0.07986,
      "grad_norm": 0.9072544573320482,
      "learning_rate": 0.003,
      "loss": 4.1073,
      "step": 7986
    },
    {
      "epoch": 0.07987,
      "grad_norm": 1.014408265411552,
      "learning_rate": 0.003,
      "loss": 4.0812,
      "step": 7987
    },
    {
      "epoch": 0.07988,
      "grad_norm": 1.0940592348211349,
      "learning_rate": 0.003,
      "loss": 4.1003,
      "step": 7988
    },
    {
      "epoch": 0.07989,
      "grad_norm": 1.5166715738459051,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 7989
    },
    {
      "epoch": 0.0799,
      "grad_norm": 1.1113923958563618,
      "learning_rate": 0.003,
      "loss": 4.0935,
      "step": 7990
    },
    {
      "epoch": 0.07991,
      "grad_norm": 1.1217722300895914,
      "learning_rate": 0.003,
      "loss": 4.093,
      "step": 7991
    },
    {
      "epoch": 0.07992,
      "grad_norm": 0.9491402602543485,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 7992
    },
    {
      "epoch": 0.07993,
      "grad_norm": 0.955914940013595,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 7993
    },
    {
      "epoch": 0.07994,
      "grad_norm": 0.9095882982710126,
      "learning_rate": 0.003,
      "loss": 4.0995,
      "step": 7994
    },
    {
      "epoch": 0.07995,
      "grad_norm": 1.1501864531666672,
      "learning_rate": 0.003,
      "loss": 4.1057,
      "step": 7995
    },
    {
      "epoch": 0.07996,
      "grad_norm": 1.2288505894745418,
      "learning_rate": 0.003,
      "loss": 4.0989,
      "step": 7996
    },
    {
      "epoch": 0.07997,
      "grad_norm": 1.0079549128442122,
      "learning_rate": 0.003,
      "loss": 4.0908,
      "step": 7997
    },
    {
      "epoch": 0.07998,
      "grad_norm": 1.2919012167956998,
      "learning_rate": 0.003,
      "loss": 4.1264,
      "step": 7998
    },
    {
      "epoch": 0.07999,
      "grad_norm": 0.7692306732693436,
      "learning_rate": 0.003,
      "loss": 4.0886,
      "step": 7999
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.7658749344062428,
      "learning_rate": 0.003,
      "loss": 4.094,
      "step": 8000
    },
    {
      "epoch": 0.08001,
      "grad_norm": 1.1172290952445811,
      "learning_rate": 0.003,
      "loss": 4.0872,
      "step": 8001
    },
    {
      "epoch": 0.08002,
      "grad_norm": 1.2957889022004523,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 8002
    },
    {
      "epoch": 0.08003,
      "grad_norm": 1.0457934117693226,
      "learning_rate": 0.003,
      "loss": 4.1042,
      "step": 8003
    },
    {
      "epoch": 0.08004,
      "grad_norm": 1.0305139871514264,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 8004
    },
    {
      "epoch": 0.08005,
      "grad_norm": 1.1419018400508258,
      "learning_rate": 0.003,
      "loss": 4.1135,
      "step": 8005
    },
    {
      "epoch": 0.08006,
      "grad_norm": 1.0896797731228673,
      "learning_rate": 0.003,
      "loss": 4.0901,
      "step": 8006
    },
    {
      "epoch": 0.08007,
      "grad_norm": 1.228265310250077,
      "learning_rate": 0.003,
      "loss": 4.0925,
      "step": 8007
    },
    {
      "epoch": 0.08008,
      "grad_norm": 1.1176908197364457,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 8008
    },
    {
      "epoch": 0.08009,
      "grad_norm": 1.2643872060654957,
      "learning_rate": 0.003,
      "loss": 4.0987,
      "step": 8009
    },
    {
      "epoch": 0.0801,
      "grad_norm": 1.0093075739010895,
      "learning_rate": 0.003,
      "loss": 4.0856,
      "step": 8010
    },
    {
      "epoch": 0.08011,
      "grad_norm": 1.2086973478631764,
      "learning_rate": 0.003,
      "loss": 4.1013,
      "step": 8011
    },
    {
      "epoch": 0.08012,
      "grad_norm": 1.1284095226139361,
      "learning_rate": 0.003,
      "loss": 4.1026,
      "step": 8012
    },
    {
      "epoch": 0.08013,
      "grad_norm": 1.0806491574385775,
      "learning_rate": 0.003,
      "loss": 4.1017,
      "step": 8013
    },
    {
      "epoch": 0.08014,
      "grad_norm": 1.0499299018409107,
      "learning_rate": 0.003,
      "loss": 4.0853,
      "step": 8014
    },
    {
      "epoch": 0.08015,
      "grad_norm": 1.1832836384348369,
      "learning_rate": 0.003,
      "loss": 4.1183,
      "step": 8015
    },
    {
      "epoch": 0.08016,
      "grad_norm": 1.2639744517182545,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 8016
    },
    {
      "epoch": 0.08017,
      "grad_norm": 1.1523916089493353,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 8017
    },
    {
      "epoch": 0.08018,
      "grad_norm": 1.184155355677051,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 8018
    },
    {
      "epoch": 0.08019,
      "grad_norm": 0.8903633933008729,
      "learning_rate": 0.003,
      "loss": 4.1168,
      "step": 8019
    },
    {
      "epoch": 0.0802,
      "grad_norm": 0.9520940918042209,
      "learning_rate": 0.003,
      "loss": 4.0809,
      "step": 8020
    },
    {
      "epoch": 0.08021,
      "grad_norm": 1.1295461972729675,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 8021
    },
    {
      "epoch": 0.08022,
      "grad_norm": 1.002556264564961,
      "learning_rate": 0.003,
      "loss": 4.1026,
      "step": 8022
    },
    {
      "epoch": 0.08023,
      "grad_norm": 1.1786108591329556,
      "learning_rate": 0.003,
      "loss": 4.0941,
      "step": 8023
    },
    {
      "epoch": 0.08024,
      "grad_norm": 0.9555442073542494,
      "learning_rate": 0.003,
      "loss": 4.1078,
      "step": 8024
    },
    {
      "epoch": 0.08025,
      "grad_norm": 1.2962171433165834,
      "learning_rate": 0.003,
      "loss": 4.0838,
      "step": 8025
    },
    {
      "epoch": 0.08026,
      "grad_norm": 0.9010195783376608,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 8026
    },
    {
      "epoch": 0.08027,
      "grad_norm": 1.0238470434941584,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 8027
    },
    {
      "epoch": 0.08028,
      "grad_norm": 0.9548697731707966,
      "learning_rate": 0.003,
      "loss": 4.0859,
      "step": 8028
    },
    {
      "epoch": 0.08029,
      "grad_norm": 1.0023059334224902,
      "learning_rate": 0.003,
      "loss": 4.1106,
      "step": 8029
    },
    {
      "epoch": 0.0803,
      "grad_norm": 1.252440818965578,
      "learning_rate": 0.003,
      "loss": 4.0842,
      "step": 8030
    },
    {
      "epoch": 0.08031,
      "grad_norm": 1.068095590321888,
      "learning_rate": 0.003,
      "loss": 4.0989,
      "step": 8031
    },
    {
      "epoch": 0.08032,
      "grad_norm": 1.3578842748881137,
      "learning_rate": 0.003,
      "loss": 4.0912,
      "step": 8032
    },
    {
      "epoch": 0.08033,
      "grad_norm": 0.9060347362052149,
      "learning_rate": 0.003,
      "loss": 4.112,
      "step": 8033
    },
    {
      "epoch": 0.08034,
      "grad_norm": 1.0537972743762867,
      "learning_rate": 0.003,
      "loss": 4.1106,
      "step": 8034
    },
    {
      "epoch": 0.08035,
      "grad_norm": 1.319790888337074,
      "learning_rate": 0.003,
      "loss": 4.1239,
      "step": 8035
    },
    {
      "epoch": 0.08036,
      "grad_norm": 0.9259950251223968,
      "learning_rate": 0.003,
      "loss": 4.1316,
      "step": 8036
    },
    {
      "epoch": 0.08037,
      "grad_norm": 0.9375805778403902,
      "learning_rate": 0.003,
      "loss": 4.0925,
      "step": 8037
    },
    {
      "epoch": 0.08038,
      "grad_norm": 1.0239195144037003,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 8038
    },
    {
      "epoch": 0.08039,
      "grad_norm": 1.3195500348248446,
      "learning_rate": 0.003,
      "loss": 4.1079,
      "step": 8039
    },
    {
      "epoch": 0.0804,
      "grad_norm": 1.1082428212704758,
      "learning_rate": 0.003,
      "loss": 4.1099,
      "step": 8040
    },
    {
      "epoch": 0.08041,
      "grad_norm": 1.1596745215156437,
      "learning_rate": 0.003,
      "loss": 4.1201,
      "step": 8041
    },
    {
      "epoch": 0.08042,
      "grad_norm": 1.064287573359169,
      "learning_rate": 0.003,
      "loss": 4.1021,
      "step": 8042
    },
    {
      "epoch": 0.08043,
      "grad_norm": 1.1696931645456978,
      "learning_rate": 0.003,
      "loss": 4.099,
      "step": 8043
    },
    {
      "epoch": 0.08044,
      "grad_norm": 1.0352811531836574,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 8044
    },
    {
      "epoch": 0.08045,
      "grad_norm": 1.2260629551905426,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 8045
    },
    {
      "epoch": 0.08046,
      "grad_norm": 1.2395334777899678,
      "learning_rate": 0.003,
      "loss": 4.1011,
      "step": 8046
    },
    {
      "epoch": 0.08047,
      "grad_norm": 1.1355116433579298,
      "learning_rate": 0.003,
      "loss": 4.0952,
      "step": 8047
    },
    {
      "epoch": 0.08048,
      "grad_norm": 1.0459530983468805,
      "learning_rate": 0.003,
      "loss": 4.104,
      "step": 8048
    },
    {
      "epoch": 0.08049,
      "grad_norm": 0.9552089247433029,
      "learning_rate": 0.003,
      "loss": 4.0834,
      "step": 8049
    },
    {
      "epoch": 0.0805,
      "grad_norm": 1.076195243985042,
      "learning_rate": 0.003,
      "loss": 4.1121,
      "step": 8050
    },
    {
      "epoch": 0.08051,
      "grad_norm": 1.1851350313945763,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 8051
    },
    {
      "epoch": 0.08052,
      "grad_norm": 0.8808885981989337,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 8052
    },
    {
      "epoch": 0.08053,
      "grad_norm": 0.8774625163986162,
      "learning_rate": 0.003,
      "loss": 4.1056,
      "step": 8053
    },
    {
      "epoch": 0.08054,
      "grad_norm": 1.0416380425987897,
      "learning_rate": 0.003,
      "loss": 4.0796,
      "step": 8054
    },
    {
      "epoch": 0.08055,
      "grad_norm": 1.171907766302584,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 8055
    },
    {
      "epoch": 0.08056,
      "grad_norm": 0.9849653204085805,
      "learning_rate": 0.003,
      "loss": 4.0933,
      "step": 8056
    },
    {
      "epoch": 0.08057,
      "grad_norm": 0.9694296024333688,
      "learning_rate": 0.003,
      "loss": 4.0956,
      "step": 8057
    },
    {
      "epoch": 0.08058,
      "grad_norm": 1.2661816817540663,
      "learning_rate": 0.003,
      "loss": 4.1007,
      "step": 8058
    },
    {
      "epoch": 0.08059,
      "grad_norm": 1.091215180973451,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 8059
    },
    {
      "epoch": 0.0806,
      "grad_norm": 1.0117982176263614,
      "learning_rate": 0.003,
      "loss": 4.0856,
      "step": 8060
    },
    {
      "epoch": 0.08061,
      "grad_norm": 0.9611026577563779,
      "learning_rate": 0.003,
      "loss": 4.0705,
      "step": 8061
    },
    {
      "epoch": 0.08062,
      "grad_norm": 1.1817220754515882,
      "learning_rate": 0.003,
      "loss": 4.1051,
      "step": 8062
    },
    {
      "epoch": 0.08063,
      "grad_norm": 1.2318418776684246,
      "learning_rate": 0.003,
      "loss": 4.1387,
      "step": 8063
    },
    {
      "epoch": 0.08064,
      "grad_norm": 1.1485414527276185,
      "learning_rate": 0.003,
      "loss": 4.0918,
      "step": 8064
    },
    {
      "epoch": 0.08065,
      "grad_norm": 1.1378965393033746,
      "learning_rate": 0.003,
      "loss": 4.1036,
      "step": 8065
    },
    {
      "epoch": 0.08066,
      "grad_norm": 1.1922787895771274,
      "learning_rate": 0.003,
      "loss": 4.1211,
      "step": 8066
    },
    {
      "epoch": 0.08067,
      "grad_norm": 1.153807083794397,
      "learning_rate": 0.003,
      "loss": 4.1082,
      "step": 8067
    },
    {
      "epoch": 0.08068,
      "grad_norm": 1.027030031849749,
      "learning_rate": 0.003,
      "loss": 4.1029,
      "step": 8068
    },
    {
      "epoch": 0.08069,
      "grad_norm": 1.2677666258950757,
      "learning_rate": 0.003,
      "loss": 4.1076,
      "step": 8069
    },
    {
      "epoch": 0.0807,
      "grad_norm": 1.1788831903370685,
      "learning_rate": 0.003,
      "loss": 4.0939,
      "step": 8070
    },
    {
      "epoch": 0.08071,
      "grad_norm": 1.1349772426555031,
      "learning_rate": 0.003,
      "loss": 4.0915,
      "step": 8071
    },
    {
      "epoch": 0.08072,
      "grad_norm": 1.2643432586832963,
      "learning_rate": 0.003,
      "loss": 4.0853,
      "step": 8072
    },
    {
      "epoch": 0.08073,
      "grad_norm": 1.004717571451655,
      "learning_rate": 0.003,
      "loss": 4.0841,
      "step": 8073
    },
    {
      "epoch": 0.08074,
      "grad_norm": 1.0281952059306105,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 8074
    },
    {
      "epoch": 0.08075,
      "grad_norm": 1.1302334862263013,
      "learning_rate": 0.003,
      "loss": 4.1182,
      "step": 8075
    },
    {
      "epoch": 0.08076,
      "grad_norm": 0.8849239138901812,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 8076
    },
    {
      "epoch": 0.08077,
      "grad_norm": 1.0904929838277595,
      "learning_rate": 0.003,
      "loss": 4.121,
      "step": 8077
    },
    {
      "epoch": 0.08078,
      "grad_norm": 1.2580142950738245,
      "learning_rate": 0.003,
      "loss": 4.1121,
      "step": 8078
    },
    {
      "epoch": 0.08079,
      "grad_norm": 1.0647573839248337,
      "learning_rate": 0.003,
      "loss": 4.0971,
      "step": 8079
    },
    {
      "epoch": 0.0808,
      "grad_norm": 1.009377860450751,
      "learning_rate": 0.003,
      "loss": 4.0873,
      "step": 8080
    },
    {
      "epoch": 0.08081,
      "grad_norm": 1.1462226057928848,
      "learning_rate": 0.003,
      "loss": 4.1018,
      "step": 8081
    },
    {
      "epoch": 0.08082,
      "grad_norm": 0.8533433204287125,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 8082
    },
    {
      "epoch": 0.08083,
      "grad_norm": 0.9405341530283867,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 8083
    },
    {
      "epoch": 0.08084,
      "grad_norm": 1.1885955017782868,
      "learning_rate": 0.003,
      "loss": 4.0953,
      "step": 8084
    },
    {
      "epoch": 0.08085,
      "grad_norm": 1.059349902006872,
      "learning_rate": 0.003,
      "loss": 4.1229,
      "step": 8085
    },
    {
      "epoch": 0.08086,
      "grad_norm": 1.1246370076606642,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 8086
    },
    {
      "epoch": 0.08087,
      "grad_norm": 0.8911927530693307,
      "learning_rate": 0.003,
      "loss": 4.0817,
      "step": 8087
    },
    {
      "epoch": 0.08088,
      "grad_norm": 1.1142808988853259,
      "learning_rate": 0.003,
      "loss": 4.1006,
      "step": 8088
    },
    {
      "epoch": 0.08089,
      "grad_norm": 1.211243629124428,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 8089
    },
    {
      "epoch": 0.0809,
      "grad_norm": 1.1971598603451767,
      "learning_rate": 0.003,
      "loss": 4.0952,
      "step": 8090
    },
    {
      "epoch": 0.08091,
      "grad_norm": 1.2219998111646206,
      "learning_rate": 0.003,
      "loss": 4.1176,
      "step": 8091
    },
    {
      "epoch": 0.08092,
      "grad_norm": 1.0863330722445514,
      "learning_rate": 0.003,
      "loss": 4.1132,
      "step": 8092
    },
    {
      "epoch": 0.08093,
      "grad_norm": 1.1734069502989566,
      "learning_rate": 0.003,
      "loss": 4.0799,
      "step": 8093
    },
    {
      "epoch": 0.08094,
      "grad_norm": 0.9707437526356893,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 8094
    },
    {
      "epoch": 0.08095,
      "grad_norm": 1.2867032457790912,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 8095
    },
    {
      "epoch": 0.08096,
      "grad_norm": 1.0927893033822331,
      "learning_rate": 0.003,
      "loss": 4.0989,
      "step": 8096
    },
    {
      "epoch": 0.08097,
      "grad_norm": 1.1439771673315244,
      "learning_rate": 0.003,
      "loss": 4.0906,
      "step": 8097
    },
    {
      "epoch": 0.08098,
      "grad_norm": 1.196138267953642,
      "learning_rate": 0.003,
      "loss": 4.1225,
      "step": 8098
    },
    {
      "epoch": 0.08099,
      "grad_norm": 1.0160152850154716,
      "learning_rate": 0.003,
      "loss": 4.092,
      "step": 8099
    },
    {
      "epoch": 0.081,
      "grad_norm": 1.0887542330321427,
      "learning_rate": 0.003,
      "loss": 4.0982,
      "step": 8100
    },
    {
      "epoch": 0.08101,
      "grad_norm": 1.1829287676768256,
      "learning_rate": 0.003,
      "loss": 4.1222,
      "step": 8101
    },
    {
      "epoch": 0.08102,
      "grad_norm": 1.140589086395366,
      "learning_rate": 0.003,
      "loss": 4.097,
      "step": 8102
    },
    {
      "epoch": 0.08103,
      "grad_norm": 1.1358979019750919,
      "learning_rate": 0.003,
      "loss": 4.0985,
      "step": 8103
    },
    {
      "epoch": 0.08104,
      "grad_norm": 1.1288134341200402,
      "learning_rate": 0.003,
      "loss": 4.099,
      "step": 8104
    },
    {
      "epoch": 0.08105,
      "grad_norm": 1.0135805089098007,
      "learning_rate": 0.003,
      "loss": 4.1144,
      "step": 8105
    },
    {
      "epoch": 0.08106,
      "grad_norm": 1.2455233908534715,
      "learning_rate": 0.003,
      "loss": 4.1118,
      "step": 8106
    },
    {
      "epoch": 0.08107,
      "grad_norm": 1.0325069605488597,
      "learning_rate": 0.003,
      "loss": 4.1042,
      "step": 8107
    },
    {
      "epoch": 0.08108,
      "grad_norm": 1.0751154807719285,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 8108
    },
    {
      "epoch": 0.08109,
      "grad_norm": 1.025582036833368,
      "learning_rate": 0.003,
      "loss": 4.0949,
      "step": 8109
    },
    {
      "epoch": 0.0811,
      "grad_norm": 1.1308151638204902,
      "learning_rate": 0.003,
      "loss": 4.1134,
      "step": 8110
    },
    {
      "epoch": 0.08111,
      "grad_norm": 1.1259700518707663,
      "learning_rate": 0.003,
      "loss": 4.1048,
      "step": 8111
    },
    {
      "epoch": 0.08112,
      "grad_norm": 1.0161036704370565,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 8112
    },
    {
      "epoch": 0.08113,
      "grad_norm": 1.0188779126166405,
      "learning_rate": 0.003,
      "loss": 4.0888,
      "step": 8113
    },
    {
      "epoch": 0.08114,
      "grad_norm": 0.9793445412752302,
      "learning_rate": 0.003,
      "loss": 4.1156,
      "step": 8114
    },
    {
      "epoch": 0.08115,
      "grad_norm": 1.0652029124997249,
      "learning_rate": 0.003,
      "loss": 4.1094,
      "step": 8115
    },
    {
      "epoch": 0.08116,
      "grad_norm": 1.5790703567249575,
      "learning_rate": 0.003,
      "loss": 4.1277,
      "step": 8116
    },
    {
      "epoch": 0.08117,
      "grad_norm": 1.3300782853173372,
      "learning_rate": 0.003,
      "loss": 4.1266,
      "step": 8117
    },
    {
      "epoch": 0.08118,
      "grad_norm": 1.0830907844791484,
      "learning_rate": 0.003,
      "loss": 4.1059,
      "step": 8118
    },
    {
      "epoch": 0.08119,
      "grad_norm": 1.0533574564377777,
      "learning_rate": 0.003,
      "loss": 4.0962,
      "step": 8119
    },
    {
      "epoch": 0.0812,
      "grad_norm": 1.0429356977745372,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 8120
    },
    {
      "epoch": 0.08121,
      "grad_norm": 1.0610900493162627,
      "learning_rate": 0.003,
      "loss": 4.1047,
      "step": 8121
    },
    {
      "epoch": 0.08122,
      "grad_norm": 1.0524119493247375,
      "learning_rate": 0.003,
      "loss": 4.1004,
      "step": 8122
    },
    {
      "epoch": 0.08123,
      "grad_norm": 1.054031758211702,
      "learning_rate": 0.003,
      "loss": 4.0951,
      "step": 8123
    },
    {
      "epoch": 0.08124,
      "grad_norm": 1.2911908500296434,
      "learning_rate": 0.003,
      "loss": 4.1113,
      "step": 8124
    },
    {
      "epoch": 0.08125,
      "grad_norm": 1.0027166818569144,
      "learning_rate": 0.003,
      "loss": 4.1255,
      "step": 8125
    },
    {
      "epoch": 0.08126,
      "grad_norm": 1.2287258658702658,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 8126
    },
    {
      "epoch": 0.08127,
      "grad_norm": 1.2494299585369655,
      "learning_rate": 0.003,
      "loss": 4.088,
      "step": 8127
    },
    {
      "epoch": 0.08128,
      "grad_norm": 1.0401015237932247,
      "learning_rate": 0.003,
      "loss": 4.1078,
      "step": 8128
    },
    {
      "epoch": 0.08129,
      "grad_norm": 1.1620438566222164,
      "learning_rate": 0.003,
      "loss": 4.0819,
      "step": 8129
    },
    {
      "epoch": 0.0813,
      "grad_norm": 1.1249593770127122,
      "learning_rate": 0.003,
      "loss": 4.0799,
      "step": 8130
    },
    {
      "epoch": 0.08131,
      "grad_norm": 1.1903713747016034,
      "learning_rate": 0.003,
      "loss": 4.1024,
      "step": 8131
    },
    {
      "epoch": 0.08132,
      "grad_norm": 1.127606083040239,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 8132
    },
    {
      "epoch": 0.08133,
      "grad_norm": 1.0591042809058853,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 8133
    },
    {
      "epoch": 0.08134,
      "grad_norm": 1.0352971364527501,
      "learning_rate": 0.003,
      "loss": 4.0817,
      "step": 8134
    },
    {
      "epoch": 0.08135,
      "grad_norm": 1.0953299118626243,
      "learning_rate": 0.003,
      "loss": 4.083,
      "step": 8135
    },
    {
      "epoch": 0.08136,
      "grad_norm": 1.082062792719415,
      "learning_rate": 0.003,
      "loss": 4.1123,
      "step": 8136
    },
    {
      "epoch": 0.08137,
      "grad_norm": 1.024569523425406,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 8137
    },
    {
      "epoch": 0.08138,
      "grad_norm": 1.0882241989313444,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 8138
    },
    {
      "epoch": 0.08139,
      "grad_norm": 1.0250913668500887,
      "learning_rate": 0.003,
      "loss": 4.0933,
      "step": 8139
    },
    {
      "epoch": 0.0814,
      "grad_norm": 1.1805058404248698,
      "learning_rate": 0.003,
      "loss": 4.1085,
      "step": 8140
    },
    {
      "epoch": 0.08141,
      "grad_norm": 1.0554436593079368,
      "learning_rate": 0.003,
      "loss": 4.1024,
      "step": 8141
    },
    {
      "epoch": 0.08142,
      "grad_norm": 1.1633473045475347,
      "learning_rate": 0.003,
      "loss": 4.1162,
      "step": 8142
    },
    {
      "epoch": 0.08143,
      "grad_norm": 1.0891327651830105,
      "learning_rate": 0.003,
      "loss": 4.1083,
      "step": 8143
    },
    {
      "epoch": 0.08144,
      "grad_norm": 1.2506038115523703,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 8144
    },
    {
      "epoch": 0.08145,
      "grad_norm": 1.0856039619112743,
      "learning_rate": 0.003,
      "loss": 4.0849,
      "step": 8145
    },
    {
      "epoch": 0.08146,
      "grad_norm": 1.1661061378191588,
      "learning_rate": 0.003,
      "loss": 4.1186,
      "step": 8146
    },
    {
      "epoch": 0.08147,
      "grad_norm": 1.1101233976675375,
      "learning_rate": 0.003,
      "loss": 4.1183,
      "step": 8147
    },
    {
      "epoch": 0.08148,
      "grad_norm": 0.8889453883522072,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 8148
    },
    {
      "epoch": 0.08149,
      "grad_norm": 0.9830514476631846,
      "learning_rate": 0.003,
      "loss": 4.0821,
      "step": 8149
    },
    {
      "epoch": 0.0815,
      "grad_norm": 1.1043176784528583,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 8150
    },
    {
      "epoch": 0.08151,
      "grad_norm": 1.1479592567463672,
      "learning_rate": 0.003,
      "loss": 4.106,
      "step": 8151
    },
    {
      "epoch": 0.08152,
      "grad_norm": 1.4269496832332111,
      "learning_rate": 0.003,
      "loss": 4.1118,
      "step": 8152
    },
    {
      "epoch": 0.08153,
      "grad_norm": 0.9417973264848024,
      "learning_rate": 0.003,
      "loss": 4.0812,
      "step": 8153
    },
    {
      "epoch": 0.08154,
      "grad_norm": 1.1174301212473075,
      "learning_rate": 0.003,
      "loss": 4.099,
      "step": 8154
    },
    {
      "epoch": 0.08155,
      "grad_norm": 1.1514382782073405,
      "learning_rate": 0.003,
      "loss": 4.1144,
      "step": 8155
    },
    {
      "epoch": 0.08156,
      "grad_norm": 1.2437840352931884,
      "learning_rate": 0.003,
      "loss": 4.0977,
      "step": 8156
    },
    {
      "epoch": 0.08157,
      "grad_norm": 1.24374265378239,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 8157
    },
    {
      "epoch": 0.08158,
      "grad_norm": 1.2391145299742439,
      "learning_rate": 0.003,
      "loss": 4.1289,
      "step": 8158
    },
    {
      "epoch": 0.08159,
      "grad_norm": 0.9209362202403073,
      "learning_rate": 0.003,
      "loss": 4.1224,
      "step": 8159
    },
    {
      "epoch": 0.0816,
      "grad_norm": 0.9496193056630835,
      "learning_rate": 0.003,
      "loss": 4.0926,
      "step": 8160
    },
    {
      "epoch": 0.08161,
      "grad_norm": 1.1810669050285874,
      "learning_rate": 0.003,
      "loss": 4.1044,
      "step": 8161
    },
    {
      "epoch": 0.08162,
      "grad_norm": 0.8943628515512069,
      "learning_rate": 0.003,
      "loss": 4.0983,
      "step": 8162
    },
    {
      "epoch": 0.08163,
      "grad_norm": 1.2794592979642219,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 8163
    },
    {
      "epoch": 0.08164,
      "grad_norm": 0.9636518073823616,
      "learning_rate": 0.003,
      "loss": 4.0862,
      "step": 8164
    },
    {
      "epoch": 0.08165,
      "grad_norm": 1.08459249431884,
      "learning_rate": 0.003,
      "loss": 4.0933,
      "step": 8165
    },
    {
      "epoch": 0.08166,
      "grad_norm": 1.0603325319395356,
      "learning_rate": 0.003,
      "loss": 4.0951,
      "step": 8166
    },
    {
      "epoch": 0.08167,
      "grad_norm": 1.1850345601678152,
      "learning_rate": 0.003,
      "loss": 4.0985,
      "step": 8167
    },
    {
      "epoch": 0.08168,
      "grad_norm": 0.95521911177411,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 8168
    },
    {
      "epoch": 0.08169,
      "grad_norm": 1.22286947393468,
      "learning_rate": 0.003,
      "loss": 4.106,
      "step": 8169
    },
    {
      "epoch": 0.0817,
      "grad_norm": 1.0781374703616826,
      "learning_rate": 0.003,
      "loss": 4.0951,
      "step": 8170
    },
    {
      "epoch": 0.08171,
      "grad_norm": 1.1894843647102566,
      "learning_rate": 0.003,
      "loss": 4.0843,
      "step": 8171
    },
    {
      "epoch": 0.08172,
      "grad_norm": 1.0536990256821024,
      "learning_rate": 0.003,
      "loss": 4.1077,
      "step": 8172
    },
    {
      "epoch": 0.08173,
      "grad_norm": 1.2178651572012762,
      "learning_rate": 0.003,
      "loss": 4.1314,
      "step": 8173
    },
    {
      "epoch": 0.08174,
      "grad_norm": 1.0060322968259616,
      "learning_rate": 0.003,
      "loss": 4.1235,
      "step": 8174
    },
    {
      "epoch": 0.08175,
      "grad_norm": 1.1986954728048305,
      "learning_rate": 0.003,
      "loss": 4.0941,
      "step": 8175
    },
    {
      "epoch": 0.08176,
      "grad_norm": 0.9545643256470665,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 8176
    },
    {
      "epoch": 0.08177,
      "grad_norm": 1.0761729611403241,
      "learning_rate": 0.003,
      "loss": 4.0799,
      "step": 8177
    },
    {
      "epoch": 0.08178,
      "grad_norm": 1.0563026676258491,
      "learning_rate": 0.003,
      "loss": 4.0921,
      "step": 8178
    },
    {
      "epoch": 0.08179,
      "grad_norm": 1.2148124269546543,
      "learning_rate": 0.003,
      "loss": 4.1227,
      "step": 8179
    },
    {
      "epoch": 0.0818,
      "grad_norm": 1.169362110071581,
      "learning_rate": 0.003,
      "loss": 4.0961,
      "step": 8180
    },
    {
      "epoch": 0.08181,
      "grad_norm": 1.0955307239949632,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 8181
    },
    {
      "epoch": 0.08182,
      "grad_norm": 1.1144317578395646,
      "learning_rate": 0.003,
      "loss": 4.1287,
      "step": 8182
    },
    {
      "epoch": 0.08183,
      "grad_norm": 1.0295955999206605,
      "learning_rate": 0.003,
      "loss": 4.0854,
      "step": 8183
    },
    {
      "epoch": 0.08184,
      "grad_norm": 1.0947095795077642,
      "learning_rate": 0.003,
      "loss": 4.0808,
      "step": 8184
    },
    {
      "epoch": 0.08185,
      "grad_norm": 1.1371091387650618,
      "learning_rate": 0.003,
      "loss": 4.102,
      "step": 8185
    },
    {
      "epoch": 0.08186,
      "grad_norm": 1.2471904492463743,
      "learning_rate": 0.003,
      "loss": 4.1,
      "step": 8186
    },
    {
      "epoch": 0.08187,
      "grad_norm": 1.1930155717879962,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 8187
    },
    {
      "epoch": 0.08188,
      "grad_norm": 1.0342424236497922,
      "learning_rate": 0.003,
      "loss": 4.1164,
      "step": 8188
    },
    {
      "epoch": 0.08189,
      "grad_norm": 1.0352329127755682,
      "learning_rate": 0.003,
      "loss": 4.1117,
      "step": 8189
    },
    {
      "epoch": 0.0819,
      "grad_norm": 1.1527070200939569,
      "learning_rate": 0.003,
      "loss": 4.0883,
      "step": 8190
    },
    {
      "epoch": 0.08191,
      "grad_norm": 0.8792668377328183,
      "learning_rate": 0.003,
      "loss": 4.0855,
      "step": 8191
    },
    {
      "epoch": 0.08192,
      "grad_norm": 1.0467393413650283,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 8192
    },
    {
      "epoch": 0.08193,
      "grad_norm": 1.2051924602645199,
      "learning_rate": 0.003,
      "loss": 4.0952,
      "step": 8193
    },
    {
      "epoch": 0.08194,
      "grad_norm": 0.9287872876699927,
      "learning_rate": 0.003,
      "loss": 4.099,
      "step": 8194
    },
    {
      "epoch": 0.08195,
      "grad_norm": 1.1076662233195484,
      "learning_rate": 0.003,
      "loss": 4.086,
      "step": 8195
    },
    {
      "epoch": 0.08196,
      "grad_norm": 0.9526697950640942,
      "learning_rate": 0.003,
      "loss": 4.1135,
      "step": 8196
    },
    {
      "epoch": 0.08197,
      "grad_norm": 1.0534752143661108,
      "learning_rate": 0.003,
      "loss": 4.0974,
      "step": 8197
    },
    {
      "epoch": 0.08198,
      "grad_norm": 1.385183829879785,
      "learning_rate": 0.003,
      "loss": 4.0958,
      "step": 8198
    },
    {
      "epoch": 0.08199,
      "grad_norm": 1.0493902630605674,
      "learning_rate": 0.003,
      "loss": 4.1001,
      "step": 8199
    },
    {
      "epoch": 0.082,
      "grad_norm": 1.1705668502686706,
      "learning_rate": 0.003,
      "loss": 4.0757,
      "step": 8200
    },
    {
      "epoch": 0.08201,
      "grad_norm": 1.0066907003222516,
      "learning_rate": 0.003,
      "loss": 4.0824,
      "step": 8201
    },
    {
      "epoch": 0.08202,
      "grad_norm": 1.2820389328790676,
      "learning_rate": 0.003,
      "loss": 4.079,
      "step": 8202
    },
    {
      "epoch": 0.08203,
      "grad_norm": 0.8350201264359747,
      "learning_rate": 0.003,
      "loss": 4.1019,
      "step": 8203
    },
    {
      "epoch": 0.08204,
      "grad_norm": 0.9463964782125038,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 8204
    },
    {
      "epoch": 0.08205,
      "grad_norm": 1.2945714728843314,
      "learning_rate": 0.003,
      "loss": 4.0948,
      "step": 8205
    },
    {
      "epoch": 0.08206,
      "grad_norm": 0.9038682308269772,
      "learning_rate": 0.003,
      "loss": 4.1187,
      "step": 8206
    },
    {
      "epoch": 0.08207,
      "grad_norm": 1.1933488063660513,
      "learning_rate": 0.003,
      "loss": 4.0946,
      "step": 8207
    },
    {
      "epoch": 0.08208,
      "grad_norm": 1.0853521570058549,
      "learning_rate": 0.003,
      "loss": 4.0961,
      "step": 8208
    },
    {
      "epoch": 0.08209,
      "grad_norm": 1.1921605079877917,
      "learning_rate": 0.003,
      "loss": 4.1142,
      "step": 8209
    },
    {
      "epoch": 0.0821,
      "grad_norm": 1.0851985553856465,
      "learning_rate": 0.003,
      "loss": 4.0945,
      "step": 8210
    },
    {
      "epoch": 0.08211,
      "grad_norm": 1.181977847238575,
      "learning_rate": 0.003,
      "loss": 4.0972,
      "step": 8211
    },
    {
      "epoch": 0.08212,
      "grad_norm": 1.1493226942987702,
      "learning_rate": 0.003,
      "loss": 4.0946,
      "step": 8212
    },
    {
      "epoch": 0.08213,
      "grad_norm": 1.046684484682388,
      "learning_rate": 0.003,
      "loss": 4.0768,
      "step": 8213
    },
    {
      "epoch": 0.08214,
      "grad_norm": 1.250975258065935,
      "learning_rate": 0.003,
      "loss": 4.114,
      "step": 8214
    },
    {
      "epoch": 0.08215,
      "grad_norm": 1.154855618057501,
      "learning_rate": 0.003,
      "loss": 4.1181,
      "step": 8215
    },
    {
      "epoch": 0.08216,
      "grad_norm": 1.0257914000202106,
      "learning_rate": 0.003,
      "loss": 4.0927,
      "step": 8216
    },
    {
      "epoch": 0.08217,
      "grad_norm": 1.0800945344509303,
      "learning_rate": 0.003,
      "loss": 4.1242,
      "step": 8217
    },
    {
      "epoch": 0.08218,
      "grad_norm": 1.1371989960222333,
      "learning_rate": 0.003,
      "loss": 4.0886,
      "step": 8218
    },
    {
      "epoch": 0.08219,
      "grad_norm": 1.1092426389119456,
      "learning_rate": 0.003,
      "loss": 4.083,
      "step": 8219
    },
    {
      "epoch": 0.0822,
      "grad_norm": 1.2715561919775418,
      "learning_rate": 0.003,
      "loss": 4.097,
      "step": 8220
    },
    {
      "epoch": 0.08221,
      "grad_norm": 1.0407722220048563,
      "learning_rate": 0.003,
      "loss": 4.1117,
      "step": 8221
    },
    {
      "epoch": 0.08222,
      "grad_norm": 1.3022172548610993,
      "learning_rate": 0.003,
      "loss": 4.1023,
      "step": 8222
    },
    {
      "epoch": 0.08223,
      "grad_norm": 0.8922838680442248,
      "learning_rate": 0.003,
      "loss": 4.1074,
      "step": 8223
    },
    {
      "epoch": 0.08224,
      "grad_norm": 0.8581824078692487,
      "learning_rate": 0.003,
      "loss": 4.0988,
      "step": 8224
    },
    {
      "epoch": 0.08225,
      "grad_norm": 0.8472746859295258,
      "learning_rate": 0.003,
      "loss": 4.1115,
      "step": 8225
    },
    {
      "epoch": 0.08226,
      "grad_norm": 0.9428073992848998,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 8226
    },
    {
      "epoch": 0.08227,
      "grad_norm": 1.2120835674691295,
      "learning_rate": 0.003,
      "loss": 4.1225,
      "step": 8227
    },
    {
      "epoch": 0.08228,
      "grad_norm": 0.9976142438793715,
      "learning_rate": 0.003,
      "loss": 4.1058,
      "step": 8228
    },
    {
      "epoch": 0.08229,
      "grad_norm": 1.449903058079096,
      "learning_rate": 0.003,
      "loss": 4.0809,
      "step": 8229
    },
    {
      "epoch": 0.0823,
      "grad_norm": 0.9384378582006289,
      "learning_rate": 0.003,
      "loss": 4.1159,
      "step": 8230
    },
    {
      "epoch": 0.08231,
      "grad_norm": 1.0676173907140094,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 8231
    },
    {
      "epoch": 0.08232,
      "grad_norm": 1.3410926543201007,
      "learning_rate": 0.003,
      "loss": 4.088,
      "step": 8232
    },
    {
      "epoch": 0.08233,
      "grad_norm": 1.0127914202688533,
      "learning_rate": 0.003,
      "loss": 4.0923,
      "step": 8233
    },
    {
      "epoch": 0.08234,
      "grad_norm": 1.160477985307097,
      "learning_rate": 0.003,
      "loss": 4.1098,
      "step": 8234
    },
    {
      "epoch": 0.08235,
      "grad_norm": 1.0668413961583754,
      "learning_rate": 0.003,
      "loss": 4.1278,
      "step": 8235
    },
    {
      "epoch": 0.08236,
      "grad_norm": 0.9901916985746556,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 8236
    },
    {
      "epoch": 0.08237,
      "grad_norm": 1.0644853370451144,
      "learning_rate": 0.003,
      "loss": 4.0944,
      "step": 8237
    },
    {
      "epoch": 0.08238,
      "grad_norm": 1.004834581027316,
      "learning_rate": 0.003,
      "loss": 4.1006,
      "step": 8238
    },
    {
      "epoch": 0.08239,
      "grad_norm": 1.0868937448101426,
      "learning_rate": 0.003,
      "loss": 4.0759,
      "step": 8239
    },
    {
      "epoch": 0.0824,
      "grad_norm": 1.2795995013293346,
      "learning_rate": 0.003,
      "loss": 4.1171,
      "step": 8240
    },
    {
      "epoch": 0.08241,
      "grad_norm": 1.0885372674354117,
      "learning_rate": 0.003,
      "loss": 4.097,
      "step": 8241
    },
    {
      "epoch": 0.08242,
      "grad_norm": 1.3352614702888232,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 8242
    },
    {
      "epoch": 0.08243,
      "grad_norm": 0.9699411728441731,
      "learning_rate": 0.003,
      "loss": 4.116,
      "step": 8243
    },
    {
      "epoch": 0.08244,
      "grad_norm": 1.0933561697473708,
      "learning_rate": 0.003,
      "loss": 4.0892,
      "step": 8244
    },
    {
      "epoch": 0.08245,
      "grad_norm": 1.338593294596529,
      "learning_rate": 0.003,
      "loss": 4.0945,
      "step": 8245
    },
    {
      "epoch": 0.08246,
      "grad_norm": 0.9397562612350752,
      "learning_rate": 0.003,
      "loss": 4.0895,
      "step": 8246
    },
    {
      "epoch": 0.08247,
      "grad_norm": 1.0486818288187878,
      "learning_rate": 0.003,
      "loss": 4.0962,
      "step": 8247
    },
    {
      "epoch": 0.08248,
      "grad_norm": 1.1040734775180387,
      "learning_rate": 0.003,
      "loss": 4.0898,
      "step": 8248
    },
    {
      "epoch": 0.08249,
      "grad_norm": 1.0083132980867908,
      "learning_rate": 0.003,
      "loss": 4.1072,
      "step": 8249
    },
    {
      "epoch": 0.0825,
      "grad_norm": 1.1974518119321447,
      "learning_rate": 0.003,
      "loss": 4.085,
      "step": 8250
    },
    {
      "epoch": 0.08251,
      "grad_norm": 0.9667133442070936,
      "learning_rate": 0.003,
      "loss": 4.1011,
      "step": 8251
    },
    {
      "epoch": 0.08252,
      "grad_norm": 1.1201280872751502,
      "learning_rate": 0.003,
      "loss": 4.1077,
      "step": 8252
    },
    {
      "epoch": 0.08253,
      "grad_norm": 0.998409555504509,
      "learning_rate": 0.003,
      "loss": 4.0994,
      "step": 8253
    },
    {
      "epoch": 0.08254,
      "grad_norm": 1.3018028543957068,
      "learning_rate": 0.003,
      "loss": 4.0891,
      "step": 8254
    },
    {
      "epoch": 0.08255,
      "grad_norm": 0.9111483127940284,
      "learning_rate": 0.003,
      "loss": 4.0764,
      "step": 8255
    },
    {
      "epoch": 0.08256,
      "grad_norm": 1.0988385606276057,
      "learning_rate": 0.003,
      "loss": 4.1132,
      "step": 8256
    },
    {
      "epoch": 0.08257,
      "grad_norm": 1.3331206787781442,
      "learning_rate": 0.003,
      "loss": 4.0929,
      "step": 8257
    },
    {
      "epoch": 0.08258,
      "grad_norm": 1.118558216630687,
      "learning_rate": 0.003,
      "loss": 4.09,
      "step": 8258
    },
    {
      "epoch": 0.08259,
      "grad_norm": 1.2391360739497914,
      "learning_rate": 0.003,
      "loss": 4.1284,
      "step": 8259
    },
    {
      "epoch": 0.0826,
      "grad_norm": 1.2260663334599986,
      "learning_rate": 0.003,
      "loss": 4.0753,
      "step": 8260
    },
    {
      "epoch": 0.08261,
      "grad_norm": 1.0507114341081063,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 8261
    },
    {
      "epoch": 0.08262,
      "grad_norm": 1.2994656575599821,
      "learning_rate": 0.003,
      "loss": 4.1309,
      "step": 8262
    },
    {
      "epoch": 0.08263,
      "grad_norm": 0.9294316792288047,
      "learning_rate": 0.003,
      "loss": 4.0914,
      "step": 8263
    },
    {
      "epoch": 0.08264,
      "grad_norm": 1.0817062665366421,
      "learning_rate": 0.003,
      "loss": 4.1046,
      "step": 8264
    },
    {
      "epoch": 0.08265,
      "grad_norm": 1.3031114166024542,
      "learning_rate": 0.003,
      "loss": 4.0978,
      "step": 8265
    },
    {
      "epoch": 0.08266,
      "grad_norm": 0.9187895794684968,
      "learning_rate": 0.003,
      "loss": 4.0932,
      "step": 8266
    },
    {
      "epoch": 0.08267,
      "grad_norm": 1.084320300187528,
      "learning_rate": 0.003,
      "loss": 4.0977,
      "step": 8267
    },
    {
      "epoch": 0.08268,
      "grad_norm": 1.3152711121489948,
      "learning_rate": 0.003,
      "loss": 4.0858,
      "step": 8268
    },
    {
      "epoch": 0.08269,
      "grad_norm": 1.0774842752138507,
      "learning_rate": 0.003,
      "loss": 4.1153,
      "step": 8269
    },
    {
      "epoch": 0.0827,
      "grad_norm": 1.1132630336583187,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 8270
    },
    {
      "epoch": 0.08271,
      "grad_norm": 0.9988446582678208,
      "learning_rate": 0.003,
      "loss": 4.1084,
      "step": 8271
    },
    {
      "epoch": 0.08272,
      "grad_norm": 1.1318532368830336,
      "learning_rate": 0.003,
      "loss": 4.1008,
      "step": 8272
    },
    {
      "epoch": 0.08273,
      "grad_norm": 1.2166172102778954,
      "learning_rate": 0.003,
      "loss": 4.0936,
      "step": 8273
    },
    {
      "epoch": 0.08274,
      "grad_norm": 0.9006690473275145,
      "learning_rate": 0.003,
      "loss": 4.1048,
      "step": 8274
    },
    {
      "epoch": 0.08275,
      "grad_norm": 1.2927983262436373,
      "learning_rate": 0.003,
      "loss": 4.0868,
      "step": 8275
    },
    {
      "epoch": 0.08276,
      "grad_norm": 1.2815263368790923,
      "learning_rate": 0.003,
      "loss": 4.0764,
      "step": 8276
    },
    {
      "epoch": 0.08277,
      "grad_norm": 0.8505292867566394,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 8277
    },
    {
      "epoch": 0.08278,
      "grad_norm": 1.047826620944776,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 8278
    },
    {
      "epoch": 0.08279,
      "grad_norm": 1.2347360947843489,
      "learning_rate": 0.003,
      "loss": 4.0935,
      "step": 8279
    },
    {
      "epoch": 0.0828,
      "grad_norm": 0.9255694974902187,
      "learning_rate": 0.003,
      "loss": 4.1137,
      "step": 8280
    },
    {
      "epoch": 0.08281,
      "grad_norm": 0.8671297701457938,
      "learning_rate": 0.003,
      "loss": 4.0724,
      "step": 8281
    },
    {
      "epoch": 0.08282,
      "grad_norm": 1.005419393978364,
      "learning_rate": 0.003,
      "loss": 4.087,
      "step": 8282
    },
    {
      "epoch": 0.08283,
      "grad_norm": 1.1740422382225844,
      "learning_rate": 0.003,
      "loss": 4.1084,
      "step": 8283
    },
    {
      "epoch": 0.08284,
      "grad_norm": 1.0602566479523916,
      "learning_rate": 0.003,
      "loss": 4.084,
      "step": 8284
    },
    {
      "epoch": 0.08285,
      "grad_norm": 1.281256167393328,
      "learning_rate": 0.003,
      "loss": 4.1069,
      "step": 8285
    },
    {
      "epoch": 0.08286,
      "grad_norm": 1.2698218556683902,
      "learning_rate": 0.003,
      "loss": 4.119,
      "step": 8286
    },
    {
      "epoch": 0.08287,
      "grad_norm": 1.0905009067792837,
      "learning_rate": 0.003,
      "loss": 4.0938,
      "step": 8287
    },
    {
      "epoch": 0.08288,
      "grad_norm": 1.2351695328697756,
      "learning_rate": 0.003,
      "loss": 4.0792,
      "step": 8288
    },
    {
      "epoch": 0.08289,
      "grad_norm": 0.9062126664137129,
      "learning_rate": 0.003,
      "loss": 4.0949,
      "step": 8289
    },
    {
      "epoch": 0.0829,
      "grad_norm": 0.8229690552504854,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 8290
    },
    {
      "epoch": 0.08291,
      "grad_norm": 1.035719318529572,
      "learning_rate": 0.003,
      "loss": 4.0855,
      "step": 8291
    },
    {
      "epoch": 0.08292,
      "grad_norm": 1.321975709341898,
      "learning_rate": 0.003,
      "loss": 4.0988,
      "step": 8292
    },
    {
      "epoch": 0.08293,
      "grad_norm": 0.956397612925301,
      "learning_rate": 0.003,
      "loss": 4.1013,
      "step": 8293
    },
    {
      "epoch": 0.08294,
      "grad_norm": 1.1005920349573157,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 8294
    },
    {
      "epoch": 0.08295,
      "grad_norm": 1.1827646432424894,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 8295
    },
    {
      "epoch": 0.08296,
      "grad_norm": 0.9893580937327027,
      "learning_rate": 0.003,
      "loss": 4.0893,
      "step": 8296
    },
    {
      "epoch": 0.08297,
      "grad_norm": 1.0584769255248867,
      "learning_rate": 0.003,
      "loss": 4.1027,
      "step": 8297
    },
    {
      "epoch": 0.08298,
      "grad_norm": 1.1520347568565248,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 8298
    },
    {
      "epoch": 0.08299,
      "grad_norm": 1.5127165191333787,
      "learning_rate": 0.003,
      "loss": 4.0974,
      "step": 8299
    },
    {
      "epoch": 0.083,
      "grad_norm": 0.988716971696361,
      "learning_rate": 0.003,
      "loss": 4.1257,
      "step": 8300
    },
    {
      "epoch": 0.08301,
      "grad_norm": 1.2189370746107486,
      "learning_rate": 0.003,
      "loss": 4.1084,
      "step": 8301
    },
    {
      "epoch": 0.08302,
      "grad_norm": 1.1627940955744562,
      "learning_rate": 0.003,
      "loss": 4.1195,
      "step": 8302
    },
    {
      "epoch": 0.08303,
      "grad_norm": 1.0548746777232683,
      "learning_rate": 0.003,
      "loss": 4.0855,
      "step": 8303
    },
    {
      "epoch": 0.08304,
      "grad_norm": 1.1160211910286122,
      "learning_rate": 0.003,
      "loss": 4.1176,
      "step": 8304
    },
    {
      "epoch": 0.08305,
      "grad_norm": 1.2052489612764943,
      "learning_rate": 0.003,
      "loss": 4.0904,
      "step": 8305
    },
    {
      "epoch": 0.08306,
      "grad_norm": 1.0824749055776,
      "learning_rate": 0.003,
      "loss": 4.0999,
      "step": 8306
    },
    {
      "epoch": 0.08307,
      "grad_norm": 1.3467892345516064,
      "learning_rate": 0.003,
      "loss": 4.0896,
      "step": 8307
    },
    {
      "epoch": 0.08308,
      "grad_norm": 1.1553201736475212,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 8308
    },
    {
      "epoch": 0.08309,
      "grad_norm": 1.1190267883548892,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 8309
    },
    {
      "epoch": 0.0831,
      "grad_norm": 1.1176202436275529,
      "learning_rate": 0.003,
      "loss": 4.0959,
      "step": 8310
    },
    {
      "epoch": 0.08311,
      "grad_norm": 0.8361916910739546,
      "learning_rate": 0.003,
      "loss": 4.0849,
      "step": 8311
    },
    {
      "epoch": 0.08312,
      "grad_norm": 1.0224578245449776,
      "learning_rate": 0.003,
      "loss": 4.1156,
      "step": 8312
    },
    {
      "epoch": 0.08313,
      "grad_norm": 1.2877054818229416,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 8313
    },
    {
      "epoch": 0.08314,
      "grad_norm": 1.2226376630948521,
      "learning_rate": 0.003,
      "loss": 4.11,
      "step": 8314
    },
    {
      "epoch": 0.08315,
      "grad_norm": 1.1227844150001849,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 8315
    },
    {
      "epoch": 0.08316,
      "grad_norm": 1.0714770724372067,
      "learning_rate": 0.003,
      "loss": 4.0843,
      "step": 8316
    },
    {
      "epoch": 0.08317,
      "grad_norm": 1.156610721764529,
      "learning_rate": 0.003,
      "loss": 4.0822,
      "step": 8317
    },
    {
      "epoch": 0.08318,
      "grad_norm": 1.0610246046060168,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 8318
    },
    {
      "epoch": 0.08319,
      "grad_norm": 1.0429084261396977,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 8319
    },
    {
      "epoch": 0.0832,
      "grad_norm": 1.0243220134183482,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 8320
    },
    {
      "epoch": 0.08321,
      "grad_norm": 1.1448330627177232,
      "learning_rate": 0.003,
      "loss": 4.1063,
      "step": 8321
    },
    {
      "epoch": 0.08322,
      "grad_norm": 1.1972256232351257,
      "learning_rate": 0.003,
      "loss": 4.0877,
      "step": 8322
    },
    {
      "epoch": 0.08323,
      "grad_norm": 1.156455898352581,
      "learning_rate": 0.003,
      "loss": 4.0703,
      "step": 8323
    },
    {
      "epoch": 0.08324,
      "grad_norm": 1.1787235098823745,
      "learning_rate": 0.003,
      "loss": 4.0985,
      "step": 8324
    },
    {
      "epoch": 0.08325,
      "grad_norm": 1.1828573586679785,
      "learning_rate": 0.003,
      "loss": 4.1022,
      "step": 8325
    },
    {
      "epoch": 0.08326,
      "grad_norm": 1.0054340916036382,
      "learning_rate": 0.003,
      "loss": 4.0851,
      "step": 8326
    },
    {
      "epoch": 0.08327,
      "grad_norm": 1.2407155491184743,
      "learning_rate": 0.003,
      "loss": 4.0843,
      "step": 8327
    },
    {
      "epoch": 0.08328,
      "grad_norm": 1.0406068984959647,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 8328
    },
    {
      "epoch": 0.08329,
      "grad_norm": 1.1046752234336579,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 8329
    },
    {
      "epoch": 0.0833,
      "grad_norm": 1.074379949234231,
      "learning_rate": 0.003,
      "loss": 4.0985,
      "step": 8330
    },
    {
      "epoch": 0.08331,
      "grad_norm": 1.3380178938643743,
      "learning_rate": 0.003,
      "loss": 4.0894,
      "step": 8331
    },
    {
      "epoch": 0.08332,
      "grad_norm": 1.0510812462170238,
      "learning_rate": 0.003,
      "loss": 4.1039,
      "step": 8332
    },
    {
      "epoch": 0.08333,
      "grad_norm": 1.1840932096423546,
      "learning_rate": 0.003,
      "loss": 4.1081,
      "step": 8333
    },
    {
      "epoch": 0.08334,
      "grad_norm": 1.0785750284205469,
      "learning_rate": 0.003,
      "loss": 4.1039,
      "step": 8334
    },
    {
      "epoch": 0.08335,
      "grad_norm": 1.1947059923861136,
      "learning_rate": 0.003,
      "loss": 4.0992,
      "step": 8335
    },
    {
      "epoch": 0.08336,
      "grad_norm": 1.0556584760576144,
      "learning_rate": 0.003,
      "loss": 4.1212,
      "step": 8336
    },
    {
      "epoch": 0.08337,
      "grad_norm": 1.072592749257867,
      "learning_rate": 0.003,
      "loss": 4.1092,
      "step": 8337
    },
    {
      "epoch": 0.08338,
      "grad_norm": 0.972461982559215,
      "learning_rate": 0.003,
      "loss": 4.0968,
      "step": 8338
    },
    {
      "epoch": 0.08339,
      "grad_norm": 1.0573142171642655,
      "learning_rate": 0.003,
      "loss": 4.0951,
      "step": 8339
    },
    {
      "epoch": 0.0834,
      "grad_norm": 1.0328161551983648,
      "learning_rate": 0.003,
      "loss": 4.0938,
      "step": 8340
    },
    {
      "epoch": 0.08341,
      "grad_norm": 1.0749745185516024,
      "learning_rate": 0.003,
      "loss": 4.1246,
      "step": 8341
    },
    {
      "epoch": 0.08342,
      "grad_norm": 1.1598258763657079,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 8342
    },
    {
      "epoch": 0.08343,
      "grad_norm": 1.200654151812054,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 8343
    },
    {
      "epoch": 0.08344,
      "grad_norm": 1.074601761075456,
      "learning_rate": 0.003,
      "loss": 4.0906,
      "step": 8344
    },
    {
      "epoch": 0.08345,
      "grad_norm": 1.2407518752084736,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 8345
    },
    {
      "epoch": 0.08346,
      "grad_norm": 0.8878117997949128,
      "learning_rate": 0.003,
      "loss": 4.0968,
      "step": 8346
    },
    {
      "epoch": 0.08347,
      "grad_norm": 1.0334765028748847,
      "learning_rate": 0.003,
      "loss": 4.0949,
      "step": 8347
    },
    {
      "epoch": 0.08348,
      "grad_norm": 0.9992128280318741,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 8348
    },
    {
      "epoch": 0.08349,
      "grad_norm": 1.063209503148335,
      "learning_rate": 0.003,
      "loss": 4.1032,
      "step": 8349
    },
    {
      "epoch": 0.0835,
      "grad_norm": 1.1905818892426057,
      "learning_rate": 0.003,
      "loss": 4.1318,
      "step": 8350
    },
    {
      "epoch": 0.08351,
      "grad_norm": 1.1772176254153641,
      "learning_rate": 0.003,
      "loss": 4.1144,
      "step": 8351
    },
    {
      "epoch": 0.08352,
      "grad_norm": 1.2115245247856177,
      "learning_rate": 0.003,
      "loss": 4.1105,
      "step": 8352
    },
    {
      "epoch": 0.08353,
      "grad_norm": 1.0366342198498524,
      "learning_rate": 0.003,
      "loss": 4.0932,
      "step": 8353
    },
    {
      "epoch": 0.08354,
      "grad_norm": 1.3335689200164156,
      "learning_rate": 0.003,
      "loss": 4.104,
      "step": 8354
    },
    {
      "epoch": 0.08355,
      "grad_norm": 0.9642656094519331,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 8355
    },
    {
      "epoch": 0.08356,
      "grad_norm": 1.1411059061577467,
      "learning_rate": 0.003,
      "loss": 4.0996,
      "step": 8356
    },
    {
      "epoch": 0.08357,
      "grad_norm": 0.9934662625503926,
      "learning_rate": 0.003,
      "loss": 4.1047,
      "step": 8357
    },
    {
      "epoch": 0.08358,
      "grad_norm": 1.1371302192567387,
      "learning_rate": 0.003,
      "loss": 4.097,
      "step": 8358
    },
    {
      "epoch": 0.08359,
      "grad_norm": 0.9740680031348249,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 8359
    },
    {
      "epoch": 0.0836,
      "grad_norm": 1.1419609591681779,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 8360
    },
    {
      "epoch": 0.08361,
      "grad_norm": 1.0646469303885506,
      "learning_rate": 0.003,
      "loss": 4.0966,
      "step": 8361
    },
    {
      "epoch": 0.08362,
      "grad_norm": 1.1094407683581557,
      "learning_rate": 0.003,
      "loss": 4.1126,
      "step": 8362
    },
    {
      "epoch": 0.08363,
      "grad_norm": 1.1295628536998819,
      "learning_rate": 0.003,
      "loss": 4.1266,
      "step": 8363
    },
    {
      "epoch": 0.08364,
      "grad_norm": 1.2583129130125206,
      "learning_rate": 0.003,
      "loss": 4.0913,
      "step": 8364
    },
    {
      "epoch": 0.08365,
      "grad_norm": 1.1878894378097133,
      "learning_rate": 0.003,
      "loss": 4.1062,
      "step": 8365
    },
    {
      "epoch": 0.08366,
      "grad_norm": 1.3686033802340978,
      "learning_rate": 0.003,
      "loss": 4.1216,
      "step": 8366
    },
    {
      "epoch": 0.08367,
      "grad_norm": 0.9011404948936148,
      "learning_rate": 0.003,
      "loss": 4.092,
      "step": 8367
    },
    {
      "epoch": 0.08368,
      "grad_norm": 1.0882608213924376,
      "learning_rate": 0.003,
      "loss": 4.1092,
      "step": 8368
    },
    {
      "epoch": 0.08369,
      "grad_norm": 1.3599006033461152,
      "learning_rate": 0.003,
      "loss": 4.1054,
      "step": 8369
    },
    {
      "epoch": 0.0837,
      "grad_norm": 0.7503193168469512,
      "learning_rate": 0.003,
      "loss": 4.09,
      "step": 8370
    },
    {
      "epoch": 0.08371,
      "grad_norm": 0.9831335397603639,
      "learning_rate": 0.003,
      "loss": 4.0988,
      "step": 8371
    },
    {
      "epoch": 0.08372,
      "grad_norm": 1.4830324351803355,
      "learning_rate": 0.003,
      "loss": 4.1279,
      "step": 8372
    },
    {
      "epoch": 0.08373,
      "grad_norm": 0.7578377445625349,
      "learning_rate": 0.003,
      "loss": 4.0957,
      "step": 8373
    },
    {
      "epoch": 0.08374,
      "grad_norm": 1.0871032375475826,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 8374
    },
    {
      "epoch": 0.08375,
      "grad_norm": 1.02193912735729,
      "learning_rate": 0.003,
      "loss": 4.0835,
      "step": 8375
    },
    {
      "epoch": 0.08376,
      "grad_norm": 1.1806307187497338,
      "learning_rate": 0.003,
      "loss": 4.0913,
      "step": 8376
    },
    {
      "epoch": 0.08377,
      "grad_norm": 1.0233421107165972,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 8377
    },
    {
      "epoch": 0.08378,
      "grad_norm": 1.0515534170348302,
      "learning_rate": 0.003,
      "loss": 4.0949,
      "step": 8378
    },
    {
      "epoch": 0.08379,
      "grad_norm": 1.3247691739476297,
      "learning_rate": 0.003,
      "loss": 4.095,
      "step": 8379
    },
    {
      "epoch": 0.0838,
      "grad_norm": 0.9259173435236405,
      "learning_rate": 0.003,
      "loss": 4.0864,
      "step": 8380
    },
    {
      "epoch": 0.08381,
      "grad_norm": 0.9616554946157291,
      "learning_rate": 0.003,
      "loss": 4.0938,
      "step": 8381
    },
    {
      "epoch": 0.08382,
      "grad_norm": 1.038118730756911,
      "learning_rate": 0.003,
      "loss": 4.1101,
      "step": 8382
    },
    {
      "epoch": 0.08383,
      "grad_norm": 1.2117364000112238,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 8383
    },
    {
      "epoch": 0.08384,
      "grad_norm": 0.9896634834173473,
      "learning_rate": 0.003,
      "loss": 4.0808,
      "step": 8384
    },
    {
      "epoch": 0.08385,
      "grad_norm": 1.2713083862055092,
      "learning_rate": 0.003,
      "loss": 4.0825,
      "step": 8385
    },
    {
      "epoch": 0.08386,
      "grad_norm": 1.040476983684724,
      "learning_rate": 0.003,
      "loss": 4.0902,
      "step": 8386
    },
    {
      "epoch": 0.08387,
      "grad_norm": 1.2968269677913182,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 8387
    },
    {
      "epoch": 0.08388,
      "grad_norm": 0.9493766094384419,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 8388
    },
    {
      "epoch": 0.08389,
      "grad_norm": 0.9193393415229424,
      "learning_rate": 0.003,
      "loss": 4.1042,
      "step": 8389
    },
    {
      "epoch": 0.0839,
      "grad_norm": 1.2288668412945116,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 8390
    },
    {
      "epoch": 0.08391,
      "grad_norm": 1.3874236422255661,
      "learning_rate": 0.003,
      "loss": 4.1135,
      "step": 8391
    },
    {
      "epoch": 0.08392,
      "grad_norm": 1.107145105182999,
      "learning_rate": 0.003,
      "loss": 4.0801,
      "step": 8392
    },
    {
      "epoch": 0.08393,
      "grad_norm": 1.0476788926336602,
      "learning_rate": 0.003,
      "loss": 4.1097,
      "step": 8393
    },
    {
      "epoch": 0.08394,
      "grad_norm": 1.0534947679685096,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 8394
    },
    {
      "epoch": 0.08395,
      "grad_norm": 1.1208698213755766,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 8395
    },
    {
      "epoch": 0.08396,
      "grad_norm": 0.928086307132362,
      "learning_rate": 0.003,
      "loss": 4.1067,
      "step": 8396
    },
    {
      "epoch": 0.08397,
      "grad_norm": 1.1521765849653989,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 8397
    },
    {
      "epoch": 0.08398,
      "grad_norm": 1.3972790523786438,
      "learning_rate": 0.003,
      "loss": 4.1061,
      "step": 8398
    },
    {
      "epoch": 0.08399,
      "grad_norm": 1.086059690207596,
      "learning_rate": 0.003,
      "loss": 4.0891,
      "step": 8399
    },
    {
      "epoch": 0.084,
      "grad_norm": 1.0596488413781482,
      "learning_rate": 0.003,
      "loss": 4.0656,
      "step": 8400
    },
    {
      "epoch": 0.08401,
      "grad_norm": 1.0555266948649673,
      "learning_rate": 0.003,
      "loss": 4.0781,
      "step": 8401
    },
    {
      "epoch": 0.08402,
      "grad_norm": 1.1373781516408072,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 8402
    },
    {
      "epoch": 0.08403,
      "grad_norm": 0.966784512965234,
      "learning_rate": 0.003,
      "loss": 4.0862,
      "step": 8403
    },
    {
      "epoch": 0.08404,
      "grad_norm": 1.3105083482065643,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 8404
    },
    {
      "epoch": 0.08405,
      "grad_norm": 1.1525915739023551,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 8405
    },
    {
      "epoch": 0.08406,
      "grad_norm": 1.0591140746101446,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 8406
    },
    {
      "epoch": 0.08407,
      "grad_norm": 1.2144407868343654,
      "learning_rate": 0.003,
      "loss": 4.1449,
      "step": 8407
    },
    {
      "epoch": 0.08408,
      "grad_norm": 1.1819589739488847,
      "learning_rate": 0.003,
      "loss": 4.0694,
      "step": 8408
    },
    {
      "epoch": 0.08409,
      "grad_norm": 1.0441350913888352,
      "learning_rate": 0.003,
      "loss": 4.1072,
      "step": 8409
    },
    {
      "epoch": 0.0841,
      "grad_norm": 1.1383642745480276,
      "learning_rate": 0.003,
      "loss": 4.106,
      "step": 8410
    },
    {
      "epoch": 0.08411,
      "grad_norm": 1.1073161454597245,
      "learning_rate": 0.003,
      "loss": 4.1294,
      "step": 8411
    },
    {
      "epoch": 0.08412,
      "grad_norm": 1.0480383171994219,
      "learning_rate": 0.003,
      "loss": 4.1063,
      "step": 8412
    },
    {
      "epoch": 0.08413,
      "grad_norm": 1.1492294487682813,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 8413
    },
    {
      "epoch": 0.08414,
      "grad_norm": 1.0210002105972802,
      "learning_rate": 0.003,
      "loss": 4.1081,
      "step": 8414
    },
    {
      "epoch": 0.08415,
      "grad_norm": 1.0465311769849186,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 8415
    },
    {
      "epoch": 0.08416,
      "grad_norm": 1.0796543627431838,
      "learning_rate": 0.003,
      "loss": 4.1017,
      "step": 8416
    },
    {
      "epoch": 0.08417,
      "grad_norm": 1.079430987633751,
      "learning_rate": 0.003,
      "loss": 4.1011,
      "step": 8417
    },
    {
      "epoch": 0.08418,
      "grad_norm": 1.0105681425463648,
      "learning_rate": 0.003,
      "loss": 4.0848,
      "step": 8418
    },
    {
      "epoch": 0.08419,
      "grad_norm": 1.247355704527725,
      "learning_rate": 0.003,
      "loss": 4.0965,
      "step": 8419
    },
    {
      "epoch": 0.0842,
      "grad_norm": 1.2105784595860498,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 8420
    },
    {
      "epoch": 0.08421,
      "grad_norm": 1.1492243371213764,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 8421
    },
    {
      "epoch": 0.08422,
      "grad_norm": 0.9577188639693539,
      "learning_rate": 0.003,
      "loss": 4.1034,
      "step": 8422
    },
    {
      "epoch": 0.08423,
      "grad_norm": 1.034036484869677,
      "learning_rate": 0.003,
      "loss": 4.0883,
      "step": 8423
    },
    {
      "epoch": 0.08424,
      "grad_norm": 1.1242868596920017,
      "learning_rate": 0.003,
      "loss": 4.0851,
      "step": 8424
    },
    {
      "epoch": 0.08425,
      "grad_norm": 0.8441325876866451,
      "learning_rate": 0.003,
      "loss": 4.0969,
      "step": 8425
    },
    {
      "epoch": 0.08426,
      "grad_norm": 0.9813337516253374,
      "learning_rate": 0.003,
      "loss": 4.0831,
      "step": 8426
    },
    {
      "epoch": 0.08427,
      "grad_norm": 1.2605532328539706,
      "learning_rate": 0.003,
      "loss": 4.1191,
      "step": 8427
    },
    {
      "epoch": 0.08428,
      "grad_norm": 0.9704205073413669,
      "learning_rate": 0.003,
      "loss": 4.0906,
      "step": 8428
    },
    {
      "epoch": 0.08429,
      "grad_norm": 1.2140012352375567,
      "learning_rate": 0.003,
      "loss": 4.0899,
      "step": 8429
    },
    {
      "epoch": 0.0843,
      "grad_norm": 1.3884303580055675,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 8430
    },
    {
      "epoch": 0.08431,
      "grad_norm": 1.142875897516256,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 8431
    },
    {
      "epoch": 0.08432,
      "grad_norm": 1.1142933348631512,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 8432
    },
    {
      "epoch": 0.08433,
      "grad_norm": 1.1373903002999843,
      "learning_rate": 0.003,
      "loss": 4.1065,
      "step": 8433
    },
    {
      "epoch": 0.08434,
      "grad_norm": 1.1107740861581872,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 8434
    },
    {
      "epoch": 0.08435,
      "grad_norm": 1.2124601254077134,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 8435
    },
    {
      "epoch": 0.08436,
      "grad_norm": 1.0006866274530002,
      "learning_rate": 0.003,
      "loss": 4.088,
      "step": 8436
    },
    {
      "epoch": 0.08437,
      "grad_norm": 1.1576379043773006,
      "learning_rate": 0.003,
      "loss": 4.1034,
      "step": 8437
    },
    {
      "epoch": 0.08438,
      "grad_norm": 1.0862541258017986,
      "learning_rate": 0.003,
      "loss": 4.0856,
      "step": 8438
    },
    {
      "epoch": 0.08439,
      "grad_norm": 0.904884805175504,
      "learning_rate": 0.003,
      "loss": 4.0991,
      "step": 8439
    },
    {
      "epoch": 0.0844,
      "grad_norm": 1.1078688851927403,
      "learning_rate": 0.003,
      "loss": 4.1122,
      "step": 8440
    },
    {
      "epoch": 0.08441,
      "grad_norm": 1.1844909779663761,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 8441
    },
    {
      "epoch": 0.08442,
      "grad_norm": 1.2262528613979544,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 8442
    },
    {
      "epoch": 0.08443,
      "grad_norm": 1.0879224552984719,
      "learning_rate": 0.003,
      "loss": 4.0928,
      "step": 8443
    },
    {
      "epoch": 0.08444,
      "grad_norm": 1.0908579778214895,
      "learning_rate": 0.003,
      "loss": 4.1235,
      "step": 8444
    },
    {
      "epoch": 0.08445,
      "grad_norm": 1.134907437950465,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 8445
    },
    {
      "epoch": 0.08446,
      "grad_norm": 1.0581120143027056,
      "learning_rate": 0.003,
      "loss": 4.0914,
      "step": 8446
    },
    {
      "epoch": 0.08447,
      "grad_norm": 1.1471818659392075,
      "learning_rate": 0.003,
      "loss": 4.1032,
      "step": 8447
    },
    {
      "epoch": 0.08448,
      "grad_norm": 1.0030963036118246,
      "learning_rate": 0.003,
      "loss": 4.0991,
      "step": 8448
    },
    {
      "epoch": 0.08449,
      "grad_norm": 1.0449812545173407,
      "learning_rate": 0.003,
      "loss": 4.1094,
      "step": 8449
    },
    {
      "epoch": 0.0845,
      "grad_norm": 0.9500089816000273,
      "learning_rate": 0.003,
      "loss": 4.0781,
      "step": 8450
    },
    {
      "epoch": 0.08451,
      "grad_norm": 1.0300366626148232,
      "learning_rate": 0.003,
      "loss": 4.1075,
      "step": 8451
    },
    {
      "epoch": 0.08452,
      "grad_norm": 1.0714746920985359,
      "learning_rate": 0.003,
      "loss": 4.0871,
      "step": 8452
    },
    {
      "epoch": 0.08453,
      "grad_norm": 1.094370289054861,
      "learning_rate": 0.003,
      "loss": 4.1092,
      "step": 8453
    },
    {
      "epoch": 0.08454,
      "grad_norm": 1.1120630928897233,
      "learning_rate": 0.003,
      "loss": 4.1034,
      "step": 8454
    },
    {
      "epoch": 0.08455,
      "grad_norm": 1.2560544276208887,
      "learning_rate": 0.003,
      "loss": 4.0804,
      "step": 8455
    },
    {
      "epoch": 0.08456,
      "grad_norm": 0.9752461801922623,
      "learning_rate": 0.003,
      "loss": 4.0851,
      "step": 8456
    },
    {
      "epoch": 0.08457,
      "grad_norm": 1.27526851227219,
      "learning_rate": 0.003,
      "loss": 4.1261,
      "step": 8457
    },
    {
      "epoch": 0.08458,
      "grad_norm": 1.3055821184674892,
      "learning_rate": 0.003,
      "loss": 4.0972,
      "step": 8458
    },
    {
      "epoch": 0.08459,
      "grad_norm": 1.0932295290544094,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 8459
    },
    {
      "epoch": 0.0846,
      "grad_norm": 1.1813495224526782,
      "learning_rate": 0.003,
      "loss": 4.1042,
      "step": 8460
    },
    {
      "epoch": 0.08461,
      "grad_norm": 1.0721351403498962,
      "learning_rate": 0.003,
      "loss": 4.1073,
      "step": 8461
    },
    {
      "epoch": 0.08462,
      "grad_norm": 1.3346700222485708,
      "learning_rate": 0.003,
      "loss": 4.1122,
      "step": 8462
    },
    {
      "epoch": 0.08463,
      "grad_norm": 0.9392142093785416,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 8463
    },
    {
      "epoch": 0.08464,
      "grad_norm": 1.102351898512901,
      "learning_rate": 0.003,
      "loss": 4.1139,
      "step": 8464
    },
    {
      "epoch": 0.08465,
      "grad_norm": 1.0020672839019136,
      "learning_rate": 0.003,
      "loss": 4.0931,
      "step": 8465
    },
    {
      "epoch": 0.08466,
      "grad_norm": 1.2416686816572848,
      "learning_rate": 0.003,
      "loss": 4.1161,
      "step": 8466
    },
    {
      "epoch": 0.08467,
      "grad_norm": 1.011177944354335,
      "learning_rate": 0.003,
      "loss": 4.1118,
      "step": 8467
    },
    {
      "epoch": 0.08468,
      "grad_norm": 0.9939470606527164,
      "learning_rate": 0.003,
      "loss": 4.1035,
      "step": 8468
    },
    {
      "epoch": 0.08469,
      "grad_norm": 1.1517379990943246,
      "learning_rate": 0.003,
      "loss": 4.1095,
      "step": 8469
    },
    {
      "epoch": 0.0847,
      "grad_norm": 1.2134216312298631,
      "learning_rate": 0.003,
      "loss": 4.0959,
      "step": 8470
    },
    {
      "epoch": 0.08471,
      "grad_norm": 1.0402951838186536,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 8471
    },
    {
      "epoch": 0.08472,
      "grad_norm": 1.1880832032771211,
      "learning_rate": 0.003,
      "loss": 4.0852,
      "step": 8472
    },
    {
      "epoch": 0.08473,
      "grad_norm": 1.1386393088356936,
      "learning_rate": 0.003,
      "loss": 4.0811,
      "step": 8473
    },
    {
      "epoch": 0.08474,
      "grad_norm": 1.0623433425885904,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 8474
    },
    {
      "epoch": 0.08475,
      "grad_norm": 1.4615195992095813,
      "learning_rate": 0.003,
      "loss": 4.0994,
      "step": 8475
    },
    {
      "epoch": 0.08476,
      "grad_norm": 1.400034147068075,
      "learning_rate": 0.003,
      "loss": 4.1193,
      "step": 8476
    },
    {
      "epoch": 0.08477,
      "grad_norm": 0.9222788274944164,
      "learning_rate": 0.003,
      "loss": 4.0931,
      "step": 8477
    },
    {
      "epoch": 0.08478,
      "grad_norm": 0.9721124921620972,
      "learning_rate": 0.003,
      "loss": 4.0871,
      "step": 8478
    },
    {
      "epoch": 0.08479,
      "grad_norm": 1.12880457289452,
      "learning_rate": 0.003,
      "loss": 4.1093,
      "step": 8479
    },
    {
      "epoch": 0.0848,
      "grad_norm": 1.0391225480274222,
      "learning_rate": 0.003,
      "loss": 4.1078,
      "step": 8480
    },
    {
      "epoch": 0.08481,
      "grad_norm": 1.188111048658957,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 8481
    },
    {
      "epoch": 0.08482,
      "grad_norm": 1.2272677578307725,
      "learning_rate": 0.003,
      "loss": 4.1035,
      "step": 8482
    },
    {
      "epoch": 0.08483,
      "grad_norm": 0.9551332686819325,
      "learning_rate": 0.003,
      "loss": 4.0919,
      "step": 8483
    },
    {
      "epoch": 0.08484,
      "grad_norm": 1.5024989588505933,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 8484
    },
    {
      "epoch": 0.08485,
      "grad_norm": 1.1577932362053887,
      "learning_rate": 0.003,
      "loss": 4.1015,
      "step": 8485
    },
    {
      "epoch": 0.08486,
      "grad_norm": 1.0658532503557656,
      "learning_rate": 0.003,
      "loss": 4.1217,
      "step": 8486
    },
    {
      "epoch": 0.08487,
      "grad_norm": 1.2131550444413173,
      "learning_rate": 0.003,
      "loss": 4.1192,
      "step": 8487
    },
    {
      "epoch": 0.08488,
      "grad_norm": 1.1266967628313658,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 8488
    },
    {
      "epoch": 0.08489,
      "grad_norm": 1.1425330146082786,
      "learning_rate": 0.003,
      "loss": 4.1035,
      "step": 8489
    },
    {
      "epoch": 0.0849,
      "grad_norm": 1.0411187983617134,
      "learning_rate": 0.003,
      "loss": 4.1083,
      "step": 8490
    },
    {
      "epoch": 0.08491,
      "grad_norm": 1.2636596972864644,
      "learning_rate": 0.003,
      "loss": 4.1108,
      "step": 8491
    },
    {
      "epoch": 0.08492,
      "grad_norm": 0.9418480617171554,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 8492
    },
    {
      "epoch": 0.08493,
      "grad_norm": 1.0704335834033358,
      "learning_rate": 0.003,
      "loss": 4.0984,
      "step": 8493
    },
    {
      "epoch": 0.08494,
      "grad_norm": 0.9203629097780305,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 8494
    },
    {
      "epoch": 0.08495,
      "grad_norm": 1.1526449050049299,
      "learning_rate": 0.003,
      "loss": 4.0979,
      "step": 8495
    },
    {
      "epoch": 0.08496,
      "grad_norm": 1.263784431170235,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 8496
    },
    {
      "epoch": 0.08497,
      "grad_norm": 1.0711188981690378,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 8497
    },
    {
      "epoch": 0.08498,
      "grad_norm": 1.247652954241833,
      "learning_rate": 0.003,
      "loss": 4.1013,
      "step": 8498
    },
    {
      "epoch": 0.08499,
      "grad_norm": 1.0480915872768224,
      "learning_rate": 0.003,
      "loss": 4.1188,
      "step": 8499
    },
    {
      "epoch": 0.085,
      "grad_norm": 0.9860067174717102,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 8500
    },
    {
      "epoch": 0.08501,
      "grad_norm": 1.2116292642399038,
      "learning_rate": 0.003,
      "loss": 4.0884,
      "step": 8501
    },
    {
      "epoch": 0.08502,
      "grad_norm": 0.9240148489827791,
      "learning_rate": 0.003,
      "loss": 4.0921,
      "step": 8502
    },
    {
      "epoch": 0.08503,
      "grad_norm": 1.075740001423965,
      "learning_rate": 0.003,
      "loss": 4.0939,
      "step": 8503
    },
    {
      "epoch": 0.08504,
      "grad_norm": 1.2241979651093111,
      "learning_rate": 0.003,
      "loss": 4.1259,
      "step": 8504
    },
    {
      "epoch": 0.08505,
      "grad_norm": 1.1189328001416305,
      "learning_rate": 0.003,
      "loss": 4.0857,
      "step": 8505
    },
    {
      "epoch": 0.08506,
      "grad_norm": 1.2782740508359858,
      "learning_rate": 0.003,
      "loss": 4.1063,
      "step": 8506
    },
    {
      "epoch": 0.08507,
      "grad_norm": 0.9869939621726643,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 8507
    },
    {
      "epoch": 0.08508,
      "grad_norm": 1.3233689266261979,
      "learning_rate": 0.003,
      "loss": 4.0876,
      "step": 8508
    },
    {
      "epoch": 0.08509,
      "grad_norm": 1.1313632101488496,
      "learning_rate": 0.003,
      "loss": 4.124,
      "step": 8509
    },
    {
      "epoch": 0.0851,
      "grad_norm": 1.1719684297456017,
      "learning_rate": 0.003,
      "loss": 4.0941,
      "step": 8510
    },
    {
      "epoch": 0.08511,
      "grad_norm": 1.029569375700711,
      "learning_rate": 0.003,
      "loss": 4.0987,
      "step": 8511
    },
    {
      "epoch": 0.08512,
      "grad_norm": 1.0768752140455045,
      "learning_rate": 0.003,
      "loss": 4.1264,
      "step": 8512
    },
    {
      "epoch": 0.08513,
      "grad_norm": 1.0949952948920811,
      "learning_rate": 0.003,
      "loss": 4.0892,
      "step": 8513
    },
    {
      "epoch": 0.08514,
      "grad_norm": 1.0899619602404949,
      "learning_rate": 0.003,
      "loss": 4.0938,
      "step": 8514
    },
    {
      "epoch": 0.08515,
      "grad_norm": 0.9411338192057704,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 8515
    },
    {
      "epoch": 0.08516,
      "grad_norm": 1.0725560537444447,
      "learning_rate": 0.003,
      "loss": 4.1183,
      "step": 8516
    },
    {
      "epoch": 0.08517,
      "grad_norm": 1.1508250226748429,
      "learning_rate": 0.003,
      "loss": 4.0894,
      "step": 8517
    },
    {
      "epoch": 0.08518,
      "grad_norm": 1.2110527808200953,
      "learning_rate": 0.003,
      "loss": 4.0837,
      "step": 8518
    },
    {
      "epoch": 0.08519,
      "grad_norm": 1.1151939229482213,
      "learning_rate": 0.003,
      "loss": 4.0848,
      "step": 8519
    },
    {
      "epoch": 0.0852,
      "grad_norm": 1.058765428917515,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 8520
    },
    {
      "epoch": 0.08521,
      "grad_norm": 1.562898533414411,
      "learning_rate": 0.003,
      "loss": 4.1139,
      "step": 8521
    },
    {
      "epoch": 0.08522,
      "grad_norm": 0.9343614192489725,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 8522
    },
    {
      "epoch": 0.08523,
      "grad_norm": 0.9687273425374774,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 8523
    },
    {
      "epoch": 0.08524,
      "grad_norm": 1.1935185487859767,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 8524
    },
    {
      "epoch": 0.08525,
      "grad_norm": 1.1707008624166617,
      "learning_rate": 0.003,
      "loss": 4.0852,
      "step": 8525
    },
    {
      "epoch": 0.08526,
      "grad_norm": 1.140262331690332,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 8526
    },
    {
      "epoch": 0.08527,
      "grad_norm": 1.1910316786163704,
      "learning_rate": 0.003,
      "loss": 4.0907,
      "step": 8527
    },
    {
      "epoch": 0.08528,
      "grad_norm": 1.25069179782291,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 8528
    },
    {
      "epoch": 0.08529,
      "grad_norm": 0.8931136112866865,
      "learning_rate": 0.003,
      "loss": 4.092,
      "step": 8529
    },
    {
      "epoch": 0.0853,
      "grad_norm": 1.098918408213405,
      "learning_rate": 0.003,
      "loss": 4.0935,
      "step": 8530
    },
    {
      "epoch": 0.08531,
      "grad_norm": 1.2764274322869384,
      "learning_rate": 0.003,
      "loss": 4.0718,
      "step": 8531
    },
    {
      "epoch": 0.08532,
      "grad_norm": 1.085018277020142,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 8532
    },
    {
      "epoch": 0.08533,
      "grad_norm": 1.4334361641261024,
      "learning_rate": 0.003,
      "loss": 4.1118,
      "step": 8533
    },
    {
      "epoch": 0.08534,
      "grad_norm": 1.004055489226347,
      "learning_rate": 0.003,
      "loss": 4.099,
      "step": 8534
    },
    {
      "epoch": 0.08535,
      "grad_norm": 1.1653627072213197,
      "learning_rate": 0.003,
      "loss": 4.0871,
      "step": 8535
    },
    {
      "epoch": 0.08536,
      "grad_norm": 1.0516909042494484,
      "learning_rate": 0.003,
      "loss": 4.1031,
      "step": 8536
    },
    {
      "epoch": 0.08537,
      "grad_norm": 1.1601019959999184,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 8537
    },
    {
      "epoch": 0.08538,
      "grad_norm": 1.2230647833271613,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 8538
    },
    {
      "epoch": 0.08539,
      "grad_norm": 1.3530142917773134,
      "learning_rate": 0.003,
      "loss": 4.1189,
      "step": 8539
    },
    {
      "epoch": 0.0854,
      "grad_norm": 1.1128122417965394,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 8540
    },
    {
      "epoch": 0.08541,
      "grad_norm": 1.0675207047672388,
      "learning_rate": 0.003,
      "loss": 4.0729,
      "step": 8541
    },
    {
      "epoch": 0.08542,
      "grad_norm": 1.0336315662780386,
      "learning_rate": 0.003,
      "loss": 4.1081,
      "step": 8542
    },
    {
      "epoch": 0.08543,
      "grad_norm": 1.2081270118051715,
      "learning_rate": 0.003,
      "loss": 4.0998,
      "step": 8543
    },
    {
      "epoch": 0.08544,
      "grad_norm": 0.9074808211335781,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 8544
    },
    {
      "epoch": 0.08545,
      "grad_norm": 1.0203074076002299,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 8545
    },
    {
      "epoch": 0.08546,
      "grad_norm": 1.3480134319252597,
      "learning_rate": 0.003,
      "loss": 4.0996,
      "step": 8546
    },
    {
      "epoch": 0.08547,
      "grad_norm": 0.8532216900844057,
      "learning_rate": 0.003,
      "loss": 4.1037,
      "step": 8547
    },
    {
      "epoch": 0.08548,
      "grad_norm": 1.0598267597981388,
      "learning_rate": 0.003,
      "loss": 4.1104,
      "step": 8548
    },
    {
      "epoch": 0.08549,
      "grad_norm": 1.1890143144588734,
      "learning_rate": 0.003,
      "loss": 4.1074,
      "step": 8549
    },
    {
      "epoch": 0.0855,
      "grad_norm": 1.0305051389885018,
      "learning_rate": 0.003,
      "loss": 4.1136,
      "step": 8550
    },
    {
      "epoch": 0.08551,
      "grad_norm": 1.2021160867834288,
      "learning_rate": 0.003,
      "loss": 4.1151,
      "step": 8551
    },
    {
      "epoch": 0.08552,
      "grad_norm": 1.229391199817145,
      "learning_rate": 0.003,
      "loss": 4.0972,
      "step": 8552
    },
    {
      "epoch": 0.08553,
      "grad_norm": 1.2277388086687184,
      "learning_rate": 0.003,
      "loss": 4.1049,
      "step": 8553
    },
    {
      "epoch": 0.08554,
      "grad_norm": 1.0009999269129783,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 8554
    },
    {
      "epoch": 0.08555,
      "grad_norm": 1.3281536314404834,
      "learning_rate": 0.003,
      "loss": 4.1213,
      "step": 8555
    },
    {
      "epoch": 0.08556,
      "grad_norm": 1.0941894161427708,
      "learning_rate": 0.003,
      "loss": 4.1047,
      "step": 8556
    },
    {
      "epoch": 0.08557,
      "grad_norm": 1.0074987192557203,
      "learning_rate": 0.003,
      "loss": 4.0702,
      "step": 8557
    },
    {
      "epoch": 0.08558,
      "grad_norm": 1.3120755419752181,
      "learning_rate": 0.003,
      "loss": 4.103,
      "step": 8558
    },
    {
      "epoch": 0.08559,
      "grad_norm": 1.112855109141421,
      "learning_rate": 0.003,
      "loss": 4.1002,
      "step": 8559
    },
    {
      "epoch": 0.0856,
      "grad_norm": 1.1802008883688693,
      "learning_rate": 0.003,
      "loss": 4.0972,
      "step": 8560
    },
    {
      "epoch": 0.08561,
      "grad_norm": 1.1435400866015262,
      "learning_rate": 0.003,
      "loss": 4.0884,
      "step": 8561
    },
    {
      "epoch": 0.08562,
      "grad_norm": 1.2157748046066845,
      "learning_rate": 0.003,
      "loss": 4.0658,
      "step": 8562
    },
    {
      "epoch": 0.08563,
      "grad_norm": 0.9184521377121538,
      "learning_rate": 0.003,
      "loss": 4.0872,
      "step": 8563
    },
    {
      "epoch": 0.08564,
      "grad_norm": 1.3069415671600118,
      "learning_rate": 0.003,
      "loss": 4.0896,
      "step": 8564
    },
    {
      "epoch": 0.08565,
      "grad_norm": 1.2906376644935875,
      "learning_rate": 0.003,
      "loss": 4.0804,
      "step": 8565
    },
    {
      "epoch": 0.08566,
      "grad_norm": 0.8897753639609908,
      "learning_rate": 0.003,
      "loss": 4.0906,
      "step": 8566
    },
    {
      "epoch": 0.08567,
      "grad_norm": 0.9167516320833566,
      "learning_rate": 0.003,
      "loss": 4.0966,
      "step": 8567
    },
    {
      "epoch": 0.08568,
      "grad_norm": 0.9734453972672918,
      "learning_rate": 0.003,
      "loss": 4.1135,
      "step": 8568
    },
    {
      "epoch": 0.08569,
      "grad_norm": 1.1471493076406067,
      "learning_rate": 0.003,
      "loss": 4.124,
      "step": 8569
    },
    {
      "epoch": 0.0857,
      "grad_norm": 1.0305087020424901,
      "learning_rate": 0.003,
      "loss": 4.1174,
      "step": 8570
    },
    {
      "epoch": 0.08571,
      "grad_norm": 1.1530264666685826,
      "learning_rate": 0.003,
      "loss": 4.1088,
      "step": 8571
    },
    {
      "epoch": 0.08572,
      "grad_norm": 1.072811478724214,
      "learning_rate": 0.003,
      "loss": 4.0829,
      "step": 8572
    },
    {
      "epoch": 0.08573,
      "grad_norm": 1.0010697460369764,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 8573
    },
    {
      "epoch": 0.08574,
      "grad_norm": 1.3554961872841174,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 8574
    },
    {
      "epoch": 0.08575,
      "grad_norm": 1.0594679145189,
      "learning_rate": 0.003,
      "loss": 4.1074,
      "step": 8575
    },
    {
      "epoch": 0.08576,
      "grad_norm": 1.531733599884536,
      "learning_rate": 0.003,
      "loss": 4.1063,
      "step": 8576
    },
    {
      "epoch": 0.08577,
      "grad_norm": 0.946013162845909,
      "learning_rate": 0.003,
      "loss": 4.0982,
      "step": 8577
    },
    {
      "epoch": 0.08578,
      "grad_norm": 0.9822723663412125,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 8578
    },
    {
      "epoch": 0.08579,
      "grad_norm": 1.264222600454884,
      "learning_rate": 0.003,
      "loss": 4.1001,
      "step": 8579
    },
    {
      "epoch": 0.0858,
      "grad_norm": 1.2028419299274247,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 8580
    },
    {
      "epoch": 0.08581,
      "grad_norm": 1.166999098358251,
      "learning_rate": 0.003,
      "loss": 4.0809,
      "step": 8581
    },
    {
      "epoch": 0.08582,
      "grad_norm": 1.1946130776246302,
      "learning_rate": 0.003,
      "loss": 4.1072,
      "step": 8582
    },
    {
      "epoch": 0.08583,
      "grad_norm": 0.8275926145019445,
      "learning_rate": 0.003,
      "loss": 4.112,
      "step": 8583
    },
    {
      "epoch": 0.08584,
      "grad_norm": 0.9601828393882058,
      "learning_rate": 0.003,
      "loss": 4.1016,
      "step": 8584
    },
    {
      "epoch": 0.08585,
      "grad_norm": 1.1418282893849723,
      "learning_rate": 0.003,
      "loss": 4.1102,
      "step": 8585
    },
    {
      "epoch": 0.08586,
      "grad_norm": 0.9692499003978388,
      "learning_rate": 0.003,
      "loss": 4.1235,
      "step": 8586
    },
    {
      "epoch": 0.08587,
      "grad_norm": 1.3248373211325242,
      "learning_rate": 0.003,
      "loss": 4.1038,
      "step": 8587
    },
    {
      "epoch": 0.08588,
      "grad_norm": 0.9942213910299912,
      "learning_rate": 0.003,
      "loss": 4.0806,
      "step": 8588
    },
    {
      "epoch": 0.08589,
      "grad_norm": 1.280584293268826,
      "learning_rate": 0.003,
      "loss": 4.0997,
      "step": 8589
    },
    {
      "epoch": 0.0859,
      "grad_norm": 0.8301437868115007,
      "learning_rate": 0.003,
      "loss": 4.083,
      "step": 8590
    },
    {
      "epoch": 0.08591,
      "grad_norm": 0.9369694398863401,
      "learning_rate": 0.003,
      "loss": 4.0886,
      "step": 8591
    },
    {
      "epoch": 0.08592,
      "grad_norm": 1.275721161123058,
      "learning_rate": 0.003,
      "loss": 4.1196,
      "step": 8592
    },
    {
      "epoch": 0.08593,
      "grad_norm": 0.8955252636026287,
      "learning_rate": 0.003,
      "loss": 4.1308,
      "step": 8593
    },
    {
      "epoch": 0.08594,
      "grad_norm": 1.4011732140313142,
      "learning_rate": 0.003,
      "loss": 4.0911,
      "step": 8594
    },
    {
      "epoch": 0.08595,
      "grad_norm": 1.0600791952772353,
      "learning_rate": 0.003,
      "loss": 4.0964,
      "step": 8595
    },
    {
      "epoch": 0.08596,
      "grad_norm": 1.267568542822607,
      "learning_rate": 0.003,
      "loss": 4.115,
      "step": 8596
    },
    {
      "epoch": 0.08597,
      "grad_norm": 1.0586434977398884,
      "learning_rate": 0.003,
      "loss": 4.1114,
      "step": 8597
    },
    {
      "epoch": 0.08598,
      "grad_norm": 1.27561385748398,
      "learning_rate": 0.003,
      "loss": 4.0879,
      "step": 8598
    },
    {
      "epoch": 0.08599,
      "grad_norm": 1.1065342806433314,
      "learning_rate": 0.003,
      "loss": 4.1054,
      "step": 8599
    },
    {
      "epoch": 0.086,
      "grad_norm": 1.1348977213487659,
      "learning_rate": 0.003,
      "loss": 4.1282,
      "step": 8600
    },
    {
      "epoch": 0.08601,
      "grad_norm": 1.176279738052218,
      "learning_rate": 0.003,
      "loss": 4.1122,
      "step": 8601
    },
    {
      "epoch": 0.08602,
      "grad_norm": 1.1064233817022224,
      "learning_rate": 0.003,
      "loss": 4.121,
      "step": 8602
    },
    {
      "epoch": 0.08603,
      "grad_norm": 1.0688250082333606,
      "learning_rate": 0.003,
      "loss": 4.1114,
      "step": 8603
    },
    {
      "epoch": 0.08604,
      "grad_norm": 0.9619884458380277,
      "learning_rate": 0.003,
      "loss": 4.0901,
      "step": 8604
    },
    {
      "epoch": 0.08605,
      "grad_norm": 1.1539583918871326,
      "learning_rate": 0.003,
      "loss": 4.1099,
      "step": 8605
    },
    {
      "epoch": 0.08606,
      "grad_norm": 1.3009621210548399,
      "learning_rate": 0.003,
      "loss": 4.0835,
      "step": 8606
    },
    {
      "epoch": 0.08607,
      "grad_norm": 0.9519847108591609,
      "learning_rate": 0.003,
      "loss": 4.1196,
      "step": 8607
    },
    {
      "epoch": 0.08608,
      "grad_norm": 1.1165576004738105,
      "learning_rate": 0.003,
      "loss": 4.095,
      "step": 8608
    },
    {
      "epoch": 0.08609,
      "grad_norm": 1.2359873790117464,
      "learning_rate": 0.003,
      "loss": 4.1239,
      "step": 8609
    },
    {
      "epoch": 0.0861,
      "grad_norm": 0.8527902351660679,
      "learning_rate": 0.003,
      "loss": 4.1134,
      "step": 8610
    },
    {
      "epoch": 0.08611,
      "grad_norm": 1.0660073775504049,
      "learning_rate": 0.003,
      "loss": 4.0948,
      "step": 8611
    },
    {
      "epoch": 0.08612,
      "grad_norm": 1.1340056451512006,
      "learning_rate": 0.003,
      "loss": 4.1349,
      "step": 8612
    },
    {
      "epoch": 0.08613,
      "grad_norm": 1.3268150264154202,
      "learning_rate": 0.003,
      "loss": 4.1125,
      "step": 8613
    },
    {
      "epoch": 0.08614,
      "grad_norm": 1.1093722096483616,
      "learning_rate": 0.003,
      "loss": 4.0914,
      "step": 8614
    },
    {
      "epoch": 0.08615,
      "grad_norm": 1.3664781719705619,
      "learning_rate": 0.003,
      "loss": 4.0977,
      "step": 8615
    },
    {
      "epoch": 0.08616,
      "grad_norm": 0.9384106102804197,
      "learning_rate": 0.003,
      "loss": 4.0728,
      "step": 8616
    },
    {
      "epoch": 0.08617,
      "grad_norm": 1.0492336110550373,
      "learning_rate": 0.003,
      "loss": 4.0834,
      "step": 8617
    },
    {
      "epoch": 0.08618,
      "grad_norm": 1.1146964560183108,
      "learning_rate": 0.003,
      "loss": 4.0983,
      "step": 8618
    },
    {
      "epoch": 0.08619,
      "grad_norm": 1.194030607125967,
      "learning_rate": 0.003,
      "loss": 4.1152,
      "step": 8619
    },
    {
      "epoch": 0.0862,
      "grad_norm": 1.2682058300146288,
      "learning_rate": 0.003,
      "loss": 4.1037,
      "step": 8620
    },
    {
      "epoch": 0.08621,
      "grad_norm": 1.0546915391496692,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 8621
    },
    {
      "epoch": 0.08622,
      "grad_norm": 1.201336209536038,
      "learning_rate": 0.003,
      "loss": 4.1162,
      "step": 8622
    },
    {
      "epoch": 0.08623,
      "grad_norm": 0.8286267707907774,
      "learning_rate": 0.003,
      "loss": 4.0998,
      "step": 8623
    },
    {
      "epoch": 0.08624,
      "grad_norm": 0.8050569535552256,
      "learning_rate": 0.003,
      "loss": 4.0808,
      "step": 8624
    },
    {
      "epoch": 0.08625,
      "grad_norm": 1.0090223121995023,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 8625
    },
    {
      "epoch": 0.08626,
      "grad_norm": 1.3836049551194456,
      "learning_rate": 0.003,
      "loss": 4.1241,
      "step": 8626
    },
    {
      "epoch": 0.08627,
      "grad_norm": 0.9396004260224532,
      "learning_rate": 0.003,
      "loss": 4.0926,
      "step": 8627
    },
    {
      "epoch": 0.08628,
      "grad_norm": 1.0940158992898368,
      "learning_rate": 0.003,
      "loss": 4.1015,
      "step": 8628
    },
    {
      "epoch": 0.08629,
      "grad_norm": 1.3217515207758788,
      "learning_rate": 0.003,
      "loss": 4.0992,
      "step": 8629
    },
    {
      "epoch": 0.0863,
      "grad_norm": 1.0401740449406907,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 8630
    },
    {
      "epoch": 0.08631,
      "grad_norm": 1.1695727742458422,
      "learning_rate": 0.003,
      "loss": 4.1062,
      "step": 8631
    },
    {
      "epoch": 0.08632,
      "grad_norm": 0.9840368958621316,
      "learning_rate": 0.003,
      "loss": 4.0817,
      "step": 8632
    },
    {
      "epoch": 0.08633,
      "grad_norm": 1.2699264390946416,
      "learning_rate": 0.003,
      "loss": 4.1261,
      "step": 8633
    },
    {
      "epoch": 0.08634,
      "grad_norm": 0.954835252306559,
      "learning_rate": 0.003,
      "loss": 4.1046,
      "step": 8634
    },
    {
      "epoch": 0.08635,
      "grad_norm": 1.0949764020553094,
      "learning_rate": 0.003,
      "loss": 4.0972,
      "step": 8635
    },
    {
      "epoch": 0.08636,
      "grad_norm": 1.2803777327613919,
      "learning_rate": 0.003,
      "loss": 4.1016,
      "step": 8636
    },
    {
      "epoch": 0.08637,
      "grad_norm": 1.0805026136910605,
      "learning_rate": 0.003,
      "loss": 4.1243,
      "step": 8637
    },
    {
      "epoch": 0.08638,
      "grad_norm": 1.1432278312866255,
      "learning_rate": 0.003,
      "loss": 4.1139,
      "step": 8638
    },
    {
      "epoch": 0.08639,
      "grad_norm": 1.1025532522142478,
      "learning_rate": 0.003,
      "loss": 4.0922,
      "step": 8639
    },
    {
      "epoch": 0.0864,
      "grad_norm": 0.9490378890886939,
      "learning_rate": 0.003,
      "loss": 4.094,
      "step": 8640
    },
    {
      "epoch": 0.08641,
      "grad_norm": 1.285895447896166,
      "learning_rate": 0.003,
      "loss": 4.1029,
      "step": 8641
    },
    {
      "epoch": 0.08642,
      "grad_norm": 1.3217026018289355,
      "learning_rate": 0.003,
      "loss": 4.1073,
      "step": 8642
    },
    {
      "epoch": 0.08643,
      "grad_norm": 1.1095193491690498,
      "learning_rate": 0.003,
      "loss": 4.0907,
      "step": 8643
    },
    {
      "epoch": 0.08644,
      "grad_norm": 1.0954213548252971,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 8644
    },
    {
      "epoch": 0.08645,
      "grad_norm": 1.0836314486145702,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 8645
    },
    {
      "epoch": 0.08646,
      "grad_norm": 1.2812668159767462,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 8646
    },
    {
      "epoch": 0.08647,
      "grad_norm": 1.1030847393878342,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 8647
    },
    {
      "epoch": 0.08648,
      "grad_norm": 1.111034140124417,
      "learning_rate": 0.003,
      "loss": 4.0948,
      "step": 8648
    },
    {
      "epoch": 0.08649,
      "grad_norm": 1.1538481841911161,
      "learning_rate": 0.003,
      "loss": 4.121,
      "step": 8649
    },
    {
      "epoch": 0.0865,
      "grad_norm": 1.0152874441487634,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 8650
    },
    {
      "epoch": 0.08651,
      "grad_norm": 1.0994427850562234,
      "learning_rate": 0.003,
      "loss": 4.0992,
      "step": 8651
    },
    {
      "epoch": 0.08652,
      "grad_norm": 1.127861382204831,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 8652
    },
    {
      "epoch": 0.08653,
      "grad_norm": 1.07505411159091,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 8653
    },
    {
      "epoch": 0.08654,
      "grad_norm": 1.3496208467560102,
      "learning_rate": 0.003,
      "loss": 4.1008,
      "step": 8654
    },
    {
      "epoch": 0.08655,
      "grad_norm": 1.01647617649097,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 8655
    },
    {
      "epoch": 0.08656,
      "grad_norm": 1.4209597404124044,
      "learning_rate": 0.003,
      "loss": 4.1165,
      "step": 8656
    },
    {
      "epoch": 0.08657,
      "grad_norm": 0.9690796077947841,
      "learning_rate": 0.003,
      "loss": 4.1186,
      "step": 8657
    },
    {
      "epoch": 0.08658,
      "grad_norm": 1.217443551789007,
      "learning_rate": 0.003,
      "loss": 4.0823,
      "step": 8658
    },
    {
      "epoch": 0.08659,
      "grad_norm": 0.9222153095355206,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 8659
    },
    {
      "epoch": 0.0866,
      "grad_norm": 1.042457381694646,
      "learning_rate": 0.003,
      "loss": 4.0858,
      "step": 8660
    },
    {
      "epoch": 0.08661,
      "grad_norm": 1.28321325520703,
      "learning_rate": 0.003,
      "loss": 4.0944,
      "step": 8661
    },
    {
      "epoch": 0.08662,
      "grad_norm": 0.9921356540740303,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 8662
    },
    {
      "epoch": 0.08663,
      "grad_norm": 1.1816308894921619,
      "learning_rate": 0.003,
      "loss": 4.1103,
      "step": 8663
    },
    {
      "epoch": 0.08664,
      "grad_norm": 0.9604118665551328,
      "learning_rate": 0.003,
      "loss": 4.0925,
      "step": 8664
    },
    {
      "epoch": 0.08665,
      "grad_norm": 1.103577915598327,
      "learning_rate": 0.003,
      "loss": 4.0988,
      "step": 8665
    },
    {
      "epoch": 0.08666,
      "grad_norm": 1.0912951429700728,
      "learning_rate": 0.003,
      "loss": 4.0934,
      "step": 8666
    },
    {
      "epoch": 0.08667,
      "grad_norm": 1.089798083822531,
      "learning_rate": 0.003,
      "loss": 4.0828,
      "step": 8667
    },
    {
      "epoch": 0.08668,
      "grad_norm": 1.0154192284190815,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 8668
    },
    {
      "epoch": 0.08669,
      "grad_norm": 1.1674462530445848,
      "learning_rate": 0.003,
      "loss": 4.0868,
      "step": 8669
    },
    {
      "epoch": 0.0867,
      "grad_norm": 1.2856109028709737,
      "learning_rate": 0.003,
      "loss": 4.0897,
      "step": 8670
    },
    {
      "epoch": 0.08671,
      "grad_norm": 1.0904458700486965,
      "learning_rate": 0.003,
      "loss": 4.1003,
      "step": 8671
    },
    {
      "epoch": 0.08672,
      "grad_norm": 1.4024483060796498,
      "learning_rate": 0.003,
      "loss": 4.0912,
      "step": 8672
    },
    {
      "epoch": 0.08673,
      "grad_norm": 0.8669257920104226,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 8673
    },
    {
      "epoch": 0.08674,
      "grad_norm": 0.8388966263742093,
      "learning_rate": 0.003,
      "loss": 4.0967,
      "step": 8674
    },
    {
      "epoch": 0.08675,
      "grad_norm": 0.9635039333587543,
      "learning_rate": 0.003,
      "loss": 4.1026,
      "step": 8675
    },
    {
      "epoch": 0.08676,
      "grad_norm": 1.2344963320644802,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 8676
    },
    {
      "epoch": 0.08677,
      "grad_norm": 1.0387887353279868,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 8677
    },
    {
      "epoch": 0.08678,
      "grad_norm": 1.1527070962116546,
      "learning_rate": 0.003,
      "loss": 4.0902,
      "step": 8678
    },
    {
      "epoch": 0.08679,
      "grad_norm": 1.1029487438754473,
      "learning_rate": 0.003,
      "loss": 4.0995,
      "step": 8679
    },
    {
      "epoch": 0.0868,
      "grad_norm": 1.3124639540897858,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 8680
    },
    {
      "epoch": 0.08681,
      "grad_norm": 1.2437793839980147,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 8681
    },
    {
      "epoch": 0.08682,
      "grad_norm": 1.0540929823400549,
      "learning_rate": 0.003,
      "loss": 4.0873,
      "step": 8682
    },
    {
      "epoch": 0.08683,
      "grad_norm": 1.2790371263369233,
      "learning_rate": 0.003,
      "loss": 4.1144,
      "step": 8683
    },
    {
      "epoch": 0.08684,
      "grad_norm": 0.8657522896104256,
      "learning_rate": 0.003,
      "loss": 4.096,
      "step": 8684
    },
    {
      "epoch": 0.08685,
      "grad_norm": 1.0517928037944657,
      "learning_rate": 0.003,
      "loss": 4.096,
      "step": 8685
    },
    {
      "epoch": 0.08686,
      "grad_norm": 1.1161496633955428,
      "learning_rate": 0.003,
      "loss": 4.1017,
      "step": 8686
    },
    {
      "epoch": 0.08687,
      "grad_norm": 1.0797615023578369,
      "learning_rate": 0.003,
      "loss": 4.1039,
      "step": 8687
    },
    {
      "epoch": 0.08688,
      "grad_norm": 1.2204131791670518,
      "learning_rate": 0.003,
      "loss": 4.094,
      "step": 8688
    },
    {
      "epoch": 0.08689,
      "grad_norm": 1.1023472021408198,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 8689
    },
    {
      "epoch": 0.0869,
      "grad_norm": 1.2096005457249266,
      "learning_rate": 0.003,
      "loss": 4.0995,
      "step": 8690
    },
    {
      "epoch": 0.08691,
      "grad_norm": 1.0159491010398511,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 8691
    },
    {
      "epoch": 0.08692,
      "grad_norm": 0.9501982626279768,
      "learning_rate": 0.003,
      "loss": 4.093,
      "step": 8692
    },
    {
      "epoch": 0.08693,
      "grad_norm": 1.1527647539457258,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 8693
    },
    {
      "epoch": 0.08694,
      "grad_norm": 1.070928960423378,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 8694
    },
    {
      "epoch": 0.08695,
      "grad_norm": 1.207001157436488,
      "learning_rate": 0.003,
      "loss": 4.0868,
      "step": 8695
    },
    {
      "epoch": 0.08696,
      "grad_norm": 1.053492800660972,
      "learning_rate": 0.003,
      "loss": 4.1123,
      "step": 8696
    },
    {
      "epoch": 0.08697,
      "grad_norm": 1.2985291128404224,
      "learning_rate": 0.003,
      "loss": 4.1112,
      "step": 8697
    },
    {
      "epoch": 0.08698,
      "grad_norm": 1.0631378422403852,
      "learning_rate": 0.003,
      "loss": 4.0807,
      "step": 8698
    },
    {
      "epoch": 0.08699,
      "grad_norm": 1.1823377411491516,
      "learning_rate": 0.003,
      "loss": 4.0978,
      "step": 8699
    },
    {
      "epoch": 0.087,
      "grad_norm": 1.0378537893217477,
      "learning_rate": 0.003,
      "loss": 4.1183,
      "step": 8700
    },
    {
      "epoch": 0.08701,
      "grad_norm": 1.2051775798271545,
      "learning_rate": 0.003,
      "loss": 4.1029,
      "step": 8701
    },
    {
      "epoch": 0.08702,
      "grad_norm": 1.034446479405826,
      "learning_rate": 0.003,
      "loss": 4.0901,
      "step": 8702
    },
    {
      "epoch": 0.08703,
      "grad_norm": 0.9988920281954514,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 8703
    },
    {
      "epoch": 0.08704,
      "grad_norm": 1.1646013905661423,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 8704
    },
    {
      "epoch": 0.08705,
      "grad_norm": 1.0961253024762871,
      "learning_rate": 0.003,
      "loss": 4.1131,
      "step": 8705
    },
    {
      "epoch": 0.08706,
      "grad_norm": 1.1967169806769928,
      "learning_rate": 0.003,
      "loss": 4.0852,
      "step": 8706
    },
    {
      "epoch": 0.08707,
      "grad_norm": 0.9474678629718546,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 8707
    },
    {
      "epoch": 0.08708,
      "grad_norm": 1.1516705610260332,
      "learning_rate": 0.003,
      "loss": 4.0959,
      "step": 8708
    },
    {
      "epoch": 0.08709,
      "grad_norm": 1.2887917026425864,
      "learning_rate": 0.003,
      "loss": 4.109,
      "step": 8709
    },
    {
      "epoch": 0.0871,
      "grad_norm": 1.049402400833251,
      "learning_rate": 0.003,
      "loss": 4.083,
      "step": 8710
    },
    {
      "epoch": 0.08711,
      "grad_norm": 1.267428077933279,
      "learning_rate": 0.003,
      "loss": 4.1003,
      "step": 8711
    },
    {
      "epoch": 0.08712,
      "grad_norm": 0.9108996994268164,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 8712
    },
    {
      "epoch": 0.08713,
      "grad_norm": 1.129357093503024,
      "learning_rate": 0.003,
      "loss": 4.1034,
      "step": 8713
    },
    {
      "epoch": 0.08714,
      "grad_norm": 1.1837985448712545,
      "learning_rate": 0.003,
      "loss": 4.0932,
      "step": 8714
    },
    {
      "epoch": 0.08715,
      "grad_norm": 1.0341252713206417,
      "learning_rate": 0.003,
      "loss": 4.1002,
      "step": 8715
    },
    {
      "epoch": 0.08716,
      "grad_norm": 1.195648131905206,
      "learning_rate": 0.003,
      "loss": 4.1037,
      "step": 8716
    },
    {
      "epoch": 0.08717,
      "grad_norm": 1.100628894474599,
      "learning_rate": 0.003,
      "loss": 4.1088,
      "step": 8717
    },
    {
      "epoch": 0.08718,
      "grad_norm": 1.2704156956713528,
      "learning_rate": 0.003,
      "loss": 4.1044,
      "step": 8718
    },
    {
      "epoch": 0.08719,
      "grad_norm": 1.0463156811456091,
      "learning_rate": 0.003,
      "loss": 4.0878,
      "step": 8719
    },
    {
      "epoch": 0.0872,
      "grad_norm": 1.1258377527829428,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 8720
    },
    {
      "epoch": 0.08721,
      "grad_norm": 0.9919059576280482,
      "learning_rate": 0.003,
      "loss": 4.097,
      "step": 8721
    },
    {
      "epoch": 0.08722,
      "grad_norm": 1.1228247316933804,
      "learning_rate": 0.003,
      "loss": 4.0886,
      "step": 8722
    },
    {
      "epoch": 0.08723,
      "grad_norm": 1.2656029984955952,
      "learning_rate": 0.003,
      "loss": 4.0874,
      "step": 8723
    },
    {
      "epoch": 0.08724,
      "grad_norm": 1.2555608900266475,
      "learning_rate": 0.003,
      "loss": 4.0932,
      "step": 8724
    },
    {
      "epoch": 0.08725,
      "grad_norm": 1.0587890320142601,
      "learning_rate": 0.003,
      "loss": 4.1012,
      "step": 8725
    },
    {
      "epoch": 0.08726,
      "grad_norm": 1.1684635337571947,
      "learning_rate": 0.003,
      "loss": 4.0849,
      "step": 8726
    },
    {
      "epoch": 0.08727,
      "grad_norm": 1.0747000877168544,
      "learning_rate": 0.003,
      "loss": 4.0767,
      "step": 8727
    },
    {
      "epoch": 0.08728,
      "grad_norm": 1.0900120073540935,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 8728
    },
    {
      "epoch": 0.08729,
      "grad_norm": 1.045934652730821,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 8729
    },
    {
      "epoch": 0.0873,
      "grad_norm": 1.2476511295842083,
      "learning_rate": 0.003,
      "loss": 4.1019,
      "step": 8730
    },
    {
      "epoch": 0.08731,
      "grad_norm": 1.1697153837448115,
      "learning_rate": 0.003,
      "loss": 4.0984,
      "step": 8731
    },
    {
      "epoch": 0.08732,
      "grad_norm": 1.1538724837794856,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 8732
    },
    {
      "epoch": 0.08733,
      "grad_norm": 1.027194286522456,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 8733
    },
    {
      "epoch": 0.08734,
      "grad_norm": 1.22602238055874,
      "learning_rate": 0.003,
      "loss": 4.1164,
      "step": 8734
    },
    {
      "epoch": 0.08735,
      "grad_norm": 1.1763552197930867,
      "learning_rate": 0.003,
      "loss": 4.0976,
      "step": 8735
    },
    {
      "epoch": 0.08736,
      "grad_norm": 1.0792760623837168,
      "learning_rate": 0.003,
      "loss": 4.0989,
      "step": 8736
    },
    {
      "epoch": 0.08737,
      "grad_norm": 1.1921416599279708,
      "learning_rate": 0.003,
      "loss": 4.1186,
      "step": 8737
    },
    {
      "epoch": 0.08738,
      "grad_norm": 1.1452855639758666,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 8738
    },
    {
      "epoch": 0.08739,
      "grad_norm": 1.0439716447507283,
      "learning_rate": 0.003,
      "loss": 4.0912,
      "step": 8739
    },
    {
      "epoch": 0.0874,
      "grad_norm": 1.0420639689753401,
      "learning_rate": 0.003,
      "loss": 4.0739,
      "step": 8740
    },
    {
      "epoch": 0.08741,
      "grad_norm": 1.0712505938261823,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 8741
    },
    {
      "epoch": 0.08742,
      "grad_norm": 1.0422480953499131,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 8742
    },
    {
      "epoch": 0.08743,
      "grad_norm": 1.1752533479503575,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 8743
    },
    {
      "epoch": 0.08744,
      "grad_norm": 1.0901393452167087,
      "learning_rate": 0.003,
      "loss": 4.1135,
      "step": 8744
    },
    {
      "epoch": 0.08745,
      "grad_norm": 1.2208848202535578,
      "learning_rate": 0.003,
      "loss": 4.0933,
      "step": 8745
    },
    {
      "epoch": 0.08746,
      "grad_norm": 1.0770248033885392,
      "learning_rate": 0.003,
      "loss": 4.0957,
      "step": 8746
    },
    {
      "epoch": 0.08747,
      "grad_norm": 1.354397110036037,
      "learning_rate": 0.003,
      "loss": 4.1018,
      "step": 8747
    },
    {
      "epoch": 0.08748,
      "grad_norm": 1.0312216209666822,
      "learning_rate": 0.003,
      "loss": 4.0871,
      "step": 8748
    },
    {
      "epoch": 0.08749,
      "grad_norm": 1.1323882557666598,
      "learning_rate": 0.003,
      "loss": 4.0877,
      "step": 8749
    },
    {
      "epoch": 0.0875,
      "grad_norm": 1.0381850401908939,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 8750
    },
    {
      "epoch": 0.08751,
      "grad_norm": 1.2178940429665637,
      "learning_rate": 0.003,
      "loss": 4.0971,
      "step": 8751
    },
    {
      "epoch": 0.08752,
      "grad_norm": 0.8993679957101482,
      "learning_rate": 0.003,
      "loss": 4.1113,
      "step": 8752
    },
    {
      "epoch": 0.08753,
      "grad_norm": 1.0370682749447258,
      "learning_rate": 0.003,
      "loss": 4.1158,
      "step": 8753
    },
    {
      "epoch": 0.08754,
      "grad_norm": 1.2042388022732353,
      "learning_rate": 0.003,
      "loss": 4.1003,
      "step": 8754
    },
    {
      "epoch": 0.08755,
      "grad_norm": 1.1184706165321316,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 8755
    },
    {
      "epoch": 0.08756,
      "grad_norm": 1.1561679243035003,
      "learning_rate": 0.003,
      "loss": 4.086,
      "step": 8756
    },
    {
      "epoch": 0.08757,
      "grad_norm": 1.024513208226419,
      "learning_rate": 0.003,
      "loss": 4.114,
      "step": 8757
    },
    {
      "epoch": 0.08758,
      "grad_norm": 1.1184437711252289,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 8758
    },
    {
      "epoch": 0.08759,
      "grad_norm": 1.177181255250541,
      "learning_rate": 0.003,
      "loss": 4.0921,
      "step": 8759
    },
    {
      "epoch": 0.0876,
      "grad_norm": 1.0936241217545724,
      "learning_rate": 0.003,
      "loss": 4.0895,
      "step": 8760
    },
    {
      "epoch": 0.08761,
      "grad_norm": 1.252279806451399,
      "learning_rate": 0.003,
      "loss": 4.1255,
      "step": 8761
    },
    {
      "epoch": 0.08762,
      "grad_norm": 1.2123177273975052,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 8762
    },
    {
      "epoch": 0.08763,
      "grad_norm": 1.0369156484787974,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 8763
    },
    {
      "epoch": 0.08764,
      "grad_norm": 1.0335684893203534,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 8764
    },
    {
      "epoch": 0.08765,
      "grad_norm": 1.1945644551620531,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 8765
    },
    {
      "epoch": 0.08766,
      "grad_norm": 0.9950694287958622,
      "learning_rate": 0.003,
      "loss": 4.1037,
      "step": 8766
    },
    {
      "epoch": 0.08767,
      "grad_norm": 1.371292755326346,
      "learning_rate": 0.003,
      "loss": 4.0964,
      "step": 8767
    },
    {
      "epoch": 0.08768,
      "grad_norm": 0.7677810209753791,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 8768
    },
    {
      "epoch": 0.08769,
      "grad_norm": 0.7426354441341513,
      "learning_rate": 0.003,
      "loss": 4.0703,
      "step": 8769
    },
    {
      "epoch": 0.0877,
      "grad_norm": 0.8480980442986015,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 8770
    },
    {
      "epoch": 0.08771,
      "grad_norm": 1.2161480975231598,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 8771
    },
    {
      "epoch": 0.08772,
      "grad_norm": 1.1051348226912412,
      "learning_rate": 0.003,
      "loss": 4.0764,
      "step": 8772
    },
    {
      "epoch": 0.08773,
      "grad_norm": 1.2177584535671466,
      "learning_rate": 0.003,
      "loss": 4.109,
      "step": 8773
    },
    {
      "epoch": 0.08774,
      "grad_norm": 0.8702308282274693,
      "learning_rate": 0.003,
      "loss": 4.0839,
      "step": 8774
    },
    {
      "epoch": 0.08775,
      "grad_norm": 0.9929217162163861,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 8775
    },
    {
      "epoch": 0.08776,
      "grad_norm": 1.263113346655494,
      "learning_rate": 0.003,
      "loss": 4.0913,
      "step": 8776
    },
    {
      "epoch": 0.08777,
      "grad_norm": 1.3257815908902322,
      "learning_rate": 0.003,
      "loss": 4.106,
      "step": 8777
    },
    {
      "epoch": 0.08778,
      "grad_norm": 1.269385296284594,
      "learning_rate": 0.003,
      "loss": 4.0914,
      "step": 8778
    },
    {
      "epoch": 0.08779,
      "grad_norm": 0.9282639434675246,
      "learning_rate": 0.003,
      "loss": 4.1027,
      "step": 8779
    },
    {
      "epoch": 0.0878,
      "grad_norm": 1.0191602256022942,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 8780
    },
    {
      "epoch": 0.08781,
      "grad_norm": 1.1997723203364192,
      "learning_rate": 0.003,
      "loss": 4.0824,
      "step": 8781
    },
    {
      "epoch": 0.08782,
      "grad_norm": 1.2176855894509315,
      "learning_rate": 0.003,
      "loss": 4.083,
      "step": 8782
    },
    {
      "epoch": 0.08783,
      "grad_norm": 1.1652524255615615,
      "learning_rate": 0.003,
      "loss": 4.0994,
      "step": 8783
    },
    {
      "epoch": 0.08784,
      "grad_norm": 1.2085793286606386,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 8784
    },
    {
      "epoch": 0.08785,
      "grad_norm": 0.9571651778611358,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 8785
    },
    {
      "epoch": 0.08786,
      "grad_norm": 1.1582560830850317,
      "learning_rate": 0.003,
      "loss": 4.1107,
      "step": 8786
    },
    {
      "epoch": 0.08787,
      "grad_norm": 0.9758954458203004,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 8787
    },
    {
      "epoch": 0.08788,
      "grad_norm": 1.135243821696432,
      "learning_rate": 0.003,
      "loss": 4.1103,
      "step": 8788
    },
    {
      "epoch": 0.08789,
      "grad_norm": 1.2150079947158996,
      "learning_rate": 0.003,
      "loss": 4.0804,
      "step": 8789
    },
    {
      "epoch": 0.0879,
      "grad_norm": 1.1623522733964076,
      "learning_rate": 0.003,
      "loss": 4.1026,
      "step": 8790
    },
    {
      "epoch": 0.08791,
      "grad_norm": 1.2156744840046996,
      "learning_rate": 0.003,
      "loss": 4.0897,
      "step": 8791
    },
    {
      "epoch": 0.08792,
      "grad_norm": 0.9980442564190244,
      "learning_rate": 0.003,
      "loss": 4.0956,
      "step": 8792
    },
    {
      "epoch": 0.08793,
      "grad_norm": 1.0903970342772615,
      "learning_rate": 0.003,
      "loss": 4.0862,
      "step": 8793
    },
    {
      "epoch": 0.08794,
      "grad_norm": 1.3109945906614189,
      "learning_rate": 0.003,
      "loss": 4.0862,
      "step": 8794
    },
    {
      "epoch": 0.08795,
      "grad_norm": 1.0586317024452636,
      "learning_rate": 0.003,
      "loss": 4.0824,
      "step": 8795
    },
    {
      "epoch": 0.08796,
      "grad_norm": 1.2308071734445476,
      "learning_rate": 0.003,
      "loss": 4.0931,
      "step": 8796
    },
    {
      "epoch": 0.08797,
      "grad_norm": 1.093667193233928,
      "learning_rate": 0.003,
      "loss": 4.0665,
      "step": 8797
    },
    {
      "epoch": 0.08798,
      "grad_norm": 1.239645975324972,
      "learning_rate": 0.003,
      "loss": 4.1101,
      "step": 8798
    },
    {
      "epoch": 0.08799,
      "grad_norm": 1.1829608196970796,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 8799
    },
    {
      "epoch": 0.088,
      "grad_norm": 1.179310389918988,
      "learning_rate": 0.003,
      "loss": 4.111,
      "step": 8800
    },
    {
      "epoch": 0.08801,
      "grad_norm": 1.0740673093501871,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 8801
    },
    {
      "epoch": 0.08802,
      "grad_norm": 1.0524632137944494,
      "learning_rate": 0.003,
      "loss": 4.0994,
      "step": 8802
    },
    {
      "epoch": 0.08803,
      "grad_norm": 1.2242810222141456,
      "learning_rate": 0.003,
      "loss": 4.0675,
      "step": 8803
    },
    {
      "epoch": 0.08804,
      "grad_norm": 1.0060241245553985,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 8804
    },
    {
      "epoch": 0.08805,
      "grad_norm": 1.343503230878226,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 8805
    },
    {
      "epoch": 0.08806,
      "grad_norm": 0.920693396297294,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 8806
    },
    {
      "epoch": 0.08807,
      "grad_norm": 0.9856376499764958,
      "learning_rate": 0.003,
      "loss": 4.0929,
      "step": 8807
    },
    {
      "epoch": 0.08808,
      "grad_norm": 1.3858125490769972,
      "learning_rate": 0.003,
      "loss": 4.0886,
      "step": 8808
    },
    {
      "epoch": 0.08809,
      "grad_norm": 1.0746767723125061,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 8809
    },
    {
      "epoch": 0.0881,
      "grad_norm": 1.232148978357337,
      "learning_rate": 0.003,
      "loss": 4.0944,
      "step": 8810
    },
    {
      "epoch": 0.08811,
      "grad_norm": 0.9811405734907467,
      "learning_rate": 0.003,
      "loss": 4.0874,
      "step": 8811
    },
    {
      "epoch": 0.08812,
      "grad_norm": 1.3329184858707495,
      "learning_rate": 0.003,
      "loss": 4.1024,
      "step": 8812
    },
    {
      "epoch": 0.08813,
      "grad_norm": 0.9265847297855418,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 8813
    },
    {
      "epoch": 0.08814,
      "grad_norm": 0.9570432521573592,
      "learning_rate": 0.003,
      "loss": 4.0991,
      "step": 8814
    },
    {
      "epoch": 0.08815,
      "grad_norm": 1.1665709914992801,
      "learning_rate": 0.003,
      "loss": 4.1029,
      "step": 8815
    },
    {
      "epoch": 0.08816,
      "grad_norm": 0.9874536553232731,
      "learning_rate": 0.003,
      "loss": 4.0849,
      "step": 8816
    },
    {
      "epoch": 0.08817,
      "grad_norm": 1.4382139291553788,
      "learning_rate": 0.003,
      "loss": 4.0812,
      "step": 8817
    },
    {
      "epoch": 0.08818,
      "grad_norm": 1.1605884282639565,
      "learning_rate": 0.003,
      "loss": 4.0853,
      "step": 8818
    },
    {
      "epoch": 0.08819,
      "grad_norm": 1.134115351493515,
      "learning_rate": 0.003,
      "loss": 4.1194,
      "step": 8819
    },
    {
      "epoch": 0.0882,
      "grad_norm": 0.9996745730470702,
      "learning_rate": 0.003,
      "loss": 4.0898,
      "step": 8820
    },
    {
      "epoch": 0.08821,
      "grad_norm": 1.2619153343104192,
      "learning_rate": 0.003,
      "loss": 4.1261,
      "step": 8821
    },
    {
      "epoch": 0.08822,
      "grad_norm": 1.016060665012236,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 8822
    },
    {
      "epoch": 0.08823,
      "grad_norm": 1.2486817945224273,
      "learning_rate": 0.003,
      "loss": 4.0931,
      "step": 8823
    },
    {
      "epoch": 0.08824,
      "grad_norm": 1.0324860890887555,
      "learning_rate": 0.003,
      "loss": 4.0859,
      "step": 8824
    },
    {
      "epoch": 0.08825,
      "grad_norm": 1.154701898108942,
      "learning_rate": 0.003,
      "loss": 4.0977,
      "step": 8825
    },
    {
      "epoch": 0.08826,
      "grad_norm": 1.0402884792853095,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 8826
    },
    {
      "epoch": 0.08827,
      "grad_norm": 1.1370723262382887,
      "learning_rate": 0.003,
      "loss": 4.1006,
      "step": 8827
    },
    {
      "epoch": 0.08828,
      "grad_norm": 1.195942427398043,
      "learning_rate": 0.003,
      "loss": 4.1032,
      "step": 8828
    },
    {
      "epoch": 0.08829,
      "grad_norm": 1.1015741941643187,
      "learning_rate": 0.003,
      "loss": 4.0989,
      "step": 8829
    },
    {
      "epoch": 0.0883,
      "grad_norm": 1.24287314028591,
      "learning_rate": 0.003,
      "loss": 4.0909,
      "step": 8830
    },
    {
      "epoch": 0.08831,
      "grad_norm": 1.1540903706193593,
      "learning_rate": 0.003,
      "loss": 4.0897,
      "step": 8831
    },
    {
      "epoch": 0.08832,
      "grad_norm": 1.0841781176133298,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 8832
    },
    {
      "epoch": 0.08833,
      "grad_norm": 1.1620178341397036,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 8833
    },
    {
      "epoch": 0.08834,
      "grad_norm": 1.285096206379007,
      "learning_rate": 0.003,
      "loss": 4.0804,
      "step": 8834
    },
    {
      "epoch": 0.08835,
      "grad_norm": 1.2220110993481752,
      "learning_rate": 0.003,
      "loss": 4.1159,
      "step": 8835
    },
    {
      "epoch": 0.08836,
      "grad_norm": 1.2824438182664897,
      "learning_rate": 0.003,
      "loss": 4.1028,
      "step": 8836
    },
    {
      "epoch": 0.08837,
      "grad_norm": 1.030557537546806,
      "learning_rate": 0.003,
      "loss": 4.0751,
      "step": 8837
    },
    {
      "epoch": 0.08838,
      "grad_norm": 1.2592905047467666,
      "learning_rate": 0.003,
      "loss": 4.1013,
      "step": 8838
    },
    {
      "epoch": 0.08839,
      "grad_norm": 0.9948192453122057,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 8839
    },
    {
      "epoch": 0.0884,
      "grad_norm": 0.9604246294508401,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 8840
    },
    {
      "epoch": 0.08841,
      "grad_norm": 1.0821685274255017,
      "learning_rate": 0.003,
      "loss": 4.104,
      "step": 8841
    },
    {
      "epoch": 0.08842,
      "grad_norm": 1.2149930539564309,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 8842
    },
    {
      "epoch": 0.08843,
      "grad_norm": 1.1535664957187073,
      "learning_rate": 0.003,
      "loss": 4.0827,
      "step": 8843
    },
    {
      "epoch": 0.08844,
      "grad_norm": 1.0643701786404371,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 8844
    },
    {
      "epoch": 0.08845,
      "grad_norm": 1.1519022410200033,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 8845
    },
    {
      "epoch": 0.08846,
      "grad_norm": 1.3347722844417247,
      "learning_rate": 0.003,
      "loss": 4.128,
      "step": 8846
    },
    {
      "epoch": 0.08847,
      "grad_norm": 1.045478931502332,
      "learning_rate": 0.003,
      "loss": 4.1145,
      "step": 8847
    },
    {
      "epoch": 0.08848,
      "grad_norm": 1.0931523912517,
      "learning_rate": 0.003,
      "loss": 4.113,
      "step": 8848
    },
    {
      "epoch": 0.08849,
      "grad_norm": 1.085871448202411,
      "learning_rate": 0.003,
      "loss": 4.0871,
      "step": 8849
    },
    {
      "epoch": 0.0885,
      "grad_norm": 0.9450454887357006,
      "learning_rate": 0.003,
      "loss": 4.0968,
      "step": 8850
    },
    {
      "epoch": 0.08851,
      "grad_norm": 1.0428404856331992,
      "learning_rate": 0.003,
      "loss": 4.1004,
      "step": 8851
    },
    {
      "epoch": 0.08852,
      "grad_norm": 1.1178846171093337,
      "learning_rate": 0.003,
      "loss": 4.0962,
      "step": 8852
    },
    {
      "epoch": 0.08853,
      "grad_norm": 1.0673247680057139,
      "learning_rate": 0.003,
      "loss": 4.1065,
      "step": 8853
    },
    {
      "epoch": 0.08854,
      "grad_norm": 1.4370218662311567,
      "learning_rate": 0.003,
      "loss": 4.1298,
      "step": 8854
    },
    {
      "epoch": 0.08855,
      "grad_norm": 1.2424246600081954,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 8855
    },
    {
      "epoch": 0.08856,
      "grad_norm": 1.1952803481942424,
      "learning_rate": 0.003,
      "loss": 4.1005,
      "step": 8856
    },
    {
      "epoch": 0.08857,
      "grad_norm": 1.0610730671993456,
      "learning_rate": 0.003,
      "loss": 4.0995,
      "step": 8857
    },
    {
      "epoch": 0.08858,
      "grad_norm": 1.204237016674479,
      "learning_rate": 0.003,
      "loss": 4.0886,
      "step": 8858
    },
    {
      "epoch": 0.08859,
      "grad_norm": 0.985430125390967,
      "learning_rate": 0.003,
      "loss": 4.0963,
      "step": 8859
    },
    {
      "epoch": 0.0886,
      "grad_norm": 1.1113027683439096,
      "learning_rate": 0.003,
      "loss": 4.1115,
      "step": 8860
    },
    {
      "epoch": 0.08861,
      "grad_norm": 1.0509820877432436,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 8861
    },
    {
      "epoch": 0.08862,
      "grad_norm": 1.0753189064520456,
      "learning_rate": 0.003,
      "loss": 4.0787,
      "step": 8862
    },
    {
      "epoch": 0.08863,
      "grad_norm": 1.146796776128645,
      "learning_rate": 0.003,
      "loss": 4.0968,
      "step": 8863
    },
    {
      "epoch": 0.08864,
      "grad_norm": 1.2419423716308708,
      "learning_rate": 0.003,
      "loss": 4.1052,
      "step": 8864
    },
    {
      "epoch": 0.08865,
      "grad_norm": 0.99866038915533,
      "learning_rate": 0.003,
      "loss": 4.0975,
      "step": 8865
    },
    {
      "epoch": 0.08866,
      "grad_norm": 1.223417079345496,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 8866
    },
    {
      "epoch": 0.08867,
      "grad_norm": 0.9179947374618549,
      "learning_rate": 0.003,
      "loss": 4.0992,
      "step": 8867
    },
    {
      "epoch": 0.08868,
      "grad_norm": 1.0120882698093776,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 8868
    },
    {
      "epoch": 0.08869,
      "grad_norm": 1.4761625946066304,
      "learning_rate": 0.003,
      "loss": 4.1472,
      "step": 8869
    },
    {
      "epoch": 0.0887,
      "grad_norm": 1.0013311950887647,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 8870
    },
    {
      "epoch": 0.08871,
      "grad_norm": 1.1905133400701076,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 8871
    },
    {
      "epoch": 0.08872,
      "grad_norm": 1.225762665576794,
      "learning_rate": 0.003,
      "loss": 4.1298,
      "step": 8872
    },
    {
      "epoch": 0.08873,
      "grad_norm": 1.1051400960493805,
      "learning_rate": 0.003,
      "loss": 4.0781,
      "step": 8873
    },
    {
      "epoch": 0.08874,
      "grad_norm": 1.1725074717579642,
      "learning_rate": 0.003,
      "loss": 4.0912,
      "step": 8874
    },
    {
      "epoch": 0.08875,
      "grad_norm": 1.1425706121868302,
      "learning_rate": 0.003,
      "loss": 4.1095,
      "step": 8875
    },
    {
      "epoch": 0.08876,
      "grad_norm": 1.1324360400730162,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 8876
    },
    {
      "epoch": 0.08877,
      "grad_norm": 1.1193149417853414,
      "learning_rate": 0.003,
      "loss": 4.0748,
      "step": 8877
    },
    {
      "epoch": 0.08878,
      "grad_norm": 1.1813382330480562,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 8878
    },
    {
      "epoch": 0.08879,
      "grad_norm": 1.0591929235147335,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 8879
    },
    {
      "epoch": 0.0888,
      "grad_norm": 1.1938270432922649,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 8880
    },
    {
      "epoch": 0.08881,
      "grad_norm": 1.15951980851196,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 8881
    },
    {
      "epoch": 0.08882,
      "grad_norm": 1.1269173518743505,
      "learning_rate": 0.003,
      "loss": 4.0972,
      "step": 8882
    },
    {
      "epoch": 0.08883,
      "grad_norm": 0.9256998003722761,
      "learning_rate": 0.003,
      "loss": 4.1316,
      "step": 8883
    },
    {
      "epoch": 0.08884,
      "grad_norm": 0.9928338510945068,
      "learning_rate": 0.003,
      "loss": 4.0851,
      "step": 8884
    },
    {
      "epoch": 0.08885,
      "grad_norm": 1.235862690491941,
      "learning_rate": 0.003,
      "loss": 4.0845,
      "step": 8885
    },
    {
      "epoch": 0.08886,
      "grad_norm": 1.025957142318475,
      "learning_rate": 0.003,
      "loss": 4.0883,
      "step": 8886
    },
    {
      "epoch": 0.08887,
      "grad_norm": 1.1645758366066534,
      "learning_rate": 0.003,
      "loss": 4.0838,
      "step": 8887
    },
    {
      "epoch": 0.08888,
      "grad_norm": 1.0979405833970721,
      "learning_rate": 0.003,
      "loss": 4.0793,
      "step": 8888
    },
    {
      "epoch": 0.08889,
      "grad_norm": 1.1926742138622266,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 8889
    },
    {
      "epoch": 0.0889,
      "grad_norm": 0.9283655625939753,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 8890
    },
    {
      "epoch": 0.08891,
      "grad_norm": 0.9922525991886726,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 8891
    },
    {
      "epoch": 0.08892,
      "grad_norm": 1.2511385500851198,
      "learning_rate": 0.003,
      "loss": 4.1119,
      "step": 8892
    },
    {
      "epoch": 0.08893,
      "grad_norm": 1.0877694715722532,
      "learning_rate": 0.003,
      "loss": 4.0821,
      "step": 8893
    },
    {
      "epoch": 0.08894,
      "grad_norm": 1.2425342003563593,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 8894
    },
    {
      "epoch": 0.08895,
      "grad_norm": 1.1708965119096941,
      "learning_rate": 0.003,
      "loss": 4.086,
      "step": 8895
    },
    {
      "epoch": 0.08896,
      "grad_norm": 1.148852163720437,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 8896
    },
    {
      "epoch": 0.08897,
      "grad_norm": 0.9829003899946172,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 8897
    },
    {
      "epoch": 0.08898,
      "grad_norm": 1.1577433657757135,
      "learning_rate": 0.003,
      "loss": 4.0883,
      "step": 8898
    },
    {
      "epoch": 0.08899,
      "grad_norm": 1.0212815115160019,
      "learning_rate": 0.003,
      "loss": 4.086,
      "step": 8899
    },
    {
      "epoch": 0.089,
      "grad_norm": 1.304229793351174,
      "learning_rate": 0.003,
      "loss": 4.0806,
      "step": 8900
    },
    {
      "epoch": 0.08901,
      "grad_norm": 0.9490335114750387,
      "learning_rate": 0.003,
      "loss": 4.0876,
      "step": 8901
    },
    {
      "epoch": 0.08902,
      "grad_norm": 1.0053164397125514,
      "learning_rate": 0.003,
      "loss": 4.09,
      "step": 8902
    },
    {
      "epoch": 0.08903,
      "grad_norm": 1.2031637408189797,
      "learning_rate": 0.003,
      "loss": 4.0866,
      "step": 8903
    },
    {
      "epoch": 0.08904,
      "grad_norm": 1.0348101241532264,
      "learning_rate": 0.003,
      "loss": 4.0945,
      "step": 8904
    },
    {
      "epoch": 0.08905,
      "grad_norm": 1.2411377788704367,
      "learning_rate": 0.003,
      "loss": 4.1106,
      "step": 8905
    },
    {
      "epoch": 0.08906,
      "grad_norm": 1.2368523131379077,
      "learning_rate": 0.003,
      "loss": 4.0982,
      "step": 8906
    },
    {
      "epoch": 0.08907,
      "grad_norm": 1.196052564590542,
      "learning_rate": 0.003,
      "loss": 4.0904,
      "step": 8907
    },
    {
      "epoch": 0.08908,
      "grad_norm": 1.149069619968232,
      "learning_rate": 0.003,
      "loss": 4.1105,
      "step": 8908
    },
    {
      "epoch": 0.08909,
      "grad_norm": 1.2196952014342493,
      "learning_rate": 0.003,
      "loss": 4.106,
      "step": 8909
    },
    {
      "epoch": 0.0891,
      "grad_norm": 0.9657789763518829,
      "learning_rate": 0.003,
      "loss": 4.0823,
      "step": 8910
    },
    {
      "epoch": 0.08911,
      "grad_norm": 1.226501539468004,
      "learning_rate": 0.003,
      "loss": 4.0923,
      "step": 8911
    },
    {
      "epoch": 0.08912,
      "grad_norm": 0.9831269607454067,
      "learning_rate": 0.003,
      "loss": 4.0866,
      "step": 8912
    },
    {
      "epoch": 0.08913,
      "grad_norm": 1.0432328581446457,
      "learning_rate": 0.003,
      "loss": 4.1081,
      "step": 8913
    },
    {
      "epoch": 0.08914,
      "grad_norm": 1.2445813702184239,
      "learning_rate": 0.003,
      "loss": 4.1051,
      "step": 8914
    },
    {
      "epoch": 0.08915,
      "grad_norm": 0.8348608288238978,
      "learning_rate": 0.003,
      "loss": 4.1046,
      "step": 8915
    },
    {
      "epoch": 0.08916,
      "grad_norm": 1.0277019185383869,
      "learning_rate": 0.003,
      "loss": 4.0971,
      "step": 8916
    },
    {
      "epoch": 0.08917,
      "grad_norm": 1.29836858008687,
      "learning_rate": 0.003,
      "loss": 4.0874,
      "step": 8917
    },
    {
      "epoch": 0.08918,
      "grad_norm": 1.1457493663465181,
      "learning_rate": 0.003,
      "loss": 4.0933,
      "step": 8918
    },
    {
      "epoch": 0.08919,
      "grad_norm": 1.3902905052344592,
      "learning_rate": 0.003,
      "loss": 4.1088,
      "step": 8919
    },
    {
      "epoch": 0.0892,
      "grad_norm": 1.1302254715099984,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 8920
    },
    {
      "epoch": 0.08921,
      "grad_norm": 1.144164260144895,
      "learning_rate": 0.003,
      "loss": 4.0787,
      "step": 8921
    },
    {
      "epoch": 0.08922,
      "grad_norm": 1.2102096555366344,
      "learning_rate": 0.003,
      "loss": 4.107,
      "step": 8922
    },
    {
      "epoch": 0.08923,
      "grad_norm": 1.2889411146695433,
      "learning_rate": 0.003,
      "loss": 4.1137,
      "step": 8923
    },
    {
      "epoch": 0.08924,
      "grad_norm": 1.1914185822394034,
      "learning_rate": 0.003,
      "loss": 4.0848,
      "step": 8924
    },
    {
      "epoch": 0.08925,
      "grad_norm": 1.1563115618267714,
      "learning_rate": 0.003,
      "loss": 4.0924,
      "step": 8925
    },
    {
      "epoch": 0.08926,
      "grad_norm": 1.0599030969488006,
      "learning_rate": 0.003,
      "loss": 4.0757,
      "step": 8926
    },
    {
      "epoch": 0.08927,
      "grad_norm": 1.2872549938271667,
      "learning_rate": 0.003,
      "loss": 4.0944,
      "step": 8927
    },
    {
      "epoch": 0.08928,
      "grad_norm": 1.0434873158845894,
      "learning_rate": 0.003,
      "loss": 4.0811,
      "step": 8928
    },
    {
      "epoch": 0.08929,
      "grad_norm": 1.0870121366596754,
      "learning_rate": 0.003,
      "loss": 4.0899,
      "step": 8929
    },
    {
      "epoch": 0.0893,
      "grad_norm": 1.1211050004018694,
      "learning_rate": 0.003,
      "loss": 4.0994,
      "step": 8930
    },
    {
      "epoch": 0.08931,
      "grad_norm": 1.065437355820969,
      "learning_rate": 0.003,
      "loss": 4.0812,
      "step": 8931
    },
    {
      "epoch": 0.08932,
      "grad_norm": 1.4361757687132564,
      "learning_rate": 0.003,
      "loss": 4.1038,
      "step": 8932
    },
    {
      "epoch": 0.08933,
      "grad_norm": 1.2264163195022284,
      "learning_rate": 0.003,
      "loss": 4.0944,
      "step": 8933
    },
    {
      "epoch": 0.08934,
      "grad_norm": 0.9643110026877505,
      "learning_rate": 0.003,
      "loss": 4.0858,
      "step": 8934
    },
    {
      "epoch": 0.08935,
      "grad_norm": 1.0313524255500566,
      "learning_rate": 0.003,
      "loss": 4.1024,
      "step": 8935
    },
    {
      "epoch": 0.08936,
      "grad_norm": 1.100550149766114,
      "learning_rate": 0.003,
      "loss": 4.0665,
      "step": 8936
    },
    {
      "epoch": 0.08937,
      "grad_norm": 1.074917096686148,
      "learning_rate": 0.003,
      "loss": 4.0701,
      "step": 8937
    },
    {
      "epoch": 0.08938,
      "grad_norm": 1.3433517938631507,
      "learning_rate": 0.003,
      "loss": 4.1159,
      "step": 8938
    },
    {
      "epoch": 0.08939,
      "grad_norm": 0.8971118463380504,
      "learning_rate": 0.003,
      "loss": 4.1223,
      "step": 8939
    },
    {
      "epoch": 0.0894,
      "grad_norm": 1.0390958399705494,
      "learning_rate": 0.003,
      "loss": 4.0878,
      "step": 8940
    },
    {
      "epoch": 0.08941,
      "grad_norm": 1.2711873162688037,
      "learning_rate": 0.003,
      "loss": 4.1062,
      "step": 8941
    },
    {
      "epoch": 0.08942,
      "grad_norm": 1.1680587830752844,
      "learning_rate": 0.003,
      "loss": 4.0806,
      "step": 8942
    },
    {
      "epoch": 0.08943,
      "grad_norm": 1.077559364926768,
      "learning_rate": 0.003,
      "loss": 4.0947,
      "step": 8943
    },
    {
      "epoch": 0.08944,
      "grad_norm": 1.1778138324586276,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 8944
    },
    {
      "epoch": 0.08945,
      "grad_norm": 1.0950154110091395,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 8945
    },
    {
      "epoch": 0.08946,
      "grad_norm": 0.9413771676176436,
      "learning_rate": 0.003,
      "loss": 4.1107,
      "step": 8946
    },
    {
      "epoch": 0.08947,
      "grad_norm": 1.116304589986322,
      "learning_rate": 0.003,
      "loss": 4.0922,
      "step": 8947
    },
    {
      "epoch": 0.08948,
      "grad_norm": 1.1348251492332606,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 8948
    },
    {
      "epoch": 0.08949,
      "grad_norm": 1.0167111128911273,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 8949
    },
    {
      "epoch": 0.0895,
      "grad_norm": 1.1114205516835516,
      "learning_rate": 0.003,
      "loss": 4.0808,
      "step": 8950
    },
    {
      "epoch": 0.08951,
      "grad_norm": 1.0311274627489682,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 8951
    },
    {
      "epoch": 0.08952,
      "grad_norm": 1.2420772425442637,
      "learning_rate": 0.003,
      "loss": 4.1023,
      "step": 8952
    },
    {
      "epoch": 0.08953,
      "grad_norm": 0.9702294606818496,
      "learning_rate": 0.003,
      "loss": 4.0894,
      "step": 8953
    },
    {
      "epoch": 0.08954,
      "grad_norm": 1.0514539971922081,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 8954
    },
    {
      "epoch": 0.08955,
      "grad_norm": 1.1175561786688144,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 8955
    },
    {
      "epoch": 0.08956,
      "grad_norm": 1.3697725831400098,
      "learning_rate": 0.003,
      "loss": 4.0985,
      "step": 8956
    },
    {
      "epoch": 0.08957,
      "grad_norm": 0.8881912667968426,
      "learning_rate": 0.003,
      "loss": 4.0987,
      "step": 8957
    },
    {
      "epoch": 0.08958,
      "grad_norm": 1.2990213460731657,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 8958
    },
    {
      "epoch": 0.08959,
      "grad_norm": 1.1275379507438985,
      "learning_rate": 0.003,
      "loss": 4.0828,
      "step": 8959
    },
    {
      "epoch": 0.0896,
      "grad_norm": 1.1686764581323634,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 8960
    },
    {
      "epoch": 0.08961,
      "grad_norm": 1.2008879374871733,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 8961
    },
    {
      "epoch": 0.08962,
      "grad_norm": 0.9166635174392294,
      "learning_rate": 0.003,
      "loss": 4.0787,
      "step": 8962
    },
    {
      "epoch": 0.08963,
      "grad_norm": 0.9529723483512812,
      "learning_rate": 0.003,
      "loss": 4.1113,
      "step": 8963
    },
    {
      "epoch": 0.08964,
      "grad_norm": 1.1181882536744028,
      "learning_rate": 0.003,
      "loss": 4.0946,
      "step": 8964
    },
    {
      "epoch": 0.08965,
      "grad_norm": 1.1324629678961362,
      "learning_rate": 0.003,
      "loss": 4.1065,
      "step": 8965
    },
    {
      "epoch": 0.08966,
      "grad_norm": 1.04799498443996,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 8966
    },
    {
      "epoch": 0.08967,
      "grad_norm": 1.193797147891738,
      "learning_rate": 0.003,
      "loss": 4.1212,
      "step": 8967
    },
    {
      "epoch": 0.08968,
      "grad_norm": 1.0573943300927882,
      "learning_rate": 0.003,
      "loss": 4.0876,
      "step": 8968
    },
    {
      "epoch": 0.08969,
      "grad_norm": 1.386096847311672,
      "learning_rate": 0.003,
      "loss": 4.0925,
      "step": 8969
    },
    {
      "epoch": 0.0897,
      "grad_norm": 1.2251660107604352,
      "learning_rate": 0.003,
      "loss": 4.1001,
      "step": 8970
    },
    {
      "epoch": 0.08971,
      "grad_norm": 1.170614763215056,
      "learning_rate": 0.003,
      "loss": 4.0892,
      "step": 8971
    },
    {
      "epoch": 0.08972,
      "grad_norm": 1.1175989967353661,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 8972
    },
    {
      "epoch": 0.08973,
      "grad_norm": 1.233829185362789,
      "learning_rate": 0.003,
      "loss": 4.1143,
      "step": 8973
    },
    {
      "epoch": 0.08974,
      "grad_norm": 1.109937499784889,
      "learning_rate": 0.003,
      "loss": 4.1054,
      "step": 8974
    },
    {
      "epoch": 0.08975,
      "grad_norm": 1.210997922226824,
      "learning_rate": 0.003,
      "loss": 4.0895,
      "step": 8975
    },
    {
      "epoch": 0.08976,
      "grad_norm": 1.00399822862685,
      "learning_rate": 0.003,
      "loss": 4.0985,
      "step": 8976
    },
    {
      "epoch": 0.08977,
      "grad_norm": 1.183420824192127,
      "learning_rate": 0.003,
      "loss": 4.0892,
      "step": 8977
    },
    {
      "epoch": 0.08978,
      "grad_norm": 1.2611176663658683,
      "learning_rate": 0.003,
      "loss": 4.0958,
      "step": 8978
    },
    {
      "epoch": 0.08979,
      "grad_norm": 1.0685727933905735,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 8979
    },
    {
      "epoch": 0.0898,
      "grad_norm": 1.1240486549449662,
      "learning_rate": 0.003,
      "loss": 4.0956,
      "step": 8980
    },
    {
      "epoch": 0.08981,
      "grad_norm": 1.306291543933293,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 8981
    },
    {
      "epoch": 0.08982,
      "grad_norm": 0.8432622834426845,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 8982
    },
    {
      "epoch": 0.08983,
      "grad_norm": 0.9177109499908207,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 8983
    },
    {
      "epoch": 0.08984,
      "grad_norm": 1.2277287537315837,
      "learning_rate": 0.003,
      "loss": 4.1126,
      "step": 8984
    },
    {
      "epoch": 0.08985,
      "grad_norm": 1.075207515725915,
      "learning_rate": 0.003,
      "loss": 4.0936,
      "step": 8985
    },
    {
      "epoch": 0.08986,
      "grad_norm": 1.2740566021576438,
      "learning_rate": 0.003,
      "loss": 4.1002,
      "step": 8986
    },
    {
      "epoch": 0.08987,
      "grad_norm": 1.1459099503667163,
      "learning_rate": 0.003,
      "loss": 4.0814,
      "step": 8987
    },
    {
      "epoch": 0.08988,
      "grad_norm": 1.064348413271581,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 8988
    },
    {
      "epoch": 0.08989,
      "grad_norm": 1.1793693743335125,
      "learning_rate": 0.003,
      "loss": 4.1127,
      "step": 8989
    },
    {
      "epoch": 0.0899,
      "grad_norm": 1.0425071968697808,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 8990
    },
    {
      "epoch": 0.08991,
      "grad_norm": 1.468796617381801,
      "learning_rate": 0.003,
      "loss": 4.0926,
      "step": 8991
    },
    {
      "epoch": 0.08992,
      "grad_norm": 0.9327563758012993,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 8992
    },
    {
      "epoch": 0.08993,
      "grad_norm": 1.0525922979652393,
      "learning_rate": 0.003,
      "loss": 4.0831,
      "step": 8993
    },
    {
      "epoch": 0.08994,
      "grad_norm": 1.267662146582978,
      "learning_rate": 0.003,
      "loss": 4.0891,
      "step": 8994
    },
    {
      "epoch": 0.08995,
      "grad_norm": 1.0986748559783313,
      "learning_rate": 0.003,
      "loss": 4.0919,
      "step": 8995
    },
    {
      "epoch": 0.08996,
      "grad_norm": 1.3115338127669567,
      "learning_rate": 0.003,
      "loss": 4.1178,
      "step": 8996
    },
    {
      "epoch": 0.08997,
      "grad_norm": 0.9557599873271017,
      "learning_rate": 0.003,
      "loss": 4.0854,
      "step": 8997
    },
    {
      "epoch": 0.08998,
      "grad_norm": 1.139031347368501,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 8998
    },
    {
      "epoch": 0.08999,
      "grad_norm": 1.076392466563895,
      "learning_rate": 0.003,
      "loss": 4.1009,
      "step": 8999
    },
    {
      "epoch": 0.09,
      "grad_norm": 1.218252301925029,
      "learning_rate": 0.003,
      "loss": 4.0965,
      "step": 9000
    },
    {
      "epoch": 0.09001,
      "grad_norm": 1.1108783016947812,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 9001
    },
    {
      "epoch": 0.09002,
      "grad_norm": 1.1195942660042726,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 9002
    },
    {
      "epoch": 0.09003,
      "grad_norm": 1.203270184084982,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 9003
    },
    {
      "epoch": 0.09004,
      "grad_norm": 1.2021233361621204,
      "learning_rate": 0.003,
      "loss": 4.0865,
      "step": 9004
    },
    {
      "epoch": 0.09005,
      "grad_norm": 0.9743559860743248,
      "learning_rate": 0.003,
      "loss": 4.1056,
      "step": 9005
    },
    {
      "epoch": 0.09006,
      "grad_norm": 1.1823704601559941,
      "learning_rate": 0.003,
      "loss": 4.0876,
      "step": 9006
    },
    {
      "epoch": 0.09007,
      "grad_norm": 1.123233185721264,
      "learning_rate": 0.003,
      "loss": 4.1116,
      "step": 9007
    },
    {
      "epoch": 0.09008,
      "grad_norm": 1.0757862834677538,
      "learning_rate": 0.003,
      "loss": 4.0957,
      "step": 9008
    },
    {
      "epoch": 0.09009,
      "grad_norm": 1.266049145604807,
      "learning_rate": 0.003,
      "loss": 4.1,
      "step": 9009
    },
    {
      "epoch": 0.0901,
      "grad_norm": 1.1564480352920141,
      "learning_rate": 0.003,
      "loss": 4.0886,
      "step": 9010
    },
    {
      "epoch": 0.09011,
      "grad_norm": 1.2954757514768573,
      "learning_rate": 0.003,
      "loss": 4.0886,
      "step": 9011
    },
    {
      "epoch": 0.09012,
      "grad_norm": 0.9656390078636606,
      "learning_rate": 0.003,
      "loss": 4.0843,
      "step": 9012
    },
    {
      "epoch": 0.09013,
      "grad_norm": 1.153233909037288,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 9013
    },
    {
      "epoch": 0.09014,
      "grad_norm": 1.052108855153509,
      "learning_rate": 0.003,
      "loss": 4.1142,
      "step": 9014
    },
    {
      "epoch": 0.09015,
      "grad_norm": 1.2385359460251653,
      "learning_rate": 0.003,
      "loss": 4.1273,
      "step": 9015
    },
    {
      "epoch": 0.09016,
      "grad_norm": 1.0770867919896612,
      "learning_rate": 0.003,
      "loss": 4.1066,
      "step": 9016
    },
    {
      "epoch": 0.09017,
      "grad_norm": 1.0339851137855502,
      "learning_rate": 0.003,
      "loss": 4.1071,
      "step": 9017
    },
    {
      "epoch": 0.09018,
      "grad_norm": 1.3115774956660293,
      "learning_rate": 0.003,
      "loss": 4.106,
      "step": 9018
    },
    {
      "epoch": 0.09019,
      "grad_norm": 1.063765856905231,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 9019
    },
    {
      "epoch": 0.0902,
      "grad_norm": 1.171303481692876,
      "learning_rate": 0.003,
      "loss": 4.0915,
      "step": 9020
    },
    {
      "epoch": 0.09021,
      "grad_norm": 1.0064142048132063,
      "learning_rate": 0.003,
      "loss": 4.1031,
      "step": 9021
    },
    {
      "epoch": 0.09022,
      "grad_norm": 1.0413311438329733,
      "learning_rate": 0.003,
      "loss": 4.093,
      "step": 9022
    },
    {
      "epoch": 0.09023,
      "grad_norm": 1.1452677126031137,
      "learning_rate": 0.003,
      "loss": 4.0877,
      "step": 9023
    },
    {
      "epoch": 0.09024,
      "grad_norm": 1.1850056876367443,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 9024
    },
    {
      "epoch": 0.09025,
      "grad_norm": 1.0640895906055132,
      "learning_rate": 0.003,
      "loss": 4.0931,
      "step": 9025
    },
    {
      "epoch": 0.09026,
      "grad_norm": 1.0784240586445417,
      "learning_rate": 0.003,
      "loss": 4.0855,
      "step": 9026
    },
    {
      "epoch": 0.09027,
      "grad_norm": 1.0642912702715661,
      "learning_rate": 0.003,
      "loss": 4.0801,
      "step": 9027
    },
    {
      "epoch": 0.09028,
      "grad_norm": 1.2655742298981867,
      "learning_rate": 0.003,
      "loss": 4.0993,
      "step": 9028
    },
    {
      "epoch": 0.09029,
      "grad_norm": 1.1293787235945678,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 9029
    },
    {
      "epoch": 0.0903,
      "grad_norm": 1.2521967234284181,
      "learning_rate": 0.003,
      "loss": 4.1175,
      "step": 9030
    },
    {
      "epoch": 0.09031,
      "grad_norm": 1.2715061899665723,
      "learning_rate": 0.003,
      "loss": 4.0684,
      "step": 9031
    },
    {
      "epoch": 0.09032,
      "grad_norm": 1.1227033191663636,
      "learning_rate": 0.003,
      "loss": 4.1186,
      "step": 9032
    },
    {
      "epoch": 0.09033,
      "grad_norm": 1.2698924690086315,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 9033
    },
    {
      "epoch": 0.09034,
      "grad_norm": 1.0279738017452675,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 9034
    },
    {
      "epoch": 0.09035,
      "grad_norm": 0.9188646060088369,
      "learning_rate": 0.003,
      "loss": 4.0819,
      "step": 9035
    },
    {
      "epoch": 0.09036,
      "grad_norm": 1.0006330136135604,
      "learning_rate": 0.003,
      "loss": 4.0768,
      "step": 9036
    },
    {
      "epoch": 0.09037,
      "grad_norm": 1.229414309206932,
      "learning_rate": 0.003,
      "loss": 4.0872,
      "step": 9037
    },
    {
      "epoch": 0.09038,
      "grad_norm": 1.056028513175944,
      "learning_rate": 0.003,
      "loss": 4.083,
      "step": 9038
    },
    {
      "epoch": 0.09039,
      "grad_norm": 1.1567627754431467,
      "learning_rate": 0.003,
      "loss": 4.0976,
      "step": 9039
    },
    {
      "epoch": 0.0904,
      "grad_norm": 1.2620704881278146,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 9040
    },
    {
      "epoch": 0.09041,
      "grad_norm": 0.9332791175909212,
      "learning_rate": 0.003,
      "loss": 4.0623,
      "step": 9041
    },
    {
      "epoch": 0.09042,
      "grad_norm": 1.0031397413450402,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 9042
    },
    {
      "epoch": 0.09043,
      "grad_norm": 1.0842259914712198,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 9043
    },
    {
      "epoch": 0.09044,
      "grad_norm": 1.1766907490463179,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 9044
    },
    {
      "epoch": 0.09045,
      "grad_norm": 1.1641848477623784,
      "learning_rate": 0.003,
      "loss": 4.0838,
      "step": 9045
    },
    {
      "epoch": 0.09046,
      "grad_norm": 0.9648345631484123,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 9046
    },
    {
      "epoch": 0.09047,
      "grad_norm": 1.0156565200283614,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 9047
    },
    {
      "epoch": 0.09048,
      "grad_norm": 1.2711228411111342,
      "learning_rate": 0.003,
      "loss": 4.0851,
      "step": 9048
    },
    {
      "epoch": 0.09049,
      "grad_norm": 0.9295851773809277,
      "learning_rate": 0.003,
      "loss": 4.1037,
      "step": 9049
    },
    {
      "epoch": 0.0905,
      "grad_norm": 1.1530883099484381,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 9050
    },
    {
      "epoch": 0.09051,
      "grad_norm": 1.058484140380266,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 9051
    },
    {
      "epoch": 0.09052,
      "grad_norm": 1.3221517826262894,
      "learning_rate": 0.003,
      "loss": 4.0979,
      "step": 9052
    },
    {
      "epoch": 0.09053,
      "grad_norm": 0.9783869911947403,
      "learning_rate": 0.003,
      "loss": 4.0727,
      "step": 9053
    },
    {
      "epoch": 0.09054,
      "grad_norm": 1.31217086863143,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 9054
    },
    {
      "epoch": 0.09055,
      "grad_norm": 1.15712884508595,
      "learning_rate": 0.003,
      "loss": 4.0672,
      "step": 9055
    },
    {
      "epoch": 0.09056,
      "grad_norm": 1.3601399184462666,
      "learning_rate": 0.003,
      "loss": 4.0759,
      "step": 9056
    },
    {
      "epoch": 0.09057,
      "grad_norm": 0.9702143476288941,
      "learning_rate": 0.003,
      "loss": 4.0979,
      "step": 9057
    },
    {
      "epoch": 0.09058,
      "grad_norm": 1.005445525878324,
      "learning_rate": 0.003,
      "loss": 4.1045,
      "step": 9058
    },
    {
      "epoch": 0.09059,
      "grad_norm": 1.343638060042953,
      "learning_rate": 0.003,
      "loss": 4.0989,
      "step": 9059
    },
    {
      "epoch": 0.0906,
      "grad_norm": 1.061498240726219,
      "learning_rate": 0.003,
      "loss": 4.107,
      "step": 9060
    },
    {
      "epoch": 0.09061,
      "grad_norm": 1.2119494291256259,
      "learning_rate": 0.003,
      "loss": 4.0983,
      "step": 9061
    },
    {
      "epoch": 0.09062,
      "grad_norm": 1.1153933410174173,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 9062
    },
    {
      "epoch": 0.09063,
      "grad_norm": 1.0509431689336337,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 9063
    },
    {
      "epoch": 0.09064,
      "grad_norm": 1.0456015290402727,
      "learning_rate": 0.003,
      "loss": 4.1104,
      "step": 9064
    },
    {
      "epoch": 0.09065,
      "grad_norm": 1.2005228086487294,
      "learning_rate": 0.003,
      "loss": 4.1067,
      "step": 9065
    },
    {
      "epoch": 0.09066,
      "grad_norm": 1.251274599678117,
      "learning_rate": 0.003,
      "loss": 4.097,
      "step": 9066
    },
    {
      "epoch": 0.09067,
      "grad_norm": 1.1743203475476542,
      "learning_rate": 0.003,
      "loss": 4.0891,
      "step": 9067
    },
    {
      "epoch": 0.09068,
      "grad_norm": 0.9880369735330882,
      "learning_rate": 0.003,
      "loss": 4.0988,
      "step": 9068
    },
    {
      "epoch": 0.09069,
      "grad_norm": 1.1738728040556163,
      "learning_rate": 0.003,
      "loss": 4.1397,
      "step": 9069
    },
    {
      "epoch": 0.0907,
      "grad_norm": 1.0377157330168174,
      "learning_rate": 0.003,
      "loss": 4.1178,
      "step": 9070
    },
    {
      "epoch": 0.09071,
      "grad_norm": 1.1057934780286178,
      "learning_rate": 0.003,
      "loss": 4.0976,
      "step": 9071
    },
    {
      "epoch": 0.09072,
      "grad_norm": 1.0188511735448431,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 9072
    },
    {
      "epoch": 0.09073,
      "grad_norm": 1.315640376041937,
      "learning_rate": 0.003,
      "loss": 4.1049,
      "step": 9073
    },
    {
      "epoch": 0.09074,
      "grad_norm": 1.101736279121598,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 9074
    },
    {
      "epoch": 0.09075,
      "grad_norm": 1.3946631521180286,
      "learning_rate": 0.003,
      "loss": 4.1137,
      "step": 9075
    },
    {
      "epoch": 0.09076,
      "grad_norm": 1.1148038465546715,
      "learning_rate": 0.003,
      "loss": 4.1152,
      "step": 9076
    },
    {
      "epoch": 0.09077,
      "grad_norm": 1.3284754703148802,
      "learning_rate": 0.003,
      "loss": 4.1146,
      "step": 9077
    },
    {
      "epoch": 0.09078,
      "grad_norm": 0.9783046559145829,
      "learning_rate": 0.003,
      "loss": 4.086,
      "step": 9078
    },
    {
      "epoch": 0.09079,
      "grad_norm": 0.913093721909462,
      "learning_rate": 0.003,
      "loss": 4.0941,
      "step": 9079
    },
    {
      "epoch": 0.0908,
      "grad_norm": 1.095737687813746,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 9080
    },
    {
      "epoch": 0.09081,
      "grad_norm": 1.2614609976958837,
      "learning_rate": 0.003,
      "loss": 4.1057,
      "step": 9081
    },
    {
      "epoch": 0.09082,
      "grad_norm": 1.2075594634986682,
      "learning_rate": 0.003,
      "loss": 4.079,
      "step": 9082
    },
    {
      "epoch": 0.09083,
      "grad_norm": 1.2122949862145338,
      "learning_rate": 0.003,
      "loss": 4.0975,
      "step": 9083
    },
    {
      "epoch": 0.09084,
      "grad_norm": 0.9500851087827136,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 9084
    },
    {
      "epoch": 0.09085,
      "grad_norm": 1.0476982169455582,
      "learning_rate": 0.003,
      "loss": 4.1066,
      "step": 9085
    },
    {
      "epoch": 0.09086,
      "grad_norm": 1.2554444552905066,
      "learning_rate": 0.003,
      "loss": 4.0835,
      "step": 9086
    },
    {
      "epoch": 0.09087,
      "grad_norm": 0.9421781553632245,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 9087
    },
    {
      "epoch": 0.09088,
      "grad_norm": 1.4131215511951543,
      "learning_rate": 0.003,
      "loss": 4.0851,
      "step": 9088
    },
    {
      "epoch": 0.09089,
      "grad_norm": 1.1108096893759776,
      "learning_rate": 0.003,
      "loss": 4.0912,
      "step": 9089
    },
    {
      "epoch": 0.0909,
      "grad_norm": 1.1952675721480868,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 9090
    },
    {
      "epoch": 0.09091,
      "grad_norm": 1.0724547606838648,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 9091
    },
    {
      "epoch": 0.09092,
      "grad_norm": 1.2051575454817625,
      "learning_rate": 0.003,
      "loss": 4.1245,
      "step": 9092
    },
    {
      "epoch": 0.09093,
      "grad_norm": 1.3267657738954277,
      "learning_rate": 0.003,
      "loss": 4.0824,
      "step": 9093
    },
    {
      "epoch": 0.09094,
      "grad_norm": 1.1254991850990028,
      "learning_rate": 0.003,
      "loss": 4.0873,
      "step": 9094
    },
    {
      "epoch": 0.09095,
      "grad_norm": 1.076280006184989,
      "learning_rate": 0.003,
      "loss": 4.0966,
      "step": 9095
    },
    {
      "epoch": 0.09096,
      "grad_norm": 1.2975257873577617,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 9096
    },
    {
      "epoch": 0.09097,
      "grad_norm": 0.8764623283131802,
      "learning_rate": 0.003,
      "loss": 4.084,
      "step": 9097
    },
    {
      "epoch": 0.09098,
      "grad_norm": 0.9942352270948855,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 9098
    },
    {
      "epoch": 0.09099,
      "grad_norm": 1.2726874710768534,
      "learning_rate": 0.003,
      "loss": 4.075,
      "step": 9099
    },
    {
      "epoch": 0.091,
      "grad_norm": 0.9642055558382768,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 9100
    },
    {
      "epoch": 0.09101,
      "grad_norm": 1.1013949140781738,
      "learning_rate": 0.003,
      "loss": 4.0831,
      "step": 9101
    },
    {
      "epoch": 0.09102,
      "grad_norm": 1.0913604869756948,
      "learning_rate": 0.003,
      "loss": 4.0934,
      "step": 9102
    },
    {
      "epoch": 0.09103,
      "grad_norm": 1.2031231056541927,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 9103
    },
    {
      "epoch": 0.09104,
      "grad_norm": 1.0988423095239437,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 9104
    },
    {
      "epoch": 0.09105,
      "grad_norm": 1.0423092013378354,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 9105
    },
    {
      "epoch": 0.09106,
      "grad_norm": 1.2438248747188299,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 9106
    },
    {
      "epoch": 0.09107,
      "grad_norm": 1.1483179803808778,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 9107
    },
    {
      "epoch": 0.09108,
      "grad_norm": 1.326132597957969,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 9108
    },
    {
      "epoch": 0.09109,
      "grad_norm": 1.0113040032192664,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 9109
    },
    {
      "epoch": 0.0911,
      "grad_norm": 1.030048752615744,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 9110
    },
    {
      "epoch": 0.09111,
      "grad_norm": 1.2571878851477611,
      "learning_rate": 0.003,
      "loss": 4.1057,
      "step": 9111
    },
    {
      "epoch": 0.09112,
      "grad_norm": 0.9793382607265296,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 9112
    },
    {
      "epoch": 0.09113,
      "grad_norm": 1.1758336523804527,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 9113
    },
    {
      "epoch": 0.09114,
      "grad_norm": 1.0634406149265316,
      "learning_rate": 0.003,
      "loss": 4.0764,
      "step": 9114
    },
    {
      "epoch": 0.09115,
      "grad_norm": 1.2160595915766383,
      "learning_rate": 0.003,
      "loss": 4.0834,
      "step": 9115
    },
    {
      "epoch": 0.09116,
      "grad_norm": 1.0742814994403547,
      "learning_rate": 0.003,
      "loss": 4.1286,
      "step": 9116
    },
    {
      "epoch": 0.09117,
      "grad_norm": 1.168852556344174,
      "learning_rate": 0.003,
      "loss": 4.1029,
      "step": 9117
    },
    {
      "epoch": 0.09118,
      "grad_norm": 1.2618508344525157,
      "learning_rate": 0.003,
      "loss": 4.092,
      "step": 9118
    },
    {
      "epoch": 0.09119,
      "grad_norm": 0.977470788596692,
      "learning_rate": 0.003,
      "loss": 4.0807,
      "step": 9119
    },
    {
      "epoch": 0.0912,
      "grad_norm": 1.2131827050884871,
      "learning_rate": 0.003,
      "loss": 4.0997,
      "step": 9120
    },
    {
      "epoch": 0.09121,
      "grad_norm": 0.9797942266581282,
      "learning_rate": 0.003,
      "loss": 4.1182,
      "step": 9121
    },
    {
      "epoch": 0.09122,
      "grad_norm": 1.3195467632222808,
      "learning_rate": 0.003,
      "loss": 4.1106,
      "step": 9122
    },
    {
      "epoch": 0.09123,
      "grad_norm": 1.2794360152215094,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 9123
    },
    {
      "epoch": 0.09124,
      "grad_norm": 1.0383850216227262,
      "learning_rate": 0.003,
      "loss": 4.0801,
      "step": 9124
    },
    {
      "epoch": 0.09125,
      "grad_norm": 1.1895964256162657,
      "learning_rate": 0.003,
      "loss": 4.1042,
      "step": 9125
    },
    {
      "epoch": 0.09126,
      "grad_norm": 1.1565205073495568,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 9126
    },
    {
      "epoch": 0.09127,
      "grad_norm": 0.9870065472498529,
      "learning_rate": 0.003,
      "loss": 4.0923,
      "step": 9127
    },
    {
      "epoch": 0.09128,
      "grad_norm": 1.3924191556687682,
      "learning_rate": 0.003,
      "loss": 4.0966,
      "step": 9128
    },
    {
      "epoch": 0.09129,
      "grad_norm": 1.0415308726364658,
      "learning_rate": 0.003,
      "loss": 4.0804,
      "step": 9129
    },
    {
      "epoch": 0.0913,
      "grad_norm": 1.2239110824416417,
      "learning_rate": 0.003,
      "loss": 4.0831,
      "step": 9130
    },
    {
      "epoch": 0.09131,
      "grad_norm": 1.1333067848208729,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 9131
    },
    {
      "epoch": 0.09132,
      "grad_norm": 1.0124991406398278,
      "learning_rate": 0.003,
      "loss": 4.106,
      "step": 9132
    },
    {
      "epoch": 0.09133,
      "grad_norm": 1.1465094851053432,
      "learning_rate": 0.003,
      "loss": 4.0874,
      "step": 9133
    },
    {
      "epoch": 0.09134,
      "grad_norm": 1.2509363219325378,
      "learning_rate": 0.003,
      "loss": 4.0924,
      "step": 9134
    },
    {
      "epoch": 0.09135,
      "grad_norm": 1.1392240077626206,
      "learning_rate": 0.003,
      "loss": 4.1003,
      "step": 9135
    },
    {
      "epoch": 0.09136,
      "grad_norm": 1.083710930327429,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 9136
    },
    {
      "epoch": 0.09137,
      "grad_norm": 1.2335339938885863,
      "learning_rate": 0.003,
      "loss": 4.1181,
      "step": 9137
    },
    {
      "epoch": 0.09138,
      "grad_norm": 1.101274143648628,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 9138
    },
    {
      "epoch": 0.09139,
      "grad_norm": 1.2239809585615162,
      "learning_rate": 0.003,
      "loss": 4.0879,
      "step": 9139
    },
    {
      "epoch": 0.0914,
      "grad_norm": 0.9619789123487413,
      "learning_rate": 0.003,
      "loss": 4.0865,
      "step": 9140
    },
    {
      "epoch": 0.09141,
      "grad_norm": 1.1383798391014412,
      "learning_rate": 0.003,
      "loss": 4.1214,
      "step": 9141
    },
    {
      "epoch": 0.09142,
      "grad_norm": 1.0505265979852703,
      "learning_rate": 0.003,
      "loss": 4.1072,
      "step": 9142
    },
    {
      "epoch": 0.09143,
      "grad_norm": 1.0997295836100855,
      "learning_rate": 0.003,
      "loss": 4.0602,
      "step": 9143
    },
    {
      "epoch": 0.09144,
      "grad_norm": 1.0581118806506116,
      "learning_rate": 0.003,
      "loss": 4.0849,
      "step": 9144
    },
    {
      "epoch": 0.09145,
      "grad_norm": 1.234848911993916,
      "learning_rate": 0.003,
      "loss": 4.1144,
      "step": 9145
    },
    {
      "epoch": 0.09146,
      "grad_norm": 1.08478997063052,
      "learning_rate": 0.003,
      "loss": 4.1048,
      "step": 9146
    },
    {
      "epoch": 0.09147,
      "grad_norm": 1.2618896424614257,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 9147
    },
    {
      "epoch": 0.09148,
      "grad_norm": 1.123169224398365,
      "learning_rate": 0.003,
      "loss": 4.1023,
      "step": 9148
    },
    {
      "epoch": 0.09149,
      "grad_norm": 1.0847323522173526,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 9149
    },
    {
      "epoch": 0.0915,
      "grad_norm": 1.1680298895237302,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 9150
    },
    {
      "epoch": 0.09151,
      "grad_norm": 1.3844852453386007,
      "learning_rate": 0.003,
      "loss": 4.0928,
      "step": 9151
    },
    {
      "epoch": 0.09152,
      "grad_norm": 1.0218893177216748,
      "learning_rate": 0.003,
      "loss": 4.0966,
      "step": 9152
    },
    {
      "epoch": 0.09153,
      "grad_norm": 0.9965527672502461,
      "learning_rate": 0.003,
      "loss": 4.0923,
      "step": 9153
    },
    {
      "epoch": 0.09154,
      "grad_norm": 1.0995384198383622,
      "learning_rate": 0.003,
      "loss": 4.0986,
      "step": 9154
    },
    {
      "epoch": 0.09155,
      "grad_norm": 1.0410969716281233,
      "learning_rate": 0.003,
      "loss": 4.1026,
      "step": 9155
    },
    {
      "epoch": 0.09156,
      "grad_norm": 1.2600779934860684,
      "learning_rate": 0.003,
      "loss": 4.1036,
      "step": 9156
    },
    {
      "epoch": 0.09157,
      "grad_norm": 0.9324524194147646,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 9157
    },
    {
      "epoch": 0.09158,
      "grad_norm": 1.1514601056640672,
      "learning_rate": 0.003,
      "loss": 4.0831,
      "step": 9158
    },
    {
      "epoch": 0.09159,
      "grad_norm": 1.2870193873680913,
      "learning_rate": 0.003,
      "loss": 4.0959,
      "step": 9159
    },
    {
      "epoch": 0.0916,
      "grad_norm": 1.0560405419821877,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 9160
    },
    {
      "epoch": 0.09161,
      "grad_norm": 1.0299875085861723,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 9161
    },
    {
      "epoch": 0.09162,
      "grad_norm": 1.116367131505824,
      "learning_rate": 0.003,
      "loss": 4.1133,
      "step": 9162
    },
    {
      "epoch": 0.09163,
      "grad_norm": 0.9983394688004362,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 9163
    },
    {
      "epoch": 0.09164,
      "grad_norm": 1.1592721035304163,
      "learning_rate": 0.003,
      "loss": 4.0857,
      "step": 9164
    },
    {
      "epoch": 0.09165,
      "grad_norm": 1.0344539200329619,
      "learning_rate": 0.003,
      "loss": 4.1122,
      "step": 9165
    },
    {
      "epoch": 0.09166,
      "grad_norm": 1.0474394105993658,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 9166
    },
    {
      "epoch": 0.09167,
      "grad_norm": 1.1658008703847873,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 9167
    },
    {
      "epoch": 0.09168,
      "grad_norm": 1.0606777041009616,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 9168
    },
    {
      "epoch": 0.09169,
      "grad_norm": 1.200603861024009,
      "learning_rate": 0.003,
      "loss": 4.0997,
      "step": 9169
    },
    {
      "epoch": 0.0917,
      "grad_norm": 1.0944668053816515,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 9170
    },
    {
      "epoch": 0.09171,
      "grad_norm": 1.0821405986570527,
      "learning_rate": 0.003,
      "loss": 4.1139,
      "step": 9171
    },
    {
      "epoch": 0.09172,
      "grad_norm": 1.145946204001904,
      "learning_rate": 0.003,
      "loss": 4.1157,
      "step": 9172
    },
    {
      "epoch": 0.09173,
      "grad_norm": 1.3491933499222881,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 9173
    },
    {
      "epoch": 0.09174,
      "grad_norm": 1.1851092298888857,
      "learning_rate": 0.003,
      "loss": 4.1014,
      "step": 9174
    },
    {
      "epoch": 0.09175,
      "grad_norm": 1.307879610936908,
      "learning_rate": 0.003,
      "loss": 4.097,
      "step": 9175
    },
    {
      "epoch": 0.09176,
      "grad_norm": 1.0397697110301758,
      "learning_rate": 0.003,
      "loss": 4.0974,
      "step": 9176
    },
    {
      "epoch": 0.09177,
      "grad_norm": 1.322756400789418,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 9177
    },
    {
      "epoch": 0.09178,
      "grad_norm": 1.0359489238564688,
      "learning_rate": 0.003,
      "loss": 4.0988,
      "step": 9178
    },
    {
      "epoch": 0.09179,
      "grad_norm": 1.387284506907797,
      "learning_rate": 0.003,
      "loss": 4.0963,
      "step": 9179
    },
    {
      "epoch": 0.0918,
      "grad_norm": 1.0856625764441654,
      "learning_rate": 0.003,
      "loss": 4.0878,
      "step": 9180
    },
    {
      "epoch": 0.09181,
      "grad_norm": 1.3055834964911002,
      "learning_rate": 0.003,
      "loss": 4.0839,
      "step": 9181
    },
    {
      "epoch": 0.09182,
      "grad_norm": 1.1092289866442044,
      "learning_rate": 0.003,
      "loss": 4.0852,
      "step": 9182
    },
    {
      "epoch": 0.09183,
      "grad_norm": 1.1885236545545457,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 9183
    },
    {
      "epoch": 0.09184,
      "grad_norm": 0.9487275740138595,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 9184
    },
    {
      "epoch": 0.09185,
      "grad_norm": 1.2244446488284848,
      "learning_rate": 0.003,
      "loss": 4.1114,
      "step": 9185
    },
    {
      "epoch": 0.09186,
      "grad_norm": 0.9495737584675268,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 9186
    },
    {
      "epoch": 0.09187,
      "grad_norm": 1.1296622323739962,
      "learning_rate": 0.003,
      "loss": 4.1225,
      "step": 9187
    },
    {
      "epoch": 0.09188,
      "grad_norm": 1.201509024595302,
      "learning_rate": 0.003,
      "loss": 4.0993,
      "step": 9188
    },
    {
      "epoch": 0.09189,
      "grad_norm": 1.2702535000447714,
      "learning_rate": 0.003,
      "loss": 4.105,
      "step": 9189
    },
    {
      "epoch": 0.0919,
      "grad_norm": 1.1985062138264047,
      "learning_rate": 0.003,
      "loss": 4.1166,
      "step": 9190
    },
    {
      "epoch": 0.09191,
      "grad_norm": 1.0017418634767983,
      "learning_rate": 0.003,
      "loss": 4.0884,
      "step": 9191
    },
    {
      "epoch": 0.09192,
      "grad_norm": 1.0025901701320967,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 9192
    },
    {
      "epoch": 0.09193,
      "grad_norm": 1.3829381034381623,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 9193
    },
    {
      "epoch": 0.09194,
      "grad_norm": 0.9677385774606069,
      "learning_rate": 0.003,
      "loss": 4.0878,
      "step": 9194
    },
    {
      "epoch": 0.09195,
      "grad_norm": 1.6506761595471497,
      "learning_rate": 0.003,
      "loss": 4.1039,
      "step": 9195
    },
    {
      "epoch": 0.09196,
      "grad_norm": 1.001256536135946,
      "learning_rate": 0.003,
      "loss": 4.1103,
      "step": 9196
    },
    {
      "epoch": 0.09197,
      "grad_norm": 1.160229001768998,
      "learning_rate": 0.003,
      "loss": 4.0947,
      "step": 9197
    },
    {
      "epoch": 0.09198,
      "grad_norm": 1.2451954616247143,
      "learning_rate": 0.003,
      "loss": 4.0977,
      "step": 9198
    },
    {
      "epoch": 0.09199,
      "grad_norm": 1.234886981466303,
      "learning_rate": 0.003,
      "loss": 4.1039,
      "step": 9199
    },
    {
      "epoch": 0.092,
      "grad_norm": 1.4720829204629136,
      "learning_rate": 0.003,
      "loss": 4.1055,
      "step": 9200
    },
    {
      "epoch": 0.09201,
      "grad_norm": 0.9054048830224165,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 9201
    },
    {
      "epoch": 0.09202,
      "grad_norm": 0.916669785043941,
      "learning_rate": 0.003,
      "loss": 4.0998,
      "step": 9202
    },
    {
      "epoch": 0.09203,
      "grad_norm": 1.0582633355745965,
      "learning_rate": 0.003,
      "loss": 4.1007,
      "step": 9203
    },
    {
      "epoch": 0.09204,
      "grad_norm": 1.435908065217812,
      "learning_rate": 0.003,
      "loss": 4.114,
      "step": 9204
    },
    {
      "epoch": 0.09205,
      "grad_norm": 0.9513665704460509,
      "learning_rate": 0.003,
      "loss": 4.0891,
      "step": 9205
    },
    {
      "epoch": 0.09206,
      "grad_norm": 1.1687418270227496,
      "learning_rate": 0.003,
      "loss": 4.1024,
      "step": 9206
    },
    {
      "epoch": 0.09207,
      "grad_norm": 1.092936507823944,
      "learning_rate": 0.003,
      "loss": 4.1192,
      "step": 9207
    },
    {
      "epoch": 0.09208,
      "grad_norm": 1.0812988391688032,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 9208
    },
    {
      "epoch": 0.09209,
      "grad_norm": 1.092551499098981,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 9209
    },
    {
      "epoch": 0.0921,
      "grad_norm": 0.963997770849915,
      "learning_rate": 0.003,
      "loss": 4.0985,
      "step": 9210
    },
    {
      "epoch": 0.09211,
      "grad_norm": 1.22240394776083,
      "learning_rate": 0.003,
      "loss": 4.0843,
      "step": 9211
    },
    {
      "epoch": 0.09212,
      "grad_norm": 0.9848334695913099,
      "learning_rate": 0.003,
      "loss": 4.103,
      "step": 9212
    },
    {
      "epoch": 0.09213,
      "grad_norm": 1.1751785205728014,
      "learning_rate": 0.003,
      "loss": 4.092,
      "step": 9213
    },
    {
      "epoch": 0.09214,
      "grad_norm": 0.9488549068764679,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 9214
    },
    {
      "epoch": 0.09215,
      "grad_norm": 1.5103788355550805,
      "learning_rate": 0.003,
      "loss": 4.0925,
      "step": 9215
    },
    {
      "epoch": 0.09216,
      "grad_norm": 1.0379900464298588,
      "learning_rate": 0.003,
      "loss": 4.0942,
      "step": 9216
    },
    {
      "epoch": 0.09217,
      "grad_norm": 1.1644131128976074,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 9217
    },
    {
      "epoch": 0.09218,
      "grad_norm": 1.209017912778444,
      "learning_rate": 0.003,
      "loss": 4.1124,
      "step": 9218
    },
    {
      "epoch": 0.09219,
      "grad_norm": 1.1356937772580649,
      "learning_rate": 0.003,
      "loss": 4.0936,
      "step": 9219
    },
    {
      "epoch": 0.0922,
      "grad_norm": 1.0245612101785528,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 9220
    },
    {
      "epoch": 0.09221,
      "grad_norm": 1.1979930204364009,
      "learning_rate": 0.003,
      "loss": 4.108,
      "step": 9221
    },
    {
      "epoch": 0.09222,
      "grad_norm": 1.0364515076478473,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 9222
    },
    {
      "epoch": 0.09223,
      "grad_norm": 1.098278277703503,
      "learning_rate": 0.003,
      "loss": 4.0851,
      "step": 9223
    },
    {
      "epoch": 0.09224,
      "grad_norm": 1.0253467498355422,
      "learning_rate": 0.003,
      "loss": 4.1004,
      "step": 9224
    },
    {
      "epoch": 0.09225,
      "grad_norm": 1.2208294239001167,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 9225
    },
    {
      "epoch": 0.09226,
      "grad_norm": 0.982628865592051,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 9226
    },
    {
      "epoch": 0.09227,
      "grad_norm": 1.2469564366340415,
      "learning_rate": 0.003,
      "loss": 4.0929,
      "step": 9227
    },
    {
      "epoch": 0.09228,
      "grad_norm": 0.9686273648297514,
      "learning_rate": 0.003,
      "loss": 4.0882,
      "step": 9228
    },
    {
      "epoch": 0.09229,
      "grad_norm": 1.6332491084756777,
      "learning_rate": 0.003,
      "loss": 4.1054,
      "step": 9229
    },
    {
      "epoch": 0.0923,
      "grad_norm": 0.9688872939255809,
      "learning_rate": 0.003,
      "loss": 4.0947,
      "step": 9230
    },
    {
      "epoch": 0.09231,
      "grad_norm": 1.071956008784058,
      "learning_rate": 0.003,
      "loss": 4.0914,
      "step": 9231
    },
    {
      "epoch": 0.09232,
      "grad_norm": 1.0691740349202605,
      "learning_rate": 0.003,
      "loss": 4.1016,
      "step": 9232
    },
    {
      "epoch": 0.09233,
      "grad_norm": 1.1757930687992064,
      "learning_rate": 0.003,
      "loss": 4.114,
      "step": 9233
    },
    {
      "epoch": 0.09234,
      "grad_norm": 1.164102159428301,
      "learning_rate": 0.003,
      "loss": 4.1207,
      "step": 9234
    },
    {
      "epoch": 0.09235,
      "grad_norm": 1.0114788239049175,
      "learning_rate": 0.003,
      "loss": 4.0808,
      "step": 9235
    },
    {
      "epoch": 0.09236,
      "grad_norm": 1.404798045906275,
      "learning_rate": 0.003,
      "loss": 4.1296,
      "step": 9236
    },
    {
      "epoch": 0.09237,
      "grad_norm": 1.136045166122303,
      "learning_rate": 0.003,
      "loss": 4.0882,
      "step": 9237
    },
    {
      "epoch": 0.09238,
      "grad_norm": 1.318478567865631,
      "learning_rate": 0.003,
      "loss": 4.0724,
      "step": 9238
    },
    {
      "epoch": 0.09239,
      "grad_norm": 1.2045711416898381,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 9239
    },
    {
      "epoch": 0.0924,
      "grad_norm": 1.098429972645525,
      "learning_rate": 0.003,
      "loss": 4.0825,
      "step": 9240
    },
    {
      "epoch": 0.09241,
      "grad_norm": 1.0429115598947243,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 9241
    },
    {
      "epoch": 0.09242,
      "grad_norm": 1.0965896753183049,
      "learning_rate": 0.003,
      "loss": 4.1027,
      "step": 9242
    },
    {
      "epoch": 0.09243,
      "grad_norm": 1.150697191723102,
      "learning_rate": 0.003,
      "loss": 4.1055,
      "step": 9243
    },
    {
      "epoch": 0.09244,
      "grad_norm": 0.9965888915579897,
      "learning_rate": 0.003,
      "loss": 4.0886,
      "step": 9244
    },
    {
      "epoch": 0.09245,
      "grad_norm": 1.1167730900283515,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 9245
    },
    {
      "epoch": 0.09246,
      "grad_norm": 1.0193728752692073,
      "learning_rate": 0.003,
      "loss": 4.0858,
      "step": 9246
    },
    {
      "epoch": 0.09247,
      "grad_norm": 1.0219816554123302,
      "learning_rate": 0.003,
      "loss": 4.0756,
      "step": 9247
    },
    {
      "epoch": 0.09248,
      "grad_norm": 1.1080989732024906,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 9248
    },
    {
      "epoch": 0.09249,
      "grad_norm": 1.103023479040818,
      "learning_rate": 0.003,
      "loss": 4.1377,
      "step": 9249
    },
    {
      "epoch": 0.0925,
      "grad_norm": 0.9904590912992994,
      "learning_rate": 0.003,
      "loss": 4.1052,
      "step": 9250
    },
    {
      "epoch": 0.09251,
      "grad_norm": 1.08798994175361,
      "learning_rate": 0.003,
      "loss": 4.1066,
      "step": 9251
    },
    {
      "epoch": 0.09252,
      "grad_norm": 1.2064555294662573,
      "learning_rate": 0.003,
      "loss": 4.1189,
      "step": 9252
    },
    {
      "epoch": 0.09253,
      "grad_norm": 1.0462445137910321,
      "learning_rate": 0.003,
      "loss": 4.0878,
      "step": 9253
    },
    {
      "epoch": 0.09254,
      "grad_norm": 1.2715061499946645,
      "learning_rate": 0.003,
      "loss": 4.1091,
      "step": 9254
    },
    {
      "epoch": 0.09255,
      "grad_norm": 1.2438088592426244,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 9255
    },
    {
      "epoch": 0.09256,
      "grad_norm": 1.2544838328606913,
      "learning_rate": 0.003,
      "loss": 4.1045,
      "step": 9256
    },
    {
      "epoch": 0.09257,
      "grad_norm": 1.0171563098654022,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 9257
    },
    {
      "epoch": 0.09258,
      "grad_norm": 1.2557192439580471,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 9258
    },
    {
      "epoch": 0.09259,
      "grad_norm": 1.0493174212094476,
      "learning_rate": 0.003,
      "loss": 4.1034,
      "step": 9259
    },
    {
      "epoch": 0.0926,
      "grad_norm": 1.419614696139921,
      "learning_rate": 0.003,
      "loss": 4.0968,
      "step": 9260
    },
    {
      "epoch": 0.09261,
      "grad_norm": 1.3455975545498775,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 9261
    },
    {
      "epoch": 0.09262,
      "grad_norm": 0.924216487979214,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 9262
    },
    {
      "epoch": 0.09263,
      "grad_norm": 1.1329672668552027,
      "learning_rate": 0.003,
      "loss": 4.0974,
      "step": 9263
    },
    {
      "epoch": 0.09264,
      "grad_norm": 1.1778087554299006,
      "learning_rate": 0.003,
      "loss": 4.1327,
      "step": 9264
    },
    {
      "epoch": 0.09265,
      "grad_norm": 0.9448279086828678,
      "learning_rate": 0.003,
      "loss": 4.0922,
      "step": 9265
    },
    {
      "epoch": 0.09266,
      "grad_norm": 1.056098935824549,
      "learning_rate": 0.003,
      "loss": 4.0952,
      "step": 9266
    },
    {
      "epoch": 0.09267,
      "grad_norm": 1.0254847874505146,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 9267
    },
    {
      "epoch": 0.09268,
      "grad_norm": 1.4009869680406688,
      "learning_rate": 0.003,
      "loss": 4.0865,
      "step": 9268
    },
    {
      "epoch": 0.09269,
      "grad_norm": 0.7636038650685317,
      "learning_rate": 0.003,
      "loss": 4.0849,
      "step": 9269
    },
    {
      "epoch": 0.0927,
      "grad_norm": 0.9369345979518385,
      "learning_rate": 0.003,
      "loss": 4.1127,
      "step": 9270
    },
    {
      "epoch": 0.09271,
      "grad_norm": 1.380947841962622,
      "learning_rate": 0.003,
      "loss": 4.1061,
      "step": 9271
    },
    {
      "epoch": 0.09272,
      "grad_norm": 1.0988768526691668,
      "learning_rate": 0.003,
      "loss": 4.0845,
      "step": 9272
    },
    {
      "epoch": 0.09273,
      "grad_norm": 1.201945787507501,
      "learning_rate": 0.003,
      "loss": 4.104,
      "step": 9273
    },
    {
      "epoch": 0.09274,
      "grad_norm": 1.110016280769841,
      "learning_rate": 0.003,
      "loss": 4.0915,
      "step": 9274
    },
    {
      "epoch": 0.09275,
      "grad_norm": 1.2820959206746452,
      "learning_rate": 0.003,
      "loss": 4.1131,
      "step": 9275
    },
    {
      "epoch": 0.09276,
      "grad_norm": 0.9452322962716858,
      "learning_rate": 0.003,
      "loss": 4.0953,
      "step": 9276
    },
    {
      "epoch": 0.09277,
      "grad_norm": 0.9631660827302326,
      "learning_rate": 0.003,
      "loss": 4.0883,
      "step": 9277
    },
    {
      "epoch": 0.09278,
      "grad_norm": 1.14895196868688,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 9278
    },
    {
      "epoch": 0.09279,
      "grad_norm": 1.0107540563165534,
      "learning_rate": 0.003,
      "loss": 4.092,
      "step": 9279
    },
    {
      "epoch": 0.0928,
      "grad_norm": 1.2063554014208668,
      "learning_rate": 0.003,
      "loss": 4.0896,
      "step": 9280
    },
    {
      "epoch": 0.09281,
      "grad_norm": 1.2176906534765644,
      "learning_rate": 0.003,
      "loss": 4.0976,
      "step": 9281
    },
    {
      "epoch": 0.09282,
      "grad_norm": 1.1177750119704892,
      "learning_rate": 0.003,
      "loss": 4.1199,
      "step": 9282
    },
    {
      "epoch": 0.09283,
      "grad_norm": 1.163517359272145,
      "learning_rate": 0.003,
      "loss": 4.1117,
      "step": 9283
    },
    {
      "epoch": 0.09284,
      "grad_norm": 1.0946434369290665,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 9284
    },
    {
      "epoch": 0.09285,
      "grad_norm": 1.0823393825924086,
      "learning_rate": 0.003,
      "loss": 4.0787,
      "step": 9285
    },
    {
      "epoch": 0.09286,
      "grad_norm": 1.1806916666698575,
      "learning_rate": 0.003,
      "loss": 4.0982,
      "step": 9286
    },
    {
      "epoch": 0.09287,
      "grad_norm": 1.1880978680931515,
      "learning_rate": 0.003,
      "loss": 4.0954,
      "step": 9287
    },
    {
      "epoch": 0.09288,
      "grad_norm": 1.0280458966744943,
      "learning_rate": 0.003,
      "loss": 4.0943,
      "step": 9288
    },
    {
      "epoch": 0.09289,
      "grad_norm": 1.165817168542551,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 9289
    },
    {
      "epoch": 0.0929,
      "grad_norm": 1.0529747069060469,
      "learning_rate": 0.003,
      "loss": 4.0926,
      "step": 9290
    },
    {
      "epoch": 0.09291,
      "grad_norm": 1.1911530919173343,
      "learning_rate": 0.003,
      "loss": 4.092,
      "step": 9291
    },
    {
      "epoch": 0.09292,
      "grad_norm": 1.0341998160006975,
      "learning_rate": 0.003,
      "loss": 4.107,
      "step": 9292
    },
    {
      "epoch": 0.09293,
      "grad_norm": 1.2373473437286293,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 9293
    },
    {
      "epoch": 0.09294,
      "grad_norm": 1.0331578332846882,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 9294
    },
    {
      "epoch": 0.09295,
      "grad_norm": 1.4023676676067414,
      "learning_rate": 0.003,
      "loss": 4.0971,
      "step": 9295
    },
    {
      "epoch": 0.09296,
      "grad_norm": 0.9248252353770195,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 9296
    },
    {
      "epoch": 0.09297,
      "grad_norm": 1.1620446186083104,
      "learning_rate": 0.003,
      "loss": 4.0917,
      "step": 9297
    },
    {
      "epoch": 0.09298,
      "grad_norm": 1.4922806015618209,
      "learning_rate": 0.003,
      "loss": 4.1003,
      "step": 9298
    },
    {
      "epoch": 0.09299,
      "grad_norm": 1.0988995125095076,
      "learning_rate": 0.003,
      "loss": 4.0917,
      "step": 9299
    },
    {
      "epoch": 0.093,
      "grad_norm": 0.9445100928298968,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 9300
    },
    {
      "epoch": 0.09301,
      "grad_norm": 0.9841352473435087,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 9301
    },
    {
      "epoch": 0.09302,
      "grad_norm": 1.2917413918777942,
      "learning_rate": 0.003,
      "loss": 4.1175,
      "step": 9302
    },
    {
      "epoch": 0.09303,
      "grad_norm": 1.1883645310803719,
      "learning_rate": 0.003,
      "loss": 4.0872,
      "step": 9303
    },
    {
      "epoch": 0.09304,
      "grad_norm": 1.3732633784531907,
      "learning_rate": 0.003,
      "loss": 4.0843,
      "step": 9304
    },
    {
      "epoch": 0.09305,
      "grad_norm": 0.9439791924939734,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 9305
    },
    {
      "epoch": 0.09306,
      "grad_norm": 1.0780768668152263,
      "learning_rate": 0.003,
      "loss": 4.0866,
      "step": 9306
    },
    {
      "epoch": 0.09307,
      "grad_norm": 1.1260710089048631,
      "learning_rate": 0.003,
      "loss": 4.0908,
      "step": 9307
    },
    {
      "epoch": 0.09308,
      "grad_norm": 1.1930964774850155,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 9308
    },
    {
      "epoch": 0.09309,
      "grad_norm": 1.1034586208598085,
      "learning_rate": 0.003,
      "loss": 4.0967,
      "step": 9309
    },
    {
      "epoch": 0.0931,
      "grad_norm": 1.066001517551609,
      "learning_rate": 0.003,
      "loss": 4.102,
      "step": 9310
    },
    {
      "epoch": 0.09311,
      "grad_norm": 1.2800453382483366,
      "learning_rate": 0.003,
      "loss": 4.0907,
      "step": 9311
    },
    {
      "epoch": 0.09312,
      "grad_norm": 0.9839849279310616,
      "learning_rate": 0.003,
      "loss": 4.0877,
      "step": 9312
    },
    {
      "epoch": 0.09313,
      "grad_norm": 1.1207976506853647,
      "learning_rate": 0.003,
      "loss": 4.1194,
      "step": 9313
    },
    {
      "epoch": 0.09314,
      "grad_norm": 1.0422474086769304,
      "learning_rate": 0.003,
      "loss": 4.0908,
      "step": 9314
    },
    {
      "epoch": 0.09315,
      "grad_norm": 1.3153342391219134,
      "learning_rate": 0.003,
      "loss": 4.0945,
      "step": 9315
    },
    {
      "epoch": 0.09316,
      "grad_norm": 0.9724487500802593,
      "learning_rate": 0.003,
      "loss": 4.0872,
      "step": 9316
    },
    {
      "epoch": 0.09317,
      "grad_norm": 1.2842443211720584,
      "learning_rate": 0.003,
      "loss": 4.1096,
      "step": 9317
    },
    {
      "epoch": 0.09318,
      "grad_norm": 1.2767684341502084,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 9318
    },
    {
      "epoch": 0.09319,
      "grad_norm": 0.8177780821558324,
      "learning_rate": 0.003,
      "loss": 4.1119,
      "step": 9319
    },
    {
      "epoch": 0.0932,
      "grad_norm": 0.8736538476508046,
      "learning_rate": 0.003,
      "loss": 4.0886,
      "step": 9320
    },
    {
      "epoch": 0.09321,
      "grad_norm": 1.0764894227017647,
      "learning_rate": 0.003,
      "loss": 4.0837,
      "step": 9321
    },
    {
      "epoch": 0.09322,
      "grad_norm": 1.106669548495908,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 9322
    },
    {
      "epoch": 0.09323,
      "grad_norm": 0.9109911357340257,
      "learning_rate": 0.003,
      "loss": 4.0853,
      "step": 9323
    },
    {
      "epoch": 0.09324,
      "grad_norm": 1.0121524528258052,
      "learning_rate": 0.003,
      "loss": 4.1085,
      "step": 9324
    },
    {
      "epoch": 0.09325,
      "grad_norm": 1.1953363852776204,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 9325
    },
    {
      "epoch": 0.09326,
      "grad_norm": 1.2214118230644457,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 9326
    },
    {
      "epoch": 0.09327,
      "grad_norm": 1.3516713372464368,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 9327
    },
    {
      "epoch": 0.09328,
      "grad_norm": 1.358690648608492,
      "learning_rate": 0.003,
      "loss": 4.0948,
      "step": 9328
    },
    {
      "epoch": 0.09329,
      "grad_norm": 0.8240895443762108,
      "learning_rate": 0.003,
      "loss": 4.0845,
      "step": 9329
    },
    {
      "epoch": 0.0933,
      "grad_norm": 0.9127779555741105,
      "learning_rate": 0.003,
      "loss": 4.1008,
      "step": 9330
    },
    {
      "epoch": 0.09331,
      "grad_norm": 1.2795917276154456,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 9331
    },
    {
      "epoch": 0.09332,
      "grad_norm": 1.1571382152960765,
      "learning_rate": 0.003,
      "loss": 4.0997,
      "step": 9332
    },
    {
      "epoch": 0.09333,
      "grad_norm": 1.534670784931041,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 9333
    },
    {
      "epoch": 0.09334,
      "grad_norm": 0.8244035475472739,
      "learning_rate": 0.003,
      "loss": 4.0986,
      "step": 9334
    },
    {
      "epoch": 0.09335,
      "grad_norm": 0.9041976617795457,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 9335
    },
    {
      "epoch": 0.09336,
      "grad_norm": 1.2406100259880315,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 9336
    },
    {
      "epoch": 0.09337,
      "grad_norm": 1.0485091876703885,
      "learning_rate": 0.003,
      "loss": 4.0894,
      "step": 9337
    },
    {
      "epoch": 0.09338,
      "grad_norm": 1.489821206610966,
      "learning_rate": 0.003,
      "loss": 4.0996,
      "step": 9338
    },
    {
      "epoch": 0.09339,
      "grad_norm": 0.964634627652276,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 9339
    },
    {
      "epoch": 0.0934,
      "grad_norm": 1.0533122624515205,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 9340
    },
    {
      "epoch": 0.09341,
      "grad_norm": 1.1590715738463193,
      "learning_rate": 0.003,
      "loss": 4.0964,
      "step": 9341
    },
    {
      "epoch": 0.09342,
      "grad_norm": 1.2626146558793703,
      "learning_rate": 0.003,
      "loss": 4.0854,
      "step": 9342
    },
    {
      "epoch": 0.09343,
      "grad_norm": 1.0502100202087143,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 9343
    },
    {
      "epoch": 0.09344,
      "grad_norm": 1.283106408396065,
      "learning_rate": 0.003,
      "loss": 4.0945,
      "step": 9344
    },
    {
      "epoch": 0.09345,
      "grad_norm": 0.9876362836581257,
      "learning_rate": 0.003,
      "loss": 4.1088,
      "step": 9345
    },
    {
      "epoch": 0.09346,
      "grad_norm": 1.217302791244378,
      "learning_rate": 0.003,
      "loss": 4.0997,
      "step": 9346
    },
    {
      "epoch": 0.09347,
      "grad_norm": 0.922861237259809,
      "learning_rate": 0.003,
      "loss": 4.0815,
      "step": 9347
    },
    {
      "epoch": 0.09348,
      "grad_norm": 1.0359926662539227,
      "learning_rate": 0.003,
      "loss": 4.1005,
      "step": 9348
    },
    {
      "epoch": 0.09349,
      "grad_norm": 1.2124928441737841,
      "learning_rate": 0.003,
      "loss": 4.1043,
      "step": 9349
    },
    {
      "epoch": 0.0935,
      "grad_norm": 1.2233172467818763,
      "learning_rate": 0.003,
      "loss": 4.0895,
      "step": 9350
    },
    {
      "epoch": 0.09351,
      "grad_norm": 1.1007285345731772,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 9351
    },
    {
      "epoch": 0.09352,
      "grad_norm": 1.1629721595908953,
      "learning_rate": 0.003,
      "loss": 4.0906,
      "step": 9352
    },
    {
      "epoch": 0.09353,
      "grad_norm": 1.1644970866221465,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 9353
    },
    {
      "epoch": 0.09354,
      "grad_norm": 1.2882587498408007,
      "learning_rate": 0.003,
      "loss": 4.0837,
      "step": 9354
    },
    {
      "epoch": 0.09355,
      "grad_norm": 1.206734517803683,
      "learning_rate": 0.003,
      "loss": 4.1063,
      "step": 9355
    },
    {
      "epoch": 0.09356,
      "grad_norm": 1.1528835236818287,
      "learning_rate": 0.003,
      "loss": 4.0919,
      "step": 9356
    },
    {
      "epoch": 0.09357,
      "grad_norm": 1.0992532104364745,
      "learning_rate": 0.003,
      "loss": 4.0891,
      "step": 9357
    },
    {
      "epoch": 0.09358,
      "grad_norm": 1.0561375952353425,
      "learning_rate": 0.003,
      "loss": 4.0891,
      "step": 9358
    },
    {
      "epoch": 0.09359,
      "grad_norm": 1.1750434512246604,
      "learning_rate": 0.003,
      "loss": 4.1049,
      "step": 9359
    },
    {
      "epoch": 0.0936,
      "grad_norm": 1.0984683955298284,
      "learning_rate": 0.003,
      "loss": 4.0951,
      "step": 9360
    },
    {
      "epoch": 0.09361,
      "grad_norm": 1.3400256540724003,
      "learning_rate": 0.003,
      "loss": 4.1013,
      "step": 9361
    },
    {
      "epoch": 0.09362,
      "grad_norm": 1.2780797275614484,
      "learning_rate": 0.003,
      "loss": 4.131,
      "step": 9362
    },
    {
      "epoch": 0.09363,
      "grad_norm": 1.0599674321980093,
      "learning_rate": 0.003,
      "loss": 4.1124,
      "step": 9363
    },
    {
      "epoch": 0.09364,
      "grad_norm": 1.1102228277609347,
      "learning_rate": 0.003,
      "loss": 4.0819,
      "step": 9364
    },
    {
      "epoch": 0.09365,
      "grad_norm": 1.0688813344095076,
      "learning_rate": 0.003,
      "loss": 4.1051,
      "step": 9365
    },
    {
      "epoch": 0.09366,
      "grad_norm": 1.3133131086901089,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 9366
    },
    {
      "epoch": 0.09367,
      "grad_norm": 1.0992701868502373,
      "learning_rate": 0.003,
      "loss": 4.0877,
      "step": 9367
    },
    {
      "epoch": 0.09368,
      "grad_norm": 1.3194143703533834,
      "learning_rate": 0.003,
      "loss": 4.1021,
      "step": 9368
    },
    {
      "epoch": 0.09369,
      "grad_norm": 1.0629018593754467,
      "learning_rate": 0.003,
      "loss": 4.0658,
      "step": 9369
    },
    {
      "epoch": 0.0937,
      "grad_norm": 1.0256226402002402,
      "learning_rate": 0.003,
      "loss": 4.1078,
      "step": 9370
    },
    {
      "epoch": 0.09371,
      "grad_norm": 1.088497881262939,
      "learning_rate": 0.003,
      "loss": 4.097,
      "step": 9371
    },
    {
      "epoch": 0.09372,
      "grad_norm": 1.1654530492594952,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 9372
    },
    {
      "epoch": 0.09373,
      "grad_norm": 1.1828421853475708,
      "learning_rate": 0.003,
      "loss": 4.1286,
      "step": 9373
    },
    {
      "epoch": 0.09374,
      "grad_norm": 0.9578727686445765,
      "learning_rate": 0.003,
      "loss": 4.1225,
      "step": 9374
    },
    {
      "epoch": 0.09375,
      "grad_norm": 1.1087522061787056,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 9375
    },
    {
      "epoch": 0.09376,
      "grad_norm": 1.075961470799943,
      "learning_rate": 0.003,
      "loss": 4.1104,
      "step": 9376
    },
    {
      "epoch": 0.09377,
      "grad_norm": 1.037345966623672,
      "learning_rate": 0.003,
      "loss": 4.0899,
      "step": 9377
    },
    {
      "epoch": 0.09378,
      "grad_norm": 1.1148154963944317,
      "learning_rate": 0.003,
      "loss": 4.0999,
      "step": 9378
    },
    {
      "epoch": 0.09379,
      "grad_norm": 1.0376386791840873,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 9379
    },
    {
      "epoch": 0.0938,
      "grad_norm": 1.4017661260371703,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 9380
    },
    {
      "epoch": 0.09381,
      "grad_norm": 1.0029510870258844,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 9381
    },
    {
      "epoch": 0.09382,
      "grad_norm": 1.2388574249097994,
      "learning_rate": 0.003,
      "loss": 4.0993,
      "step": 9382
    },
    {
      "epoch": 0.09383,
      "grad_norm": 1.1132940713186557,
      "learning_rate": 0.003,
      "loss": 4.0829,
      "step": 9383
    },
    {
      "epoch": 0.09384,
      "grad_norm": 1.1756128136659143,
      "learning_rate": 0.003,
      "loss": 4.1107,
      "step": 9384
    },
    {
      "epoch": 0.09385,
      "grad_norm": 1.332429527105117,
      "learning_rate": 0.003,
      "loss": 4.0968,
      "step": 9385
    },
    {
      "epoch": 0.09386,
      "grad_norm": 0.8812918899253777,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 9386
    },
    {
      "epoch": 0.09387,
      "grad_norm": 1.0670364087317739,
      "learning_rate": 0.003,
      "loss": 4.0959,
      "step": 9387
    },
    {
      "epoch": 0.09388,
      "grad_norm": 1.235244434189283,
      "learning_rate": 0.003,
      "loss": 4.0958,
      "step": 9388
    },
    {
      "epoch": 0.09389,
      "grad_norm": 1.1706315387987256,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 9389
    },
    {
      "epoch": 0.0939,
      "grad_norm": 1.1499760335019689,
      "learning_rate": 0.003,
      "loss": 4.0993,
      "step": 9390
    },
    {
      "epoch": 0.09391,
      "grad_norm": 1.2531320429573474,
      "learning_rate": 0.003,
      "loss": 4.0797,
      "step": 9391
    },
    {
      "epoch": 0.09392,
      "grad_norm": 1.0209475725286998,
      "learning_rate": 0.003,
      "loss": 4.1117,
      "step": 9392
    },
    {
      "epoch": 0.09393,
      "grad_norm": 1.359927438767316,
      "learning_rate": 0.003,
      "loss": 4.1062,
      "step": 9393
    },
    {
      "epoch": 0.09394,
      "grad_norm": 0.9466831704667089,
      "learning_rate": 0.003,
      "loss": 4.0872,
      "step": 9394
    },
    {
      "epoch": 0.09395,
      "grad_norm": 1.335400407810145,
      "learning_rate": 0.003,
      "loss": 4.0983,
      "step": 9395
    },
    {
      "epoch": 0.09396,
      "grad_norm": 1.0971675919494583,
      "learning_rate": 0.003,
      "loss": 4.1229,
      "step": 9396
    },
    {
      "epoch": 0.09397,
      "grad_norm": 1.074597575443764,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 9397
    },
    {
      "epoch": 0.09398,
      "grad_norm": 1.0573510523767233,
      "learning_rate": 0.003,
      "loss": 4.1074,
      "step": 9398
    },
    {
      "epoch": 0.09399,
      "grad_norm": 1.1461006871137773,
      "learning_rate": 0.003,
      "loss": 4.0977,
      "step": 9399
    },
    {
      "epoch": 0.094,
      "grad_norm": 1.2871999841337791,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 9400
    },
    {
      "epoch": 0.09401,
      "grad_norm": 1.0662993758000978,
      "learning_rate": 0.003,
      "loss": 4.1041,
      "step": 9401
    },
    {
      "epoch": 0.09402,
      "grad_norm": 0.985967078282072,
      "learning_rate": 0.003,
      "loss": 4.0884,
      "step": 9402
    },
    {
      "epoch": 0.09403,
      "grad_norm": 1.1318935874061076,
      "learning_rate": 0.003,
      "loss": 4.1139,
      "step": 9403
    },
    {
      "epoch": 0.09404,
      "grad_norm": 1.1693036846085634,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 9404
    },
    {
      "epoch": 0.09405,
      "grad_norm": 1.080452148293103,
      "learning_rate": 0.003,
      "loss": 4.1086,
      "step": 9405
    },
    {
      "epoch": 0.09406,
      "grad_norm": 1.1205331802372656,
      "learning_rate": 0.003,
      "loss": 4.1069,
      "step": 9406
    },
    {
      "epoch": 0.09407,
      "grad_norm": 1.0444311736562197,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 9407
    },
    {
      "epoch": 0.09408,
      "grad_norm": 1.176470642135001,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 9408
    },
    {
      "epoch": 0.09409,
      "grad_norm": 1.155930558160252,
      "learning_rate": 0.003,
      "loss": 4.1015,
      "step": 9409
    },
    {
      "epoch": 0.0941,
      "grad_norm": 0.9223475406864473,
      "learning_rate": 0.003,
      "loss": 4.1037,
      "step": 9410
    },
    {
      "epoch": 0.09411,
      "grad_norm": 1.2829733075715253,
      "learning_rate": 0.003,
      "loss": 4.118,
      "step": 9411
    },
    {
      "epoch": 0.09412,
      "grad_norm": 1.078581058592096,
      "learning_rate": 0.003,
      "loss": 4.1064,
      "step": 9412
    },
    {
      "epoch": 0.09413,
      "grad_norm": 1.0432780765545573,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 9413
    },
    {
      "epoch": 0.09414,
      "grad_norm": 1.0829176810329322,
      "learning_rate": 0.003,
      "loss": 4.1035,
      "step": 9414
    },
    {
      "epoch": 0.09415,
      "grad_norm": 1.3236052288392064,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 9415
    },
    {
      "epoch": 0.09416,
      "grad_norm": 1.2301785581024418,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 9416
    },
    {
      "epoch": 0.09417,
      "grad_norm": 1.1007603067639056,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 9417
    },
    {
      "epoch": 0.09418,
      "grad_norm": 1.2070889380081715,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 9418
    },
    {
      "epoch": 0.09419,
      "grad_norm": 1.0931219718566882,
      "learning_rate": 0.003,
      "loss": 4.1126,
      "step": 9419
    },
    {
      "epoch": 0.0942,
      "grad_norm": 0.9277318132635548,
      "learning_rate": 0.003,
      "loss": 4.0827,
      "step": 9420
    },
    {
      "epoch": 0.09421,
      "grad_norm": 0.9913968322046562,
      "learning_rate": 0.003,
      "loss": 4.082,
      "step": 9421
    },
    {
      "epoch": 0.09422,
      "grad_norm": 1.265447990074078,
      "learning_rate": 0.003,
      "loss": 4.0912,
      "step": 9422
    },
    {
      "epoch": 0.09423,
      "grad_norm": 0.9313054820006388,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 9423
    },
    {
      "epoch": 0.09424,
      "grad_norm": 1.2579896425286146,
      "learning_rate": 0.003,
      "loss": 4.0925,
      "step": 9424
    },
    {
      "epoch": 0.09425,
      "grad_norm": 1.105309071721738,
      "learning_rate": 0.003,
      "loss": 4.1101,
      "step": 9425
    },
    {
      "epoch": 0.09426,
      "grad_norm": 1.3115119791195045,
      "learning_rate": 0.003,
      "loss": 4.1195,
      "step": 9426
    },
    {
      "epoch": 0.09427,
      "grad_norm": 1.184811038469732,
      "learning_rate": 0.003,
      "loss": 4.1001,
      "step": 9427
    },
    {
      "epoch": 0.09428,
      "grad_norm": 1.019043659850143,
      "learning_rate": 0.003,
      "loss": 4.1075,
      "step": 9428
    },
    {
      "epoch": 0.09429,
      "grad_norm": 1.1829465834546578,
      "learning_rate": 0.003,
      "loss": 4.0812,
      "step": 9429
    },
    {
      "epoch": 0.0943,
      "grad_norm": 1.1269673551587713,
      "learning_rate": 0.003,
      "loss": 4.0929,
      "step": 9430
    },
    {
      "epoch": 0.09431,
      "grad_norm": 1.1911164137240267,
      "learning_rate": 0.003,
      "loss": 4.1017,
      "step": 9431
    },
    {
      "epoch": 0.09432,
      "grad_norm": 1.0130186081326145,
      "learning_rate": 0.003,
      "loss": 4.084,
      "step": 9432
    },
    {
      "epoch": 0.09433,
      "grad_norm": 1.0546005003528371,
      "learning_rate": 0.003,
      "loss": 4.0874,
      "step": 9433
    },
    {
      "epoch": 0.09434,
      "grad_norm": 0.9341349184124893,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 9434
    },
    {
      "epoch": 0.09435,
      "grad_norm": 1.1083137995222276,
      "learning_rate": 0.003,
      "loss": 4.0852,
      "step": 9435
    },
    {
      "epoch": 0.09436,
      "grad_norm": 1.0654190303786433,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 9436
    },
    {
      "epoch": 0.09437,
      "grad_norm": 1.584058741422896,
      "learning_rate": 0.003,
      "loss": 4.0971,
      "step": 9437
    },
    {
      "epoch": 0.09438,
      "grad_norm": 0.9750544774777391,
      "learning_rate": 0.003,
      "loss": 4.1082,
      "step": 9438
    },
    {
      "epoch": 0.09439,
      "grad_norm": 1.339680164780197,
      "learning_rate": 0.003,
      "loss": 4.1071,
      "step": 9439
    },
    {
      "epoch": 0.0944,
      "grad_norm": 1.1756294393140456,
      "learning_rate": 0.003,
      "loss": 4.087,
      "step": 9440
    },
    {
      "epoch": 0.09441,
      "grad_norm": 1.014182140362658,
      "learning_rate": 0.003,
      "loss": 4.0858,
      "step": 9441
    },
    {
      "epoch": 0.09442,
      "grad_norm": 1.1662162468777555,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 9442
    },
    {
      "epoch": 0.09443,
      "grad_norm": 0.9553948736102283,
      "learning_rate": 0.003,
      "loss": 4.1113,
      "step": 9443
    },
    {
      "epoch": 0.09444,
      "grad_norm": 1.0709848010805125,
      "learning_rate": 0.003,
      "loss": 4.1004,
      "step": 9444
    },
    {
      "epoch": 0.09445,
      "grad_norm": 1.134319736024161,
      "learning_rate": 0.003,
      "loss": 4.0929,
      "step": 9445
    },
    {
      "epoch": 0.09446,
      "grad_norm": 1.0318135027042934,
      "learning_rate": 0.003,
      "loss": 4.0787,
      "step": 9446
    },
    {
      "epoch": 0.09447,
      "grad_norm": 1.3736137282415484,
      "learning_rate": 0.003,
      "loss": 4.1081,
      "step": 9447
    },
    {
      "epoch": 0.09448,
      "grad_norm": 0.9759871083573741,
      "learning_rate": 0.003,
      "loss": 4.0958,
      "step": 9448
    },
    {
      "epoch": 0.09449,
      "grad_norm": 1.3188281568720621,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 9449
    },
    {
      "epoch": 0.0945,
      "grad_norm": 1.009402232799297,
      "learning_rate": 0.003,
      "loss": 4.0858,
      "step": 9450
    },
    {
      "epoch": 0.09451,
      "grad_norm": 1.2792820747685412,
      "learning_rate": 0.003,
      "loss": 4.1016,
      "step": 9451
    },
    {
      "epoch": 0.09452,
      "grad_norm": 1.0990815298004843,
      "learning_rate": 0.003,
      "loss": 4.1064,
      "step": 9452
    },
    {
      "epoch": 0.09453,
      "grad_norm": 0.8910360106491243,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 9453
    },
    {
      "epoch": 0.09454,
      "grad_norm": 1.1110202820179365,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 9454
    },
    {
      "epoch": 0.09455,
      "grad_norm": 1.034632480340015,
      "learning_rate": 0.003,
      "loss": 4.0864,
      "step": 9455
    },
    {
      "epoch": 0.09456,
      "grad_norm": 1.282774041514997,
      "learning_rate": 0.003,
      "loss": 4.0883,
      "step": 9456
    },
    {
      "epoch": 0.09457,
      "grad_norm": 1.1710239575027193,
      "learning_rate": 0.003,
      "loss": 4.1031,
      "step": 9457
    },
    {
      "epoch": 0.09458,
      "grad_norm": 1.1948449206009228,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 9458
    },
    {
      "epoch": 0.09459,
      "grad_norm": 1.4329498523084134,
      "learning_rate": 0.003,
      "loss": 4.1023,
      "step": 9459
    },
    {
      "epoch": 0.0946,
      "grad_norm": 1.0480088763983315,
      "learning_rate": 0.003,
      "loss": 4.1111,
      "step": 9460
    },
    {
      "epoch": 0.09461,
      "grad_norm": 1.1347178145478178,
      "learning_rate": 0.003,
      "loss": 4.1213,
      "step": 9461
    },
    {
      "epoch": 0.09462,
      "grad_norm": 1.2915927303703032,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 9462
    },
    {
      "epoch": 0.09463,
      "grad_norm": 0.8991215034828245,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 9463
    },
    {
      "epoch": 0.09464,
      "grad_norm": 1.217189952244973,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 9464
    },
    {
      "epoch": 0.09465,
      "grad_norm": 1.108024828467612,
      "learning_rate": 0.003,
      "loss": 4.0829,
      "step": 9465
    },
    {
      "epoch": 0.09466,
      "grad_norm": 1.254719512836333,
      "learning_rate": 0.003,
      "loss": 4.0967,
      "step": 9466
    },
    {
      "epoch": 0.09467,
      "grad_norm": 1.0657274107021417,
      "learning_rate": 0.003,
      "loss": 4.0852,
      "step": 9467
    },
    {
      "epoch": 0.09468,
      "grad_norm": 1.307853078083217,
      "learning_rate": 0.003,
      "loss": 4.0951,
      "step": 9468
    },
    {
      "epoch": 0.09469,
      "grad_norm": 1.0966231654158667,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 9469
    },
    {
      "epoch": 0.0947,
      "grad_norm": 0.9873023581515457,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 9470
    },
    {
      "epoch": 0.09471,
      "grad_norm": 1.0072444332654882,
      "learning_rate": 0.003,
      "loss": 4.1054,
      "step": 9471
    },
    {
      "epoch": 0.09472,
      "grad_norm": 1.217023364844429,
      "learning_rate": 0.003,
      "loss": 4.0895,
      "step": 9472
    },
    {
      "epoch": 0.09473,
      "grad_norm": 1.2084006922919652,
      "learning_rate": 0.003,
      "loss": 4.082,
      "step": 9473
    },
    {
      "epoch": 0.09474,
      "grad_norm": 1.1341037440802257,
      "learning_rate": 0.003,
      "loss": 4.0759,
      "step": 9474
    },
    {
      "epoch": 0.09475,
      "grad_norm": 1.3957096693032005,
      "learning_rate": 0.003,
      "loss": 4.0857,
      "step": 9475
    },
    {
      "epoch": 0.09476,
      "grad_norm": 1.0893090004792627,
      "learning_rate": 0.003,
      "loss": 4.0815,
      "step": 9476
    },
    {
      "epoch": 0.09477,
      "grad_norm": 1.0407040979188704,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 9477
    },
    {
      "epoch": 0.09478,
      "grad_norm": 1.0265803874051365,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 9478
    },
    {
      "epoch": 0.09479,
      "grad_norm": 1.3147642641286537,
      "learning_rate": 0.003,
      "loss": 4.0866,
      "step": 9479
    },
    {
      "epoch": 0.0948,
      "grad_norm": 1.0903590993744465,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 9480
    },
    {
      "epoch": 0.09481,
      "grad_norm": 1.1338195989175865,
      "learning_rate": 0.003,
      "loss": 4.1109,
      "step": 9481
    },
    {
      "epoch": 0.09482,
      "grad_norm": 1.1488847919171767,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 9482
    },
    {
      "epoch": 0.09483,
      "grad_norm": 1.0895462508145521,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 9483
    },
    {
      "epoch": 0.09484,
      "grad_norm": 1.0370843870115312,
      "learning_rate": 0.003,
      "loss": 4.0999,
      "step": 9484
    },
    {
      "epoch": 0.09485,
      "grad_norm": 1.2723261565988722,
      "learning_rate": 0.003,
      "loss": 4.09,
      "step": 9485
    },
    {
      "epoch": 0.09486,
      "grad_norm": 0.965502657440432,
      "learning_rate": 0.003,
      "loss": 4.0968,
      "step": 9486
    },
    {
      "epoch": 0.09487,
      "grad_norm": 1.342344919200925,
      "learning_rate": 0.003,
      "loss": 4.0923,
      "step": 9487
    },
    {
      "epoch": 0.09488,
      "grad_norm": 0.9393715311377178,
      "learning_rate": 0.003,
      "loss": 4.1302,
      "step": 9488
    },
    {
      "epoch": 0.09489,
      "grad_norm": 1.0372158171386994,
      "learning_rate": 0.003,
      "loss": 4.0961,
      "step": 9489
    },
    {
      "epoch": 0.0949,
      "grad_norm": 1.2723730449344455,
      "learning_rate": 0.003,
      "loss": 4.0979,
      "step": 9490
    },
    {
      "epoch": 0.09491,
      "grad_norm": 1.1531858839360378,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 9491
    },
    {
      "epoch": 0.09492,
      "grad_norm": 1.0917875041437035,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 9492
    },
    {
      "epoch": 0.09493,
      "grad_norm": 1.0917450412795457,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 9493
    },
    {
      "epoch": 0.09494,
      "grad_norm": 1.300674879254043,
      "learning_rate": 0.003,
      "loss": 4.1029,
      "step": 9494
    },
    {
      "epoch": 0.09495,
      "grad_norm": 1.0477357995905079,
      "learning_rate": 0.003,
      "loss": 4.0878,
      "step": 9495
    },
    {
      "epoch": 0.09496,
      "grad_norm": 1.308017981995178,
      "learning_rate": 0.003,
      "loss": 4.1079,
      "step": 9496
    },
    {
      "epoch": 0.09497,
      "grad_norm": 1.01346574500382,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 9497
    },
    {
      "epoch": 0.09498,
      "grad_norm": 1.2616680695782678,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 9498
    },
    {
      "epoch": 0.09499,
      "grad_norm": 1.0581943895228205,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 9499
    },
    {
      "epoch": 0.095,
      "grad_norm": 1.2120883263505524,
      "learning_rate": 0.003,
      "loss": 4.1228,
      "step": 9500
    },
    {
      "epoch": 0.09501,
      "grad_norm": 1.1721476598291545,
      "learning_rate": 0.003,
      "loss": 4.0838,
      "step": 9501
    },
    {
      "epoch": 0.09502,
      "grad_norm": 1.1119140712765754,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 9502
    },
    {
      "epoch": 0.09503,
      "grad_norm": 1.0214668770994324,
      "learning_rate": 0.003,
      "loss": 4.0394,
      "step": 9503
    },
    {
      "epoch": 0.09504,
      "grad_norm": 1.2759200888998818,
      "learning_rate": 0.003,
      "loss": 4.0994,
      "step": 9504
    },
    {
      "epoch": 0.09505,
      "grad_norm": 1.043511269779943,
      "learning_rate": 0.003,
      "loss": 4.0808,
      "step": 9505
    },
    {
      "epoch": 0.09506,
      "grad_norm": 1.3204985861928111,
      "learning_rate": 0.003,
      "loss": 4.1019,
      "step": 9506
    },
    {
      "epoch": 0.09507,
      "grad_norm": 0.9924814468245581,
      "learning_rate": 0.003,
      "loss": 4.1082,
      "step": 9507
    },
    {
      "epoch": 0.09508,
      "grad_norm": 1.0657775357354207,
      "learning_rate": 0.003,
      "loss": 4.0819,
      "step": 9508
    },
    {
      "epoch": 0.09509,
      "grad_norm": 1.0941505499696234,
      "learning_rate": 0.003,
      "loss": 4.081,
      "step": 9509
    },
    {
      "epoch": 0.0951,
      "grad_norm": 1.1405032961817592,
      "learning_rate": 0.003,
      "loss": 4.0906,
      "step": 9510
    },
    {
      "epoch": 0.09511,
      "grad_norm": 1.1584629996450322,
      "learning_rate": 0.003,
      "loss": 4.0799,
      "step": 9511
    },
    {
      "epoch": 0.09512,
      "grad_norm": 1.1526523340688448,
      "learning_rate": 0.003,
      "loss": 4.0851,
      "step": 9512
    },
    {
      "epoch": 0.09513,
      "grad_norm": 1.012931520636649,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 9513
    },
    {
      "epoch": 0.09514,
      "grad_norm": 1.3001297494253556,
      "learning_rate": 0.003,
      "loss": 4.0886,
      "step": 9514
    },
    {
      "epoch": 0.09515,
      "grad_norm": 1.0909618097726363,
      "learning_rate": 0.003,
      "loss": 4.0956,
      "step": 9515
    },
    {
      "epoch": 0.09516,
      "grad_norm": 1.1840771159783676,
      "learning_rate": 0.003,
      "loss": 4.0854,
      "step": 9516
    },
    {
      "epoch": 0.09517,
      "grad_norm": 1.2901035070823237,
      "learning_rate": 0.003,
      "loss": 4.0897,
      "step": 9517
    },
    {
      "epoch": 0.09518,
      "grad_norm": 1.071713800777275,
      "learning_rate": 0.003,
      "loss": 4.0985,
      "step": 9518
    },
    {
      "epoch": 0.09519,
      "grad_norm": 1.2588935345722223,
      "learning_rate": 0.003,
      "loss": 4.1155,
      "step": 9519
    },
    {
      "epoch": 0.0952,
      "grad_norm": 1.150982327799799,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 9520
    },
    {
      "epoch": 0.09521,
      "grad_norm": 1.0941667959158383,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 9521
    },
    {
      "epoch": 0.09522,
      "grad_norm": 1.2457329144942104,
      "learning_rate": 0.003,
      "loss": 4.0756,
      "step": 9522
    },
    {
      "epoch": 0.09523,
      "grad_norm": 1.0685171393986306,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 9523
    },
    {
      "epoch": 0.09524,
      "grad_norm": 1.059073090332289,
      "learning_rate": 0.003,
      "loss": 4.0702,
      "step": 9524
    },
    {
      "epoch": 0.09525,
      "grad_norm": 1.1317859296460144,
      "learning_rate": 0.003,
      "loss": 4.0908,
      "step": 9525
    },
    {
      "epoch": 0.09526,
      "grad_norm": 1.0358007539350973,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 9526
    },
    {
      "epoch": 0.09527,
      "grad_norm": 1.1025319126502247,
      "learning_rate": 0.003,
      "loss": 4.0828,
      "step": 9527
    },
    {
      "epoch": 0.09528,
      "grad_norm": 0.9906183944830572,
      "learning_rate": 0.003,
      "loss": 4.1013,
      "step": 9528
    },
    {
      "epoch": 0.09529,
      "grad_norm": 1.2857359082767073,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 9529
    },
    {
      "epoch": 0.0953,
      "grad_norm": 1.096634688209568,
      "learning_rate": 0.003,
      "loss": 4.0862,
      "step": 9530
    },
    {
      "epoch": 0.09531,
      "grad_norm": 1.1987032031232345,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 9531
    },
    {
      "epoch": 0.09532,
      "grad_norm": 1.0394624430870323,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 9532
    },
    {
      "epoch": 0.09533,
      "grad_norm": 1.4862811291217963,
      "learning_rate": 0.003,
      "loss": 4.0712,
      "step": 9533
    },
    {
      "epoch": 0.09534,
      "grad_norm": 0.9465852817142347,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 9534
    },
    {
      "epoch": 0.09535,
      "grad_norm": 1.1139397441123668,
      "learning_rate": 0.003,
      "loss": 4.082,
      "step": 9535
    },
    {
      "epoch": 0.09536,
      "grad_norm": 1.0512790302527368,
      "learning_rate": 0.003,
      "loss": 4.0978,
      "step": 9536
    },
    {
      "epoch": 0.09537,
      "grad_norm": 1.1891251252289299,
      "learning_rate": 0.003,
      "loss": 4.1086,
      "step": 9537
    },
    {
      "epoch": 0.09538,
      "grad_norm": 1.0883015405092293,
      "learning_rate": 0.003,
      "loss": 4.0879,
      "step": 9538
    },
    {
      "epoch": 0.09539,
      "grad_norm": 1.175924001059813,
      "learning_rate": 0.003,
      "loss": 4.1051,
      "step": 9539
    },
    {
      "epoch": 0.0954,
      "grad_norm": 1.134543356215364,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 9540
    },
    {
      "epoch": 0.09541,
      "grad_norm": 0.9943534853511077,
      "learning_rate": 0.003,
      "loss": 4.0876,
      "step": 9541
    },
    {
      "epoch": 0.09542,
      "grad_norm": 1.2269677886331591,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 9542
    },
    {
      "epoch": 0.09543,
      "grad_norm": 1.1058198763957707,
      "learning_rate": 0.003,
      "loss": 4.079,
      "step": 9543
    },
    {
      "epoch": 0.09544,
      "grad_norm": 1.3515105723732794,
      "learning_rate": 0.003,
      "loss": 4.0817,
      "step": 9544
    },
    {
      "epoch": 0.09545,
      "grad_norm": 0.9298293002067652,
      "learning_rate": 0.003,
      "loss": 4.086,
      "step": 9545
    },
    {
      "epoch": 0.09546,
      "grad_norm": 1.1998465227374622,
      "learning_rate": 0.003,
      "loss": 4.1061,
      "step": 9546
    },
    {
      "epoch": 0.09547,
      "grad_norm": 1.2539130424447145,
      "learning_rate": 0.003,
      "loss": 4.098,
      "step": 9547
    },
    {
      "epoch": 0.09548,
      "grad_norm": 1.0141650543050034,
      "learning_rate": 0.003,
      "loss": 4.0834,
      "step": 9548
    },
    {
      "epoch": 0.09549,
      "grad_norm": 1.3549810054680689,
      "learning_rate": 0.003,
      "loss": 4.1249,
      "step": 9549
    },
    {
      "epoch": 0.0955,
      "grad_norm": 1.0786660493312055,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 9550
    },
    {
      "epoch": 0.09551,
      "grad_norm": 1.572522511143651,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 9551
    },
    {
      "epoch": 0.09552,
      "grad_norm": 1.0306417789967723,
      "learning_rate": 0.003,
      "loss": 4.1405,
      "step": 9552
    },
    {
      "epoch": 0.09553,
      "grad_norm": 1.2709722537106287,
      "learning_rate": 0.003,
      "loss": 4.0997,
      "step": 9553
    },
    {
      "epoch": 0.09554,
      "grad_norm": 0.9366968321825859,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 9554
    },
    {
      "epoch": 0.09555,
      "grad_norm": 1.0415773988291444,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 9555
    },
    {
      "epoch": 0.09556,
      "grad_norm": 1.1872790399809248,
      "learning_rate": 0.003,
      "loss": 4.0871,
      "step": 9556
    },
    {
      "epoch": 0.09557,
      "grad_norm": 1.0918558120913107,
      "learning_rate": 0.003,
      "loss": 4.1047,
      "step": 9557
    },
    {
      "epoch": 0.09558,
      "grad_norm": 1.227846572726307,
      "learning_rate": 0.003,
      "loss": 4.0932,
      "step": 9558
    },
    {
      "epoch": 0.09559,
      "grad_norm": 1.2386135780006378,
      "learning_rate": 0.003,
      "loss": 4.1189,
      "step": 9559
    },
    {
      "epoch": 0.0956,
      "grad_norm": 1.0618652016313734,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 9560
    },
    {
      "epoch": 0.09561,
      "grad_norm": 1.119890477915479,
      "learning_rate": 0.003,
      "loss": 4.1061,
      "step": 9561
    },
    {
      "epoch": 0.09562,
      "grad_norm": 1.016523442218028,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 9562
    },
    {
      "epoch": 0.09563,
      "grad_norm": 1.1785565929403774,
      "learning_rate": 0.003,
      "loss": 4.1249,
      "step": 9563
    },
    {
      "epoch": 0.09564,
      "grad_norm": 0.9337669267700384,
      "learning_rate": 0.003,
      "loss": 4.1066,
      "step": 9564
    },
    {
      "epoch": 0.09565,
      "grad_norm": 1.0780917400747017,
      "learning_rate": 0.003,
      "loss": 4.0767,
      "step": 9565
    },
    {
      "epoch": 0.09566,
      "grad_norm": 1.195493305871725,
      "learning_rate": 0.003,
      "loss": 4.092,
      "step": 9566
    },
    {
      "epoch": 0.09567,
      "grad_norm": 1.156333194607271,
      "learning_rate": 0.003,
      "loss": 4.0867,
      "step": 9567
    },
    {
      "epoch": 0.09568,
      "grad_norm": 1.1350215045863368,
      "learning_rate": 0.003,
      "loss": 4.0907,
      "step": 9568
    },
    {
      "epoch": 0.09569,
      "grad_norm": 1.2348564410977743,
      "learning_rate": 0.003,
      "loss": 4.0984,
      "step": 9569
    },
    {
      "epoch": 0.0957,
      "grad_norm": 1.0560118100188587,
      "learning_rate": 0.003,
      "loss": 4.0933,
      "step": 9570
    },
    {
      "epoch": 0.09571,
      "grad_norm": 1.1086095630369532,
      "learning_rate": 0.003,
      "loss": 4.0927,
      "step": 9571
    },
    {
      "epoch": 0.09572,
      "grad_norm": 1.0193857583153085,
      "learning_rate": 0.003,
      "loss": 4.0849,
      "step": 9572
    },
    {
      "epoch": 0.09573,
      "grad_norm": 1.2887839228489493,
      "learning_rate": 0.003,
      "loss": 4.0915,
      "step": 9573
    },
    {
      "epoch": 0.09574,
      "grad_norm": 1.0614328188485582,
      "learning_rate": 0.003,
      "loss": 4.0885,
      "step": 9574
    },
    {
      "epoch": 0.09575,
      "grad_norm": 1.4319941739250786,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 9575
    },
    {
      "epoch": 0.09576,
      "grad_norm": 0.8356787684459427,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 9576
    },
    {
      "epoch": 0.09577,
      "grad_norm": 0.9948210584003812,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 9577
    },
    {
      "epoch": 0.09578,
      "grad_norm": 1.3921854113957979,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 9578
    },
    {
      "epoch": 0.09579,
      "grad_norm": 1.0864312621607588,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 9579
    },
    {
      "epoch": 0.0958,
      "grad_norm": 1.1156666180695558,
      "learning_rate": 0.003,
      "loss": 4.0996,
      "step": 9580
    },
    {
      "epoch": 0.09581,
      "grad_norm": 1.0322074629753206,
      "learning_rate": 0.003,
      "loss": 4.1318,
      "step": 9581
    },
    {
      "epoch": 0.09582,
      "grad_norm": 1.1315310163551284,
      "learning_rate": 0.003,
      "loss": 4.0751,
      "step": 9582
    },
    {
      "epoch": 0.09583,
      "grad_norm": 1.115818784532022,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 9583
    },
    {
      "epoch": 0.09584,
      "grad_norm": 1.0450540579600909,
      "learning_rate": 0.003,
      "loss": 4.0839,
      "step": 9584
    },
    {
      "epoch": 0.09585,
      "grad_norm": 1.036975398253159,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 9585
    },
    {
      "epoch": 0.09586,
      "grad_norm": 1.1315376448061392,
      "learning_rate": 0.003,
      "loss": 4.1021,
      "step": 9586
    },
    {
      "epoch": 0.09587,
      "grad_norm": 0.8801716853178653,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 9587
    },
    {
      "epoch": 0.09588,
      "grad_norm": 1.1979096575235726,
      "learning_rate": 0.003,
      "loss": 4.0878,
      "step": 9588
    },
    {
      "epoch": 0.09589,
      "grad_norm": 1.238757685421794,
      "learning_rate": 0.003,
      "loss": 4.09,
      "step": 9589
    },
    {
      "epoch": 0.0959,
      "grad_norm": 1.1267556925308015,
      "learning_rate": 0.003,
      "loss": 4.1013,
      "step": 9590
    },
    {
      "epoch": 0.09591,
      "grad_norm": 1.130584073648821,
      "learning_rate": 0.003,
      "loss": 4.1296,
      "step": 9591
    },
    {
      "epoch": 0.09592,
      "grad_norm": 1.0738133917355688,
      "learning_rate": 0.003,
      "loss": 4.0941,
      "step": 9592
    },
    {
      "epoch": 0.09593,
      "grad_norm": 1.1776732863268307,
      "learning_rate": 0.003,
      "loss": 4.1073,
      "step": 9593
    },
    {
      "epoch": 0.09594,
      "grad_norm": 1.0443759893082762,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 9594
    },
    {
      "epoch": 0.09595,
      "grad_norm": 1.3547206567603112,
      "learning_rate": 0.003,
      "loss": 4.085,
      "step": 9595
    },
    {
      "epoch": 0.09596,
      "grad_norm": 0.9752910801651283,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 9596
    },
    {
      "epoch": 0.09597,
      "grad_norm": 1.4882322897015248,
      "learning_rate": 0.003,
      "loss": 4.0933,
      "step": 9597
    },
    {
      "epoch": 0.09598,
      "grad_norm": 1.1972088601549762,
      "learning_rate": 0.003,
      "loss": 4.1001,
      "step": 9598
    },
    {
      "epoch": 0.09599,
      "grad_norm": 1.2882807539335117,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 9599
    },
    {
      "epoch": 0.096,
      "grad_norm": 1.2197457372448262,
      "learning_rate": 0.003,
      "loss": 4.0821,
      "step": 9600
    },
    {
      "epoch": 0.09601,
      "grad_norm": 1.0026983299265844,
      "learning_rate": 0.003,
      "loss": 4.083,
      "step": 9601
    },
    {
      "epoch": 0.09602,
      "grad_norm": 1.0359963184067358,
      "learning_rate": 0.003,
      "loss": 4.1124,
      "step": 9602
    },
    {
      "epoch": 0.09603,
      "grad_norm": 1.0929372806073887,
      "learning_rate": 0.003,
      "loss": 4.0917,
      "step": 9603
    },
    {
      "epoch": 0.09604,
      "grad_norm": 1.048976558304099,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 9604
    },
    {
      "epoch": 0.09605,
      "grad_norm": 1.133185053036612,
      "learning_rate": 0.003,
      "loss": 4.1014,
      "step": 9605
    },
    {
      "epoch": 0.09606,
      "grad_norm": 1.3381830624612678,
      "learning_rate": 0.003,
      "loss": 4.102,
      "step": 9606
    },
    {
      "epoch": 0.09607,
      "grad_norm": 1.0571967293380204,
      "learning_rate": 0.003,
      "loss": 4.1066,
      "step": 9607
    },
    {
      "epoch": 0.09608,
      "grad_norm": 1.2880171482068876,
      "learning_rate": 0.003,
      "loss": 4.1229,
      "step": 9608
    },
    {
      "epoch": 0.09609,
      "grad_norm": 1.1090349823779482,
      "learning_rate": 0.003,
      "loss": 4.0937,
      "step": 9609
    },
    {
      "epoch": 0.0961,
      "grad_norm": 1.0287658024244641,
      "learning_rate": 0.003,
      "loss": 4.0899,
      "step": 9610
    },
    {
      "epoch": 0.09611,
      "grad_norm": 1.0738364101326499,
      "learning_rate": 0.003,
      "loss": 4.0882,
      "step": 9611
    },
    {
      "epoch": 0.09612,
      "grad_norm": 1.282326636020608,
      "learning_rate": 0.003,
      "loss": 4.0947,
      "step": 9612
    },
    {
      "epoch": 0.09613,
      "grad_norm": 1.3701329621538962,
      "learning_rate": 0.003,
      "loss": 4.1095,
      "step": 9613
    },
    {
      "epoch": 0.09614,
      "grad_norm": 1.1634185313174656,
      "learning_rate": 0.003,
      "loss": 4.0968,
      "step": 9614
    },
    {
      "epoch": 0.09615,
      "grad_norm": 1.1017589773949448,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 9615
    },
    {
      "epoch": 0.09616,
      "grad_norm": 1.1956757667716347,
      "learning_rate": 0.003,
      "loss": 4.0975,
      "step": 9616
    },
    {
      "epoch": 0.09617,
      "grad_norm": 1.0483299135427413,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 9617
    },
    {
      "epoch": 0.09618,
      "grad_norm": 1.0207002766117625,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 9618
    },
    {
      "epoch": 0.09619,
      "grad_norm": 1.1687767625123595,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 9619
    },
    {
      "epoch": 0.0962,
      "grad_norm": 1.2124142030797413,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 9620
    },
    {
      "epoch": 0.09621,
      "grad_norm": 1.0427796712758857,
      "learning_rate": 0.003,
      "loss": 4.0907,
      "step": 9621
    },
    {
      "epoch": 0.09622,
      "grad_norm": 1.1321557809894192,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 9622
    },
    {
      "epoch": 0.09623,
      "grad_norm": 1.080551373298356,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 9623
    },
    {
      "epoch": 0.09624,
      "grad_norm": 1.2231836247934575,
      "learning_rate": 0.003,
      "loss": 4.093,
      "step": 9624
    },
    {
      "epoch": 0.09625,
      "grad_norm": 0.9743054244901846,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 9625
    },
    {
      "epoch": 0.09626,
      "grad_norm": 1.1144708979343558,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 9626
    },
    {
      "epoch": 0.09627,
      "grad_norm": 1.0555475417358153,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 9627
    },
    {
      "epoch": 0.09628,
      "grad_norm": 1.1998838196216295,
      "learning_rate": 0.003,
      "loss": 4.0843,
      "step": 9628
    },
    {
      "epoch": 0.09629,
      "grad_norm": 1.29917367128637,
      "learning_rate": 0.003,
      "loss": 4.1017,
      "step": 9629
    },
    {
      "epoch": 0.0963,
      "grad_norm": 1.3338654943176473,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 9630
    },
    {
      "epoch": 0.09631,
      "grad_norm": 1.133478955698145,
      "learning_rate": 0.003,
      "loss": 4.1218,
      "step": 9631
    },
    {
      "epoch": 0.09632,
      "grad_norm": 1.2159206106180984,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 9632
    },
    {
      "epoch": 0.09633,
      "grad_norm": 1.3893586132647013,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 9633
    },
    {
      "epoch": 0.09634,
      "grad_norm": 1.0477658736391087,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 9634
    },
    {
      "epoch": 0.09635,
      "grad_norm": 1.0397822704281512,
      "learning_rate": 0.003,
      "loss": 4.0977,
      "step": 9635
    },
    {
      "epoch": 0.09636,
      "grad_norm": 1.2637642360376349,
      "learning_rate": 0.003,
      "loss": 4.1085,
      "step": 9636
    },
    {
      "epoch": 0.09637,
      "grad_norm": 0.9333047922469344,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 9637
    },
    {
      "epoch": 0.09638,
      "grad_norm": 1.1332885470676644,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 9638
    },
    {
      "epoch": 0.09639,
      "grad_norm": 1.160189932847423,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 9639
    },
    {
      "epoch": 0.0964,
      "grad_norm": 1.175168237055286,
      "learning_rate": 0.003,
      "loss": 4.1189,
      "step": 9640
    },
    {
      "epoch": 0.09641,
      "grad_norm": 1.130167062520008,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 9641
    },
    {
      "epoch": 0.09642,
      "grad_norm": 1.0245340911895193,
      "learning_rate": 0.003,
      "loss": 4.109,
      "step": 9642
    },
    {
      "epoch": 0.09643,
      "grad_norm": 1.2253638168085181,
      "learning_rate": 0.003,
      "loss": 4.088,
      "step": 9643
    },
    {
      "epoch": 0.09644,
      "grad_norm": 1.0444677924566768,
      "learning_rate": 0.003,
      "loss": 4.1031,
      "step": 9644
    },
    {
      "epoch": 0.09645,
      "grad_norm": 1.1654182478122446,
      "learning_rate": 0.003,
      "loss": 4.0956,
      "step": 9645
    },
    {
      "epoch": 0.09646,
      "grad_norm": 1.1443250863557537,
      "learning_rate": 0.003,
      "loss": 4.0902,
      "step": 9646
    },
    {
      "epoch": 0.09647,
      "grad_norm": 1.167791156046563,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 9647
    },
    {
      "epoch": 0.09648,
      "grad_norm": 1.1947555409014852,
      "learning_rate": 0.003,
      "loss": 4.1242,
      "step": 9648
    },
    {
      "epoch": 0.09649,
      "grad_norm": 1.2907277869077691,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 9649
    },
    {
      "epoch": 0.0965,
      "grad_norm": 1.0649464399284445,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 9650
    },
    {
      "epoch": 0.09651,
      "grad_norm": 1.167094081630458,
      "learning_rate": 0.003,
      "loss": 4.0925,
      "step": 9651
    },
    {
      "epoch": 0.09652,
      "grad_norm": 1.0329533755389784,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 9652
    },
    {
      "epoch": 0.09653,
      "grad_norm": 1.2268437129588268,
      "learning_rate": 0.003,
      "loss": 4.1075,
      "step": 9653
    },
    {
      "epoch": 0.09654,
      "grad_norm": 1.1469799145575592,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 9654
    },
    {
      "epoch": 0.09655,
      "grad_norm": 1.2112360987709447,
      "learning_rate": 0.003,
      "loss": 4.0942,
      "step": 9655
    },
    {
      "epoch": 0.09656,
      "grad_norm": 1.0727040957696266,
      "learning_rate": 0.003,
      "loss": 4.102,
      "step": 9656
    },
    {
      "epoch": 0.09657,
      "grad_norm": 1.1776494445532388,
      "learning_rate": 0.003,
      "loss": 4.1,
      "step": 9657
    },
    {
      "epoch": 0.09658,
      "grad_norm": 1.1303799155061847,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 9658
    },
    {
      "epoch": 0.09659,
      "grad_norm": 1.2571601860110444,
      "learning_rate": 0.003,
      "loss": 4.1112,
      "step": 9659
    },
    {
      "epoch": 0.0966,
      "grad_norm": 1.0693884401717517,
      "learning_rate": 0.003,
      "loss": 4.1007,
      "step": 9660
    },
    {
      "epoch": 0.09661,
      "grad_norm": 1.004175075932187,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 9661
    },
    {
      "epoch": 0.09662,
      "grad_norm": 1.2266057039354872,
      "learning_rate": 0.003,
      "loss": 4.0789,
      "step": 9662
    },
    {
      "epoch": 0.09663,
      "grad_norm": 0.9543532741319573,
      "learning_rate": 0.003,
      "loss": 4.1053,
      "step": 9663
    },
    {
      "epoch": 0.09664,
      "grad_norm": 1.3902940709187628,
      "learning_rate": 0.003,
      "loss": 4.1064,
      "step": 9664
    },
    {
      "epoch": 0.09665,
      "grad_norm": 0.9943600842844004,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 9665
    },
    {
      "epoch": 0.09666,
      "grad_norm": 1.1758873634953837,
      "learning_rate": 0.003,
      "loss": 4.101,
      "step": 9666
    },
    {
      "epoch": 0.09667,
      "grad_norm": 0.9845801901236461,
      "learning_rate": 0.003,
      "loss": 4.0969,
      "step": 9667
    },
    {
      "epoch": 0.09668,
      "grad_norm": 1.218866323888133,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 9668
    },
    {
      "epoch": 0.09669,
      "grad_norm": 1.153506103770982,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 9669
    },
    {
      "epoch": 0.0967,
      "grad_norm": 1.1368038601399784,
      "learning_rate": 0.003,
      "loss": 4.0988,
      "step": 9670
    },
    {
      "epoch": 0.09671,
      "grad_norm": 1.4006249826823316,
      "learning_rate": 0.003,
      "loss": 4.0845,
      "step": 9671
    },
    {
      "epoch": 0.09672,
      "grad_norm": 0.9161827917716768,
      "learning_rate": 0.003,
      "loss": 4.1058,
      "step": 9672
    },
    {
      "epoch": 0.09673,
      "grad_norm": 1.0835717715634787,
      "learning_rate": 0.003,
      "loss": 4.106,
      "step": 9673
    },
    {
      "epoch": 0.09674,
      "grad_norm": 1.0399068019858488,
      "learning_rate": 0.003,
      "loss": 4.1367,
      "step": 9674
    },
    {
      "epoch": 0.09675,
      "grad_norm": 1.0469774866570443,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 9675
    },
    {
      "epoch": 0.09676,
      "grad_norm": 1.2806357843149219,
      "learning_rate": 0.003,
      "loss": 4.0955,
      "step": 9676
    },
    {
      "epoch": 0.09677,
      "grad_norm": 1.009853061676496,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 9677
    },
    {
      "epoch": 0.09678,
      "grad_norm": 1.1540167046393022,
      "learning_rate": 0.003,
      "loss": 4.1157,
      "step": 9678
    },
    {
      "epoch": 0.09679,
      "grad_norm": 1.0771299268306656,
      "learning_rate": 0.003,
      "loss": 4.103,
      "step": 9679
    },
    {
      "epoch": 0.0968,
      "grad_norm": 1.2351879811153652,
      "learning_rate": 0.003,
      "loss": 4.0941,
      "step": 9680
    },
    {
      "epoch": 0.09681,
      "grad_norm": 1.0473582360721734,
      "learning_rate": 0.003,
      "loss": 4.0985,
      "step": 9681
    },
    {
      "epoch": 0.09682,
      "grad_norm": 1.1020137212667507,
      "learning_rate": 0.003,
      "loss": 4.0839,
      "step": 9682
    },
    {
      "epoch": 0.09683,
      "grad_norm": 1.1966346222569244,
      "learning_rate": 0.003,
      "loss": 4.0792,
      "step": 9683
    },
    {
      "epoch": 0.09684,
      "grad_norm": 1.2461509613901234,
      "learning_rate": 0.003,
      "loss": 4.1094,
      "step": 9684
    },
    {
      "epoch": 0.09685,
      "grad_norm": 1.1552960648766668,
      "learning_rate": 0.003,
      "loss": 4.0892,
      "step": 9685
    },
    {
      "epoch": 0.09686,
      "grad_norm": 1.0019986633187414,
      "learning_rate": 0.003,
      "loss": 4.0918,
      "step": 9686
    },
    {
      "epoch": 0.09687,
      "grad_norm": 1.3405619727570803,
      "learning_rate": 0.003,
      "loss": 4.0997,
      "step": 9687
    },
    {
      "epoch": 0.09688,
      "grad_norm": 0.9274861169023544,
      "learning_rate": 0.003,
      "loss": 4.1183,
      "step": 9688
    },
    {
      "epoch": 0.09689,
      "grad_norm": 1.0221235959428148,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 9689
    },
    {
      "epoch": 0.0969,
      "grad_norm": 1.24021442454874,
      "learning_rate": 0.003,
      "loss": 4.0961,
      "step": 9690
    },
    {
      "epoch": 0.09691,
      "grad_norm": 1.0627861292781198,
      "learning_rate": 0.003,
      "loss": 4.1072,
      "step": 9691
    },
    {
      "epoch": 0.09692,
      "grad_norm": 1.2282733803147996,
      "learning_rate": 0.003,
      "loss": 4.1056,
      "step": 9692
    },
    {
      "epoch": 0.09693,
      "grad_norm": 1.1357687713326148,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 9693
    },
    {
      "epoch": 0.09694,
      "grad_norm": 1.5228079240920682,
      "learning_rate": 0.003,
      "loss": 4.0911,
      "step": 9694
    },
    {
      "epoch": 0.09695,
      "grad_norm": 1.0140611378984512,
      "learning_rate": 0.003,
      "loss": 4.0975,
      "step": 9695
    },
    {
      "epoch": 0.09696,
      "grad_norm": 1.3988967695816317,
      "learning_rate": 0.003,
      "loss": 4.0892,
      "step": 9696
    },
    {
      "epoch": 0.09697,
      "grad_norm": 0.9961426808869265,
      "learning_rate": 0.003,
      "loss": 4.0978,
      "step": 9697
    },
    {
      "epoch": 0.09698,
      "grad_norm": 1.1658655050212485,
      "learning_rate": 0.003,
      "loss": 4.1085,
      "step": 9698
    },
    {
      "epoch": 0.09699,
      "grad_norm": 1.0747064995848443,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 9699
    },
    {
      "epoch": 0.097,
      "grad_norm": 1.3446822786713644,
      "learning_rate": 0.003,
      "loss": 4.1236,
      "step": 9700
    },
    {
      "epoch": 0.09701,
      "grad_norm": 0.8646827591218209,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 9701
    },
    {
      "epoch": 0.09702,
      "grad_norm": 1.068401196167419,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 9702
    },
    {
      "epoch": 0.09703,
      "grad_norm": 1.3695048401575418,
      "learning_rate": 0.003,
      "loss": 4.091,
      "step": 9703
    },
    {
      "epoch": 0.09704,
      "grad_norm": 1.108979607117454,
      "learning_rate": 0.003,
      "loss": 4.091,
      "step": 9704
    },
    {
      "epoch": 0.09705,
      "grad_norm": 1.225092292145554,
      "learning_rate": 0.003,
      "loss": 4.0817,
      "step": 9705
    },
    {
      "epoch": 0.09706,
      "grad_norm": 0.8926937513877116,
      "learning_rate": 0.003,
      "loss": 4.0931,
      "step": 9706
    },
    {
      "epoch": 0.09707,
      "grad_norm": 1.0510954237421852,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 9707
    },
    {
      "epoch": 0.09708,
      "grad_norm": 1.2904843277354863,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 9708
    },
    {
      "epoch": 0.09709,
      "grad_norm": 0.9595584155271577,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 9709
    },
    {
      "epoch": 0.0971,
      "grad_norm": 1.4667900800195917,
      "learning_rate": 0.003,
      "loss": 4.0898,
      "step": 9710
    },
    {
      "epoch": 0.09711,
      "grad_norm": 0.8316555422617203,
      "learning_rate": 0.003,
      "loss": 4.0885,
      "step": 9711
    },
    {
      "epoch": 0.09712,
      "grad_norm": 0.9443133434214869,
      "learning_rate": 0.003,
      "loss": 4.1253,
      "step": 9712
    },
    {
      "epoch": 0.09713,
      "grad_norm": 1.3054408816764862,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 9713
    },
    {
      "epoch": 0.09714,
      "grad_norm": 0.9463113118510074,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 9714
    },
    {
      "epoch": 0.09715,
      "grad_norm": 0.9711389233571197,
      "learning_rate": 0.003,
      "loss": 4.0949,
      "step": 9715
    },
    {
      "epoch": 0.09716,
      "grad_norm": 1.2214846668210972,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 9716
    },
    {
      "epoch": 0.09717,
      "grad_norm": 1.0151000634517393,
      "learning_rate": 0.003,
      "loss": 4.1156,
      "step": 9717
    },
    {
      "epoch": 0.09718,
      "grad_norm": 1.4019736336963942,
      "learning_rate": 0.003,
      "loss": 4.094,
      "step": 9718
    },
    {
      "epoch": 0.09719,
      "grad_norm": 1.3303176085468154,
      "learning_rate": 0.003,
      "loss": 4.0801,
      "step": 9719
    },
    {
      "epoch": 0.0972,
      "grad_norm": 1.1313634808344284,
      "learning_rate": 0.003,
      "loss": 4.0739,
      "step": 9720
    },
    {
      "epoch": 0.09721,
      "grad_norm": 1.2746221665923179,
      "learning_rate": 0.003,
      "loss": 4.1114,
      "step": 9721
    },
    {
      "epoch": 0.09722,
      "grad_norm": 0.892644565526494,
      "learning_rate": 0.003,
      "loss": 4.0834,
      "step": 9722
    },
    {
      "epoch": 0.09723,
      "grad_norm": 0.9551155117082032,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 9723
    },
    {
      "epoch": 0.09724,
      "grad_norm": 1.19901007204341,
      "learning_rate": 0.003,
      "loss": 4.0756,
      "step": 9724
    },
    {
      "epoch": 0.09725,
      "grad_norm": 1.1178173811513445,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 9725
    },
    {
      "epoch": 0.09726,
      "grad_norm": 1.3145957793294916,
      "learning_rate": 0.003,
      "loss": 4.0963,
      "step": 9726
    },
    {
      "epoch": 0.09727,
      "grad_norm": 0.9933567371016185,
      "learning_rate": 0.003,
      "loss": 4.0809,
      "step": 9727
    },
    {
      "epoch": 0.09728,
      "grad_norm": 1.1580376397503649,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 9728
    },
    {
      "epoch": 0.09729,
      "grad_norm": 0.9180435605034515,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 9729
    },
    {
      "epoch": 0.0973,
      "grad_norm": 1.1007443018077367,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 9730
    },
    {
      "epoch": 0.09731,
      "grad_norm": 1.3412829735980611,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 9731
    },
    {
      "epoch": 0.09732,
      "grad_norm": 1.270840362188865,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 9732
    },
    {
      "epoch": 0.09733,
      "grad_norm": 1.1412059933750884,
      "learning_rate": 0.003,
      "loss": 4.0751,
      "step": 9733
    },
    {
      "epoch": 0.09734,
      "grad_norm": 1.02676220561073,
      "learning_rate": 0.003,
      "loss": 4.1044,
      "step": 9734
    },
    {
      "epoch": 0.09735,
      "grad_norm": 1.131065828153314,
      "learning_rate": 0.003,
      "loss": 4.1027,
      "step": 9735
    },
    {
      "epoch": 0.09736,
      "grad_norm": 1.047750185255067,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 9736
    },
    {
      "epoch": 0.09737,
      "grad_norm": 1.0697443599529262,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 9737
    },
    {
      "epoch": 0.09738,
      "grad_norm": 1.085106830258747,
      "learning_rate": 0.003,
      "loss": 4.0998,
      "step": 9738
    },
    {
      "epoch": 0.09739,
      "grad_norm": 1.0775630395031826,
      "learning_rate": 0.003,
      "loss": 4.0953,
      "step": 9739
    },
    {
      "epoch": 0.0974,
      "grad_norm": 1.1335140983423713,
      "learning_rate": 0.003,
      "loss": 4.0858,
      "step": 9740
    },
    {
      "epoch": 0.09741,
      "grad_norm": 1.1750684356779817,
      "learning_rate": 0.003,
      "loss": 4.131,
      "step": 9741
    },
    {
      "epoch": 0.09742,
      "grad_norm": 1.3393502821906345,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 9742
    },
    {
      "epoch": 0.09743,
      "grad_norm": 0.8102089351238771,
      "learning_rate": 0.003,
      "loss": 4.0934,
      "step": 9743
    },
    {
      "epoch": 0.09744,
      "grad_norm": 0.9575256130991701,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 9744
    },
    {
      "epoch": 0.09745,
      "grad_norm": 1.3204084853812648,
      "learning_rate": 0.003,
      "loss": 4.118,
      "step": 9745
    },
    {
      "epoch": 0.09746,
      "grad_norm": 1.1649043514340434,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 9746
    },
    {
      "epoch": 0.09747,
      "grad_norm": 1.2014671262072751,
      "learning_rate": 0.003,
      "loss": 4.0883,
      "step": 9747
    },
    {
      "epoch": 0.09748,
      "grad_norm": 1.1721400848514163,
      "learning_rate": 0.003,
      "loss": 4.1202,
      "step": 9748
    },
    {
      "epoch": 0.09749,
      "grad_norm": 0.9427633072020334,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 9749
    },
    {
      "epoch": 0.0975,
      "grad_norm": 1.2436393502737977,
      "learning_rate": 0.003,
      "loss": 4.0753,
      "step": 9750
    },
    {
      "epoch": 0.09751,
      "grad_norm": 0.9618810694053324,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 9751
    },
    {
      "epoch": 0.09752,
      "grad_norm": 1.113371972687393,
      "learning_rate": 0.003,
      "loss": 4.0928,
      "step": 9752
    },
    {
      "epoch": 0.09753,
      "grad_norm": 1.1960548546905854,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 9753
    },
    {
      "epoch": 0.09754,
      "grad_norm": 1.1085263084596224,
      "learning_rate": 0.003,
      "loss": 4.0951,
      "step": 9754
    },
    {
      "epoch": 0.09755,
      "grad_norm": 1.2291799079319456,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 9755
    },
    {
      "epoch": 0.09756,
      "grad_norm": 1.1359054436090914,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 9756
    },
    {
      "epoch": 0.09757,
      "grad_norm": 1.315833425223894,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 9757
    },
    {
      "epoch": 0.09758,
      "grad_norm": 0.9137304753604574,
      "learning_rate": 0.003,
      "loss": 4.0768,
      "step": 9758
    },
    {
      "epoch": 0.09759,
      "grad_norm": 1.1037158944833518,
      "learning_rate": 0.003,
      "loss": 4.0814,
      "step": 9759
    },
    {
      "epoch": 0.0976,
      "grad_norm": 1.27332720944019,
      "learning_rate": 0.003,
      "loss": 4.0897,
      "step": 9760
    },
    {
      "epoch": 0.09761,
      "grad_norm": 1.0871017800919058,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 9761
    },
    {
      "epoch": 0.09762,
      "grad_norm": 1.4097920604765457,
      "learning_rate": 0.003,
      "loss": 4.1072,
      "step": 9762
    },
    {
      "epoch": 0.09763,
      "grad_norm": 1.0543709201081772,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 9763
    },
    {
      "epoch": 0.09764,
      "grad_norm": 1.3174829695931187,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 9764
    },
    {
      "epoch": 0.09765,
      "grad_norm": 1.0572347247872953,
      "learning_rate": 0.003,
      "loss": 4.1136,
      "step": 9765
    },
    {
      "epoch": 0.09766,
      "grad_norm": 1.2330553046659682,
      "learning_rate": 0.003,
      "loss": 4.0896,
      "step": 9766
    },
    {
      "epoch": 0.09767,
      "grad_norm": 1.1251012136521312,
      "learning_rate": 0.003,
      "loss": 4.1021,
      "step": 9767
    },
    {
      "epoch": 0.09768,
      "grad_norm": 1.207157802526716,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 9768
    },
    {
      "epoch": 0.09769,
      "grad_norm": 1.1489618911216242,
      "learning_rate": 0.003,
      "loss": 4.1081,
      "step": 9769
    },
    {
      "epoch": 0.0977,
      "grad_norm": 1.0543591815664535,
      "learning_rate": 0.003,
      "loss": 4.1022,
      "step": 9770
    },
    {
      "epoch": 0.09771,
      "grad_norm": 1.0711936690272732,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 9771
    },
    {
      "epoch": 0.09772,
      "grad_norm": 1.0665693361733348,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 9772
    },
    {
      "epoch": 0.09773,
      "grad_norm": 1.2726662562614708,
      "learning_rate": 0.003,
      "loss": 4.0918,
      "step": 9773
    },
    {
      "epoch": 0.09774,
      "grad_norm": 1.056717609884744,
      "learning_rate": 0.003,
      "loss": 4.1061,
      "step": 9774
    },
    {
      "epoch": 0.09775,
      "grad_norm": 1.300907210522145,
      "learning_rate": 0.003,
      "loss": 4.1071,
      "step": 9775
    },
    {
      "epoch": 0.09776,
      "grad_norm": 1.1616423803262845,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 9776
    },
    {
      "epoch": 0.09777,
      "grad_norm": 1.1101219869879306,
      "learning_rate": 0.003,
      "loss": 4.1008,
      "step": 9777
    },
    {
      "epoch": 0.09778,
      "grad_norm": 1.1601171778844346,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 9778
    },
    {
      "epoch": 0.09779,
      "grad_norm": 1.0863832514735121,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 9779
    },
    {
      "epoch": 0.0978,
      "grad_norm": 1.0996842022521216,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 9780
    },
    {
      "epoch": 0.09781,
      "grad_norm": 1.2011284768026025,
      "learning_rate": 0.003,
      "loss": 4.0912,
      "step": 9781
    },
    {
      "epoch": 0.09782,
      "grad_norm": 1.1251539953516647,
      "learning_rate": 0.003,
      "loss": 4.0882,
      "step": 9782
    },
    {
      "epoch": 0.09783,
      "grad_norm": 1.0937091196835855,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 9783
    },
    {
      "epoch": 0.09784,
      "grad_norm": 1.401979999650129,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 9784
    },
    {
      "epoch": 0.09785,
      "grad_norm": 1.2484479648172329,
      "learning_rate": 0.003,
      "loss": 4.0859,
      "step": 9785
    },
    {
      "epoch": 0.09786,
      "grad_norm": 0.9741916665639245,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 9786
    },
    {
      "epoch": 0.09787,
      "grad_norm": 0.9768176115546604,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 9787
    },
    {
      "epoch": 0.09788,
      "grad_norm": 1.2418178056224656,
      "learning_rate": 0.003,
      "loss": 4.0934,
      "step": 9788
    },
    {
      "epoch": 0.09789,
      "grad_norm": 1.0229569058564036,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 9789
    },
    {
      "epoch": 0.0979,
      "grad_norm": 1.3990216170496972,
      "learning_rate": 0.003,
      "loss": 4.0994,
      "step": 9790
    },
    {
      "epoch": 0.09791,
      "grad_norm": 1.054322946861501,
      "learning_rate": 0.003,
      "loss": 4.0963,
      "step": 9791
    },
    {
      "epoch": 0.09792,
      "grad_norm": 1.0079326867694463,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 9792
    },
    {
      "epoch": 0.09793,
      "grad_norm": 1.1905527226997223,
      "learning_rate": 0.003,
      "loss": 4.0811,
      "step": 9793
    },
    {
      "epoch": 0.09794,
      "grad_norm": 1.2912865761568724,
      "learning_rate": 0.003,
      "loss": 4.094,
      "step": 9794
    },
    {
      "epoch": 0.09795,
      "grad_norm": 1.0946945932266972,
      "learning_rate": 0.003,
      "loss": 4.1032,
      "step": 9795
    },
    {
      "epoch": 0.09796,
      "grad_norm": 1.2185676619450705,
      "learning_rate": 0.003,
      "loss": 4.0804,
      "step": 9796
    },
    {
      "epoch": 0.09797,
      "grad_norm": 1.283197500753962,
      "learning_rate": 0.003,
      "loss": 4.084,
      "step": 9797
    },
    {
      "epoch": 0.09798,
      "grad_norm": 0.9618699050164604,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 9798
    },
    {
      "epoch": 0.09799,
      "grad_norm": 1.0714888118856691,
      "learning_rate": 0.003,
      "loss": 4.1065,
      "step": 9799
    },
    {
      "epoch": 0.098,
      "grad_norm": 1.1761297292426933,
      "learning_rate": 0.003,
      "loss": 4.0841,
      "step": 9800
    },
    {
      "epoch": 0.09801,
      "grad_norm": 1.2281890490435963,
      "learning_rate": 0.003,
      "loss": 4.0895,
      "step": 9801
    },
    {
      "epoch": 0.09802,
      "grad_norm": 1.0664551725656253,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 9802
    },
    {
      "epoch": 0.09803,
      "grad_norm": 1.1046441878202422,
      "learning_rate": 0.003,
      "loss": 4.0675,
      "step": 9803
    },
    {
      "epoch": 0.09804,
      "grad_norm": 0.9496888889795306,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 9804
    },
    {
      "epoch": 0.09805,
      "grad_norm": 1.0844565308342573,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 9805
    },
    {
      "epoch": 0.09806,
      "grad_norm": 1.0849479136330835,
      "learning_rate": 0.003,
      "loss": 4.0602,
      "step": 9806
    },
    {
      "epoch": 0.09807,
      "grad_norm": 1.1895584455614043,
      "learning_rate": 0.003,
      "loss": 4.0806,
      "step": 9807
    },
    {
      "epoch": 0.09808,
      "grad_norm": 1.0288089992322405,
      "learning_rate": 0.003,
      "loss": 4.1084,
      "step": 9808
    },
    {
      "epoch": 0.09809,
      "grad_norm": 1.2608877275032968,
      "learning_rate": 0.003,
      "loss": 4.0851,
      "step": 9809
    },
    {
      "epoch": 0.0981,
      "grad_norm": 1.1758889828454,
      "learning_rate": 0.003,
      "loss": 4.1096,
      "step": 9810
    },
    {
      "epoch": 0.09811,
      "grad_norm": 1.0784427152170986,
      "learning_rate": 0.003,
      "loss": 4.1081,
      "step": 9811
    },
    {
      "epoch": 0.09812,
      "grad_norm": 1.2805804980790638,
      "learning_rate": 0.003,
      "loss": 4.0839,
      "step": 9812
    },
    {
      "epoch": 0.09813,
      "grad_norm": 1.224801700658708,
      "learning_rate": 0.003,
      "loss": 4.0924,
      "step": 9813
    },
    {
      "epoch": 0.09814,
      "grad_norm": 1.341835488155258,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 9814
    },
    {
      "epoch": 0.09815,
      "grad_norm": 1.2555265063780896,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 9815
    },
    {
      "epoch": 0.09816,
      "grad_norm": 1.1577054447465116,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 9816
    },
    {
      "epoch": 0.09817,
      "grad_norm": 1.2693818580526928,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 9817
    },
    {
      "epoch": 0.09818,
      "grad_norm": 1.2054892593089115,
      "learning_rate": 0.003,
      "loss": 4.0971,
      "step": 9818
    },
    {
      "epoch": 0.09819,
      "grad_norm": 1.0065370155399669,
      "learning_rate": 0.003,
      "loss": 4.0945,
      "step": 9819
    },
    {
      "epoch": 0.0982,
      "grad_norm": 1.05278766664134,
      "learning_rate": 0.003,
      "loss": 4.0773,
      "step": 9820
    },
    {
      "epoch": 0.09821,
      "grad_norm": 1.107489019798911,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 9821
    },
    {
      "epoch": 0.09822,
      "grad_norm": 1.3877140403407857,
      "learning_rate": 0.003,
      "loss": 4.0957,
      "step": 9822
    },
    {
      "epoch": 0.09823,
      "grad_norm": 1.0026511211775835,
      "learning_rate": 0.003,
      "loss": 4.0996,
      "step": 9823
    },
    {
      "epoch": 0.09824,
      "grad_norm": 1.033939959190294,
      "learning_rate": 0.003,
      "loss": 4.0894,
      "step": 9824
    },
    {
      "epoch": 0.09825,
      "grad_norm": 1.2849763015452975,
      "learning_rate": 0.003,
      "loss": 4.0841,
      "step": 9825
    },
    {
      "epoch": 0.09826,
      "grad_norm": 1.0313620019572372,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 9826
    },
    {
      "epoch": 0.09827,
      "grad_norm": 1.1878648695251066,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 9827
    },
    {
      "epoch": 0.09828,
      "grad_norm": 1.2119833843882561,
      "learning_rate": 0.003,
      "loss": 4.0888,
      "step": 9828
    },
    {
      "epoch": 0.09829,
      "grad_norm": 1.1477303105835792,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 9829
    },
    {
      "epoch": 0.0983,
      "grad_norm": 1.0635731826395092,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 9830
    },
    {
      "epoch": 0.09831,
      "grad_norm": 1.293170125069096,
      "learning_rate": 0.003,
      "loss": 4.0963,
      "step": 9831
    },
    {
      "epoch": 0.09832,
      "grad_norm": 0.9769068590933736,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 9832
    },
    {
      "epoch": 0.09833,
      "grad_norm": 1.1407951573643962,
      "learning_rate": 0.003,
      "loss": 4.1015,
      "step": 9833
    },
    {
      "epoch": 0.09834,
      "grad_norm": 1.1059375659080322,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 9834
    },
    {
      "epoch": 0.09835,
      "grad_norm": 1.168854618660162,
      "learning_rate": 0.003,
      "loss": 4.0853,
      "step": 9835
    },
    {
      "epoch": 0.09836,
      "grad_norm": 0.9730803504426415,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 9836
    },
    {
      "epoch": 0.09837,
      "grad_norm": 1.2347465653452903,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 9837
    },
    {
      "epoch": 0.09838,
      "grad_norm": 1.1476121202950844,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 9838
    },
    {
      "epoch": 0.09839,
      "grad_norm": 1.2696542282974184,
      "learning_rate": 0.003,
      "loss": 4.1093,
      "step": 9839
    },
    {
      "epoch": 0.0984,
      "grad_norm": 1.0724548362610222,
      "learning_rate": 0.003,
      "loss": 4.0806,
      "step": 9840
    },
    {
      "epoch": 0.09841,
      "grad_norm": 1.0152332779536721,
      "learning_rate": 0.003,
      "loss": 4.0921,
      "step": 9841
    },
    {
      "epoch": 0.09842,
      "grad_norm": 1.1771386010629616,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 9842
    },
    {
      "epoch": 0.09843,
      "grad_norm": 0.9479731922186927,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 9843
    },
    {
      "epoch": 0.09844,
      "grad_norm": 1.17928222050337,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 9844
    },
    {
      "epoch": 0.09845,
      "grad_norm": 1.0128480063087133,
      "learning_rate": 0.003,
      "loss": 4.0728,
      "step": 9845
    },
    {
      "epoch": 0.09846,
      "grad_norm": 1.332545163467496,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 9846
    },
    {
      "epoch": 0.09847,
      "grad_norm": 1.1738880704546049,
      "learning_rate": 0.003,
      "loss": 4.0861,
      "step": 9847
    },
    {
      "epoch": 0.09848,
      "grad_norm": 1.1376872338122561,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 9848
    },
    {
      "epoch": 0.09849,
      "grad_norm": 1.1281981446522997,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 9849
    },
    {
      "epoch": 0.0985,
      "grad_norm": 1.3198147193649996,
      "learning_rate": 0.003,
      "loss": 4.1125,
      "step": 9850
    },
    {
      "epoch": 0.09851,
      "grad_norm": 0.9562453134354812,
      "learning_rate": 0.003,
      "loss": 4.1088,
      "step": 9851
    },
    {
      "epoch": 0.09852,
      "grad_norm": 1.3740365526158849,
      "learning_rate": 0.003,
      "loss": 4.0915,
      "step": 9852
    },
    {
      "epoch": 0.09853,
      "grad_norm": 1.0095709459654698,
      "learning_rate": 0.003,
      "loss": 4.0777,
      "step": 9853
    },
    {
      "epoch": 0.09854,
      "grad_norm": 1.2468967661465074,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 9854
    },
    {
      "epoch": 0.09855,
      "grad_norm": 1.0743143852792683,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 9855
    },
    {
      "epoch": 0.09856,
      "grad_norm": 1.3474930801551417,
      "learning_rate": 0.003,
      "loss": 4.1161,
      "step": 9856
    },
    {
      "epoch": 0.09857,
      "grad_norm": 1.12785581796951,
      "learning_rate": 0.003,
      "loss": 4.0822,
      "step": 9857
    },
    {
      "epoch": 0.09858,
      "grad_norm": 1.0233629542205291,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 9858
    },
    {
      "epoch": 0.09859,
      "grad_norm": 1.3543380171211834,
      "learning_rate": 0.003,
      "loss": 4.1244,
      "step": 9859
    },
    {
      "epoch": 0.0986,
      "grad_norm": 0.9674230733367689,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 9860
    },
    {
      "epoch": 0.09861,
      "grad_norm": 1.2444572560734313,
      "learning_rate": 0.003,
      "loss": 4.0912,
      "step": 9861
    },
    {
      "epoch": 0.09862,
      "grad_norm": 1.0081407117477676,
      "learning_rate": 0.003,
      "loss": 4.1014,
      "step": 9862
    },
    {
      "epoch": 0.09863,
      "grad_norm": 1.3463742496274695,
      "learning_rate": 0.003,
      "loss": 4.1201,
      "step": 9863
    },
    {
      "epoch": 0.09864,
      "grad_norm": 1.1319604981703015,
      "learning_rate": 0.003,
      "loss": 4.1138,
      "step": 9864
    },
    {
      "epoch": 0.09865,
      "grad_norm": 1.174713167251643,
      "learning_rate": 0.003,
      "loss": 4.0932,
      "step": 9865
    },
    {
      "epoch": 0.09866,
      "grad_norm": 1.1897800123686184,
      "learning_rate": 0.003,
      "loss": 4.0822,
      "step": 9866
    },
    {
      "epoch": 0.09867,
      "grad_norm": 1.0641636117425668,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 9867
    },
    {
      "epoch": 0.09868,
      "grad_norm": 1.0050044319605966,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 9868
    },
    {
      "epoch": 0.09869,
      "grad_norm": 1.2371922187774345,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 9869
    },
    {
      "epoch": 0.0987,
      "grad_norm": 1.0979751060301617,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 9870
    },
    {
      "epoch": 0.09871,
      "grad_norm": 1.075197663211,
      "learning_rate": 0.003,
      "loss": 4.1416,
      "step": 9871
    },
    {
      "epoch": 0.09872,
      "grad_norm": 1.3162216075817015,
      "learning_rate": 0.003,
      "loss": 4.1034,
      "step": 9872
    },
    {
      "epoch": 0.09873,
      "grad_norm": 0.9695957927551048,
      "learning_rate": 0.003,
      "loss": 4.0835,
      "step": 9873
    },
    {
      "epoch": 0.09874,
      "grad_norm": 1.141162495266731,
      "learning_rate": 0.003,
      "loss": 4.1051,
      "step": 9874
    },
    {
      "epoch": 0.09875,
      "grad_norm": 1.3599541714097816,
      "learning_rate": 0.003,
      "loss": 4.0807,
      "step": 9875
    },
    {
      "epoch": 0.09876,
      "grad_norm": 1.2469087193309278,
      "learning_rate": 0.003,
      "loss": 4.086,
      "step": 9876
    },
    {
      "epoch": 0.09877,
      "grad_norm": 1.1735176016241307,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 9877
    },
    {
      "epoch": 0.09878,
      "grad_norm": 1.281465889737412,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 9878
    },
    {
      "epoch": 0.09879,
      "grad_norm": 1.041824837320346,
      "learning_rate": 0.003,
      "loss": 4.0702,
      "step": 9879
    },
    {
      "epoch": 0.0988,
      "grad_norm": 1.0776427620189757,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 9880
    },
    {
      "epoch": 0.09881,
      "grad_norm": 0.9551291296156181,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 9881
    },
    {
      "epoch": 0.09882,
      "grad_norm": 1.2407348799554545,
      "learning_rate": 0.003,
      "loss": 4.1003,
      "step": 9882
    },
    {
      "epoch": 0.09883,
      "grad_norm": 1.0724628106467027,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 9883
    },
    {
      "epoch": 0.09884,
      "grad_norm": 1.2312626740236658,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 9884
    },
    {
      "epoch": 0.09885,
      "grad_norm": 1.1214546593036556,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 9885
    },
    {
      "epoch": 0.09886,
      "grad_norm": 1.2006637515281076,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 9886
    },
    {
      "epoch": 0.09887,
      "grad_norm": 1.0990165349102805,
      "learning_rate": 0.003,
      "loss": 4.0992,
      "step": 9887
    },
    {
      "epoch": 0.09888,
      "grad_norm": 1.2350170155045428,
      "learning_rate": 0.003,
      "loss": 4.1174,
      "step": 9888
    },
    {
      "epoch": 0.09889,
      "grad_norm": 1.362476733084362,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 9889
    },
    {
      "epoch": 0.0989,
      "grad_norm": 0.7819269212449568,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 9890
    },
    {
      "epoch": 0.09891,
      "grad_norm": 1.048931288056113,
      "learning_rate": 0.003,
      "loss": 4.0912,
      "step": 9891
    },
    {
      "epoch": 0.09892,
      "grad_norm": 1.107313711237237,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 9892
    },
    {
      "epoch": 0.09893,
      "grad_norm": 0.966456965078536,
      "learning_rate": 0.003,
      "loss": 4.0819,
      "step": 9893
    },
    {
      "epoch": 0.09894,
      "grad_norm": 1.2879625194596298,
      "learning_rate": 0.003,
      "loss": 4.1019,
      "step": 9894
    },
    {
      "epoch": 0.09895,
      "grad_norm": 1.010388942214523,
      "learning_rate": 0.003,
      "loss": 4.0967,
      "step": 9895
    },
    {
      "epoch": 0.09896,
      "grad_norm": 1.469756639451844,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 9896
    },
    {
      "epoch": 0.09897,
      "grad_norm": 0.9724964169587819,
      "learning_rate": 0.003,
      "loss": 4.0806,
      "step": 9897
    },
    {
      "epoch": 0.09898,
      "grad_norm": 1.110062957321316,
      "learning_rate": 0.003,
      "loss": 4.0781,
      "step": 9898
    },
    {
      "epoch": 0.09899,
      "grad_norm": 1.1012435868630626,
      "learning_rate": 0.003,
      "loss": 4.0759,
      "step": 9899
    },
    {
      "epoch": 0.099,
      "grad_norm": 1.1331555108422586,
      "learning_rate": 0.003,
      "loss": 4.1127,
      "step": 9900
    },
    {
      "epoch": 0.09901,
      "grad_norm": 1.240416809700766,
      "learning_rate": 0.003,
      "loss": 4.0988,
      "step": 9901
    },
    {
      "epoch": 0.09902,
      "grad_norm": 1.2370388742960592,
      "learning_rate": 0.003,
      "loss": 4.0961,
      "step": 9902
    },
    {
      "epoch": 0.09903,
      "grad_norm": 1.1397273835793245,
      "learning_rate": 0.003,
      "loss": 4.0812,
      "step": 9903
    },
    {
      "epoch": 0.09904,
      "grad_norm": 1.0060094292673782,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 9904
    },
    {
      "epoch": 0.09905,
      "grad_norm": 1.3423527405582918,
      "learning_rate": 0.003,
      "loss": 4.0915,
      "step": 9905
    },
    {
      "epoch": 0.09906,
      "grad_norm": 1.1284117962253848,
      "learning_rate": 0.003,
      "loss": 4.0993,
      "step": 9906
    },
    {
      "epoch": 0.09907,
      "grad_norm": 1.3696783895343145,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 9907
    },
    {
      "epoch": 0.09908,
      "grad_norm": 1.2581458797401222,
      "learning_rate": 0.003,
      "loss": 4.1027,
      "step": 9908
    },
    {
      "epoch": 0.09909,
      "grad_norm": 1.1631948089672632,
      "learning_rate": 0.003,
      "loss": 4.0968,
      "step": 9909
    },
    {
      "epoch": 0.0991,
      "grad_norm": 1.044196951382137,
      "learning_rate": 0.003,
      "loss": 4.0894,
      "step": 9910
    },
    {
      "epoch": 0.09911,
      "grad_norm": 1.1236345691959386,
      "learning_rate": 0.003,
      "loss": 4.0942,
      "step": 9911
    },
    {
      "epoch": 0.09912,
      "grad_norm": 1.1346415688903262,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 9912
    },
    {
      "epoch": 0.09913,
      "grad_norm": 1.135605780851906,
      "learning_rate": 0.003,
      "loss": 4.1068,
      "step": 9913
    },
    {
      "epoch": 0.09914,
      "grad_norm": 0.9738857919843333,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 9914
    },
    {
      "epoch": 0.09915,
      "grad_norm": 1.1826516493578803,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 9915
    },
    {
      "epoch": 0.09916,
      "grad_norm": 1.0458826743541352,
      "learning_rate": 0.003,
      "loss": 4.0967,
      "step": 9916
    },
    {
      "epoch": 0.09917,
      "grad_norm": 1.2069309328906987,
      "learning_rate": 0.003,
      "loss": 4.1002,
      "step": 9917
    },
    {
      "epoch": 0.09918,
      "grad_norm": 1.093532181480916,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 9918
    },
    {
      "epoch": 0.09919,
      "grad_norm": 1.0556920632213929,
      "learning_rate": 0.003,
      "loss": 4.109,
      "step": 9919
    },
    {
      "epoch": 0.0992,
      "grad_norm": 1.2088706818022557,
      "learning_rate": 0.003,
      "loss": 4.0955,
      "step": 9920
    },
    {
      "epoch": 0.09921,
      "grad_norm": 1.16100001226493,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 9921
    },
    {
      "epoch": 0.09922,
      "grad_norm": 1.3171688806014334,
      "learning_rate": 0.003,
      "loss": 4.0866,
      "step": 9922
    },
    {
      "epoch": 0.09923,
      "grad_norm": 1.1526997426645762,
      "learning_rate": 0.003,
      "loss": 4.0918,
      "step": 9923
    },
    {
      "epoch": 0.09924,
      "grad_norm": 0.9461549921276146,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 9924
    },
    {
      "epoch": 0.09925,
      "grad_norm": 0.9614350868486845,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 9925
    },
    {
      "epoch": 0.09926,
      "grad_norm": 1.2208880766976027,
      "learning_rate": 0.003,
      "loss": 4.0837,
      "step": 9926
    },
    {
      "epoch": 0.09927,
      "grad_norm": 1.0350078279501103,
      "learning_rate": 0.003,
      "loss": 4.1119,
      "step": 9927
    },
    {
      "epoch": 0.09928,
      "grad_norm": 1.290699463729032,
      "learning_rate": 0.003,
      "loss": 4.075,
      "step": 9928
    },
    {
      "epoch": 0.09929,
      "grad_norm": 0.9234887678298359,
      "learning_rate": 0.003,
      "loss": 4.0796,
      "step": 9929
    },
    {
      "epoch": 0.0993,
      "grad_norm": 1.0780902297276813,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 9930
    },
    {
      "epoch": 0.09931,
      "grad_norm": 1.1709209020702502,
      "learning_rate": 0.003,
      "loss": 4.0792,
      "step": 9931
    },
    {
      "epoch": 0.09932,
      "grad_norm": 1.0557018974394423,
      "learning_rate": 0.003,
      "loss": 4.0858,
      "step": 9932
    },
    {
      "epoch": 0.09933,
      "grad_norm": 1.277625174006262,
      "learning_rate": 0.003,
      "loss": 4.0885,
      "step": 9933
    },
    {
      "epoch": 0.09934,
      "grad_norm": 1.1428722092685848,
      "learning_rate": 0.003,
      "loss": 4.0907,
      "step": 9934
    },
    {
      "epoch": 0.09935,
      "grad_norm": 1.3283362969267656,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 9935
    },
    {
      "epoch": 0.09936,
      "grad_norm": 1.2116884651385684,
      "learning_rate": 0.003,
      "loss": 4.0848,
      "step": 9936
    },
    {
      "epoch": 0.09937,
      "grad_norm": 1.1129728817050744,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 9937
    },
    {
      "epoch": 0.09938,
      "grad_norm": 1.076087084617254,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 9938
    },
    {
      "epoch": 0.09939,
      "grad_norm": 1.1984980026743222,
      "learning_rate": 0.003,
      "loss": 4.0896,
      "step": 9939
    },
    {
      "epoch": 0.0994,
      "grad_norm": 1.0631816350352683,
      "learning_rate": 0.003,
      "loss": 4.1072,
      "step": 9940
    },
    {
      "epoch": 0.09941,
      "grad_norm": 1.0349558778639734,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 9941
    },
    {
      "epoch": 0.09942,
      "grad_norm": 1.3832430713974921,
      "learning_rate": 0.003,
      "loss": 4.0873,
      "step": 9942
    },
    {
      "epoch": 0.09943,
      "grad_norm": 1.2127412232640076,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 9943
    },
    {
      "epoch": 0.09944,
      "grad_norm": 1.1014547273341169,
      "learning_rate": 0.003,
      "loss": 4.1101,
      "step": 9944
    },
    {
      "epoch": 0.09945,
      "grad_norm": 1.1126941512319144,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 9945
    },
    {
      "epoch": 0.09946,
      "grad_norm": 1.143588221605322,
      "learning_rate": 0.003,
      "loss": 4.1016,
      "step": 9946
    },
    {
      "epoch": 0.09947,
      "grad_norm": 1.1319703116283337,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 9947
    },
    {
      "epoch": 0.09948,
      "grad_norm": 1.184782613822203,
      "learning_rate": 0.003,
      "loss": 4.0922,
      "step": 9948
    },
    {
      "epoch": 0.09949,
      "grad_norm": 1.2660838975989452,
      "learning_rate": 0.003,
      "loss": 4.0939,
      "step": 9949
    },
    {
      "epoch": 0.0995,
      "grad_norm": 1.0530905713274332,
      "learning_rate": 0.003,
      "loss": 4.0811,
      "step": 9950
    },
    {
      "epoch": 0.09951,
      "grad_norm": 1.2458577796529136,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 9951
    },
    {
      "epoch": 0.09952,
      "grad_norm": 1.086892599123822,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 9952
    },
    {
      "epoch": 0.09953,
      "grad_norm": 1.2738241159815291,
      "learning_rate": 0.003,
      "loss": 4.0987,
      "step": 9953
    },
    {
      "epoch": 0.09954,
      "grad_norm": 1.1805167886930188,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 9954
    },
    {
      "epoch": 0.09955,
      "grad_norm": 1.159573083100745,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 9955
    },
    {
      "epoch": 0.09956,
      "grad_norm": 1.03695422736725,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 9956
    },
    {
      "epoch": 0.09957,
      "grad_norm": 1.2902593078263713,
      "learning_rate": 0.003,
      "loss": 4.0835,
      "step": 9957
    },
    {
      "epoch": 0.09958,
      "grad_norm": 1.0541302884915535,
      "learning_rate": 0.003,
      "loss": 4.0938,
      "step": 9958
    },
    {
      "epoch": 0.09959,
      "grad_norm": 1.2123162248067152,
      "learning_rate": 0.003,
      "loss": 4.1,
      "step": 9959
    },
    {
      "epoch": 0.0996,
      "grad_norm": 0.8893684589311062,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 9960
    },
    {
      "epoch": 0.09961,
      "grad_norm": 1.0680186135141085,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 9961
    },
    {
      "epoch": 0.09962,
      "grad_norm": 1.0769231089037596,
      "learning_rate": 0.003,
      "loss": 4.095,
      "step": 9962
    },
    {
      "epoch": 0.09963,
      "grad_norm": 1.157455437174743,
      "learning_rate": 0.003,
      "loss": 4.0944,
      "step": 9963
    },
    {
      "epoch": 0.09964,
      "grad_norm": 1.167599421268358,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 9964
    },
    {
      "epoch": 0.09965,
      "grad_norm": 1.0009527785807748,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 9965
    },
    {
      "epoch": 0.09966,
      "grad_norm": 1.1837576069622928,
      "learning_rate": 0.003,
      "loss": 4.0956,
      "step": 9966
    },
    {
      "epoch": 0.09967,
      "grad_norm": 1.3602843120067374,
      "learning_rate": 0.003,
      "loss": 4.0914,
      "step": 9967
    },
    {
      "epoch": 0.09968,
      "grad_norm": 1.2627589377158193,
      "learning_rate": 0.003,
      "loss": 4.0806,
      "step": 9968
    },
    {
      "epoch": 0.09969,
      "grad_norm": 1.0719830176194494,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 9969
    },
    {
      "epoch": 0.0997,
      "grad_norm": 1.0574384689662466,
      "learning_rate": 0.003,
      "loss": 4.0712,
      "step": 9970
    },
    {
      "epoch": 0.09971,
      "grad_norm": 1.229985803627891,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 9971
    },
    {
      "epoch": 0.09972,
      "grad_norm": 1.022626878926486,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 9972
    },
    {
      "epoch": 0.09973,
      "grad_norm": 1.3166393251005053,
      "learning_rate": 0.003,
      "loss": 4.084,
      "step": 9973
    },
    {
      "epoch": 0.09974,
      "grad_norm": 0.9811358575164996,
      "learning_rate": 0.003,
      "loss": 4.0759,
      "step": 9974
    },
    {
      "epoch": 0.09975,
      "grad_norm": 1.3113017482993325,
      "learning_rate": 0.003,
      "loss": 4.0941,
      "step": 9975
    },
    {
      "epoch": 0.09976,
      "grad_norm": 0.9954690018023183,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 9976
    },
    {
      "epoch": 0.09977,
      "grad_norm": 1.3210366296189624,
      "learning_rate": 0.003,
      "loss": 4.0623,
      "step": 9977
    },
    {
      "epoch": 0.09978,
      "grad_norm": 1.006727568887328,
      "learning_rate": 0.003,
      "loss": 4.0676,
      "step": 9978
    },
    {
      "epoch": 0.09979,
      "grad_norm": 1.4046031646009436,
      "learning_rate": 0.003,
      "loss": 4.0908,
      "step": 9979
    },
    {
      "epoch": 0.0998,
      "grad_norm": 1.1228742990032854,
      "learning_rate": 0.003,
      "loss": 4.0825,
      "step": 9980
    },
    {
      "epoch": 0.09981,
      "grad_norm": 1.0350610981605155,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 9981
    },
    {
      "epoch": 0.09982,
      "grad_norm": 1.1337520050085472,
      "learning_rate": 0.003,
      "loss": 4.092,
      "step": 9982
    },
    {
      "epoch": 0.09983,
      "grad_norm": 1.21368108908721,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 9983
    },
    {
      "epoch": 0.09984,
      "grad_norm": 1.0936050467548077,
      "learning_rate": 0.003,
      "loss": 4.1007,
      "step": 9984
    },
    {
      "epoch": 0.09985,
      "grad_norm": 1.3692702486619674,
      "learning_rate": 0.003,
      "loss": 4.0981,
      "step": 9985
    },
    {
      "epoch": 0.09986,
      "grad_norm": 1.0255191220694087,
      "learning_rate": 0.003,
      "loss": 4.0985,
      "step": 9986
    },
    {
      "epoch": 0.09987,
      "grad_norm": 1.283385904792894,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 9987
    },
    {
      "epoch": 0.09988,
      "grad_norm": 0.9973600391953245,
      "learning_rate": 0.003,
      "loss": 4.1066,
      "step": 9988
    },
    {
      "epoch": 0.09989,
      "grad_norm": 1.3224856674170413,
      "learning_rate": 0.003,
      "loss": 4.0804,
      "step": 9989
    },
    {
      "epoch": 0.0999,
      "grad_norm": 1.0342040264635208,
      "learning_rate": 0.003,
      "loss": 4.0835,
      "step": 9990
    },
    {
      "epoch": 0.09991,
      "grad_norm": 1.2010872038379294,
      "learning_rate": 0.003,
      "loss": 4.1087,
      "step": 9991
    },
    {
      "epoch": 0.09992,
      "grad_norm": 1.1028513651017675,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 9992
    },
    {
      "epoch": 0.09993,
      "grad_norm": 1.2704048114109967,
      "learning_rate": 0.003,
      "loss": 4.0837,
      "step": 9993
    },
    {
      "epoch": 0.09994,
      "grad_norm": 1.1812143976118226,
      "learning_rate": 0.003,
      "loss": 4.1011,
      "step": 9994
    },
    {
      "epoch": 0.09995,
      "grad_norm": 1.412624941712437,
      "learning_rate": 0.003,
      "loss": 4.1039,
      "step": 9995
    },
    {
      "epoch": 0.09996,
      "grad_norm": 1.1199866494266424,
      "learning_rate": 0.003,
      "loss": 4.0784,
      "step": 9996
    },
    {
      "epoch": 0.09997,
      "grad_norm": 1.108151659835287,
      "learning_rate": 0.003,
      "loss": 4.0842,
      "step": 9997
    },
    {
      "epoch": 0.09998,
      "grad_norm": 1.0462352704362803,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 9998
    },
    {
      "epoch": 0.09999,
      "grad_norm": 1.5166747268978646,
      "learning_rate": 0.003,
      "loss": 4.0941,
      "step": 9999
    },
    {
      "epoch": 0.1,
      "grad_norm": 1.0728207634540272,
      "learning_rate": 0.003,
      "loss": 4.0922,
      "step": 10000
    },
    {
      "epoch": 0.10001,
      "grad_norm": 1.1674018884204582,
      "learning_rate": 0.003,
      "loss": 4.0764,
      "step": 10001
    },
    {
      "epoch": 0.10002,
      "grad_norm": 1.0527973953758787,
      "learning_rate": 0.003,
      "loss": 4.1026,
      "step": 10002
    },
    {
      "epoch": 0.10003,
      "grad_norm": 1.2928164974147542,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 10003
    },
    {
      "epoch": 0.10004,
      "grad_norm": 0.8969958806342154,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 10004
    },
    {
      "epoch": 0.10005,
      "grad_norm": 1.1574074627438358,
      "learning_rate": 0.003,
      "loss": 4.0824,
      "step": 10005
    },
    {
      "epoch": 0.10006,
      "grad_norm": 1.3771495047557027,
      "learning_rate": 0.003,
      "loss": 4.0909,
      "step": 10006
    },
    {
      "epoch": 0.10007,
      "grad_norm": 1.167333352905923,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 10007
    },
    {
      "epoch": 0.10008,
      "grad_norm": 1.2023273467074431,
      "learning_rate": 0.003,
      "loss": 4.0975,
      "step": 10008
    },
    {
      "epoch": 0.10009,
      "grad_norm": 1.074482670712341,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 10009
    },
    {
      "epoch": 0.1001,
      "grad_norm": 1.211996301608577,
      "learning_rate": 0.003,
      "loss": 4.1003,
      "step": 10010
    },
    {
      "epoch": 0.10011,
      "grad_norm": 0.9009540566883109,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 10011
    },
    {
      "epoch": 0.10012,
      "grad_norm": 1.0564974927278068,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 10012
    },
    {
      "epoch": 0.10013,
      "grad_norm": 1.3350438104824471,
      "learning_rate": 0.003,
      "loss": 4.1063,
      "step": 10013
    },
    {
      "epoch": 0.10014,
      "grad_norm": 1.0822425068020562,
      "learning_rate": 0.003,
      "loss": 4.0919,
      "step": 10014
    },
    {
      "epoch": 0.10015,
      "grad_norm": 1.019372729465579,
      "learning_rate": 0.003,
      "loss": 4.1089,
      "step": 10015
    },
    {
      "epoch": 0.10016,
      "grad_norm": 1.1682122132964539,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 10016
    },
    {
      "epoch": 0.10017,
      "grad_norm": 1.0636692414598121,
      "learning_rate": 0.003,
      "loss": 4.0933,
      "step": 10017
    },
    {
      "epoch": 0.10018,
      "grad_norm": 1.153668222529384,
      "learning_rate": 0.003,
      "loss": 4.0963,
      "step": 10018
    },
    {
      "epoch": 0.10019,
      "grad_norm": 1.1863489301505494,
      "learning_rate": 0.003,
      "loss": 4.0929,
      "step": 10019
    },
    {
      "epoch": 0.1002,
      "grad_norm": 1.1426419458719907,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 10020
    },
    {
      "epoch": 0.10021,
      "grad_norm": 1.2306087761188012,
      "learning_rate": 0.003,
      "loss": 4.0989,
      "step": 10021
    },
    {
      "epoch": 0.10022,
      "grad_norm": 1.0297341287287447,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 10022
    },
    {
      "epoch": 0.10023,
      "grad_norm": 1.2320108667621523,
      "learning_rate": 0.003,
      "loss": 4.0933,
      "step": 10023
    },
    {
      "epoch": 0.10024,
      "grad_norm": 1.0916466193494474,
      "learning_rate": 0.003,
      "loss": 4.1001,
      "step": 10024
    },
    {
      "epoch": 0.10025,
      "grad_norm": 1.1464372389238344,
      "learning_rate": 0.003,
      "loss": 4.0897,
      "step": 10025
    },
    {
      "epoch": 0.10026,
      "grad_norm": 1.2105092192886502,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 10026
    },
    {
      "epoch": 0.10027,
      "grad_norm": 1.0593198000420767,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 10027
    },
    {
      "epoch": 0.10028,
      "grad_norm": 1.2124348310175164,
      "learning_rate": 0.003,
      "loss": 4.1178,
      "step": 10028
    },
    {
      "epoch": 0.10029,
      "grad_norm": 1.3449665326048608,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 10029
    },
    {
      "epoch": 0.1003,
      "grad_norm": 1.13252213762222,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 10030
    },
    {
      "epoch": 0.10031,
      "grad_norm": 1.208957749908409,
      "learning_rate": 0.003,
      "loss": 4.1191,
      "step": 10031
    },
    {
      "epoch": 0.10032,
      "grad_norm": 1.0843050238714709,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 10032
    },
    {
      "epoch": 0.10033,
      "grad_norm": 1.1118594428514184,
      "learning_rate": 0.003,
      "loss": 4.0767,
      "step": 10033
    },
    {
      "epoch": 0.10034,
      "grad_norm": 1.1454301997431835,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 10034
    },
    {
      "epoch": 0.10035,
      "grad_norm": 1.2373694067703571,
      "learning_rate": 0.003,
      "loss": 4.0819,
      "step": 10035
    },
    {
      "epoch": 0.10036,
      "grad_norm": 1.234927886756976,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 10036
    },
    {
      "epoch": 0.10037,
      "grad_norm": 1.183130686021368,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 10037
    },
    {
      "epoch": 0.10038,
      "grad_norm": 1.1581871376157453,
      "learning_rate": 0.003,
      "loss": 4.1012,
      "step": 10038
    },
    {
      "epoch": 0.10039,
      "grad_norm": 1.0805442201078972,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 10039
    },
    {
      "epoch": 0.1004,
      "grad_norm": 1.0644531472003973,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 10040
    },
    {
      "epoch": 0.10041,
      "grad_norm": 1.1689125902658166,
      "learning_rate": 0.003,
      "loss": 4.0923,
      "step": 10041
    },
    {
      "epoch": 0.10042,
      "grad_norm": 0.9131143869964985,
      "learning_rate": 0.003,
      "loss": 4.1087,
      "step": 10042
    },
    {
      "epoch": 0.10043,
      "grad_norm": 1.1125978766470848,
      "learning_rate": 0.003,
      "loss": 4.109,
      "step": 10043
    },
    {
      "epoch": 0.10044,
      "grad_norm": 1.1554321741109186,
      "learning_rate": 0.003,
      "loss": 4.0974,
      "step": 10044
    },
    {
      "epoch": 0.10045,
      "grad_norm": 0.9476401166924924,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 10045
    },
    {
      "epoch": 0.10046,
      "grad_norm": 1.472333260084941,
      "learning_rate": 0.003,
      "loss": 4.1057,
      "step": 10046
    },
    {
      "epoch": 0.10047,
      "grad_norm": 0.9478781520731763,
      "learning_rate": 0.003,
      "loss": 4.0867,
      "step": 10047
    },
    {
      "epoch": 0.10048,
      "grad_norm": 1.2498078049575723,
      "learning_rate": 0.003,
      "loss": 4.0941,
      "step": 10048
    },
    {
      "epoch": 0.10049,
      "grad_norm": 1.1847876378870514,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 10049
    },
    {
      "epoch": 0.1005,
      "grad_norm": 1.4042126593877933,
      "learning_rate": 0.003,
      "loss": 4.0964,
      "step": 10050
    },
    {
      "epoch": 0.10051,
      "grad_norm": 1.1738786717009158,
      "learning_rate": 0.003,
      "loss": 4.0854,
      "step": 10051
    },
    {
      "epoch": 0.10052,
      "grad_norm": 1.0152151745073639,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 10052
    },
    {
      "epoch": 0.10053,
      "grad_norm": 1.158818668265983,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 10053
    },
    {
      "epoch": 0.10054,
      "grad_norm": 1.3629148640865112,
      "learning_rate": 0.003,
      "loss": 4.102,
      "step": 10054
    },
    {
      "epoch": 0.10055,
      "grad_norm": 0.9977236962370847,
      "learning_rate": 0.003,
      "loss": 4.099,
      "step": 10055
    },
    {
      "epoch": 0.10056,
      "grad_norm": 1.3482816658394208,
      "learning_rate": 0.003,
      "loss": 4.116,
      "step": 10056
    },
    {
      "epoch": 0.10057,
      "grad_norm": 0.9539265856157081,
      "learning_rate": 0.003,
      "loss": 4.095,
      "step": 10057
    },
    {
      "epoch": 0.10058,
      "grad_norm": 1.1076892954413688,
      "learning_rate": 0.003,
      "loss": 4.0933,
      "step": 10058
    },
    {
      "epoch": 0.10059,
      "grad_norm": 1.0197333845789176,
      "learning_rate": 0.003,
      "loss": 4.0727,
      "step": 10059
    },
    {
      "epoch": 0.1006,
      "grad_norm": 1.2717378385736515,
      "learning_rate": 0.003,
      "loss": 4.0957,
      "step": 10060
    },
    {
      "epoch": 0.10061,
      "grad_norm": 1.1525924759121828,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 10061
    },
    {
      "epoch": 0.10062,
      "grad_norm": 1.1978969573423361,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 10062
    },
    {
      "epoch": 0.10063,
      "grad_norm": 1.1840545782953635,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 10063
    },
    {
      "epoch": 0.10064,
      "grad_norm": 1.0449591042434434,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 10064
    },
    {
      "epoch": 0.10065,
      "grad_norm": 1.212776165261585,
      "learning_rate": 0.003,
      "loss": 4.1055,
      "step": 10065
    },
    {
      "epoch": 0.10066,
      "grad_norm": 1.2286144576629734,
      "learning_rate": 0.003,
      "loss": 4.0812,
      "step": 10066
    },
    {
      "epoch": 0.10067,
      "grad_norm": 1.1815206209397082,
      "learning_rate": 0.003,
      "loss": 4.0932,
      "step": 10067
    },
    {
      "epoch": 0.10068,
      "grad_norm": 1.089605118069462,
      "learning_rate": 0.003,
      "loss": 4.0829,
      "step": 10068
    },
    {
      "epoch": 0.10069,
      "grad_norm": 1.2751774093739108,
      "learning_rate": 0.003,
      "loss": 4.0759,
      "step": 10069
    },
    {
      "epoch": 0.1007,
      "grad_norm": 1.0518315664497935,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 10070
    },
    {
      "epoch": 0.10071,
      "grad_norm": 1.0075982956011604,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 10071
    },
    {
      "epoch": 0.10072,
      "grad_norm": 0.9328071534298303,
      "learning_rate": 0.003,
      "loss": 4.075,
      "step": 10072
    },
    {
      "epoch": 0.10073,
      "grad_norm": 1.0491017217453746,
      "learning_rate": 0.003,
      "loss": 4.0908,
      "step": 10073
    },
    {
      "epoch": 0.10074,
      "grad_norm": 1.1418471177068579,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 10074
    },
    {
      "epoch": 0.10075,
      "grad_norm": 0.9615639895687752,
      "learning_rate": 0.003,
      "loss": 4.1053,
      "step": 10075
    },
    {
      "epoch": 0.10076,
      "grad_norm": 1.186944099321991,
      "learning_rate": 0.003,
      "loss": 4.0925,
      "step": 10076
    },
    {
      "epoch": 0.10077,
      "grad_norm": 1.1045264647874289,
      "learning_rate": 0.003,
      "loss": 4.1057,
      "step": 10077
    },
    {
      "epoch": 0.10078,
      "grad_norm": 1.3580204083886844,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 10078
    },
    {
      "epoch": 0.10079,
      "grad_norm": 1.119001442915295,
      "learning_rate": 0.003,
      "loss": 4.0821,
      "step": 10079
    },
    {
      "epoch": 0.1008,
      "grad_norm": 1.6069412879920508,
      "learning_rate": 0.003,
      "loss": 4.0855,
      "step": 10080
    },
    {
      "epoch": 0.10081,
      "grad_norm": 1.0604760790650596,
      "learning_rate": 0.003,
      "loss": 4.1064,
      "step": 10081
    },
    {
      "epoch": 0.10082,
      "grad_norm": 1.1115804845269284,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 10082
    },
    {
      "epoch": 0.10083,
      "grad_norm": 1.238741868903038,
      "learning_rate": 0.003,
      "loss": 4.0986,
      "step": 10083
    },
    {
      "epoch": 0.10084,
      "grad_norm": 1.2314664198702576,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 10084
    },
    {
      "epoch": 0.10085,
      "grad_norm": 1.1833832812757277,
      "learning_rate": 0.003,
      "loss": 4.0728,
      "step": 10085
    },
    {
      "epoch": 0.10086,
      "grad_norm": 1.0137645005741707,
      "learning_rate": 0.003,
      "loss": 4.0861,
      "step": 10086
    },
    {
      "epoch": 0.10087,
      "grad_norm": 1.231342789321368,
      "learning_rate": 0.003,
      "loss": 4.1073,
      "step": 10087
    },
    {
      "epoch": 0.10088,
      "grad_norm": 0.9848663896765558,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 10088
    },
    {
      "epoch": 0.10089,
      "grad_norm": 1.2766688039526612,
      "learning_rate": 0.003,
      "loss": 4.0919,
      "step": 10089
    },
    {
      "epoch": 0.1009,
      "grad_norm": 1.0665591732242785,
      "learning_rate": 0.003,
      "loss": 4.1106,
      "step": 10090
    },
    {
      "epoch": 0.10091,
      "grad_norm": 1.3844890839324862,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 10091
    },
    {
      "epoch": 0.10092,
      "grad_norm": 0.9703410910939055,
      "learning_rate": 0.003,
      "loss": 4.0942,
      "step": 10092
    },
    {
      "epoch": 0.10093,
      "grad_norm": 1.1081454322650999,
      "learning_rate": 0.003,
      "loss": 4.0839,
      "step": 10093
    },
    {
      "epoch": 0.10094,
      "grad_norm": 1.2702596843147151,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 10094
    },
    {
      "epoch": 0.10095,
      "grad_norm": 1.3241180876871614,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 10095
    },
    {
      "epoch": 0.10096,
      "grad_norm": 1.063782838125781,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 10096
    },
    {
      "epoch": 0.10097,
      "grad_norm": 1.173590221046823,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 10097
    },
    {
      "epoch": 0.10098,
      "grad_norm": 1.1388690036673108,
      "learning_rate": 0.003,
      "loss": 4.0751,
      "step": 10098
    },
    {
      "epoch": 0.10099,
      "grad_norm": 1.016521589387514,
      "learning_rate": 0.003,
      "loss": 4.0718,
      "step": 10099
    },
    {
      "epoch": 0.101,
      "grad_norm": 1.124890576751929,
      "learning_rate": 0.003,
      "loss": 4.0902,
      "step": 10100
    },
    {
      "epoch": 0.10101,
      "grad_norm": 1.0702676449322102,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 10101
    },
    {
      "epoch": 0.10102,
      "grad_norm": 1.0594937403569906,
      "learning_rate": 0.003,
      "loss": 4.0952,
      "step": 10102
    },
    {
      "epoch": 0.10103,
      "grad_norm": 1.1189786533695918,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 10103
    },
    {
      "epoch": 0.10104,
      "grad_norm": 1.1621062267666284,
      "learning_rate": 0.003,
      "loss": 4.0872,
      "step": 10104
    },
    {
      "epoch": 0.10105,
      "grad_norm": 1.1586349551608832,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 10105
    },
    {
      "epoch": 0.10106,
      "grad_norm": 1.2449125737307989,
      "learning_rate": 0.003,
      "loss": 4.0879,
      "step": 10106
    },
    {
      "epoch": 0.10107,
      "grad_norm": 1.0103660592944312,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 10107
    },
    {
      "epoch": 0.10108,
      "grad_norm": 1.2430486420035323,
      "learning_rate": 0.003,
      "loss": 4.0993,
      "step": 10108
    },
    {
      "epoch": 0.10109,
      "grad_norm": 1.060115161096371,
      "learning_rate": 0.003,
      "loss": 4.0623,
      "step": 10109
    },
    {
      "epoch": 0.1011,
      "grad_norm": 1.0405717245223134,
      "learning_rate": 0.003,
      "loss": 4.0979,
      "step": 10110
    },
    {
      "epoch": 0.10111,
      "grad_norm": 1.166298936720804,
      "learning_rate": 0.003,
      "loss": 4.0966,
      "step": 10111
    },
    {
      "epoch": 0.10112,
      "grad_norm": 1.2285570938753498,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 10112
    },
    {
      "epoch": 0.10113,
      "grad_norm": 1.014117471053155,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 10113
    },
    {
      "epoch": 0.10114,
      "grad_norm": 1.1714584848865766,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 10114
    },
    {
      "epoch": 0.10115,
      "grad_norm": 1.171191771839053,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 10115
    },
    {
      "epoch": 0.10116,
      "grad_norm": 1.1268191412711248,
      "learning_rate": 0.003,
      "loss": 4.0743,
      "step": 10116
    },
    {
      "epoch": 0.10117,
      "grad_norm": 1.056060456963882,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 10117
    },
    {
      "epoch": 0.10118,
      "grad_norm": 1.14423470231977,
      "learning_rate": 0.003,
      "loss": 4.1048,
      "step": 10118
    },
    {
      "epoch": 0.10119,
      "grad_norm": 1.2978002036951515,
      "learning_rate": 0.003,
      "loss": 4.0729,
      "step": 10119
    },
    {
      "epoch": 0.1012,
      "grad_norm": 0.9773861726106657,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 10120
    },
    {
      "epoch": 0.10121,
      "grad_norm": 1.2298033697165904,
      "learning_rate": 0.003,
      "loss": 4.0932,
      "step": 10121
    },
    {
      "epoch": 0.10122,
      "grad_norm": 1.2027309543987579,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 10122
    },
    {
      "epoch": 0.10123,
      "grad_norm": 1.0867591534868284,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 10123
    },
    {
      "epoch": 0.10124,
      "grad_norm": 1.2447200356116115,
      "learning_rate": 0.003,
      "loss": 4.1042,
      "step": 10124
    },
    {
      "epoch": 0.10125,
      "grad_norm": 1.0661379327703835,
      "learning_rate": 0.003,
      "loss": 4.09,
      "step": 10125
    },
    {
      "epoch": 0.10126,
      "grad_norm": 1.1293357584285106,
      "learning_rate": 0.003,
      "loss": 4.0702,
      "step": 10126
    },
    {
      "epoch": 0.10127,
      "grad_norm": 1.0276292424876197,
      "learning_rate": 0.003,
      "loss": 4.0954,
      "step": 10127
    },
    {
      "epoch": 0.10128,
      "grad_norm": 1.3109229855803073,
      "learning_rate": 0.003,
      "loss": 4.0977,
      "step": 10128
    },
    {
      "epoch": 0.10129,
      "grad_norm": 1.175979220904674,
      "learning_rate": 0.003,
      "loss": 4.0988,
      "step": 10129
    },
    {
      "epoch": 0.1013,
      "grad_norm": 1.046008366778478,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 10130
    },
    {
      "epoch": 0.10131,
      "grad_norm": 1.2889435751549339,
      "learning_rate": 0.003,
      "loss": 4.0768,
      "step": 10131
    },
    {
      "epoch": 0.10132,
      "grad_norm": 1.3251036219526253,
      "learning_rate": 0.003,
      "loss": 4.0927,
      "step": 10132
    },
    {
      "epoch": 0.10133,
      "grad_norm": 1.1995187882520537,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 10133
    },
    {
      "epoch": 0.10134,
      "grad_norm": 0.9472900131078198,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 10134
    },
    {
      "epoch": 0.10135,
      "grad_norm": 1.1108816955880372,
      "learning_rate": 0.003,
      "loss": 4.082,
      "step": 10135
    },
    {
      "epoch": 0.10136,
      "grad_norm": 1.078636334857174,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 10136
    },
    {
      "epoch": 0.10137,
      "grad_norm": 1.3105555106578144,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 10137
    },
    {
      "epoch": 0.10138,
      "grad_norm": 1.1199739555288444,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 10138
    },
    {
      "epoch": 0.10139,
      "grad_norm": 1.3912021727977202,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 10139
    },
    {
      "epoch": 0.1014,
      "grad_norm": 1.0491432390341304,
      "learning_rate": 0.003,
      "loss": 4.1085,
      "step": 10140
    },
    {
      "epoch": 0.10141,
      "grad_norm": 1.1476014585804746,
      "learning_rate": 0.003,
      "loss": 4.0712,
      "step": 10141
    },
    {
      "epoch": 0.10142,
      "grad_norm": 1.059954567327284,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 10142
    },
    {
      "epoch": 0.10143,
      "grad_norm": 1.418874522825989,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 10143
    },
    {
      "epoch": 0.10144,
      "grad_norm": 1.0628985240296076,
      "learning_rate": 0.003,
      "loss": 4.0899,
      "step": 10144
    },
    {
      "epoch": 0.10145,
      "grad_norm": 1.2768614034132377,
      "learning_rate": 0.003,
      "loss": 4.0801,
      "step": 10145
    },
    {
      "epoch": 0.10146,
      "grad_norm": 0.8767978842753446,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 10146
    },
    {
      "epoch": 0.10147,
      "grad_norm": 0.9766014836284787,
      "learning_rate": 0.003,
      "loss": 4.0804,
      "step": 10147
    },
    {
      "epoch": 0.10148,
      "grad_norm": 1.2106871464655664,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 10148
    },
    {
      "epoch": 0.10149,
      "grad_norm": 0.888207092902911,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 10149
    },
    {
      "epoch": 0.1015,
      "grad_norm": 1.2922590715806803,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 10150
    },
    {
      "epoch": 0.10151,
      "grad_norm": 1.237040116341225,
      "learning_rate": 0.003,
      "loss": 4.0867,
      "step": 10151
    },
    {
      "epoch": 0.10152,
      "grad_norm": 1.0625251992129978,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 10152
    },
    {
      "epoch": 0.10153,
      "grad_norm": 1.075075130630272,
      "learning_rate": 0.003,
      "loss": 4.0866,
      "step": 10153
    },
    {
      "epoch": 0.10154,
      "grad_norm": 1.1080444014221436,
      "learning_rate": 0.003,
      "loss": 4.0915,
      "step": 10154
    },
    {
      "epoch": 0.10155,
      "grad_norm": 1.2551412407034876,
      "learning_rate": 0.003,
      "loss": 4.0911,
      "step": 10155
    },
    {
      "epoch": 0.10156,
      "grad_norm": 1.0021536921323417,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 10156
    },
    {
      "epoch": 0.10157,
      "grad_norm": 1.175049036818617,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 10157
    },
    {
      "epoch": 0.10158,
      "grad_norm": 1.0293363891023952,
      "learning_rate": 0.003,
      "loss": 4.0835,
      "step": 10158
    },
    {
      "epoch": 0.10159,
      "grad_norm": 1.0548857368096765,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 10159
    },
    {
      "epoch": 0.1016,
      "grad_norm": 1.329175614790365,
      "learning_rate": 0.003,
      "loss": 4.0978,
      "step": 10160
    },
    {
      "epoch": 0.10161,
      "grad_norm": 0.9488327406094123,
      "learning_rate": 0.003,
      "loss": 4.0672,
      "step": 10161
    },
    {
      "epoch": 0.10162,
      "grad_norm": 1.4323787828675487,
      "learning_rate": 0.003,
      "loss": 4.0946,
      "step": 10162
    },
    {
      "epoch": 0.10163,
      "grad_norm": 1.1936836804121034,
      "learning_rate": 0.003,
      "loss": 4.0872,
      "step": 10163
    },
    {
      "epoch": 0.10164,
      "grad_norm": 1.406321899502577,
      "learning_rate": 0.003,
      "loss": 4.1087,
      "step": 10164
    },
    {
      "epoch": 0.10165,
      "grad_norm": 1.2261032646604577,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 10165
    },
    {
      "epoch": 0.10166,
      "grad_norm": 0.9461556092810096,
      "learning_rate": 0.003,
      "loss": 4.092,
      "step": 10166
    },
    {
      "epoch": 0.10167,
      "grad_norm": 1.1845316724357464,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 10167
    },
    {
      "epoch": 0.10168,
      "grad_norm": 0.975484573387496,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 10168
    },
    {
      "epoch": 0.10169,
      "grad_norm": 1.283116632412761,
      "learning_rate": 0.003,
      "loss": 4.0872,
      "step": 10169
    },
    {
      "epoch": 0.1017,
      "grad_norm": 0.95631152337951,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 10170
    },
    {
      "epoch": 0.10171,
      "grad_norm": 1.1139812130735123,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 10171
    },
    {
      "epoch": 0.10172,
      "grad_norm": 1.2961802093469639,
      "learning_rate": 0.003,
      "loss": 4.0965,
      "step": 10172
    },
    {
      "epoch": 0.10173,
      "grad_norm": 1.1726631483761973,
      "learning_rate": 0.003,
      "loss": 4.0907,
      "step": 10173
    },
    {
      "epoch": 0.10174,
      "grad_norm": 1.2122860250041871,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 10174
    },
    {
      "epoch": 0.10175,
      "grad_norm": 1.050830198920568,
      "learning_rate": 0.003,
      "loss": 4.0694,
      "step": 10175
    },
    {
      "epoch": 0.10176,
      "grad_norm": 1.3590915054220583,
      "learning_rate": 0.003,
      "loss": 4.0854,
      "step": 10176
    },
    {
      "epoch": 0.10177,
      "grad_norm": 1.0726629179146294,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 10177
    },
    {
      "epoch": 0.10178,
      "grad_norm": 1.2545949381444286,
      "learning_rate": 0.003,
      "loss": 4.0909,
      "step": 10178
    },
    {
      "epoch": 0.10179,
      "grad_norm": 1.0047716022040467,
      "learning_rate": 0.003,
      "loss": 4.0853,
      "step": 10179
    },
    {
      "epoch": 0.1018,
      "grad_norm": 1.3165911693506096,
      "learning_rate": 0.003,
      "loss": 4.0827,
      "step": 10180
    },
    {
      "epoch": 0.10181,
      "grad_norm": 1.1402632334004243,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 10181
    },
    {
      "epoch": 0.10182,
      "grad_norm": 1.1578192434246495,
      "learning_rate": 0.003,
      "loss": 4.0923,
      "step": 10182
    },
    {
      "epoch": 0.10183,
      "grad_norm": 1.141684268742947,
      "learning_rate": 0.003,
      "loss": 4.0982,
      "step": 10183
    },
    {
      "epoch": 0.10184,
      "grad_norm": 1.1923829372124475,
      "learning_rate": 0.003,
      "loss": 4.1041,
      "step": 10184
    },
    {
      "epoch": 0.10185,
      "grad_norm": 1.0715432709277495,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 10185
    },
    {
      "epoch": 0.10186,
      "grad_norm": 1.2311029741984303,
      "learning_rate": 0.003,
      "loss": 4.0969,
      "step": 10186
    },
    {
      "epoch": 0.10187,
      "grad_norm": 1.0152401242124782,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 10187
    },
    {
      "epoch": 0.10188,
      "grad_norm": 1.148194763035427,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 10188
    },
    {
      "epoch": 0.10189,
      "grad_norm": 1.2204798728816166,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 10189
    },
    {
      "epoch": 0.1019,
      "grad_norm": 0.9145267434089139,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 10190
    },
    {
      "epoch": 0.10191,
      "grad_norm": 1.136204850799004,
      "learning_rate": 0.003,
      "loss": 4.0925,
      "step": 10191
    },
    {
      "epoch": 0.10192,
      "grad_norm": 1.373348499431395,
      "learning_rate": 0.003,
      "loss": 4.075,
      "step": 10192
    },
    {
      "epoch": 0.10193,
      "grad_norm": 1.2291098930583644,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 10193
    },
    {
      "epoch": 0.10194,
      "grad_norm": 1.077193674838292,
      "learning_rate": 0.003,
      "loss": 4.097,
      "step": 10194
    },
    {
      "epoch": 0.10195,
      "grad_norm": 1.0523312544463759,
      "learning_rate": 0.003,
      "loss": 4.0979,
      "step": 10195
    },
    {
      "epoch": 0.10196,
      "grad_norm": 1.1638451574486604,
      "learning_rate": 0.003,
      "loss": 4.0923,
      "step": 10196
    },
    {
      "epoch": 0.10197,
      "grad_norm": 1.1208148962054274,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 10197
    },
    {
      "epoch": 0.10198,
      "grad_norm": 1.2235618242687356,
      "learning_rate": 0.003,
      "loss": 4.1126,
      "step": 10198
    },
    {
      "epoch": 0.10199,
      "grad_norm": 1.1266562751363347,
      "learning_rate": 0.003,
      "loss": 4.0859,
      "step": 10199
    },
    {
      "epoch": 0.102,
      "grad_norm": 1.3174022901674332,
      "learning_rate": 0.003,
      "loss": 4.0806,
      "step": 10200
    },
    {
      "epoch": 0.10201,
      "grad_norm": 1.1387589562083924,
      "learning_rate": 0.003,
      "loss": 4.0849,
      "step": 10201
    },
    {
      "epoch": 0.10202,
      "grad_norm": 1.1561654783381967,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 10202
    },
    {
      "epoch": 0.10203,
      "grad_norm": 1.239835426005678,
      "learning_rate": 0.003,
      "loss": 4.0753,
      "step": 10203
    },
    {
      "epoch": 0.10204,
      "grad_norm": 0.9884086947734628,
      "learning_rate": 0.003,
      "loss": 4.0981,
      "step": 10204
    },
    {
      "epoch": 0.10205,
      "grad_norm": 1.2302368861346946,
      "learning_rate": 0.003,
      "loss": 4.082,
      "step": 10205
    },
    {
      "epoch": 0.10206,
      "grad_norm": 1.1093982540522616,
      "learning_rate": 0.003,
      "loss": 4.0915,
      "step": 10206
    },
    {
      "epoch": 0.10207,
      "grad_norm": 1.339176634282011,
      "learning_rate": 0.003,
      "loss": 4.0859,
      "step": 10207
    },
    {
      "epoch": 0.10208,
      "grad_norm": 1.0896168191058506,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 10208
    },
    {
      "epoch": 0.10209,
      "grad_norm": 1.3023601380111545,
      "learning_rate": 0.003,
      "loss": 4.0951,
      "step": 10209
    },
    {
      "epoch": 0.1021,
      "grad_norm": 1.1319714724397607,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 10210
    },
    {
      "epoch": 0.10211,
      "grad_norm": 1.0388223208698757,
      "learning_rate": 0.003,
      "loss": 4.0984,
      "step": 10211
    },
    {
      "epoch": 0.10212,
      "grad_norm": 1.1553139116759326,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 10212
    },
    {
      "epoch": 0.10213,
      "grad_norm": 0.9724491568977133,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 10213
    },
    {
      "epoch": 0.10214,
      "grad_norm": 1.1061949747140027,
      "learning_rate": 0.003,
      "loss": 4.0891,
      "step": 10214
    },
    {
      "epoch": 0.10215,
      "grad_norm": 1.2796200746117097,
      "learning_rate": 0.003,
      "loss": 4.1081,
      "step": 10215
    },
    {
      "epoch": 0.10216,
      "grad_norm": 0.931370707864801,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 10216
    },
    {
      "epoch": 0.10217,
      "grad_norm": 1.3218802452586387,
      "learning_rate": 0.003,
      "loss": 4.0807,
      "step": 10217
    },
    {
      "epoch": 0.10218,
      "grad_norm": 1.433367299708013,
      "learning_rate": 0.003,
      "loss": 4.1155,
      "step": 10218
    },
    {
      "epoch": 0.10219,
      "grad_norm": 1.0503834994854093,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 10219
    },
    {
      "epoch": 0.1022,
      "grad_norm": 1.0554447867190975,
      "learning_rate": 0.003,
      "loss": 4.0884,
      "step": 10220
    },
    {
      "epoch": 0.10221,
      "grad_norm": 1.2932737417269615,
      "learning_rate": 0.003,
      "loss": 4.1073,
      "step": 10221
    },
    {
      "epoch": 0.10222,
      "grad_norm": 1.1891985207325497,
      "learning_rate": 0.003,
      "loss": 4.0893,
      "step": 10222
    },
    {
      "epoch": 0.10223,
      "grad_norm": 1.2076064076458244,
      "learning_rate": 0.003,
      "loss": 4.0972,
      "step": 10223
    },
    {
      "epoch": 0.10224,
      "grad_norm": 0.9557299648740953,
      "learning_rate": 0.003,
      "loss": 4.0872,
      "step": 10224
    },
    {
      "epoch": 0.10225,
      "grad_norm": 1.204468346015853,
      "learning_rate": 0.003,
      "loss": 4.088,
      "step": 10225
    },
    {
      "epoch": 0.10226,
      "grad_norm": 1.2441047386472726,
      "learning_rate": 0.003,
      "loss": 4.1353,
      "step": 10226
    },
    {
      "epoch": 0.10227,
      "grad_norm": 0.9380172757036935,
      "learning_rate": 0.003,
      "loss": 4.0892,
      "step": 10227
    },
    {
      "epoch": 0.10228,
      "grad_norm": 1.2223945943385395,
      "learning_rate": 0.003,
      "loss": 4.0987,
      "step": 10228
    },
    {
      "epoch": 0.10229,
      "grad_norm": 1.316102481302476,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 10229
    },
    {
      "epoch": 0.1023,
      "grad_norm": 0.9082697141003253,
      "learning_rate": 0.003,
      "loss": 4.0843,
      "step": 10230
    },
    {
      "epoch": 0.10231,
      "grad_norm": 0.96075390670666,
      "learning_rate": 0.003,
      "loss": 4.0864,
      "step": 10231
    },
    {
      "epoch": 0.10232,
      "grad_norm": 1.15482596015767,
      "learning_rate": 0.003,
      "loss": 4.0924,
      "step": 10232
    },
    {
      "epoch": 0.10233,
      "grad_norm": 1.0928828537089608,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 10233
    },
    {
      "epoch": 0.10234,
      "grad_norm": 0.944162993108171,
      "learning_rate": 0.003,
      "loss": 4.0933,
      "step": 10234
    },
    {
      "epoch": 0.10235,
      "grad_norm": 1.1235742192351625,
      "learning_rate": 0.003,
      "loss": 4.0781,
      "step": 10235
    },
    {
      "epoch": 0.10236,
      "grad_norm": 1.4527585424607339,
      "learning_rate": 0.003,
      "loss": 4.1198,
      "step": 10236
    },
    {
      "epoch": 0.10237,
      "grad_norm": 0.7729456938299287,
      "learning_rate": 0.003,
      "loss": 4.0728,
      "step": 10237
    },
    {
      "epoch": 0.10238,
      "grad_norm": 0.826808362714121,
      "learning_rate": 0.003,
      "loss": 4.0675,
      "step": 10238
    },
    {
      "epoch": 0.10239,
      "grad_norm": 1.0529521678348204,
      "learning_rate": 0.003,
      "loss": 4.1141,
      "step": 10239
    },
    {
      "epoch": 0.1024,
      "grad_norm": 1.2682105340898882,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 10240
    },
    {
      "epoch": 0.10241,
      "grad_norm": 1.0205246994429473,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 10241
    },
    {
      "epoch": 0.10242,
      "grad_norm": 1.2302603927232567,
      "learning_rate": 0.003,
      "loss": 4.0764,
      "step": 10242
    },
    {
      "epoch": 0.10243,
      "grad_norm": 1.403810456792893,
      "learning_rate": 0.003,
      "loss": 4.0946,
      "step": 10243
    },
    {
      "epoch": 0.10244,
      "grad_norm": 1.0296307975878158,
      "learning_rate": 0.003,
      "loss": 4.0757,
      "step": 10244
    },
    {
      "epoch": 0.10245,
      "grad_norm": 1.1353767920298885,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 10245
    },
    {
      "epoch": 0.10246,
      "grad_norm": 1.2374393275711797,
      "learning_rate": 0.003,
      "loss": 4.1108,
      "step": 10246
    },
    {
      "epoch": 0.10247,
      "grad_norm": 1.1135987774709135,
      "learning_rate": 0.003,
      "loss": 4.1113,
      "step": 10247
    },
    {
      "epoch": 0.10248,
      "grad_norm": 1.202901096533362,
      "learning_rate": 0.003,
      "loss": 4.1027,
      "step": 10248
    },
    {
      "epoch": 0.10249,
      "grad_norm": 1.1617662718599118,
      "learning_rate": 0.003,
      "loss": 4.0896,
      "step": 10249
    },
    {
      "epoch": 0.1025,
      "grad_norm": 1.0837696177852594,
      "learning_rate": 0.003,
      "loss": 4.0945,
      "step": 10250
    },
    {
      "epoch": 0.10251,
      "grad_norm": 0.9925390043486788,
      "learning_rate": 0.003,
      "loss": 4.0806,
      "step": 10251
    },
    {
      "epoch": 0.10252,
      "grad_norm": 1.3752058241211946,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 10252
    },
    {
      "epoch": 0.10253,
      "grad_norm": 1.030876728203176,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 10253
    },
    {
      "epoch": 0.10254,
      "grad_norm": 1.2700450563127943,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 10254
    },
    {
      "epoch": 0.10255,
      "grad_norm": 1.2562782348470567,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 10255
    },
    {
      "epoch": 0.10256,
      "grad_norm": 1.2603460522592087,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 10256
    },
    {
      "epoch": 0.10257,
      "grad_norm": 1.080959098447801,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 10257
    },
    {
      "epoch": 0.10258,
      "grad_norm": 1.034112972768806,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 10258
    },
    {
      "epoch": 0.10259,
      "grad_norm": 1.2521568837250108,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 10259
    },
    {
      "epoch": 0.1026,
      "grad_norm": 1.265948982738962,
      "learning_rate": 0.003,
      "loss": 4.0908,
      "step": 10260
    },
    {
      "epoch": 0.10261,
      "grad_norm": 1.382410009056688,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 10261
    },
    {
      "epoch": 0.10262,
      "grad_norm": 1.0064611117395248,
      "learning_rate": 0.003,
      "loss": 4.0855,
      "step": 10262
    },
    {
      "epoch": 0.10263,
      "grad_norm": 1.1945625123239736,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 10263
    },
    {
      "epoch": 0.10264,
      "grad_norm": 1.14540664203943,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 10264
    },
    {
      "epoch": 0.10265,
      "grad_norm": 1.2367135364640374,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 10265
    },
    {
      "epoch": 0.10266,
      "grad_norm": 1.1122316758156898,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 10266
    },
    {
      "epoch": 0.10267,
      "grad_norm": 1.0701772786087305,
      "learning_rate": 0.003,
      "loss": 4.1025,
      "step": 10267
    },
    {
      "epoch": 0.10268,
      "grad_norm": 1.0731388472404364,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 10268
    },
    {
      "epoch": 0.10269,
      "grad_norm": 1.3059731702245454,
      "learning_rate": 0.003,
      "loss": 4.0804,
      "step": 10269
    },
    {
      "epoch": 0.1027,
      "grad_norm": 0.9631098566908819,
      "learning_rate": 0.003,
      "loss": 4.0787,
      "step": 10270
    },
    {
      "epoch": 0.10271,
      "grad_norm": 1.152291992671532,
      "learning_rate": 0.003,
      "loss": 4.095,
      "step": 10271
    },
    {
      "epoch": 0.10272,
      "grad_norm": 1.0656130885686521,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 10272
    },
    {
      "epoch": 0.10273,
      "grad_norm": 1.1249057999871102,
      "learning_rate": 0.003,
      "loss": 4.0961,
      "step": 10273
    },
    {
      "epoch": 0.10274,
      "grad_norm": 1.0611993649516465,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 10274
    },
    {
      "epoch": 0.10275,
      "grad_norm": 1.2992200726524803,
      "learning_rate": 0.003,
      "loss": 4.1023,
      "step": 10275
    },
    {
      "epoch": 0.10276,
      "grad_norm": 1.054504040345836,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 10276
    },
    {
      "epoch": 0.10277,
      "grad_norm": 1.4833995346186497,
      "learning_rate": 0.003,
      "loss": 4.0861,
      "step": 10277
    },
    {
      "epoch": 0.10278,
      "grad_norm": 0.8234878421729,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 10278
    },
    {
      "epoch": 0.10279,
      "grad_norm": 0.9428712206050871,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 10279
    },
    {
      "epoch": 0.1028,
      "grad_norm": 1.4592341165022211,
      "learning_rate": 0.003,
      "loss": 4.1023,
      "step": 10280
    },
    {
      "epoch": 0.10281,
      "grad_norm": 1.156632111531649,
      "learning_rate": 0.003,
      "loss": 4.075,
      "step": 10281
    },
    {
      "epoch": 0.10282,
      "grad_norm": 1.2933653483347347,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 10282
    },
    {
      "epoch": 0.10283,
      "grad_norm": 1.1659712151559902,
      "learning_rate": 0.003,
      "loss": 4.0897,
      "step": 10283
    },
    {
      "epoch": 0.10284,
      "grad_norm": 1.2280066889594574,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 10284
    },
    {
      "epoch": 0.10285,
      "grad_norm": 1.1488050547183473,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 10285
    },
    {
      "epoch": 0.10286,
      "grad_norm": 1.323738392473033,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 10286
    },
    {
      "epoch": 0.10287,
      "grad_norm": 1.1218747518818815,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 10287
    },
    {
      "epoch": 0.10288,
      "grad_norm": 1.1164468731289583,
      "learning_rate": 0.003,
      "loss": 4.1158,
      "step": 10288
    },
    {
      "epoch": 0.10289,
      "grad_norm": 1.1198194850749519,
      "learning_rate": 0.003,
      "loss": 4.1056,
      "step": 10289
    },
    {
      "epoch": 0.1029,
      "grad_norm": 1.1190422077436522,
      "learning_rate": 0.003,
      "loss": 4.1027,
      "step": 10290
    },
    {
      "epoch": 0.10291,
      "grad_norm": 1.1083018333936319,
      "learning_rate": 0.003,
      "loss": 4.0694,
      "step": 10291
    },
    {
      "epoch": 0.10292,
      "grad_norm": 1.1728920878257667,
      "learning_rate": 0.003,
      "loss": 4.1085,
      "step": 10292
    },
    {
      "epoch": 0.10293,
      "grad_norm": 1.0234079345708287,
      "learning_rate": 0.003,
      "loss": 4.085,
      "step": 10293
    },
    {
      "epoch": 0.10294,
      "grad_norm": 1.3026166075083452,
      "learning_rate": 0.003,
      "loss": 4.1116,
      "step": 10294
    },
    {
      "epoch": 0.10295,
      "grad_norm": 1.09022240976423,
      "learning_rate": 0.003,
      "loss": 4.0953,
      "step": 10295
    },
    {
      "epoch": 0.10296,
      "grad_norm": 1.1713940517600216,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 10296
    },
    {
      "epoch": 0.10297,
      "grad_norm": 1.3662518963462131,
      "learning_rate": 0.003,
      "loss": 4.1019,
      "step": 10297
    },
    {
      "epoch": 0.10298,
      "grad_norm": 0.8570513244454326,
      "learning_rate": 0.003,
      "loss": 4.113,
      "step": 10298
    },
    {
      "epoch": 0.10299,
      "grad_norm": 0.9887649979400258,
      "learning_rate": 0.003,
      "loss": 4.0806,
      "step": 10299
    },
    {
      "epoch": 0.103,
      "grad_norm": 1.3373972917994166,
      "learning_rate": 0.003,
      "loss": 4.0973,
      "step": 10300
    },
    {
      "epoch": 0.10301,
      "grad_norm": 1.1238719201347804,
      "learning_rate": 0.003,
      "loss": 4.1049,
      "step": 10301
    },
    {
      "epoch": 0.10302,
      "grad_norm": 1.5150408743774066,
      "learning_rate": 0.003,
      "loss": 4.1212,
      "step": 10302
    },
    {
      "epoch": 0.10303,
      "grad_norm": 1.05105694956177,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 10303
    },
    {
      "epoch": 0.10304,
      "grad_norm": 1.259009056860587,
      "learning_rate": 0.003,
      "loss": 4.1011,
      "step": 10304
    },
    {
      "epoch": 0.10305,
      "grad_norm": 1.021533029187029,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 10305
    },
    {
      "epoch": 0.10306,
      "grad_norm": 1.2456339894412451,
      "learning_rate": 0.003,
      "loss": 4.108,
      "step": 10306
    },
    {
      "epoch": 0.10307,
      "grad_norm": 0.9300215044673347,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 10307
    },
    {
      "epoch": 0.10308,
      "grad_norm": 1.134643171364985,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 10308
    },
    {
      "epoch": 0.10309,
      "grad_norm": 1.178158076352255,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 10309
    },
    {
      "epoch": 0.1031,
      "grad_norm": 1.3390469838383294,
      "learning_rate": 0.003,
      "loss": 4.0684,
      "step": 10310
    },
    {
      "epoch": 0.10311,
      "grad_norm": 1.1157468121360072,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 10311
    },
    {
      "epoch": 0.10312,
      "grad_norm": 1.2246339507992163,
      "learning_rate": 0.003,
      "loss": 4.0829,
      "step": 10312
    },
    {
      "epoch": 0.10313,
      "grad_norm": 1.0604563014042496,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 10313
    },
    {
      "epoch": 0.10314,
      "grad_norm": 1.1819775023994816,
      "learning_rate": 0.003,
      "loss": 4.0911,
      "step": 10314
    },
    {
      "epoch": 0.10315,
      "grad_norm": 1.0248694025643161,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 10315
    },
    {
      "epoch": 0.10316,
      "grad_norm": 1.2689573088204367,
      "learning_rate": 0.003,
      "loss": 4.1202,
      "step": 10316
    },
    {
      "epoch": 0.10317,
      "grad_norm": 1.0715328778410222,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 10317
    },
    {
      "epoch": 0.10318,
      "grad_norm": 1.242756989409096,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 10318
    },
    {
      "epoch": 0.10319,
      "grad_norm": 1.083638891028703,
      "learning_rate": 0.003,
      "loss": 4.0811,
      "step": 10319
    },
    {
      "epoch": 0.1032,
      "grad_norm": 1.1672554434212687,
      "learning_rate": 0.003,
      "loss": 4.0895,
      "step": 10320
    },
    {
      "epoch": 0.10321,
      "grad_norm": 1.1707775081338792,
      "learning_rate": 0.003,
      "loss": 4.09,
      "step": 10321
    },
    {
      "epoch": 0.10322,
      "grad_norm": 0.986097296114779,
      "learning_rate": 0.003,
      "loss": 4.1064,
      "step": 10322
    },
    {
      "epoch": 0.10323,
      "grad_norm": 1.2429808744997115,
      "learning_rate": 0.003,
      "loss": 4.0953,
      "step": 10323
    },
    {
      "epoch": 0.10324,
      "grad_norm": 1.2546021069410018,
      "learning_rate": 0.003,
      "loss": 4.0923,
      "step": 10324
    },
    {
      "epoch": 0.10325,
      "grad_norm": 1.5501024626716682,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 10325
    },
    {
      "epoch": 0.10326,
      "grad_norm": 1.0117683155283734,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 10326
    },
    {
      "epoch": 0.10327,
      "grad_norm": 1.3447969269996676,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 10327
    },
    {
      "epoch": 0.10328,
      "grad_norm": 0.9361989040980547,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 10328
    },
    {
      "epoch": 0.10329,
      "grad_norm": 1.1057788815729743,
      "learning_rate": 0.003,
      "loss": 4.0956,
      "step": 10329
    },
    {
      "epoch": 0.1033,
      "grad_norm": 1.2098144965183406,
      "learning_rate": 0.003,
      "loss": 4.0926,
      "step": 10330
    },
    {
      "epoch": 0.10331,
      "grad_norm": 1.1005575275556037,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 10331
    },
    {
      "epoch": 0.10332,
      "grad_norm": 1.2796524139321448,
      "learning_rate": 0.003,
      "loss": 4.0739,
      "step": 10332
    },
    {
      "epoch": 0.10333,
      "grad_norm": 1.0127273578550784,
      "learning_rate": 0.003,
      "loss": 4.0759,
      "step": 10333
    },
    {
      "epoch": 0.10334,
      "grad_norm": 1.1144472965857977,
      "learning_rate": 0.003,
      "loss": 4.1079,
      "step": 10334
    },
    {
      "epoch": 0.10335,
      "grad_norm": 1.0218677548334034,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 10335
    },
    {
      "epoch": 0.10336,
      "grad_norm": 1.2463894208631197,
      "learning_rate": 0.003,
      "loss": 4.0992,
      "step": 10336
    },
    {
      "epoch": 0.10337,
      "grad_norm": 1.1402067778691138,
      "learning_rate": 0.003,
      "loss": 4.084,
      "step": 10337
    },
    {
      "epoch": 0.10338,
      "grad_norm": 1.076909964879423,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 10338
    },
    {
      "epoch": 0.10339,
      "grad_norm": 1.07400837774445,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 10339
    },
    {
      "epoch": 0.1034,
      "grad_norm": 1.3597158531075113,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 10340
    },
    {
      "epoch": 0.10341,
      "grad_norm": 1.1489541697757224,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 10341
    },
    {
      "epoch": 0.10342,
      "grad_norm": 1.1362409836381153,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 10342
    },
    {
      "epoch": 0.10343,
      "grad_norm": 1.2193846234659038,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 10343
    },
    {
      "epoch": 0.10344,
      "grad_norm": 1.1013460412198315,
      "learning_rate": 0.003,
      "loss": 4.0872,
      "step": 10344
    },
    {
      "epoch": 0.10345,
      "grad_norm": 1.1986398777019778,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 10345
    },
    {
      "epoch": 0.10346,
      "grad_norm": 1.2259535634191354,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 10346
    },
    {
      "epoch": 0.10347,
      "grad_norm": 1.3180066632365137,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 10347
    },
    {
      "epoch": 0.10348,
      "grad_norm": 1.0507937399513352,
      "learning_rate": 0.003,
      "loss": 4.0974,
      "step": 10348
    },
    {
      "epoch": 0.10349,
      "grad_norm": 1.3233595709960158,
      "learning_rate": 0.003,
      "loss": 4.0845,
      "step": 10349
    },
    {
      "epoch": 0.1035,
      "grad_norm": 1.1369428252105478,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 10350
    },
    {
      "epoch": 0.10351,
      "grad_norm": 0.9967600558942973,
      "learning_rate": 0.003,
      "loss": 4.0854,
      "step": 10351
    },
    {
      "epoch": 0.10352,
      "grad_norm": 1.1650604603706625,
      "learning_rate": 0.003,
      "loss": 4.083,
      "step": 10352
    },
    {
      "epoch": 0.10353,
      "grad_norm": 0.8988347203171979,
      "learning_rate": 0.003,
      "loss": 4.1102,
      "step": 10353
    },
    {
      "epoch": 0.10354,
      "grad_norm": 1.0825657617706126,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 10354
    },
    {
      "epoch": 0.10355,
      "grad_norm": 1.1940812907579037,
      "learning_rate": 0.003,
      "loss": 4.0743,
      "step": 10355
    },
    {
      "epoch": 0.10356,
      "grad_norm": 1.2644949563081056,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 10356
    },
    {
      "epoch": 0.10357,
      "grad_norm": 1.2076295759178497,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 10357
    },
    {
      "epoch": 0.10358,
      "grad_norm": 1.0173897880605756,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 10358
    },
    {
      "epoch": 0.10359,
      "grad_norm": 1.472853270790185,
      "learning_rate": 0.003,
      "loss": 4.0862,
      "step": 10359
    },
    {
      "epoch": 0.1036,
      "grad_norm": 0.9144735283899198,
      "learning_rate": 0.003,
      "loss": 4.0843,
      "step": 10360
    },
    {
      "epoch": 0.10361,
      "grad_norm": 1.2350910698680162,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 10361
    },
    {
      "epoch": 0.10362,
      "grad_norm": 1.144218155547618,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 10362
    },
    {
      "epoch": 0.10363,
      "grad_norm": 1.1359921765261243,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 10363
    },
    {
      "epoch": 0.10364,
      "grad_norm": 1.1753318212686652,
      "learning_rate": 0.003,
      "loss": 4.085,
      "step": 10364
    },
    {
      "epoch": 0.10365,
      "grad_norm": 1.0614783369827396,
      "learning_rate": 0.003,
      "loss": 4.1062,
      "step": 10365
    },
    {
      "epoch": 0.10366,
      "grad_norm": 1.3653564221120145,
      "learning_rate": 0.003,
      "loss": 4.0952,
      "step": 10366
    },
    {
      "epoch": 0.10367,
      "grad_norm": 1.0235396618427333,
      "learning_rate": 0.003,
      "loss": 4.1025,
      "step": 10367
    },
    {
      "epoch": 0.10368,
      "grad_norm": 1.3294725069518951,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 10368
    },
    {
      "epoch": 0.10369,
      "grad_norm": 1.0195789150289067,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 10369
    },
    {
      "epoch": 0.1037,
      "grad_norm": 1.599898624102901,
      "learning_rate": 0.003,
      "loss": 4.0882,
      "step": 10370
    },
    {
      "epoch": 0.10371,
      "grad_norm": 0.9491921215914594,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 10371
    },
    {
      "epoch": 0.10372,
      "grad_norm": 0.9802561557279339,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 10372
    },
    {
      "epoch": 0.10373,
      "grad_norm": 1.3466915502832943,
      "learning_rate": 0.003,
      "loss": 4.0977,
      "step": 10373
    },
    {
      "epoch": 0.10374,
      "grad_norm": 1.2192167320083085,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 10374
    },
    {
      "epoch": 0.10375,
      "grad_norm": 1.1799499752387714,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 10375
    },
    {
      "epoch": 0.10376,
      "grad_norm": 1.0836478765285256,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 10376
    },
    {
      "epoch": 0.10377,
      "grad_norm": 1.4820696523099477,
      "learning_rate": 0.003,
      "loss": 4.096,
      "step": 10377
    },
    {
      "epoch": 0.10378,
      "grad_norm": 0.9821149971943706,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 10378
    },
    {
      "epoch": 0.10379,
      "grad_norm": 1.0077055514699162,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 10379
    },
    {
      "epoch": 0.1038,
      "grad_norm": 1.3503476406368762,
      "learning_rate": 0.003,
      "loss": 4.1151,
      "step": 10380
    },
    {
      "epoch": 0.10381,
      "grad_norm": 1.050288186407046,
      "learning_rate": 0.003,
      "loss": 4.0871,
      "step": 10381
    },
    {
      "epoch": 0.10382,
      "grad_norm": 1.2049600489251269,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 10382
    },
    {
      "epoch": 0.10383,
      "grad_norm": 0.99486063450449,
      "learning_rate": 0.003,
      "loss": 4.0911,
      "step": 10383
    },
    {
      "epoch": 0.10384,
      "grad_norm": 1.2964579440731385,
      "learning_rate": 0.003,
      "loss": 4.1003,
      "step": 10384
    },
    {
      "epoch": 0.10385,
      "grad_norm": 1.271716496730901,
      "learning_rate": 0.003,
      "loss": 4.0896,
      "step": 10385
    },
    {
      "epoch": 0.10386,
      "grad_norm": 1.1667002374884987,
      "learning_rate": 0.003,
      "loss": 4.0936,
      "step": 10386
    },
    {
      "epoch": 0.10387,
      "grad_norm": 1.1834225987437577,
      "learning_rate": 0.003,
      "loss": 4.0914,
      "step": 10387
    },
    {
      "epoch": 0.10388,
      "grad_norm": 1.1304175541301462,
      "learning_rate": 0.003,
      "loss": 4.0797,
      "step": 10388
    },
    {
      "epoch": 0.10389,
      "grad_norm": 1.256719322090126,
      "learning_rate": 0.003,
      "loss": 4.0987,
      "step": 10389
    },
    {
      "epoch": 0.1039,
      "grad_norm": 1.192656971682307,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 10390
    },
    {
      "epoch": 0.10391,
      "grad_norm": 1.1851085258903133,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 10391
    },
    {
      "epoch": 0.10392,
      "grad_norm": 0.926270846236646,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 10392
    },
    {
      "epoch": 0.10393,
      "grad_norm": 1.0638718913865592,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 10393
    },
    {
      "epoch": 0.10394,
      "grad_norm": 1.2688290938922746,
      "learning_rate": 0.003,
      "loss": 4.0843,
      "step": 10394
    },
    {
      "epoch": 0.10395,
      "grad_norm": 0.9888514003130423,
      "learning_rate": 0.003,
      "loss": 4.1049,
      "step": 10395
    },
    {
      "epoch": 0.10396,
      "grad_norm": 1.387707708861817,
      "learning_rate": 0.003,
      "loss": 4.093,
      "step": 10396
    },
    {
      "epoch": 0.10397,
      "grad_norm": 1.004707297031302,
      "learning_rate": 0.003,
      "loss": 4.095,
      "step": 10397
    },
    {
      "epoch": 0.10398,
      "grad_norm": 1.4190078354572566,
      "learning_rate": 0.003,
      "loss": 4.098,
      "step": 10398
    },
    {
      "epoch": 0.10399,
      "grad_norm": 0.9423223908023143,
      "learning_rate": 0.003,
      "loss": 4.0932,
      "step": 10399
    },
    {
      "epoch": 0.104,
      "grad_norm": 1.1895178529282744,
      "learning_rate": 0.003,
      "loss": 4.0879,
      "step": 10400
    },
    {
      "epoch": 0.10401,
      "grad_norm": 1.1718657484527286,
      "learning_rate": 0.003,
      "loss": 4.0808,
      "step": 10401
    },
    {
      "epoch": 0.10402,
      "grad_norm": 1.211910599497182,
      "learning_rate": 0.003,
      "loss": 4.075,
      "step": 10402
    },
    {
      "epoch": 0.10403,
      "grad_norm": 1.2045298508618671,
      "learning_rate": 0.003,
      "loss": 4.0801,
      "step": 10403
    },
    {
      "epoch": 0.10404,
      "grad_norm": 1.060678391068161,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 10404
    },
    {
      "epoch": 0.10405,
      "grad_norm": 0.9070948966110515,
      "learning_rate": 0.003,
      "loss": 4.0825,
      "step": 10405
    },
    {
      "epoch": 0.10406,
      "grad_norm": 1.1432558088594225,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 10406
    },
    {
      "epoch": 0.10407,
      "grad_norm": 1.0400913783180197,
      "learning_rate": 0.003,
      "loss": 4.0965,
      "step": 10407
    },
    {
      "epoch": 0.10408,
      "grad_norm": 1.0531303855940235,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 10408
    },
    {
      "epoch": 0.10409,
      "grad_norm": 1.1393712037271202,
      "learning_rate": 0.003,
      "loss": 4.0894,
      "step": 10409
    },
    {
      "epoch": 0.1041,
      "grad_norm": 1.0860395180685651,
      "learning_rate": 0.003,
      "loss": 4.115,
      "step": 10410
    },
    {
      "epoch": 0.10411,
      "grad_norm": 1.1306337552458763,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 10411
    },
    {
      "epoch": 0.10412,
      "grad_norm": 1.300910075580706,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 10412
    },
    {
      "epoch": 0.10413,
      "grad_norm": 0.9914524798821533,
      "learning_rate": 0.003,
      "loss": 4.0942,
      "step": 10413
    },
    {
      "epoch": 0.10414,
      "grad_norm": 1.5336050089105886,
      "learning_rate": 0.003,
      "loss": 4.0943,
      "step": 10414
    },
    {
      "epoch": 0.10415,
      "grad_norm": 1.213025943980225,
      "learning_rate": 0.003,
      "loss": 4.0947,
      "step": 10415
    },
    {
      "epoch": 0.10416,
      "grad_norm": 1.1306085072323393,
      "learning_rate": 0.003,
      "loss": 4.1037,
      "step": 10416
    },
    {
      "epoch": 0.10417,
      "grad_norm": 1.3363346046785622,
      "learning_rate": 0.003,
      "loss": 4.1157,
      "step": 10417
    },
    {
      "epoch": 0.10418,
      "grad_norm": 1.239558459468119,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 10418
    },
    {
      "epoch": 0.10419,
      "grad_norm": 1.0659408116986282,
      "learning_rate": 0.003,
      "loss": 4.1264,
      "step": 10419
    },
    {
      "epoch": 0.1042,
      "grad_norm": 1.2990563819605447,
      "learning_rate": 0.003,
      "loss": 4.1077,
      "step": 10420
    },
    {
      "epoch": 0.10421,
      "grad_norm": 1.045572541998183,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 10421
    },
    {
      "epoch": 0.10422,
      "grad_norm": 1.2573704949094036,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 10422
    },
    {
      "epoch": 0.10423,
      "grad_norm": 1.0570604162814328,
      "learning_rate": 0.003,
      "loss": 4.0976,
      "step": 10423
    },
    {
      "epoch": 0.10424,
      "grad_norm": 1.2160452336095209,
      "learning_rate": 0.003,
      "loss": 4.0793,
      "step": 10424
    },
    {
      "epoch": 0.10425,
      "grad_norm": 0.9409949308423432,
      "learning_rate": 0.003,
      "loss": 4.0773,
      "step": 10425
    },
    {
      "epoch": 0.10426,
      "grad_norm": 1.3636682193227165,
      "learning_rate": 0.003,
      "loss": 4.081,
      "step": 10426
    },
    {
      "epoch": 0.10427,
      "grad_norm": 1.0254345901674822,
      "learning_rate": 0.003,
      "loss": 4.0929,
      "step": 10427
    },
    {
      "epoch": 0.10428,
      "grad_norm": 1.061685565843801,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 10428
    },
    {
      "epoch": 0.10429,
      "grad_norm": 1.1842401380873595,
      "learning_rate": 0.003,
      "loss": 4.0665,
      "step": 10429
    },
    {
      "epoch": 0.1043,
      "grad_norm": 1.0907336276654527,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 10430
    },
    {
      "epoch": 0.10431,
      "grad_norm": 1.2487690617445066,
      "learning_rate": 0.003,
      "loss": 4.0827,
      "step": 10431
    },
    {
      "epoch": 0.10432,
      "grad_norm": 1.1487619453121192,
      "learning_rate": 0.003,
      "loss": 4.0854,
      "step": 10432
    },
    {
      "epoch": 0.10433,
      "grad_norm": 1.1353525358762642,
      "learning_rate": 0.003,
      "loss": 4.0801,
      "step": 10433
    },
    {
      "epoch": 0.10434,
      "grad_norm": 1.182062089492732,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 10434
    },
    {
      "epoch": 0.10435,
      "grad_norm": 1.2919162960064097,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 10435
    },
    {
      "epoch": 0.10436,
      "grad_norm": 0.8784182498056757,
      "learning_rate": 0.003,
      "loss": 4.0796,
      "step": 10436
    },
    {
      "epoch": 0.10437,
      "grad_norm": 1.0964932058185943,
      "learning_rate": 0.003,
      "loss": 4.0904,
      "step": 10437
    },
    {
      "epoch": 0.10438,
      "grad_norm": 1.2113358892839388,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 10438
    },
    {
      "epoch": 0.10439,
      "grad_norm": 0.9411748843655284,
      "learning_rate": 0.003,
      "loss": 4.0899,
      "step": 10439
    },
    {
      "epoch": 0.1044,
      "grad_norm": 1.3960224048377663,
      "learning_rate": 0.003,
      "loss": 4.0987,
      "step": 10440
    },
    {
      "epoch": 0.10441,
      "grad_norm": 1.115230953127737,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 10441
    },
    {
      "epoch": 0.10442,
      "grad_norm": 1.3839057582946162,
      "learning_rate": 0.003,
      "loss": 4.0718,
      "step": 10442
    },
    {
      "epoch": 0.10443,
      "grad_norm": 0.9918161777706388,
      "learning_rate": 0.003,
      "loss": 4.1031,
      "step": 10443
    },
    {
      "epoch": 0.10444,
      "grad_norm": 1.275224814509774,
      "learning_rate": 0.003,
      "loss": 4.0925,
      "step": 10444
    },
    {
      "epoch": 0.10445,
      "grad_norm": 1.1568911084251285,
      "learning_rate": 0.003,
      "loss": 4.1043,
      "step": 10445
    },
    {
      "epoch": 0.10446,
      "grad_norm": 1.1098848204188037,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 10446
    },
    {
      "epoch": 0.10447,
      "grad_norm": 1.3475023444604641,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 10447
    },
    {
      "epoch": 0.10448,
      "grad_norm": 1.0874203874677135,
      "learning_rate": 0.003,
      "loss": 4.0823,
      "step": 10448
    },
    {
      "epoch": 0.10449,
      "grad_norm": 1.0170769666280837,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 10449
    },
    {
      "epoch": 0.1045,
      "grad_norm": 1.1462867838652293,
      "learning_rate": 0.003,
      "loss": 4.1031,
      "step": 10450
    },
    {
      "epoch": 0.10451,
      "grad_norm": 1.1467278614404408,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 10451
    },
    {
      "epoch": 0.10452,
      "grad_norm": 1.0082005144502915,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 10452
    },
    {
      "epoch": 0.10453,
      "grad_norm": 1.3099977100561864,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 10453
    },
    {
      "epoch": 0.10454,
      "grad_norm": 0.9399797422935121,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 10454
    },
    {
      "epoch": 0.10455,
      "grad_norm": 1.1633812167389073,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 10455
    },
    {
      "epoch": 0.10456,
      "grad_norm": 0.8441790544250756,
      "learning_rate": 0.003,
      "loss": 4.0913,
      "step": 10456
    },
    {
      "epoch": 0.10457,
      "grad_norm": 1.2142260614952807,
      "learning_rate": 0.003,
      "loss": 4.0917,
      "step": 10457
    },
    {
      "epoch": 0.10458,
      "grad_norm": 1.7525087370523817,
      "learning_rate": 0.003,
      "loss": 4.1103,
      "step": 10458
    },
    {
      "epoch": 0.10459,
      "grad_norm": 1.0260708858250815,
      "learning_rate": 0.003,
      "loss": 4.0739,
      "step": 10459
    },
    {
      "epoch": 0.1046,
      "grad_norm": 1.1569113558478397,
      "learning_rate": 0.003,
      "loss": 4.1043,
      "step": 10460
    },
    {
      "epoch": 0.10461,
      "grad_norm": 1.1702933967791436,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 10461
    },
    {
      "epoch": 0.10462,
      "grad_norm": 1.13316121198954,
      "learning_rate": 0.003,
      "loss": 4.0942,
      "step": 10462
    },
    {
      "epoch": 0.10463,
      "grad_norm": 1.2833369061026105,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 10463
    },
    {
      "epoch": 0.10464,
      "grad_norm": 0.9714472006978334,
      "learning_rate": 0.003,
      "loss": 4.0962,
      "step": 10464
    },
    {
      "epoch": 0.10465,
      "grad_norm": 1.4026424196175369,
      "learning_rate": 0.003,
      "loss": 4.0934,
      "step": 10465
    },
    {
      "epoch": 0.10466,
      "grad_norm": 1.0745827757635804,
      "learning_rate": 0.003,
      "loss": 4.0825,
      "step": 10466
    },
    {
      "epoch": 0.10467,
      "grad_norm": 1.144442451198987,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 10467
    },
    {
      "epoch": 0.10468,
      "grad_norm": 1.084471302601593,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 10468
    },
    {
      "epoch": 0.10469,
      "grad_norm": 1.2833962066273863,
      "learning_rate": 0.003,
      "loss": 4.0936,
      "step": 10469
    },
    {
      "epoch": 0.1047,
      "grad_norm": 1.193232193819357,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 10470
    },
    {
      "epoch": 0.10471,
      "grad_norm": 0.9239517653004048,
      "learning_rate": 0.003,
      "loss": 4.0864,
      "step": 10471
    },
    {
      "epoch": 0.10472,
      "grad_norm": 1.075646980282147,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 10472
    },
    {
      "epoch": 0.10473,
      "grad_norm": 1.4461175206428305,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 10473
    },
    {
      "epoch": 0.10474,
      "grad_norm": 0.9482031218732518,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 10474
    },
    {
      "epoch": 0.10475,
      "grad_norm": 1.0455087032787922,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 10475
    },
    {
      "epoch": 0.10476,
      "grad_norm": 1.3144286848750573,
      "learning_rate": 0.003,
      "loss": 4.0898,
      "step": 10476
    },
    {
      "epoch": 0.10477,
      "grad_norm": 1.0497100127868861,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 10477
    },
    {
      "epoch": 0.10478,
      "grad_norm": 1.3195516134146819,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 10478
    },
    {
      "epoch": 0.10479,
      "grad_norm": 1.2063061015916368,
      "learning_rate": 0.003,
      "loss": 4.0851,
      "step": 10479
    },
    {
      "epoch": 0.1048,
      "grad_norm": 1.098171732041185,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 10480
    },
    {
      "epoch": 0.10481,
      "grad_norm": 1.085191930367436,
      "learning_rate": 0.003,
      "loss": 4.0817,
      "step": 10481
    },
    {
      "epoch": 0.10482,
      "grad_norm": 1.1856723926494253,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 10482
    },
    {
      "epoch": 0.10483,
      "grad_norm": 1.0735567994984079,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 10483
    },
    {
      "epoch": 0.10484,
      "grad_norm": 1.1245294485238906,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 10484
    },
    {
      "epoch": 0.10485,
      "grad_norm": 1.290202581949648,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 10485
    },
    {
      "epoch": 0.10486,
      "grad_norm": 1.355625022483555,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 10486
    },
    {
      "epoch": 0.10487,
      "grad_norm": 1.2683269217963558,
      "learning_rate": 0.003,
      "loss": 4.0926,
      "step": 10487
    },
    {
      "epoch": 0.10488,
      "grad_norm": 1.0944164469031583,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 10488
    },
    {
      "epoch": 0.10489,
      "grad_norm": 1.2964886991431226,
      "learning_rate": 0.003,
      "loss": 4.0848,
      "step": 10489
    },
    {
      "epoch": 0.1049,
      "grad_norm": 0.8463459006306381,
      "learning_rate": 0.003,
      "loss": 4.0878,
      "step": 10490
    },
    {
      "epoch": 0.10491,
      "grad_norm": 0.8321150941800822,
      "learning_rate": 0.003,
      "loss": 4.0917,
      "step": 10491
    },
    {
      "epoch": 0.10492,
      "grad_norm": 1.0614150475628232,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 10492
    },
    {
      "epoch": 0.10493,
      "grad_norm": 1.3288957408778774,
      "learning_rate": 0.003,
      "loss": 4.085,
      "step": 10493
    },
    {
      "epoch": 0.10494,
      "grad_norm": 1.0381729235961423,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 10494
    },
    {
      "epoch": 0.10495,
      "grad_norm": 1.1504603418116,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 10495
    },
    {
      "epoch": 0.10496,
      "grad_norm": 1.0621219335371417,
      "learning_rate": 0.003,
      "loss": 4.0825,
      "step": 10496
    },
    {
      "epoch": 0.10497,
      "grad_norm": 1.1857329505281322,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 10497
    },
    {
      "epoch": 0.10498,
      "grad_norm": 1.2097394740072545,
      "learning_rate": 0.003,
      "loss": 4.0932,
      "step": 10498
    },
    {
      "epoch": 0.10499,
      "grad_norm": 1.2327086515615175,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 10499
    },
    {
      "epoch": 0.105,
      "grad_norm": 1.0287208162269712,
      "learning_rate": 0.003,
      "loss": 4.1016,
      "step": 10500
    },
    {
      "epoch": 0.10501,
      "grad_norm": 1.3067433688127361,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 10501
    },
    {
      "epoch": 0.10502,
      "grad_norm": 0.8389192771053627,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 10502
    },
    {
      "epoch": 0.10503,
      "grad_norm": 0.8885610051367872,
      "learning_rate": 0.003,
      "loss": 4.0992,
      "step": 10503
    },
    {
      "epoch": 0.10504,
      "grad_norm": 1.129677206792387,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 10504
    },
    {
      "epoch": 0.10505,
      "grad_norm": 1.4582269862849158,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 10505
    },
    {
      "epoch": 0.10506,
      "grad_norm": 0.9728461765979113,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 10506
    },
    {
      "epoch": 0.10507,
      "grad_norm": 1.0183753353440348,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 10507
    },
    {
      "epoch": 0.10508,
      "grad_norm": 1.0906651316555487,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 10508
    },
    {
      "epoch": 0.10509,
      "grad_norm": 1.1755984747198869,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 10509
    },
    {
      "epoch": 0.1051,
      "grad_norm": 1.2954430703614785,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 10510
    },
    {
      "epoch": 0.10511,
      "grad_norm": 1.1692617069825422,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 10511
    },
    {
      "epoch": 0.10512,
      "grad_norm": 1.1742433903751759,
      "learning_rate": 0.003,
      "loss": 4.1121,
      "step": 10512
    },
    {
      "epoch": 0.10513,
      "grad_norm": 1.0256581785137762,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 10513
    },
    {
      "epoch": 0.10514,
      "grad_norm": 1.3408802641679947,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 10514
    },
    {
      "epoch": 0.10515,
      "grad_norm": 1.0567380514922469,
      "learning_rate": 0.003,
      "loss": 4.087,
      "step": 10515
    },
    {
      "epoch": 0.10516,
      "grad_norm": 1.396812036919821,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 10516
    },
    {
      "epoch": 0.10517,
      "grad_norm": 1.2013341558124457,
      "learning_rate": 0.003,
      "loss": 4.1073,
      "step": 10517
    },
    {
      "epoch": 0.10518,
      "grad_norm": 1.1212373729312872,
      "learning_rate": 0.003,
      "loss": 4.1014,
      "step": 10518
    },
    {
      "epoch": 0.10519,
      "grad_norm": 1.0783056687463477,
      "learning_rate": 0.003,
      "loss": 4.057,
      "step": 10519
    },
    {
      "epoch": 0.1052,
      "grad_norm": 1.3837530732267258,
      "learning_rate": 0.003,
      "loss": 4.1108,
      "step": 10520
    },
    {
      "epoch": 0.10521,
      "grad_norm": 1.201989834218592,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 10521
    },
    {
      "epoch": 0.10522,
      "grad_norm": 1.184508196310951,
      "learning_rate": 0.003,
      "loss": 4.0901,
      "step": 10522
    },
    {
      "epoch": 0.10523,
      "grad_norm": 1.1274254742305352,
      "learning_rate": 0.003,
      "loss": 4.1045,
      "step": 10523
    },
    {
      "epoch": 0.10524,
      "grad_norm": 1.0024063475584597,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 10524
    },
    {
      "epoch": 0.10525,
      "grad_norm": 1.2163573465379631,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 10525
    },
    {
      "epoch": 0.10526,
      "grad_norm": 1.0786932134805922,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 10526
    },
    {
      "epoch": 0.10527,
      "grad_norm": 1.0145686562796887,
      "learning_rate": 0.003,
      "loss": 4.1032,
      "step": 10527
    },
    {
      "epoch": 0.10528,
      "grad_norm": 1.1055592765968865,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 10528
    },
    {
      "epoch": 0.10529,
      "grad_norm": 1.1306994783428688,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 10529
    },
    {
      "epoch": 0.1053,
      "grad_norm": 1.2836574349271654,
      "learning_rate": 0.003,
      "loss": 4.0964,
      "step": 10530
    },
    {
      "epoch": 0.10531,
      "grad_norm": 1.1432115850931945,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 10531
    },
    {
      "epoch": 0.10532,
      "grad_norm": 1.2618053347146072,
      "learning_rate": 0.003,
      "loss": 4.0886,
      "step": 10532
    },
    {
      "epoch": 0.10533,
      "grad_norm": 0.981706408047873,
      "learning_rate": 0.003,
      "loss": 4.0806,
      "step": 10533
    },
    {
      "epoch": 0.10534,
      "grad_norm": 1.234979426310262,
      "learning_rate": 0.003,
      "loss": 4.085,
      "step": 10534
    },
    {
      "epoch": 0.10535,
      "grad_norm": 0.8845202900772148,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 10535
    },
    {
      "epoch": 0.10536,
      "grad_norm": 0.9068303046180969,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 10536
    },
    {
      "epoch": 0.10537,
      "grad_norm": 1.1135901044513736,
      "learning_rate": 0.003,
      "loss": 4.0756,
      "step": 10537
    },
    {
      "epoch": 0.10538,
      "grad_norm": 1.4681092701245535,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 10538
    },
    {
      "epoch": 0.10539,
      "grad_norm": 1.023925708133604,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 10539
    },
    {
      "epoch": 0.1054,
      "grad_norm": 1.3058416784344284,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 10540
    },
    {
      "epoch": 0.10541,
      "grad_norm": 0.8823316840514877,
      "learning_rate": 0.003,
      "loss": 4.086,
      "step": 10541
    },
    {
      "epoch": 0.10542,
      "grad_norm": 1.0792554022628849,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 10542
    },
    {
      "epoch": 0.10543,
      "grad_norm": 1.1580977814345135,
      "learning_rate": 0.003,
      "loss": 4.0781,
      "step": 10543
    },
    {
      "epoch": 0.10544,
      "grad_norm": 0.9819845632535975,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 10544
    },
    {
      "epoch": 0.10545,
      "grad_norm": 1.2479955950739714,
      "learning_rate": 0.003,
      "loss": 4.094,
      "step": 10545
    },
    {
      "epoch": 0.10546,
      "grad_norm": 1.1609585761944425,
      "learning_rate": 0.003,
      "loss": 4.1128,
      "step": 10546
    },
    {
      "epoch": 0.10547,
      "grad_norm": 1.3531425668991288,
      "learning_rate": 0.003,
      "loss": 4.0675,
      "step": 10547
    },
    {
      "epoch": 0.10548,
      "grad_norm": 1.0063446389136106,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 10548
    },
    {
      "epoch": 0.10549,
      "grad_norm": 1.0325407798781339,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 10549
    },
    {
      "epoch": 0.1055,
      "grad_norm": 1.1423351291584458,
      "learning_rate": 0.003,
      "loss": 4.0753,
      "step": 10550
    },
    {
      "epoch": 0.10551,
      "grad_norm": 1.3503659758788984,
      "learning_rate": 0.003,
      "loss": 4.1028,
      "step": 10551
    },
    {
      "epoch": 0.10552,
      "grad_norm": 1.032045363172022,
      "learning_rate": 0.003,
      "loss": 4.0998,
      "step": 10552
    },
    {
      "epoch": 0.10553,
      "grad_norm": 1.2490414067587199,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 10553
    },
    {
      "epoch": 0.10554,
      "grad_norm": 1.1538988693639156,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 10554
    },
    {
      "epoch": 0.10555,
      "grad_norm": 1.0159744608616759,
      "learning_rate": 0.003,
      "loss": 4.0929,
      "step": 10555
    },
    {
      "epoch": 0.10556,
      "grad_norm": 1.1185713174374186,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 10556
    },
    {
      "epoch": 0.10557,
      "grad_norm": 1.4202293030936732,
      "learning_rate": 0.003,
      "loss": 4.0807,
      "step": 10557
    },
    {
      "epoch": 0.10558,
      "grad_norm": 1.0266908330606117,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 10558
    },
    {
      "epoch": 0.10559,
      "grad_norm": 1.3782557360428902,
      "learning_rate": 0.003,
      "loss": 4.0684,
      "step": 10559
    },
    {
      "epoch": 0.1056,
      "grad_norm": 0.9892321589682566,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 10560
    },
    {
      "epoch": 0.10561,
      "grad_norm": 1.4029111054013261,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 10561
    },
    {
      "epoch": 0.10562,
      "grad_norm": 1.0593993799476236,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 10562
    },
    {
      "epoch": 0.10563,
      "grad_norm": 1.351876246310265,
      "learning_rate": 0.003,
      "loss": 4.1037,
      "step": 10563
    },
    {
      "epoch": 0.10564,
      "grad_norm": 1.164144715391898,
      "learning_rate": 0.003,
      "loss": 4.0997,
      "step": 10564
    },
    {
      "epoch": 0.10565,
      "grad_norm": 1.0477122601932365,
      "learning_rate": 0.003,
      "loss": 4.0849,
      "step": 10565
    },
    {
      "epoch": 0.10566,
      "grad_norm": 1.0978684188474286,
      "learning_rate": 0.003,
      "loss": 4.0926,
      "step": 10566
    },
    {
      "epoch": 0.10567,
      "grad_norm": 1.1015952703676826,
      "learning_rate": 0.003,
      "loss": 4.1121,
      "step": 10567
    },
    {
      "epoch": 0.10568,
      "grad_norm": 1.1622739145924461,
      "learning_rate": 0.003,
      "loss": 4.108,
      "step": 10568
    },
    {
      "epoch": 0.10569,
      "grad_norm": 1.300806287594485,
      "learning_rate": 0.003,
      "loss": 4.1107,
      "step": 10569
    },
    {
      "epoch": 0.1057,
      "grad_norm": 1.1367400633037008,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 10570
    },
    {
      "epoch": 0.10571,
      "grad_norm": 1.0314579163465747,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 10571
    },
    {
      "epoch": 0.10572,
      "grad_norm": 1.2863399997244334,
      "learning_rate": 0.003,
      "loss": 4.092,
      "step": 10572
    },
    {
      "epoch": 0.10573,
      "grad_norm": 1.2736178186569311,
      "learning_rate": 0.003,
      "loss": 4.0751,
      "step": 10573
    },
    {
      "epoch": 0.10574,
      "grad_norm": 1.1340756812991262,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 10574
    },
    {
      "epoch": 0.10575,
      "grad_norm": 1.1035950954361586,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 10575
    },
    {
      "epoch": 0.10576,
      "grad_norm": 1.1926319049621437,
      "learning_rate": 0.003,
      "loss": 4.1028,
      "step": 10576
    },
    {
      "epoch": 0.10577,
      "grad_norm": 1.2414399446888533,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 10577
    },
    {
      "epoch": 0.10578,
      "grad_norm": 1.1865528097901785,
      "learning_rate": 0.003,
      "loss": 4.0854,
      "step": 10578
    },
    {
      "epoch": 0.10579,
      "grad_norm": 1.03281303097576,
      "learning_rate": 0.003,
      "loss": 4.04,
      "step": 10579
    },
    {
      "epoch": 0.1058,
      "grad_norm": 1.355791716984305,
      "learning_rate": 0.003,
      "loss": 4.1008,
      "step": 10580
    },
    {
      "epoch": 0.10581,
      "grad_norm": 0.9732650815642186,
      "learning_rate": 0.003,
      "loss": 4.0959,
      "step": 10581
    },
    {
      "epoch": 0.10582,
      "grad_norm": 1.3168947077970985,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 10582
    },
    {
      "epoch": 0.10583,
      "grad_norm": 1.126918732108372,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 10583
    },
    {
      "epoch": 0.10584,
      "grad_norm": 1.236038574060123,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 10584
    },
    {
      "epoch": 0.10585,
      "grad_norm": 0.9764226588991661,
      "learning_rate": 0.003,
      "loss": 4.1164,
      "step": 10585
    },
    {
      "epoch": 0.10586,
      "grad_norm": 1.4613436318029804,
      "learning_rate": 0.003,
      "loss": 4.1013,
      "step": 10586
    },
    {
      "epoch": 0.10587,
      "grad_norm": 0.8912353673808929,
      "learning_rate": 0.003,
      "loss": 4.1042,
      "step": 10587
    },
    {
      "epoch": 0.10588,
      "grad_norm": 1.1660830397020308,
      "learning_rate": 0.003,
      "loss": 4.0632,
      "step": 10588
    },
    {
      "epoch": 0.10589,
      "grad_norm": 1.2040748292723822,
      "learning_rate": 0.003,
      "loss": 4.086,
      "step": 10589
    },
    {
      "epoch": 0.1059,
      "grad_norm": 1.0023369501968133,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 10590
    },
    {
      "epoch": 0.10591,
      "grad_norm": 1.3963382291928614,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 10591
    },
    {
      "epoch": 0.10592,
      "grad_norm": 1.1181609850538723,
      "learning_rate": 0.003,
      "loss": 4.1205,
      "step": 10592
    },
    {
      "epoch": 0.10593,
      "grad_norm": 1.0780726062707058,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 10593
    },
    {
      "epoch": 0.10594,
      "grad_norm": 1.0189897758129178,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 10594
    },
    {
      "epoch": 0.10595,
      "grad_norm": 1.1796416670163832,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 10595
    },
    {
      "epoch": 0.10596,
      "grad_norm": 1.0541631031801153,
      "learning_rate": 0.003,
      "loss": 4.088,
      "step": 10596
    },
    {
      "epoch": 0.10597,
      "grad_norm": 1.2244230665198172,
      "learning_rate": 0.003,
      "loss": 4.084,
      "step": 10597
    },
    {
      "epoch": 0.10598,
      "grad_norm": 1.1989598614086323,
      "learning_rate": 0.003,
      "loss": 4.0959,
      "step": 10598
    },
    {
      "epoch": 0.10599,
      "grad_norm": 1.3731573352295356,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 10599
    },
    {
      "epoch": 0.106,
      "grad_norm": 1.1170959555314157,
      "learning_rate": 0.003,
      "loss": 4.0835,
      "step": 10600
    },
    {
      "epoch": 0.10601,
      "grad_norm": 1.0774495893470961,
      "learning_rate": 0.003,
      "loss": 4.0823,
      "step": 10601
    },
    {
      "epoch": 0.10602,
      "grad_norm": 1.2011499694569694,
      "learning_rate": 0.003,
      "loss": 4.1017,
      "step": 10602
    },
    {
      "epoch": 0.10603,
      "grad_norm": 1.0407850956499687,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 10603
    },
    {
      "epoch": 0.10604,
      "grad_norm": 1.1901650467323437,
      "learning_rate": 0.003,
      "loss": 4.1149,
      "step": 10604
    },
    {
      "epoch": 0.10605,
      "grad_norm": 1.3123752042362158,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 10605
    },
    {
      "epoch": 0.10606,
      "grad_norm": 1.1092558216886073,
      "learning_rate": 0.003,
      "loss": 4.0891,
      "step": 10606
    },
    {
      "epoch": 0.10607,
      "grad_norm": 1.4070031238725738,
      "learning_rate": 0.003,
      "loss": 4.117,
      "step": 10607
    },
    {
      "epoch": 0.10608,
      "grad_norm": 0.9636786414342008,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 10608
    },
    {
      "epoch": 0.10609,
      "grad_norm": 1.069934629136634,
      "learning_rate": 0.003,
      "loss": 4.0937,
      "step": 10609
    },
    {
      "epoch": 0.1061,
      "grad_norm": 1.3188057349981706,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 10610
    },
    {
      "epoch": 0.10611,
      "grad_norm": 1.0898838550748384,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 10611
    },
    {
      "epoch": 0.10612,
      "grad_norm": 1.0798870153432583,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 10612
    },
    {
      "epoch": 0.10613,
      "grad_norm": 1.2461876109787824,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 10613
    },
    {
      "epoch": 0.10614,
      "grad_norm": 1.160551479100354,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 10614
    },
    {
      "epoch": 0.10615,
      "grad_norm": 1.0023266443176357,
      "learning_rate": 0.003,
      "loss": 4.0882,
      "step": 10615
    },
    {
      "epoch": 0.10616,
      "grad_norm": 1.454836687976706,
      "learning_rate": 0.003,
      "loss": 4.1119,
      "step": 10616
    },
    {
      "epoch": 0.10617,
      "grad_norm": 1.1059202732456763,
      "learning_rate": 0.003,
      "loss": 4.0996,
      "step": 10617
    },
    {
      "epoch": 0.10618,
      "grad_norm": 1.1665251025931167,
      "learning_rate": 0.003,
      "loss": 4.0853,
      "step": 10618
    },
    {
      "epoch": 0.10619,
      "grad_norm": 1.1209740969721296,
      "learning_rate": 0.003,
      "loss": 4.0868,
      "step": 10619
    },
    {
      "epoch": 0.1062,
      "grad_norm": 1.066063305689871,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 10620
    },
    {
      "epoch": 0.10621,
      "grad_norm": 1.2008845335202172,
      "learning_rate": 0.003,
      "loss": 4.0672,
      "step": 10621
    },
    {
      "epoch": 0.10622,
      "grad_norm": 1.161660223257581,
      "learning_rate": 0.003,
      "loss": 4.1015,
      "step": 10622
    },
    {
      "epoch": 0.10623,
      "grad_norm": 1.6711171820413637,
      "learning_rate": 0.003,
      "loss": 4.103,
      "step": 10623
    },
    {
      "epoch": 0.10624,
      "grad_norm": 0.8884554897648648,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 10624
    },
    {
      "epoch": 0.10625,
      "grad_norm": 0.8383413684344952,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 10625
    },
    {
      "epoch": 0.10626,
      "grad_norm": 1.2129767496942065,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 10626
    },
    {
      "epoch": 0.10627,
      "grad_norm": 1.186003854674095,
      "learning_rate": 0.003,
      "loss": 4.0929,
      "step": 10627
    },
    {
      "epoch": 0.10628,
      "grad_norm": 1.487670135436451,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 10628
    },
    {
      "epoch": 0.10629,
      "grad_norm": 1.1376454723598795,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 10629
    },
    {
      "epoch": 0.1063,
      "grad_norm": 0.9890545429620673,
      "learning_rate": 0.003,
      "loss": 4.0632,
      "step": 10630
    },
    {
      "epoch": 0.10631,
      "grad_norm": 1.134570590643702,
      "learning_rate": 0.003,
      "loss": 4.0963,
      "step": 10631
    },
    {
      "epoch": 0.10632,
      "grad_norm": 1.049562191577295,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 10632
    },
    {
      "epoch": 0.10633,
      "grad_norm": 1.3046806308376384,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 10633
    },
    {
      "epoch": 0.10634,
      "grad_norm": 1.2510913324277757,
      "learning_rate": 0.003,
      "loss": 4.0808,
      "step": 10634
    },
    {
      "epoch": 0.10635,
      "grad_norm": 1.0580335535557992,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 10635
    },
    {
      "epoch": 0.10636,
      "grad_norm": 1.139536280056406,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 10636
    },
    {
      "epoch": 0.10637,
      "grad_norm": 1.219965449308225,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 10637
    },
    {
      "epoch": 0.10638,
      "grad_norm": 1.0682385047561698,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 10638
    },
    {
      "epoch": 0.10639,
      "grad_norm": 1.014126460631885,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 10639
    },
    {
      "epoch": 0.1064,
      "grad_norm": 1.3296713286352824,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 10640
    },
    {
      "epoch": 0.10641,
      "grad_norm": 0.850836262660311,
      "learning_rate": 0.003,
      "loss": 4.0751,
      "step": 10641
    },
    {
      "epoch": 0.10642,
      "grad_norm": 0.9780187533229948,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 10642
    },
    {
      "epoch": 0.10643,
      "grad_norm": 1.2420781102234009,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 10643
    },
    {
      "epoch": 0.10644,
      "grad_norm": 1.295759972755444,
      "learning_rate": 0.003,
      "loss": 4.0993,
      "step": 10644
    },
    {
      "epoch": 0.10645,
      "grad_norm": 1.4513465623405362,
      "learning_rate": 0.003,
      "loss": 4.0727,
      "step": 10645
    },
    {
      "epoch": 0.10646,
      "grad_norm": 1.20752416924251,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 10646
    },
    {
      "epoch": 0.10647,
      "grad_norm": 1.0423488355215869,
      "learning_rate": 0.003,
      "loss": 4.0781,
      "step": 10647
    },
    {
      "epoch": 0.10648,
      "grad_norm": 1.1509516111491,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 10648
    },
    {
      "epoch": 0.10649,
      "grad_norm": 1.1565147136273588,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 10649
    },
    {
      "epoch": 0.1065,
      "grad_norm": 1.0850380169542058,
      "learning_rate": 0.003,
      "loss": 4.1076,
      "step": 10650
    },
    {
      "epoch": 0.10651,
      "grad_norm": 1.2937739072167689,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 10651
    },
    {
      "epoch": 0.10652,
      "grad_norm": 1.1400309243264664,
      "learning_rate": 0.003,
      "loss": 4.0809,
      "step": 10652
    },
    {
      "epoch": 0.10653,
      "grad_norm": 1.3398794194480168,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 10653
    },
    {
      "epoch": 0.10654,
      "grad_norm": 0.900721665595705,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 10654
    },
    {
      "epoch": 0.10655,
      "grad_norm": 0.9738388665304021,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 10655
    },
    {
      "epoch": 0.10656,
      "grad_norm": 0.9972631904494703,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 10656
    },
    {
      "epoch": 0.10657,
      "grad_norm": 1.3116692550014106,
      "learning_rate": 0.003,
      "loss": 4.0885,
      "step": 10657
    },
    {
      "epoch": 0.10658,
      "grad_norm": 1.1119940904641739,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 10658
    },
    {
      "epoch": 0.10659,
      "grad_norm": 1.2235179754264862,
      "learning_rate": 0.003,
      "loss": 4.0922,
      "step": 10659
    },
    {
      "epoch": 0.1066,
      "grad_norm": 1.0639452001222656,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 10660
    },
    {
      "epoch": 0.10661,
      "grad_norm": 1.2330959592550323,
      "learning_rate": 0.003,
      "loss": 4.0924,
      "step": 10661
    },
    {
      "epoch": 0.10662,
      "grad_norm": 1.286976857529244,
      "learning_rate": 0.003,
      "loss": 4.0861,
      "step": 10662
    },
    {
      "epoch": 0.10663,
      "grad_norm": 1.2171565929941812,
      "learning_rate": 0.003,
      "loss": 4.0724,
      "step": 10663
    },
    {
      "epoch": 0.10664,
      "grad_norm": 1.0935451659013982,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 10664
    },
    {
      "epoch": 0.10665,
      "grad_norm": 1.2188284949481023,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 10665
    },
    {
      "epoch": 0.10666,
      "grad_norm": 1.397262797478184,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 10666
    },
    {
      "epoch": 0.10667,
      "grad_norm": 1.1459926815948558,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 10667
    },
    {
      "epoch": 0.10668,
      "grad_norm": 1.3576298480008717,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 10668
    },
    {
      "epoch": 0.10669,
      "grad_norm": 0.9923081391132149,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 10669
    },
    {
      "epoch": 0.1067,
      "grad_norm": 1.0964860970428374,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 10670
    },
    {
      "epoch": 0.10671,
      "grad_norm": 1.1424381515252007,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 10671
    },
    {
      "epoch": 0.10672,
      "grad_norm": 1.1371330916319933,
      "learning_rate": 0.003,
      "loss": 4.0821,
      "step": 10672
    },
    {
      "epoch": 0.10673,
      "grad_norm": 1.1158052623087227,
      "learning_rate": 0.003,
      "loss": 4.0718,
      "step": 10673
    },
    {
      "epoch": 0.10674,
      "grad_norm": 1.0989876469829485,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 10674
    },
    {
      "epoch": 0.10675,
      "grad_norm": 1.2350446853004926,
      "learning_rate": 0.003,
      "loss": 4.106,
      "step": 10675
    },
    {
      "epoch": 0.10676,
      "grad_norm": 1.091518153578478,
      "learning_rate": 0.003,
      "loss": 4.0937,
      "step": 10676
    },
    {
      "epoch": 0.10677,
      "grad_norm": 1.0887157550467645,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 10677
    },
    {
      "epoch": 0.10678,
      "grad_norm": 1.4079144898312181,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 10678
    },
    {
      "epoch": 0.10679,
      "grad_norm": 1.4385828914523937,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 10679
    },
    {
      "epoch": 0.1068,
      "grad_norm": 1.2632143752877412,
      "learning_rate": 0.003,
      "loss": 4.1004,
      "step": 10680
    },
    {
      "epoch": 0.10681,
      "grad_norm": 1.2711449191855915,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 10681
    },
    {
      "epoch": 0.10682,
      "grad_norm": 1.1502078722655877,
      "learning_rate": 0.003,
      "loss": 4.1003,
      "step": 10682
    },
    {
      "epoch": 0.10683,
      "grad_norm": 1.1689157301365982,
      "learning_rate": 0.003,
      "loss": 4.0854,
      "step": 10683
    },
    {
      "epoch": 0.10684,
      "grad_norm": 1.089658606167191,
      "learning_rate": 0.003,
      "loss": 4.0872,
      "step": 10684
    },
    {
      "epoch": 0.10685,
      "grad_norm": 1.2028062154824137,
      "learning_rate": 0.003,
      "loss": 4.0997,
      "step": 10685
    },
    {
      "epoch": 0.10686,
      "grad_norm": 1.1532956950445268,
      "learning_rate": 0.003,
      "loss": 4.0879,
      "step": 10686
    },
    {
      "epoch": 0.10687,
      "grad_norm": 1.1798981113852622,
      "learning_rate": 0.003,
      "loss": 4.0828,
      "step": 10687
    },
    {
      "epoch": 0.10688,
      "grad_norm": 1.2487741120499263,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 10688
    },
    {
      "epoch": 0.10689,
      "grad_norm": 1.1511081846376128,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 10689
    },
    {
      "epoch": 0.1069,
      "grad_norm": 1.0698421224764414,
      "learning_rate": 0.003,
      "loss": 4.0804,
      "step": 10690
    },
    {
      "epoch": 0.10691,
      "grad_norm": 1.0392026107147678,
      "learning_rate": 0.003,
      "loss": 4.1098,
      "step": 10691
    },
    {
      "epoch": 0.10692,
      "grad_norm": 1.166887026494509,
      "learning_rate": 0.003,
      "loss": 4.0811,
      "step": 10692
    },
    {
      "epoch": 0.10693,
      "grad_norm": 1.2943326893489389,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 10693
    },
    {
      "epoch": 0.10694,
      "grad_norm": 1.188867274604835,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 10694
    },
    {
      "epoch": 0.10695,
      "grad_norm": 1.2966549339357967,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 10695
    },
    {
      "epoch": 0.10696,
      "grad_norm": 1.1063571292520855,
      "learning_rate": 0.003,
      "loss": 4.0879,
      "step": 10696
    },
    {
      "epoch": 0.10697,
      "grad_norm": 1.2644135704444233,
      "learning_rate": 0.003,
      "loss": 4.0908,
      "step": 10697
    },
    {
      "epoch": 0.10698,
      "grad_norm": 0.8829702212715093,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 10698
    },
    {
      "epoch": 0.10699,
      "grad_norm": 1.1161125424172864,
      "learning_rate": 0.003,
      "loss": 4.1081,
      "step": 10699
    },
    {
      "epoch": 0.107,
      "grad_norm": 1.1207907296680133,
      "learning_rate": 0.003,
      "loss": 4.1017,
      "step": 10700
    },
    {
      "epoch": 0.10701,
      "grad_norm": 1.3700136520118256,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 10701
    },
    {
      "epoch": 0.10702,
      "grad_norm": 0.9950362140263944,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 10702
    },
    {
      "epoch": 0.10703,
      "grad_norm": 1.510859550825327,
      "learning_rate": 0.003,
      "loss": 4.0984,
      "step": 10703
    },
    {
      "epoch": 0.10704,
      "grad_norm": 0.7815725976781704,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 10704
    },
    {
      "epoch": 0.10705,
      "grad_norm": 0.9710303251899548,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 10705
    },
    {
      "epoch": 0.10706,
      "grad_norm": 1.3959458546094592,
      "learning_rate": 0.003,
      "loss": 4.0966,
      "step": 10706
    },
    {
      "epoch": 0.10707,
      "grad_norm": 1.5291106936962824,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 10707
    },
    {
      "epoch": 0.10708,
      "grad_norm": 0.8437013934373869,
      "learning_rate": 0.003,
      "loss": 4.0877,
      "step": 10708
    },
    {
      "epoch": 0.10709,
      "grad_norm": 1.1558904845934623,
      "learning_rate": 0.003,
      "loss": 4.0964,
      "step": 10709
    },
    {
      "epoch": 0.1071,
      "grad_norm": 1.29722970020292,
      "learning_rate": 0.003,
      "loss": 4.0825,
      "step": 10710
    },
    {
      "epoch": 0.10711,
      "grad_norm": 0.859426982103532,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 10711
    },
    {
      "epoch": 0.10712,
      "grad_norm": 1.0814119668627848,
      "learning_rate": 0.003,
      "loss": 4.1004,
      "step": 10712
    },
    {
      "epoch": 0.10713,
      "grad_norm": 1.0836003319414476,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 10713
    },
    {
      "epoch": 0.10714,
      "grad_norm": 1.2002969744808742,
      "learning_rate": 0.003,
      "loss": 4.1,
      "step": 10714
    },
    {
      "epoch": 0.10715,
      "grad_norm": 1.235398778887394,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 10715
    },
    {
      "epoch": 0.10716,
      "grad_norm": 1.2848874979455636,
      "learning_rate": 0.003,
      "loss": 4.0928,
      "step": 10716
    },
    {
      "epoch": 0.10717,
      "grad_norm": 1.3187911005800033,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 10717
    },
    {
      "epoch": 0.10718,
      "grad_norm": 1.0754491902530563,
      "learning_rate": 0.003,
      "loss": 4.046,
      "step": 10718
    },
    {
      "epoch": 0.10719,
      "grad_norm": 1.1780564966095928,
      "learning_rate": 0.003,
      "loss": 4.099,
      "step": 10719
    },
    {
      "epoch": 0.1072,
      "grad_norm": 0.9569717886019223,
      "learning_rate": 0.003,
      "loss": 4.1111,
      "step": 10720
    },
    {
      "epoch": 0.10721,
      "grad_norm": 1.2052252644508454,
      "learning_rate": 0.003,
      "loss": 4.0882,
      "step": 10721
    },
    {
      "epoch": 0.10722,
      "grad_norm": 1.1518598964675328,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 10722
    },
    {
      "epoch": 0.10723,
      "grad_norm": 1.2581208059738584,
      "learning_rate": 0.003,
      "loss": 4.0857,
      "step": 10723
    },
    {
      "epoch": 0.10724,
      "grad_norm": 0.9900557775494677,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 10724
    },
    {
      "epoch": 0.10725,
      "grad_norm": 1.2056121145888294,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 10725
    },
    {
      "epoch": 0.10726,
      "grad_norm": 1.2315129488700127,
      "learning_rate": 0.003,
      "loss": 4.1,
      "step": 10726
    },
    {
      "epoch": 0.10727,
      "grad_norm": 1.2188154408180971,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 10727
    },
    {
      "epoch": 0.10728,
      "grad_norm": 1.2995915472073434,
      "learning_rate": 0.003,
      "loss": 4.1036,
      "step": 10728
    },
    {
      "epoch": 0.10729,
      "grad_norm": 1.170488160136393,
      "learning_rate": 0.003,
      "loss": 4.0728,
      "step": 10729
    },
    {
      "epoch": 0.1073,
      "grad_norm": 1.23873916978082,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 10730
    },
    {
      "epoch": 0.10731,
      "grad_norm": 1.0873846640281402,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 10731
    },
    {
      "epoch": 0.10732,
      "grad_norm": 1.2585084327632297,
      "learning_rate": 0.003,
      "loss": 4.0938,
      "step": 10732
    },
    {
      "epoch": 0.10733,
      "grad_norm": 1.097532457246418,
      "learning_rate": 0.003,
      "loss": 4.0944,
      "step": 10733
    },
    {
      "epoch": 0.10734,
      "grad_norm": 1.426673691228244,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 10734
    },
    {
      "epoch": 0.10735,
      "grad_norm": 1.0017834686251834,
      "learning_rate": 0.003,
      "loss": 4.0834,
      "step": 10735
    },
    {
      "epoch": 0.10736,
      "grad_norm": 1.3745884537168405,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 10736
    },
    {
      "epoch": 0.10737,
      "grad_norm": 1.0946666388549016,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 10737
    },
    {
      "epoch": 0.10738,
      "grad_norm": 1.2950358443757186,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 10738
    },
    {
      "epoch": 0.10739,
      "grad_norm": 1.0666381621479504,
      "learning_rate": 0.003,
      "loss": 4.0807,
      "step": 10739
    },
    {
      "epoch": 0.1074,
      "grad_norm": 0.9465427423268675,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 10740
    },
    {
      "epoch": 0.10741,
      "grad_norm": 1.0827188551900107,
      "learning_rate": 0.003,
      "loss": 4.091,
      "step": 10741
    },
    {
      "epoch": 0.10742,
      "grad_norm": 1.16078214440933,
      "learning_rate": 0.003,
      "loss": 4.0759,
      "step": 10742
    },
    {
      "epoch": 0.10743,
      "grad_norm": 1.241621485105003,
      "learning_rate": 0.003,
      "loss": 4.0998,
      "step": 10743
    },
    {
      "epoch": 0.10744,
      "grad_norm": 1.1416330699562267,
      "learning_rate": 0.003,
      "loss": 4.0764,
      "step": 10744
    },
    {
      "epoch": 0.10745,
      "grad_norm": 1.1898306550711573,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 10745
    },
    {
      "epoch": 0.10746,
      "grad_norm": 1.1218560056235294,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 10746
    },
    {
      "epoch": 0.10747,
      "grad_norm": 1.3355326489416064,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 10747
    },
    {
      "epoch": 0.10748,
      "grad_norm": 1.2377403051630251,
      "learning_rate": 0.003,
      "loss": 4.1057,
      "step": 10748
    },
    {
      "epoch": 0.10749,
      "grad_norm": 1.1643784959264663,
      "learning_rate": 0.003,
      "loss": 4.0857,
      "step": 10749
    },
    {
      "epoch": 0.1075,
      "grad_norm": 1.0817261939571676,
      "learning_rate": 0.003,
      "loss": 4.0924,
      "step": 10750
    },
    {
      "epoch": 0.10751,
      "grad_norm": 1.071412951472623,
      "learning_rate": 0.003,
      "loss": 4.1033,
      "step": 10751
    },
    {
      "epoch": 0.10752,
      "grad_norm": 1.129314768018961,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 10752
    },
    {
      "epoch": 0.10753,
      "grad_norm": 1.3150908604706937,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 10753
    },
    {
      "epoch": 0.10754,
      "grad_norm": 1.125804848010354,
      "learning_rate": 0.003,
      "loss": 4.0739,
      "step": 10754
    },
    {
      "epoch": 0.10755,
      "grad_norm": 0.9977931603665849,
      "learning_rate": 0.003,
      "loss": 4.0895,
      "step": 10755
    },
    {
      "epoch": 0.10756,
      "grad_norm": 1.2436945170808427,
      "learning_rate": 0.003,
      "loss": 4.0756,
      "step": 10756
    },
    {
      "epoch": 0.10757,
      "grad_norm": 0.8883670690464539,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 10757
    },
    {
      "epoch": 0.10758,
      "grad_norm": 1.0612366629583811,
      "learning_rate": 0.003,
      "loss": 4.0835,
      "step": 10758
    },
    {
      "epoch": 0.10759,
      "grad_norm": 1.2769764962748154,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 10759
    },
    {
      "epoch": 0.1076,
      "grad_norm": 1.060806806921856,
      "learning_rate": 0.003,
      "loss": 4.0828,
      "step": 10760
    },
    {
      "epoch": 0.10761,
      "grad_norm": 1.4172996097599664,
      "learning_rate": 0.003,
      "loss": 4.0764,
      "step": 10761
    },
    {
      "epoch": 0.10762,
      "grad_norm": 0.964200936262678,
      "learning_rate": 0.003,
      "loss": 4.0898,
      "step": 10762
    },
    {
      "epoch": 0.10763,
      "grad_norm": 1.1448760828760276,
      "learning_rate": 0.003,
      "loss": 4.0748,
      "step": 10763
    },
    {
      "epoch": 0.10764,
      "grad_norm": 1.177398561595663,
      "learning_rate": 0.003,
      "loss": 4.0868,
      "step": 10764
    },
    {
      "epoch": 0.10765,
      "grad_norm": 1.3016816642607325,
      "learning_rate": 0.003,
      "loss": 4.082,
      "step": 10765
    },
    {
      "epoch": 0.10766,
      "grad_norm": 1.2312984478898017,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 10766
    },
    {
      "epoch": 0.10767,
      "grad_norm": 1.0270267866285048,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 10767
    },
    {
      "epoch": 0.10768,
      "grad_norm": 1.337822246381765,
      "learning_rate": 0.003,
      "loss": 4.0727,
      "step": 10768
    },
    {
      "epoch": 0.10769,
      "grad_norm": 1.1389782040182617,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 10769
    },
    {
      "epoch": 0.1077,
      "grad_norm": 1.0710292813632354,
      "learning_rate": 0.003,
      "loss": 4.0843,
      "step": 10770
    },
    {
      "epoch": 0.10771,
      "grad_norm": 1.2348971326728477,
      "learning_rate": 0.003,
      "loss": 4.0919,
      "step": 10771
    },
    {
      "epoch": 0.10772,
      "grad_norm": 1.161967747578216,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 10772
    },
    {
      "epoch": 0.10773,
      "grad_norm": 1.3634709444585764,
      "learning_rate": 0.003,
      "loss": 4.09,
      "step": 10773
    },
    {
      "epoch": 0.10774,
      "grad_norm": 1.0806304958698216,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 10774
    },
    {
      "epoch": 0.10775,
      "grad_norm": 1.2438172089730135,
      "learning_rate": 0.003,
      "loss": 4.0953,
      "step": 10775
    },
    {
      "epoch": 0.10776,
      "grad_norm": 0.9949587102464901,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 10776
    },
    {
      "epoch": 0.10777,
      "grad_norm": 1.3006582082099583,
      "learning_rate": 0.003,
      "loss": 4.0852,
      "step": 10777
    },
    {
      "epoch": 0.10778,
      "grad_norm": 1.0692179806323354,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 10778
    },
    {
      "epoch": 0.10779,
      "grad_norm": 1.2464760909389696,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 10779
    },
    {
      "epoch": 0.1078,
      "grad_norm": 1.1691551207648234,
      "learning_rate": 0.003,
      "loss": 4.1025,
      "step": 10780
    },
    {
      "epoch": 0.10781,
      "grad_norm": 1.196080726419258,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 10781
    },
    {
      "epoch": 0.10782,
      "grad_norm": 1.0266639938567623,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 10782
    },
    {
      "epoch": 0.10783,
      "grad_norm": 1.1797973687393533,
      "learning_rate": 0.003,
      "loss": 4.0854,
      "step": 10783
    },
    {
      "epoch": 0.10784,
      "grad_norm": 1.2600032626739337,
      "learning_rate": 0.003,
      "loss": 4.1055,
      "step": 10784
    },
    {
      "epoch": 0.10785,
      "grad_norm": 1.3016460761536592,
      "learning_rate": 0.003,
      "loss": 4.0843,
      "step": 10785
    },
    {
      "epoch": 0.10786,
      "grad_norm": 1.1906263662241257,
      "learning_rate": 0.003,
      "loss": 4.0992,
      "step": 10786
    },
    {
      "epoch": 0.10787,
      "grad_norm": 1.0763915235301704,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 10787
    },
    {
      "epoch": 0.10788,
      "grad_norm": 1.4651066745900185,
      "learning_rate": 0.003,
      "loss": 4.0781,
      "step": 10788
    },
    {
      "epoch": 0.10789,
      "grad_norm": 0.9201304719705409,
      "learning_rate": 0.003,
      "loss": 4.0302,
      "step": 10789
    },
    {
      "epoch": 0.1079,
      "grad_norm": 1.232011515029564,
      "learning_rate": 0.003,
      "loss": 4.0983,
      "step": 10790
    },
    {
      "epoch": 0.10791,
      "grad_norm": 1.1591988872666497,
      "learning_rate": 0.003,
      "loss": 4.1031,
      "step": 10791
    },
    {
      "epoch": 0.10792,
      "grad_norm": 0.9966425554361823,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 10792
    },
    {
      "epoch": 0.10793,
      "grad_norm": 1.1991131105714399,
      "learning_rate": 0.003,
      "loss": 4.0757,
      "step": 10793
    },
    {
      "epoch": 0.10794,
      "grad_norm": 1.1337112341368891,
      "learning_rate": 0.003,
      "loss": 4.0965,
      "step": 10794
    },
    {
      "epoch": 0.10795,
      "grad_norm": 1.5909269743356649,
      "learning_rate": 0.003,
      "loss": 4.0547,
      "step": 10795
    },
    {
      "epoch": 0.10796,
      "grad_norm": 0.8416057431798984,
      "learning_rate": 0.003,
      "loss": 4.0739,
      "step": 10796
    },
    {
      "epoch": 0.10797,
      "grad_norm": 1.045224639779817,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 10797
    },
    {
      "epoch": 0.10798,
      "grad_norm": 1.2293165519213025,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 10798
    },
    {
      "epoch": 0.10799,
      "grad_norm": 1.3020130984066611,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 10799
    },
    {
      "epoch": 0.108,
      "grad_norm": 1.186604828872371,
      "learning_rate": 0.003,
      "loss": 4.0936,
      "step": 10800
    },
    {
      "epoch": 0.10801,
      "grad_norm": 1.097067294795651,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 10801
    },
    {
      "epoch": 0.10802,
      "grad_norm": 1.2450394305274315,
      "learning_rate": 0.003,
      "loss": 4.0821,
      "step": 10802
    },
    {
      "epoch": 0.10803,
      "grad_norm": 0.9949904324744665,
      "learning_rate": 0.003,
      "loss": 4.0906,
      "step": 10803
    },
    {
      "epoch": 0.10804,
      "grad_norm": 1.2360068450090314,
      "learning_rate": 0.003,
      "loss": 4.0857,
      "step": 10804
    },
    {
      "epoch": 0.10805,
      "grad_norm": 1.1744858298406828,
      "learning_rate": 0.003,
      "loss": 4.0787,
      "step": 10805
    },
    {
      "epoch": 0.10806,
      "grad_norm": 1.2654260319802026,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 10806
    },
    {
      "epoch": 0.10807,
      "grad_norm": 1.3100632848185885,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 10807
    },
    {
      "epoch": 0.10808,
      "grad_norm": 1.1062041850452753,
      "learning_rate": 0.003,
      "loss": 4.1571,
      "step": 10808
    },
    {
      "epoch": 0.10809,
      "grad_norm": 0.9827220295477982,
      "learning_rate": 0.003,
      "loss": 4.0808,
      "step": 10809
    },
    {
      "epoch": 0.1081,
      "grad_norm": 1.0524862530261532,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 10810
    },
    {
      "epoch": 0.10811,
      "grad_norm": 1.249344963668008,
      "learning_rate": 0.003,
      "loss": 4.107,
      "step": 10811
    },
    {
      "epoch": 0.10812,
      "grad_norm": 0.9730113013840976,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 10812
    },
    {
      "epoch": 0.10813,
      "grad_norm": 1.2459709042802147,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 10813
    },
    {
      "epoch": 0.10814,
      "grad_norm": 1.1460293966725768,
      "learning_rate": 0.003,
      "loss": 4.0806,
      "step": 10814
    },
    {
      "epoch": 0.10815,
      "grad_norm": 1.019469842228145,
      "learning_rate": 0.003,
      "loss": 4.0799,
      "step": 10815
    },
    {
      "epoch": 0.10816,
      "grad_norm": 1.2233373876851823,
      "learning_rate": 0.003,
      "loss": 4.0872,
      "step": 10816
    },
    {
      "epoch": 0.10817,
      "grad_norm": 1.2413354353726054,
      "learning_rate": 0.003,
      "loss": 4.0822,
      "step": 10817
    },
    {
      "epoch": 0.10818,
      "grad_norm": 1.1987889670937286,
      "learning_rate": 0.003,
      "loss": 4.0787,
      "step": 10818
    },
    {
      "epoch": 0.10819,
      "grad_norm": 1.1933924411078074,
      "learning_rate": 0.003,
      "loss": 4.0814,
      "step": 10819
    },
    {
      "epoch": 0.1082,
      "grad_norm": 1.2359549573144932,
      "learning_rate": 0.003,
      "loss": 4.079,
      "step": 10820
    },
    {
      "epoch": 0.10821,
      "grad_norm": 1.1880375232350786,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 10821
    },
    {
      "epoch": 0.10822,
      "grad_norm": 1.0189809993081607,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 10822
    },
    {
      "epoch": 0.10823,
      "grad_norm": 1.2875865120552497,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 10823
    },
    {
      "epoch": 0.10824,
      "grad_norm": 1.101554063341351,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 10824
    },
    {
      "epoch": 0.10825,
      "grad_norm": 1.0262557252237896,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 10825
    },
    {
      "epoch": 0.10826,
      "grad_norm": 1.46748660981515,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 10826
    },
    {
      "epoch": 0.10827,
      "grad_norm": 0.9468098890621249,
      "learning_rate": 0.003,
      "loss": 4.103,
      "step": 10827
    },
    {
      "epoch": 0.10828,
      "grad_norm": 1.1989102371714668,
      "learning_rate": 0.003,
      "loss": 4.1142,
      "step": 10828
    },
    {
      "epoch": 0.10829,
      "grad_norm": 0.9733274516871876,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 10829
    },
    {
      "epoch": 0.1083,
      "grad_norm": 1.521393355716284,
      "learning_rate": 0.003,
      "loss": 4.1052,
      "step": 10830
    },
    {
      "epoch": 0.10831,
      "grad_norm": 1.0648565577154092,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 10831
    },
    {
      "epoch": 0.10832,
      "grad_norm": 1.670795345847375,
      "learning_rate": 0.003,
      "loss": 4.1274,
      "step": 10832
    },
    {
      "epoch": 0.10833,
      "grad_norm": 1.1214446923451595,
      "learning_rate": 0.003,
      "loss": 4.0866,
      "step": 10833
    },
    {
      "epoch": 0.10834,
      "grad_norm": 1.3418726310171458,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 10834
    },
    {
      "epoch": 0.10835,
      "grad_norm": 1.1958076729736424,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 10835
    },
    {
      "epoch": 0.10836,
      "grad_norm": 1.0348374383064476,
      "learning_rate": 0.003,
      "loss": 4.0908,
      "step": 10836
    },
    {
      "epoch": 0.10837,
      "grad_norm": 1.263763456536624,
      "learning_rate": 0.003,
      "loss": 4.093,
      "step": 10837
    },
    {
      "epoch": 0.10838,
      "grad_norm": 1.079535399309003,
      "learning_rate": 0.003,
      "loss": 4.1104,
      "step": 10838
    },
    {
      "epoch": 0.10839,
      "grad_norm": 1.0280742647698429,
      "learning_rate": 0.003,
      "loss": 4.0848,
      "step": 10839
    },
    {
      "epoch": 0.1084,
      "grad_norm": 1.3474646277249114,
      "learning_rate": 0.003,
      "loss": 4.1151,
      "step": 10840
    },
    {
      "epoch": 0.10841,
      "grad_norm": 1.4499211742939313,
      "learning_rate": 0.003,
      "loss": 4.0982,
      "step": 10841
    },
    {
      "epoch": 0.10842,
      "grad_norm": 1.3290309322316827,
      "learning_rate": 0.003,
      "loss": 4.1063,
      "step": 10842
    },
    {
      "epoch": 0.10843,
      "grad_norm": 1.0528497774700494,
      "learning_rate": 0.003,
      "loss": 4.087,
      "step": 10843
    },
    {
      "epoch": 0.10844,
      "grad_norm": 1.260209051962453,
      "learning_rate": 0.003,
      "loss": 4.1176,
      "step": 10844
    },
    {
      "epoch": 0.10845,
      "grad_norm": 1.2502627934282764,
      "learning_rate": 0.003,
      "loss": 4.1099,
      "step": 10845
    },
    {
      "epoch": 0.10846,
      "grad_norm": 0.9786619469735314,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 10846
    },
    {
      "epoch": 0.10847,
      "grad_norm": 1.1772139936380883,
      "learning_rate": 0.003,
      "loss": 4.1044,
      "step": 10847
    },
    {
      "epoch": 0.10848,
      "grad_norm": 1.1154749608556416,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 10848
    },
    {
      "epoch": 0.10849,
      "grad_norm": 1.4255927279548497,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 10849
    },
    {
      "epoch": 0.1085,
      "grad_norm": 0.9591807418215578,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 10850
    },
    {
      "epoch": 0.10851,
      "grad_norm": 1.1500794089470685,
      "learning_rate": 0.003,
      "loss": 4.0871,
      "step": 10851
    },
    {
      "epoch": 0.10852,
      "grad_norm": 1.1733854286996765,
      "learning_rate": 0.003,
      "loss": 4.0925,
      "step": 10852
    },
    {
      "epoch": 0.10853,
      "grad_norm": 1.3167899561992558,
      "learning_rate": 0.003,
      "loss": 4.0979,
      "step": 10853
    },
    {
      "epoch": 0.10854,
      "grad_norm": 1.0983208572180652,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 10854
    },
    {
      "epoch": 0.10855,
      "grad_norm": 1.1066364502124872,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 10855
    },
    {
      "epoch": 0.10856,
      "grad_norm": 1.211760377665726,
      "learning_rate": 0.003,
      "loss": 4.0843,
      "step": 10856
    },
    {
      "epoch": 0.10857,
      "grad_norm": 0.9064617457621427,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 10857
    },
    {
      "epoch": 0.10858,
      "grad_norm": 1.3610637638848726,
      "learning_rate": 0.003,
      "loss": 4.0901,
      "step": 10858
    },
    {
      "epoch": 0.10859,
      "grad_norm": 0.9870487625663688,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 10859
    },
    {
      "epoch": 0.1086,
      "grad_norm": 1.1299893307649607,
      "learning_rate": 0.003,
      "loss": 4.0705,
      "step": 10860
    },
    {
      "epoch": 0.10861,
      "grad_norm": 0.8905455480301787,
      "learning_rate": 0.003,
      "loss": 4.0914,
      "step": 10861
    },
    {
      "epoch": 0.10862,
      "grad_norm": 1.1931426253312616,
      "learning_rate": 0.003,
      "loss": 4.124,
      "step": 10862
    },
    {
      "epoch": 0.10863,
      "grad_norm": 1.3545454024620263,
      "learning_rate": 0.003,
      "loss": 4.0993,
      "step": 10863
    },
    {
      "epoch": 0.10864,
      "grad_norm": 1.0496058644337216,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 10864
    },
    {
      "epoch": 0.10865,
      "grad_norm": 1.3595045084756816,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 10865
    },
    {
      "epoch": 0.10866,
      "grad_norm": 1.0384779967197104,
      "learning_rate": 0.003,
      "loss": 4.0873,
      "step": 10866
    },
    {
      "epoch": 0.10867,
      "grad_norm": 1.3330568756729129,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 10867
    },
    {
      "epoch": 0.10868,
      "grad_norm": 0.9664414853324744,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 10868
    },
    {
      "epoch": 0.10869,
      "grad_norm": 1.2749960456735239,
      "learning_rate": 0.003,
      "loss": 4.0854,
      "step": 10869
    },
    {
      "epoch": 0.1087,
      "grad_norm": 1.109639104543151,
      "learning_rate": 0.003,
      "loss": 4.0883,
      "step": 10870
    },
    {
      "epoch": 0.10871,
      "grad_norm": 1.2792810165388266,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 10871
    },
    {
      "epoch": 0.10872,
      "grad_norm": 1.1238748789860165,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 10872
    },
    {
      "epoch": 0.10873,
      "grad_norm": 1.0909837652576535,
      "learning_rate": 0.003,
      "loss": 4.0854,
      "step": 10873
    },
    {
      "epoch": 0.10874,
      "grad_norm": 1.3464645036836225,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 10874
    },
    {
      "epoch": 0.10875,
      "grad_norm": 1.2481986896710018,
      "learning_rate": 0.003,
      "loss": 4.1016,
      "step": 10875
    },
    {
      "epoch": 0.10876,
      "grad_norm": 1.283555944231391,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 10876
    },
    {
      "epoch": 0.10877,
      "grad_norm": 1.141382207544397,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 10877
    },
    {
      "epoch": 0.10878,
      "grad_norm": 1.0151640131393809,
      "learning_rate": 0.003,
      "loss": 4.0676,
      "step": 10878
    },
    {
      "epoch": 0.10879,
      "grad_norm": 1.3476780151695977,
      "learning_rate": 0.003,
      "loss": 4.0867,
      "step": 10879
    },
    {
      "epoch": 0.1088,
      "grad_norm": 1.1041206730298196,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 10880
    },
    {
      "epoch": 0.10881,
      "grad_norm": 1.0387519214779812,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 10881
    },
    {
      "epoch": 0.10882,
      "grad_norm": 1.4934301337368312,
      "learning_rate": 0.003,
      "loss": 4.0793,
      "step": 10882
    },
    {
      "epoch": 0.10883,
      "grad_norm": 1.1675196985897143,
      "learning_rate": 0.003,
      "loss": 4.0978,
      "step": 10883
    },
    {
      "epoch": 0.10884,
      "grad_norm": 1.0064153324913705,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 10884
    },
    {
      "epoch": 0.10885,
      "grad_norm": 1.2330812158547564,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 10885
    },
    {
      "epoch": 0.10886,
      "grad_norm": 1.116359574003855,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 10886
    },
    {
      "epoch": 0.10887,
      "grad_norm": 1.2550204984733917,
      "learning_rate": 0.003,
      "loss": 4.1043,
      "step": 10887
    },
    {
      "epoch": 0.10888,
      "grad_norm": 0.9007148413092196,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 10888
    },
    {
      "epoch": 0.10889,
      "grad_norm": 1.1800673903239454,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 10889
    },
    {
      "epoch": 0.1089,
      "grad_norm": 1.2503116665259741,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 10890
    },
    {
      "epoch": 0.10891,
      "grad_norm": 0.9694142487528438,
      "learning_rate": 0.003,
      "loss": 4.0827,
      "step": 10891
    },
    {
      "epoch": 0.10892,
      "grad_norm": 1.2388945396491844,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 10892
    },
    {
      "epoch": 0.10893,
      "grad_norm": 1.0902349584357809,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 10893
    },
    {
      "epoch": 0.10894,
      "grad_norm": 1.1154048190089856,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 10894
    },
    {
      "epoch": 0.10895,
      "grad_norm": 1.0622891299100115,
      "learning_rate": 0.003,
      "loss": 4.0996,
      "step": 10895
    },
    {
      "epoch": 0.10896,
      "grad_norm": 0.9948833067306971,
      "learning_rate": 0.003,
      "loss": 4.0759,
      "step": 10896
    },
    {
      "epoch": 0.10897,
      "grad_norm": 1.2959590809617756,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 10897
    },
    {
      "epoch": 0.10898,
      "grad_norm": 0.9015807760549023,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 10898
    },
    {
      "epoch": 0.10899,
      "grad_norm": 1.1824188153007684,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 10899
    },
    {
      "epoch": 0.109,
      "grad_norm": 1.1692001862445531,
      "learning_rate": 0.003,
      "loss": 4.0973,
      "step": 10900
    },
    {
      "epoch": 0.10901,
      "grad_norm": 1.0149117448150018,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 10901
    },
    {
      "epoch": 0.10902,
      "grad_norm": 1.1839491756763185,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 10902
    },
    {
      "epoch": 0.10903,
      "grad_norm": 1.0179984934097102,
      "learning_rate": 0.003,
      "loss": 4.084,
      "step": 10903
    },
    {
      "epoch": 0.10904,
      "grad_norm": 1.4614628598796946,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 10904
    },
    {
      "epoch": 0.10905,
      "grad_norm": 1.1953547562934568,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 10905
    },
    {
      "epoch": 0.10906,
      "grad_norm": 1.636601542722464,
      "learning_rate": 0.003,
      "loss": 4.1064,
      "step": 10906
    },
    {
      "epoch": 0.10907,
      "grad_norm": 0.7528713036455322,
      "learning_rate": 0.003,
      "loss": 4.075,
      "step": 10907
    },
    {
      "epoch": 0.10908,
      "grad_norm": 1.0389917267494386,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 10908
    },
    {
      "epoch": 0.10909,
      "grad_norm": 1.7925209163614575,
      "learning_rate": 0.003,
      "loss": 4.081,
      "step": 10909
    },
    {
      "epoch": 0.1091,
      "grad_norm": 0.970094220814019,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 10910
    },
    {
      "epoch": 0.10911,
      "grad_norm": 1.179875808883414,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 10911
    },
    {
      "epoch": 0.10912,
      "grad_norm": 1.2181742635726007,
      "learning_rate": 0.003,
      "loss": 4.0871,
      "step": 10912
    },
    {
      "epoch": 0.10913,
      "grad_norm": 1.1496508513916157,
      "learning_rate": 0.003,
      "loss": 4.101,
      "step": 10913
    },
    {
      "epoch": 0.10914,
      "grad_norm": 1.1005537168343975,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 10914
    },
    {
      "epoch": 0.10915,
      "grad_norm": 1.206635927488834,
      "learning_rate": 0.003,
      "loss": 4.0829,
      "step": 10915
    },
    {
      "epoch": 0.10916,
      "grad_norm": 0.956242769077545,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 10916
    },
    {
      "epoch": 0.10917,
      "grad_norm": 1.041315048532933,
      "learning_rate": 0.003,
      "loss": 4.0913,
      "step": 10917
    },
    {
      "epoch": 0.10918,
      "grad_norm": 1.122023578338097,
      "learning_rate": 0.003,
      "loss": 4.0914,
      "step": 10918
    },
    {
      "epoch": 0.10919,
      "grad_norm": 1.0972674985128863,
      "learning_rate": 0.003,
      "loss": 4.0703,
      "step": 10919
    },
    {
      "epoch": 0.1092,
      "grad_norm": 1.2457136032987424,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 10920
    },
    {
      "epoch": 0.10921,
      "grad_norm": 1.3141612985198625,
      "learning_rate": 0.003,
      "loss": 4.0956,
      "step": 10921
    },
    {
      "epoch": 0.10922,
      "grad_norm": 1.132872008720541,
      "learning_rate": 0.003,
      "loss": 4.081,
      "step": 10922
    },
    {
      "epoch": 0.10923,
      "grad_norm": 1.1124959252954467,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 10923
    },
    {
      "epoch": 0.10924,
      "grad_norm": 0.9957527052591222,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 10924
    },
    {
      "epoch": 0.10925,
      "grad_norm": 1.2351433504353926,
      "learning_rate": 0.003,
      "loss": 4.0849,
      "step": 10925
    },
    {
      "epoch": 0.10926,
      "grad_norm": 1.1421529222430997,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 10926
    },
    {
      "epoch": 0.10927,
      "grad_norm": 1.100095566570253,
      "learning_rate": 0.003,
      "loss": 4.0856,
      "step": 10927
    },
    {
      "epoch": 0.10928,
      "grad_norm": 1.1198941608177244,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 10928
    },
    {
      "epoch": 0.10929,
      "grad_norm": 1.1434762329489463,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 10929
    },
    {
      "epoch": 0.1093,
      "grad_norm": 1.3132536106629562,
      "learning_rate": 0.003,
      "loss": 4.0925,
      "step": 10930
    },
    {
      "epoch": 0.10931,
      "grad_norm": 1.1417515606575013,
      "learning_rate": 0.003,
      "loss": 4.0907,
      "step": 10931
    },
    {
      "epoch": 0.10932,
      "grad_norm": 1.248357377123286,
      "learning_rate": 0.003,
      "loss": 4.0841,
      "step": 10932
    },
    {
      "epoch": 0.10933,
      "grad_norm": 1.0008989985258727,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 10933
    },
    {
      "epoch": 0.10934,
      "grad_norm": 1.1789044443234111,
      "learning_rate": 0.003,
      "loss": 4.1205,
      "step": 10934
    },
    {
      "epoch": 0.10935,
      "grad_norm": 1.0501842927343423,
      "learning_rate": 0.003,
      "loss": 4.0907,
      "step": 10935
    },
    {
      "epoch": 0.10936,
      "grad_norm": 1.1345208975658883,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 10936
    },
    {
      "epoch": 0.10937,
      "grad_norm": 0.953442335452271,
      "learning_rate": 0.003,
      "loss": 4.0985,
      "step": 10937
    },
    {
      "epoch": 0.10938,
      "grad_norm": 1.1377302461450274,
      "learning_rate": 0.003,
      "loss": 4.0948,
      "step": 10938
    },
    {
      "epoch": 0.10939,
      "grad_norm": 1.443996061455607,
      "learning_rate": 0.003,
      "loss": 4.0819,
      "step": 10939
    },
    {
      "epoch": 0.1094,
      "grad_norm": 1.3938331183018506,
      "learning_rate": 0.003,
      "loss": 4.1125,
      "step": 10940
    },
    {
      "epoch": 0.10941,
      "grad_norm": 0.9701809916187377,
      "learning_rate": 0.003,
      "loss": 4.0985,
      "step": 10941
    },
    {
      "epoch": 0.10942,
      "grad_norm": 1.2668069397873494,
      "learning_rate": 0.003,
      "loss": 4.096,
      "step": 10942
    },
    {
      "epoch": 0.10943,
      "grad_norm": 1.2083114473027807,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 10943
    },
    {
      "epoch": 0.10944,
      "grad_norm": 0.915154558843135,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 10944
    },
    {
      "epoch": 0.10945,
      "grad_norm": 1.1018952265095763,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 10945
    },
    {
      "epoch": 0.10946,
      "grad_norm": 1.2555065686402815,
      "learning_rate": 0.003,
      "loss": 4.0929,
      "step": 10946
    },
    {
      "epoch": 0.10947,
      "grad_norm": 0.9217730848513727,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 10947
    },
    {
      "epoch": 0.10948,
      "grad_norm": 1.2211889008294665,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 10948
    },
    {
      "epoch": 0.10949,
      "grad_norm": 1.2459604011150405,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 10949
    },
    {
      "epoch": 0.1095,
      "grad_norm": 1.1768814916877195,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 10950
    },
    {
      "epoch": 0.10951,
      "grad_norm": 1.018547814321062,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 10951
    },
    {
      "epoch": 0.10952,
      "grad_norm": 1.3164194264217637,
      "learning_rate": 0.003,
      "loss": 4.0904,
      "step": 10952
    },
    {
      "epoch": 0.10953,
      "grad_norm": 1.1706035941961062,
      "learning_rate": 0.003,
      "loss": 4.0817,
      "step": 10953
    },
    {
      "epoch": 0.10954,
      "grad_norm": 1.337196572028656,
      "learning_rate": 0.003,
      "loss": 4.1074,
      "step": 10954
    },
    {
      "epoch": 0.10955,
      "grad_norm": 1.0738506043022735,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 10955
    },
    {
      "epoch": 0.10956,
      "grad_norm": 1.2189719121937406,
      "learning_rate": 0.003,
      "loss": 4.0952,
      "step": 10956
    },
    {
      "epoch": 0.10957,
      "grad_norm": 0.9514863447924589,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 10957
    },
    {
      "epoch": 0.10958,
      "grad_norm": 1.2891491203795653,
      "learning_rate": 0.003,
      "loss": 4.0783,
      "step": 10958
    },
    {
      "epoch": 0.10959,
      "grad_norm": 1.0703009561907915,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 10959
    },
    {
      "epoch": 0.1096,
      "grad_norm": 1.3554792243974731,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 10960
    },
    {
      "epoch": 0.10961,
      "grad_norm": 1.0350064870118225,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 10961
    },
    {
      "epoch": 0.10962,
      "grad_norm": 1.2722625320372223,
      "learning_rate": 0.003,
      "loss": 4.1015,
      "step": 10962
    },
    {
      "epoch": 0.10963,
      "grad_norm": 1.0687986810017533,
      "learning_rate": 0.003,
      "loss": 4.09,
      "step": 10963
    },
    {
      "epoch": 0.10964,
      "grad_norm": 1.146695596346475,
      "learning_rate": 0.003,
      "loss": 4.0919,
      "step": 10964
    },
    {
      "epoch": 0.10965,
      "grad_norm": 1.115830007762537,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 10965
    },
    {
      "epoch": 0.10966,
      "grad_norm": 1.089456831868218,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 10966
    },
    {
      "epoch": 0.10967,
      "grad_norm": 1.2396652030097792,
      "learning_rate": 0.003,
      "loss": 4.0974,
      "step": 10967
    },
    {
      "epoch": 0.10968,
      "grad_norm": 1.1821538893519532,
      "learning_rate": 0.003,
      "loss": 4.103,
      "step": 10968
    },
    {
      "epoch": 0.10969,
      "grad_norm": 1.2414507644573376,
      "learning_rate": 0.003,
      "loss": 4.1028,
      "step": 10969
    },
    {
      "epoch": 0.1097,
      "grad_norm": 1.1096500583314424,
      "learning_rate": 0.003,
      "loss": 4.0799,
      "step": 10970
    },
    {
      "epoch": 0.10971,
      "grad_norm": 1.3266075353839237,
      "learning_rate": 0.003,
      "loss": 4.0993,
      "step": 10971
    },
    {
      "epoch": 0.10972,
      "grad_norm": 1.1288752735161007,
      "learning_rate": 0.003,
      "loss": 4.123,
      "step": 10972
    },
    {
      "epoch": 0.10973,
      "grad_norm": 1.1893059979730722,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 10973
    },
    {
      "epoch": 0.10974,
      "grad_norm": 1.1016118927584362,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 10974
    },
    {
      "epoch": 0.10975,
      "grad_norm": 1.1496691228515794,
      "learning_rate": 0.003,
      "loss": 4.088,
      "step": 10975
    },
    {
      "epoch": 0.10976,
      "grad_norm": 1.1396501642870858,
      "learning_rate": 0.003,
      "loss": 4.0665,
      "step": 10976
    },
    {
      "epoch": 0.10977,
      "grad_norm": 1.154739766385983,
      "learning_rate": 0.003,
      "loss": 4.0837,
      "step": 10977
    },
    {
      "epoch": 0.10978,
      "grad_norm": 1.2202424954301483,
      "learning_rate": 0.003,
      "loss": 4.0935,
      "step": 10978
    },
    {
      "epoch": 0.10979,
      "grad_norm": 1.024190700243081,
      "learning_rate": 0.003,
      "loss": 4.0748,
      "step": 10979
    },
    {
      "epoch": 0.1098,
      "grad_norm": 1.2310691922569879,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 10980
    },
    {
      "epoch": 0.10981,
      "grad_norm": 1.1718885237175614,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 10981
    },
    {
      "epoch": 0.10982,
      "grad_norm": 1.1343921348884538,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 10982
    },
    {
      "epoch": 0.10983,
      "grad_norm": 1.220869472852959,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 10983
    },
    {
      "epoch": 0.10984,
      "grad_norm": 1.2531776119674387,
      "learning_rate": 0.003,
      "loss": 4.1018,
      "step": 10984
    },
    {
      "epoch": 0.10985,
      "grad_norm": 1.0520148216339635,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 10985
    },
    {
      "epoch": 0.10986,
      "grad_norm": 1.2151773663575407,
      "learning_rate": 0.003,
      "loss": 4.0814,
      "step": 10986
    },
    {
      "epoch": 0.10987,
      "grad_norm": 1.1474346188515292,
      "learning_rate": 0.003,
      "loss": 4.0951,
      "step": 10987
    },
    {
      "epoch": 0.10988,
      "grad_norm": 1.2191252270848074,
      "learning_rate": 0.003,
      "loss": 4.0894,
      "step": 10988
    },
    {
      "epoch": 0.10989,
      "grad_norm": 1.0468863444800465,
      "learning_rate": 0.003,
      "loss": 4.0756,
      "step": 10989
    },
    {
      "epoch": 0.1099,
      "grad_norm": 1.102740386736203,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 10990
    },
    {
      "epoch": 0.10991,
      "grad_norm": 1.0709787382103662,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 10991
    },
    {
      "epoch": 0.10992,
      "grad_norm": 1.3471933663495328,
      "learning_rate": 0.003,
      "loss": 4.0767,
      "step": 10992
    },
    {
      "epoch": 0.10993,
      "grad_norm": 0.8777327978040282,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 10993
    },
    {
      "epoch": 0.10994,
      "grad_norm": 1.0607706676678479,
      "learning_rate": 0.003,
      "loss": 4.1107,
      "step": 10994
    },
    {
      "epoch": 0.10995,
      "grad_norm": 1.2261575756169905,
      "learning_rate": 0.003,
      "loss": 4.0882,
      "step": 10995
    },
    {
      "epoch": 0.10996,
      "grad_norm": 1.0839466931853257,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 10996
    },
    {
      "epoch": 0.10997,
      "grad_norm": 1.200556428480252,
      "learning_rate": 0.003,
      "loss": 4.1083,
      "step": 10997
    },
    {
      "epoch": 0.10998,
      "grad_norm": 1.1891628270375623,
      "learning_rate": 0.003,
      "loss": 4.111,
      "step": 10998
    },
    {
      "epoch": 0.10999,
      "grad_norm": 1.1803068575793494,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 10999
    },
    {
      "epoch": 0.11,
      "grad_norm": 1.216378777563462,
      "learning_rate": 0.003,
      "loss": 4.1113,
      "step": 11000
    },
    {
      "epoch": 0.11001,
      "grad_norm": 1.289656496874306,
      "learning_rate": 0.003,
      "loss": 4.1076,
      "step": 11001
    },
    {
      "epoch": 0.11002,
      "grad_norm": 1.1492884131117849,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 11002
    },
    {
      "epoch": 0.11003,
      "grad_norm": 1.1083998300787332,
      "learning_rate": 0.003,
      "loss": 4.0807,
      "step": 11003
    },
    {
      "epoch": 0.11004,
      "grad_norm": 1.1328457610360267,
      "learning_rate": 0.003,
      "loss": 4.0806,
      "step": 11004
    },
    {
      "epoch": 0.11005,
      "grad_norm": 1.184919916402421,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 11005
    },
    {
      "epoch": 0.11006,
      "grad_norm": 1.2225122116520737,
      "learning_rate": 0.003,
      "loss": 4.0879,
      "step": 11006
    },
    {
      "epoch": 0.11007,
      "grad_norm": 1.2692158192366592,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 11007
    },
    {
      "epoch": 0.11008,
      "grad_norm": 1.2185410602014857,
      "learning_rate": 0.003,
      "loss": 4.0907,
      "step": 11008
    },
    {
      "epoch": 0.11009,
      "grad_norm": 1.1291190266911624,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 11009
    },
    {
      "epoch": 0.1101,
      "grad_norm": 1.033167010262578,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 11010
    },
    {
      "epoch": 0.11011,
      "grad_norm": 1.2126335789070828,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 11011
    },
    {
      "epoch": 0.11012,
      "grad_norm": 1.1834850607714638,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 11012
    },
    {
      "epoch": 0.11013,
      "grad_norm": 1.1749484121679226,
      "learning_rate": 0.003,
      "loss": 4.0831,
      "step": 11013
    },
    {
      "epoch": 0.11014,
      "grad_norm": 1.0223261088307614,
      "learning_rate": 0.003,
      "loss": 4.0797,
      "step": 11014
    },
    {
      "epoch": 0.11015,
      "grad_norm": 1.2933058857253439,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 11015
    },
    {
      "epoch": 0.11016,
      "grad_norm": 1.06013065227872,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 11016
    },
    {
      "epoch": 0.11017,
      "grad_norm": 1.3486859594905953,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 11017
    },
    {
      "epoch": 0.11018,
      "grad_norm": 0.986916211339272,
      "learning_rate": 0.003,
      "loss": 4.091,
      "step": 11018
    },
    {
      "epoch": 0.11019,
      "grad_norm": 1.167384153564792,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 11019
    },
    {
      "epoch": 0.1102,
      "grad_norm": 1.1052461201632215,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 11020
    },
    {
      "epoch": 0.11021,
      "grad_norm": 1.2800251869251464,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 11021
    },
    {
      "epoch": 0.11022,
      "grad_norm": 1.3272037366109815,
      "learning_rate": 0.003,
      "loss": 4.0921,
      "step": 11022
    },
    {
      "epoch": 0.11023,
      "grad_norm": 1.1668592711484271,
      "learning_rate": 0.003,
      "loss": 4.0947,
      "step": 11023
    },
    {
      "epoch": 0.11024,
      "grad_norm": 1.1585458235363237,
      "learning_rate": 0.003,
      "loss": 4.0958,
      "step": 11024
    },
    {
      "epoch": 0.11025,
      "grad_norm": 1.0635883194519844,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 11025
    },
    {
      "epoch": 0.11026,
      "grad_norm": 1.2126176125490782,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 11026
    },
    {
      "epoch": 0.11027,
      "grad_norm": 1.0696229205857228,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 11027
    },
    {
      "epoch": 0.11028,
      "grad_norm": 1.1450257642123574,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 11028
    },
    {
      "epoch": 0.11029,
      "grad_norm": 1.126193649245872,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 11029
    },
    {
      "epoch": 0.1103,
      "grad_norm": 1.3229077788056023,
      "learning_rate": 0.003,
      "loss": 4.0811,
      "step": 11030
    },
    {
      "epoch": 0.11031,
      "grad_norm": 0.9020140652726835,
      "learning_rate": 0.003,
      "loss": 4.0703,
      "step": 11031
    },
    {
      "epoch": 0.11032,
      "grad_norm": 1.1991614113373614,
      "learning_rate": 0.003,
      "loss": 4.0904,
      "step": 11032
    },
    {
      "epoch": 0.11033,
      "grad_norm": 1.293323738809132,
      "learning_rate": 0.003,
      "loss": 4.0796,
      "step": 11033
    },
    {
      "epoch": 0.11034,
      "grad_norm": 0.9940537745625097,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 11034
    },
    {
      "epoch": 0.11035,
      "grad_norm": 1.1648199986703267,
      "learning_rate": 0.003,
      "loss": 4.0884,
      "step": 11035
    },
    {
      "epoch": 0.11036,
      "grad_norm": 1.2311792624685047,
      "learning_rate": 0.003,
      "loss": 4.1033,
      "step": 11036
    },
    {
      "epoch": 0.11037,
      "grad_norm": 1.031689829336888,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 11037
    },
    {
      "epoch": 0.11038,
      "grad_norm": 1.4327397680815341,
      "learning_rate": 0.003,
      "loss": 4.0793,
      "step": 11038
    },
    {
      "epoch": 0.11039,
      "grad_norm": 1.1444715457989634,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 11039
    },
    {
      "epoch": 0.1104,
      "grad_norm": 1.414163564427113,
      "learning_rate": 0.003,
      "loss": 4.1148,
      "step": 11040
    },
    {
      "epoch": 0.11041,
      "grad_norm": 1.2181604281798424,
      "learning_rate": 0.003,
      "loss": 4.0962,
      "step": 11041
    },
    {
      "epoch": 0.11042,
      "grad_norm": 1.049232482218127,
      "learning_rate": 0.003,
      "loss": 4.0986,
      "step": 11042
    },
    {
      "epoch": 0.11043,
      "grad_norm": 1.3071397803017475,
      "learning_rate": 0.003,
      "loss": 4.0884,
      "step": 11043
    },
    {
      "epoch": 0.11044,
      "grad_norm": 0.9255042719347787,
      "learning_rate": 0.003,
      "loss": 4.0743,
      "step": 11044
    },
    {
      "epoch": 0.11045,
      "grad_norm": 1.155005012874666,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 11045
    },
    {
      "epoch": 0.11046,
      "grad_norm": 0.9592257000206171,
      "learning_rate": 0.003,
      "loss": 4.0602,
      "step": 11046
    },
    {
      "epoch": 0.11047,
      "grad_norm": 1.1407107616989076,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 11047
    },
    {
      "epoch": 0.11048,
      "grad_norm": 1.2702583077104692,
      "learning_rate": 0.003,
      "loss": 4.0796,
      "step": 11048
    },
    {
      "epoch": 0.11049,
      "grad_norm": 1.1664236578426006,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 11049
    },
    {
      "epoch": 0.1105,
      "grad_norm": 1.1022552350618229,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 11050
    },
    {
      "epoch": 0.11051,
      "grad_norm": 1.1980343626935672,
      "learning_rate": 0.003,
      "loss": 4.0895,
      "step": 11051
    },
    {
      "epoch": 0.11052,
      "grad_norm": 1.1563108049431163,
      "learning_rate": 0.003,
      "loss": 4.0866,
      "step": 11052
    },
    {
      "epoch": 0.11053,
      "grad_norm": 1.363084009888808,
      "learning_rate": 0.003,
      "loss": 4.081,
      "step": 11053
    },
    {
      "epoch": 0.11054,
      "grad_norm": 1.0707123910939973,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 11054
    },
    {
      "epoch": 0.11055,
      "grad_norm": 1.1497782594709365,
      "learning_rate": 0.003,
      "loss": 4.0773,
      "step": 11055
    },
    {
      "epoch": 0.11056,
      "grad_norm": 1.1540871060383255,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 11056
    },
    {
      "epoch": 0.11057,
      "grad_norm": 1.0737054231011558,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 11057
    },
    {
      "epoch": 0.11058,
      "grad_norm": 1.2385487272101865,
      "learning_rate": 0.003,
      "loss": 4.0967,
      "step": 11058
    },
    {
      "epoch": 0.11059,
      "grad_norm": 1.1050454322125165,
      "learning_rate": 0.003,
      "loss": 4.081,
      "step": 11059
    },
    {
      "epoch": 0.1106,
      "grad_norm": 1.3081187989296468,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 11060
    },
    {
      "epoch": 0.11061,
      "grad_norm": 0.97080304591906,
      "learning_rate": 0.003,
      "loss": 4.0918,
      "step": 11061
    },
    {
      "epoch": 0.11062,
      "grad_norm": 1.399622532788519,
      "learning_rate": 0.003,
      "loss": 4.0919,
      "step": 11062
    },
    {
      "epoch": 0.11063,
      "grad_norm": 1.0870843815603293,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 11063
    },
    {
      "epoch": 0.11064,
      "grad_norm": 1.1939902003866765,
      "learning_rate": 0.003,
      "loss": 4.0767,
      "step": 11064
    },
    {
      "epoch": 0.11065,
      "grad_norm": 1.1477894467567924,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 11065
    },
    {
      "epoch": 0.11066,
      "grad_norm": 1.0221153810183439,
      "learning_rate": 0.003,
      "loss": 4.0902,
      "step": 11066
    },
    {
      "epoch": 0.11067,
      "grad_norm": 1.1756356134617003,
      "learning_rate": 0.003,
      "loss": 4.0968,
      "step": 11067
    },
    {
      "epoch": 0.11068,
      "grad_norm": 1.0162716761143804,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 11068
    },
    {
      "epoch": 0.11069,
      "grad_norm": 1.0810707049598247,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 11069
    },
    {
      "epoch": 0.1107,
      "grad_norm": 1.1967629587539852,
      "learning_rate": 0.003,
      "loss": 4.0877,
      "step": 11070
    },
    {
      "epoch": 0.11071,
      "grad_norm": 1.200254444855109,
      "learning_rate": 0.003,
      "loss": 4.0867,
      "step": 11071
    },
    {
      "epoch": 0.11072,
      "grad_norm": 1.132231184817727,
      "learning_rate": 0.003,
      "loss": 4.0656,
      "step": 11072
    },
    {
      "epoch": 0.11073,
      "grad_norm": 1.165720812170398,
      "learning_rate": 0.003,
      "loss": 4.1108,
      "step": 11073
    },
    {
      "epoch": 0.11074,
      "grad_norm": 1.4490821475131048,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 11074
    },
    {
      "epoch": 0.11075,
      "grad_norm": 1.2757756702169034,
      "learning_rate": 0.003,
      "loss": 4.1033,
      "step": 11075
    },
    {
      "epoch": 0.11076,
      "grad_norm": 0.9824431493913524,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 11076
    },
    {
      "epoch": 0.11077,
      "grad_norm": 1.4698769662624136,
      "learning_rate": 0.003,
      "loss": 4.1345,
      "step": 11077
    },
    {
      "epoch": 0.11078,
      "grad_norm": 1.0045970042994936,
      "learning_rate": 0.003,
      "loss": 4.0924,
      "step": 11078
    },
    {
      "epoch": 0.11079,
      "grad_norm": 1.3847834009536861,
      "learning_rate": 0.003,
      "loss": 4.081,
      "step": 11079
    },
    {
      "epoch": 0.1108,
      "grad_norm": 0.8797002169260058,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 11080
    },
    {
      "epoch": 0.11081,
      "grad_norm": 1.0901215929148171,
      "learning_rate": 0.003,
      "loss": 4.0867,
      "step": 11081
    },
    {
      "epoch": 0.11082,
      "grad_norm": 1.279624718766799,
      "learning_rate": 0.003,
      "loss": 4.0901,
      "step": 11082
    },
    {
      "epoch": 0.11083,
      "grad_norm": 1.167559863023754,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 11083
    },
    {
      "epoch": 0.11084,
      "grad_norm": 1.2821184130733287,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 11084
    },
    {
      "epoch": 0.11085,
      "grad_norm": 0.897330302931402,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 11085
    },
    {
      "epoch": 0.11086,
      "grad_norm": 1.0432451750288771,
      "learning_rate": 0.003,
      "loss": 4.1024,
      "step": 11086
    },
    {
      "epoch": 0.11087,
      "grad_norm": 1.022519793193149,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 11087
    },
    {
      "epoch": 0.11088,
      "grad_norm": 1.0535630318913338,
      "learning_rate": 0.003,
      "loss": 4.0781,
      "step": 11088
    },
    {
      "epoch": 0.11089,
      "grad_norm": 1.3968923566997862,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 11089
    },
    {
      "epoch": 0.1109,
      "grad_norm": 1.3674588309755793,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 11090
    },
    {
      "epoch": 0.11091,
      "grad_norm": 1.1591441198161583,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 11091
    },
    {
      "epoch": 0.11092,
      "grad_norm": 1.1480313147385102,
      "learning_rate": 0.003,
      "loss": 4.0739,
      "step": 11092
    },
    {
      "epoch": 0.11093,
      "grad_norm": 1.1701761236562274,
      "learning_rate": 0.003,
      "loss": 4.0848,
      "step": 11093
    },
    {
      "epoch": 0.11094,
      "grad_norm": 1.189369778108736,
      "learning_rate": 0.003,
      "loss": 4.0784,
      "step": 11094
    },
    {
      "epoch": 0.11095,
      "grad_norm": 1.295774630396128,
      "learning_rate": 0.003,
      "loss": 4.0974,
      "step": 11095
    },
    {
      "epoch": 0.11096,
      "grad_norm": 1.2094272273358757,
      "learning_rate": 0.003,
      "loss": 4.0913,
      "step": 11096
    },
    {
      "epoch": 0.11097,
      "grad_norm": 1.1580677336324876,
      "learning_rate": 0.003,
      "loss": 4.0937,
      "step": 11097
    },
    {
      "epoch": 0.11098,
      "grad_norm": 1.2363250718260648,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 11098
    },
    {
      "epoch": 0.11099,
      "grad_norm": 0.833144240622923,
      "learning_rate": 0.003,
      "loss": 4.0978,
      "step": 11099
    },
    {
      "epoch": 0.111,
      "grad_norm": 1.053843619719335,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 11100
    },
    {
      "epoch": 0.11101,
      "grad_norm": 1.146692066640518,
      "learning_rate": 0.003,
      "loss": 4.0856,
      "step": 11101
    },
    {
      "epoch": 0.11102,
      "grad_norm": 1.1457348265065703,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 11102
    },
    {
      "epoch": 0.11103,
      "grad_norm": 1.3834421526344465,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 11103
    },
    {
      "epoch": 0.11104,
      "grad_norm": 0.8438504572465533,
      "learning_rate": 0.003,
      "loss": 4.0665,
      "step": 11104
    },
    {
      "epoch": 0.11105,
      "grad_norm": 0.922149256979776,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 11105
    },
    {
      "epoch": 0.11106,
      "grad_norm": 1.259771858112156,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 11106
    },
    {
      "epoch": 0.11107,
      "grad_norm": 1.0197544280538555,
      "learning_rate": 0.003,
      "loss": 4.0656,
      "step": 11107
    },
    {
      "epoch": 0.11108,
      "grad_norm": 1.274947463280985,
      "learning_rate": 0.003,
      "loss": 4.0957,
      "step": 11108
    },
    {
      "epoch": 0.11109,
      "grad_norm": 1.3399035789882239,
      "learning_rate": 0.003,
      "loss": 4.0955,
      "step": 11109
    },
    {
      "epoch": 0.1111,
      "grad_norm": 1.09604148282697,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 11110
    },
    {
      "epoch": 0.11111,
      "grad_norm": 1.3240783623002959,
      "learning_rate": 0.003,
      "loss": 4.0885,
      "step": 11111
    },
    {
      "epoch": 0.11112,
      "grad_norm": 0.9601801261401435,
      "learning_rate": 0.003,
      "loss": 4.0999,
      "step": 11112
    },
    {
      "epoch": 0.11113,
      "grad_norm": 1.3202629173276619,
      "learning_rate": 0.003,
      "loss": 4.1021,
      "step": 11113
    },
    {
      "epoch": 0.11114,
      "grad_norm": 1.027638567450629,
      "learning_rate": 0.003,
      "loss": 4.1166,
      "step": 11114
    },
    {
      "epoch": 0.11115,
      "grad_norm": 1.2143442400495823,
      "learning_rate": 0.003,
      "loss": 4.0906,
      "step": 11115
    },
    {
      "epoch": 0.11116,
      "grad_norm": 1.104943840701868,
      "learning_rate": 0.003,
      "loss": 4.0705,
      "step": 11116
    },
    {
      "epoch": 0.11117,
      "grad_norm": 1.1174412623468781,
      "learning_rate": 0.003,
      "loss": 4.0986,
      "step": 11117
    },
    {
      "epoch": 0.11118,
      "grad_norm": 1.336239897944748,
      "learning_rate": 0.003,
      "loss": 4.0718,
      "step": 11118
    },
    {
      "epoch": 0.11119,
      "grad_norm": 1.2056116868391393,
      "learning_rate": 0.003,
      "loss": 4.0982,
      "step": 11119
    },
    {
      "epoch": 0.1112,
      "grad_norm": 1.2520239209685728,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 11120
    },
    {
      "epoch": 0.11121,
      "grad_norm": 1.256131905227129,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 11121
    },
    {
      "epoch": 0.11122,
      "grad_norm": 1.0098916379815241,
      "learning_rate": 0.003,
      "loss": 4.0936,
      "step": 11122
    },
    {
      "epoch": 0.11123,
      "grad_norm": 1.3292004460141258,
      "learning_rate": 0.003,
      "loss": 4.101,
      "step": 11123
    },
    {
      "epoch": 0.11124,
      "grad_norm": 0.9452229243493425,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 11124
    },
    {
      "epoch": 0.11125,
      "grad_norm": 1.6146840509394205,
      "learning_rate": 0.003,
      "loss": 4.101,
      "step": 11125
    },
    {
      "epoch": 0.11126,
      "grad_norm": 0.9071079822837661,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 11126
    },
    {
      "epoch": 0.11127,
      "grad_norm": 1.1078521847315095,
      "learning_rate": 0.003,
      "loss": 4.0842,
      "step": 11127
    },
    {
      "epoch": 0.11128,
      "grad_norm": 1.159721875586419,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 11128
    },
    {
      "epoch": 0.11129,
      "grad_norm": 1.0396207654106058,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 11129
    },
    {
      "epoch": 0.1113,
      "grad_norm": 1.1290974026996876,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 11130
    },
    {
      "epoch": 0.11131,
      "grad_norm": 1.07403026241833,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 11131
    },
    {
      "epoch": 0.11132,
      "grad_norm": 1.3933698121617073,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 11132
    },
    {
      "epoch": 0.11133,
      "grad_norm": 1.1725955117249347,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 11133
    },
    {
      "epoch": 0.11134,
      "grad_norm": 1.3174881140272372,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 11134
    },
    {
      "epoch": 0.11135,
      "grad_norm": 1.1736131979152669,
      "learning_rate": 0.003,
      "loss": 4.0801,
      "step": 11135
    },
    {
      "epoch": 0.11136,
      "grad_norm": 1.048053300051631,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 11136
    },
    {
      "epoch": 0.11137,
      "grad_norm": 1.102288655476633,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 11137
    },
    {
      "epoch": 0.11138,
      "grad_norm": 1.108742784150663,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 11138
    },
    {
      "epoch": 0.11139,
      "grad_norm": 1.173686326578977,
      "learning_rate": 0.003,
      "loss": 4.0936,
      "step": 11139
    },
    {
      "epoch": 0.1114,
      "grad_norm": 1.2774571058823576,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 11140
    },
    {
      "epoch": 0.11141,
      "grad_norm": 1.0850920923430565,
      "learning_rate": 0.003,
      "loss": 4.0827,
      "step": 11141
    },
    {
      "epoch": 0.11142,
      "grad_norm": 1.2722820226326228,
      "learning_rate": 0.003,
      "loss": 4.0839,
      "step": 11142
    },
    {
      "epoch": 0.11143,
      "grad_norm": 0.9960056462520913,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 11143
    },
    {
      "epoch": 0.11144,
      "grad_norm": 1.1203106074026263,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 11144
    },
    {
      "epoch": 0.11145,
      "grad_norm": 1.1966501459479226,
      "learning_rate": 0.003,
      "loss": 4.0874,
      "step": 11145
    },
    {
      "epoch": 0.11146,
      "grad_norm": 1.1984315813802482,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 11146
    },
    {
      "epoch": 0.11147,
      "grad_norm": 1.0764740147161755,
      "learning_rate": 0.003,
      "loss": 4.0983,
      "step": 11147
    },
    {
      "epoch": 0.11148,
      "grad_norm": 1.3121591191772344,
      "learning_rate": 0.003,
      "loss": 4.0837,
      "step": 11148
    },
    {
      "epoch": 0.11149,
      "grad_norm": 1.1646342972823798,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 11149
    },
    {
      "epoch": 0.1115,
      "grad_norm": 1.331730803049615,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 11150
    },
    {
      "epoch": 0.11151,
      "grad_norm": 0.9860918080871273,
      "learning_rate": 0.003,
      "loss": 4.0952,
      "step": 11151
    },
    {
      "epoch": 0.11152,
      "grad_norm": 1.2815747677037665,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 11152
    },
    {
      "epoch": 0.11153,
      "grad_norm": 1.0277548994206205,
      "learning_rate": 0.003,
      "loss": 4.0894,
      "step": 11153
    },
    {
      "epoch": 0.11154,
      "grad_norm": 1.2541170294769153,
      "learning_rate": 0.003,
      "loss": 4.0879,
      "step": 11154
    },
    {
      "epoch": 0.11155,
      "grad_norm": 0.9600444972450275,
      "learning_rate": 0.003,
      "loss": 4.0922,
      "step": 11155
    },
    {
      "epoch": 0.11156,
      "grad_norm": 1.2549006486500096,
      "learning_rate": 0.003,
      "loss": 4.0901,
      "step": 11156
    },
    {
      "epoch": 0.11157,
      "grad_norm": 1.045895721445441,
      "learning_rate": 0.003,
      "loss": 4.0933,
      "step": 11157
    },
    {
      "epoch": 0.11158,
      "grad_norm": 1.281534872473627,
      "learning_rate": 0.003,
      "loss": 4.0982,
      "step": 11158
    },
    {
      "epoch": 0.11159,
      "grad_norm": 0.9835671110843429,
      "learning_rate": 0.003,
      "loss": 4.0838,
      "step": 11159
    },
    {
      "epoch": 0.1116,
      "grad_norm": 1.3993735657738384,
      "learning_rate": 0.003,
      "loss": 4.0979,
      "step": 11160
    },
    {
      "epoch": 0.11161,
      "grad_norm": 1.1542062042669807,
      "learning_rate": 0.003,
      "loss": 4.0658,
      "step": 11161
    },
    {
      "epoch": 0.11162,
      "grad_norm": 1.2233265143236163,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 11162
    },
    {
      "epoch": 0.11163,
      "grad_norm": 1.1421093108971783,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 11163
    },
    {
      "epoch": 0.11164,
      "grad_norm": 1.0930501156203938,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 11164
    },
    {
      "epoch": 0.11165,
      "grad_norm": 1.1391165974539779,
      "learning_rate": 0.003,
      "loss": 4.0825,
      "step": 11165
    },
    {
      "epoch": 0.11166,
      "grad_norm": 1.1192418056700695,
      "learning_rate": 0.003,
      "loss": 4.0851,
      "step": 11166
    },
    {
      "epoch": 0.11167,
      "grad_norm": 1.3010174658966451,
      "learning_rate": 0.003,
      "loss": 4.0883,
      "step": 11167
    },
    {
      "epoch": 0.11168,
      "grad_norm": 1.3028159385520257,
      "learning_rate": 0.003,
      "loss": 4.0756,
      "step": 11168
    },
    {
      "epoch": 0.11169,
      "grad_norm": 0.9401279630622993,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 11169
    },
    {
      "epoch": 0.1117,
      "grad_norm": 0.9373861405274122,
      "learning_rate": 0.003,
      "loss": 4.0796,
      "step": 11170
    },
    {
      "epoch": 0.11171,
      "grad_norm": 1.0889118677948568,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 11171
    },
    {
      "epoch": 0.11172,
      "grad_norm": 1.1942612460598918,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 11172
    },
    {
      "epoch": 0.11173,
      "grad_norm": 1.098306651564804,
      "learning_rate": 0.003,
      "loss": 4.0864,
      "step": 11173
    },
    {
      "epoch": 0.11174,
      "grad_norm": 1.3399889218813388,
      "learning_rate": 0.003,
      "loss": 4.0959,
      "step": 11174
    },
    {
      "epoch": 0.11175,
      "grad_norm": 1.1531186410717533,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 11175
    },
    {
      "epoch": 0.11176,
      "grad_norm": 1.209712286758474,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 11176
    },
    {
      "epoch": 0.11177,
      "grad_norm": 0.9782456245193254,
      "learning_rate": 0.003,
      "loss": 4.0998,
      "step": 11177
    },
    {
      "epoch": 0.11178,
      "grad_norm": 1.1558283929711195,
      "learning_rate": 0.003,
      "loss": 4.0934,
      "step": 11178
    },
    {
      "epoch": 0.11179,
      "grad_norm": 1.2772922260185524,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 11179
    },
    {
      "epoch": 0.1118,
      "grad_norm": 0.9989572762152379,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 11180
    },
    {
      "epoch": 0.11181,
      "grad_norm": 1.2637439132331842,
      "learning_rate": 0.003,
      "loss": 4.0925,
      "step": 11181
    },
    {
      "epoch": 0.11182,
      "grad_norm": 1.2297280930942402,
      "learning_rate": 0.003,
      "loss": 4.079,
      "step": 11182
    },
    {
      "epoch": 0.11183,
      "grad_norm": 1.03577064834795,
      "learning_rate": 0.003,
      "loss": 4.0831,
      "step": 11183
    },
    {
      "epoch": 0.11184,
      "grad_norm": 1.2063110896089673,
      "learning_rate": 0.003,
      "loss": 4.0822,
      "step": 11184
    },
    {
      "epoch": 0.11185,
      "grad_norm": 0.9773849831210153,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 11185
    },
    {
      "epoch": 0.11186,
      "grad_norm": 1.18402790441611,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 11186
    },
    {
      "epoch": 0.11187,
      "grad_norm": 1.4741121362262628,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 11187
    },
    {
      "epoch": 0.11188,
      "grad_norm": 1.2388308800713173,
      "learning_rate": 0.003,
      "loss": 4.0712,
      "step": 11188
    },
    {
      "epoch": 0.11189,
      "grad_norm": 1.2911692078858614,
      "learning_rate": 0.003,
      "loss": 4.1121,
      "step": 11189
    },
    {
      "epoch": 0.1119,
      "grad_norm": 1.0218562441800818,
      "learning_rate": 0.003,
      "loss": 4.1037,
      "step": 11190
    },
    {
      "epoch": 0.11191,
      "grad_norm": 1.2931489036686594,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 11191
    },
    {
      "epoch": 0.11192,
      "grad_norm": 1.0861872408771538,
      "learning_rate": 0.003,
      "loss": 4.1046,
      "step": 11192
    },
    {
      "epoch": 0.11193,
      "grad_norm": 1.114209549974762,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 11193
    },
    {
      "epoch": 0.11194,
      "grad_norm": 1.209019727027277,
      "learning_rate": 0.003,
      "loss": 4.1123,
      "step": 11194
    },
    {
      "epoch": 0.11195,
      "grad_norm": 1.0108126210804838,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 11195
    },
    {
      "epoch": 0.11196,
      "grad_norm": 1.20589123068435,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 11196
    },
    {
      "epoch": 0.11197,
      "grad_norm": 1.0243731523960853,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 11197
    },
    {
      "epoch": 0.11198,
      "grad_norm": 1.405136226654536,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 11198
    },
    {
      "epoch": 0.11199,
      "grad_norm": 0.9997432541113855,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 11199
    },
    {
      "epoch": 0.112,
      "grad_norm": 1.2367199253922476,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 11200
    },
    {
      "epoch": 0.11201,
      "grad_norm": 1.0631224885717738,
      "learning_rate": 0.003,
      "loss": 4.0753,
      "step": 11201
    },
    {
      "epoch": 0.11202,
      "grad_norm": 1.1185478468019074,
      "learning_rate": 0.003,
      "loss": 4.0811,
      "step": 11202
    },
    {
      "epoch": 0.11203,
      "grad_norm": 1.1490403919062497,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 11203
    },
    {
      "epoch": 0.11204,
      "grad_norm": 0.9717178345845396,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 11204
    },
    {
      "epoch": 0.11205,
      "grad_norm": 1.1687038083031553,
      "learning_rate": 0.003,
      "loss": 4.0787,
      "step": 11205
    },
    {
      "epoch": 0.11206,
      "grad_norm": 1.073448923677053,
      "learning_rate": 0.003,
      "loss": 4.0934,
      "step": 11206
    },
    {
      "epoch": 0.11207,
      "grad_norm": 1.2772128792027855,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 11207
    },
    {
      "epoch": 0.11208,
      "grad_norm": 1.004257416373169,
      "learning_rate": 0.003,
      "loss": 4.084,
      "step": 11208
    },
    {
      "epoch": 0.11209,
      "grad_norm": 1.414543278622611,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 11209
    },
    {
      "epoch": 0.1121,
      "grad_norm": 1.1533718420414074,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 11210
    },
    {
      "epoch": 0.11211,
      "grad_norm": 1.232881271126248,
      "learning_rate": 0.003,
      "loss": 4.0986,
      "step": 11211
    },
    {
      "epoch": 0.11212,
      "grad_norm": 1.3454643836906734,
      "learning_rate": 0.003,
      "loss": 4.0862,
      "step": 11212
    },
    {
      "epoch": 0.11213,
      "grad_norm": 1.1964442622903273,
      "learning_rate": 0.003,
      "loss": 4.0841,
      "step": 11213
    },
    {
      "epoch": 0.11214,
      "grad_norm": 1.016582264231967,
      "learning_rate": 0.003,
      "loss": 4.1037,
      "step": 11214
    },
    {
      "epoch": 0.11215,
      "grad_norm": 1.2465471638780516,
      "learning_rate": 0.003,
      "loss": 4.113,
      "step": 11215
    },
    {
      "epoch": 0.11216,
      "grad_norm": 0.9731189335286383,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 11216
    },
    {
      "epoch": 0.11217,
      "grad_norm": 1.2585237890766032,
      "learning_rate": 0.003,
      "loss": 4.0821,
      "step": 11217
    },
    {
      "epoch": 0.11218,
      "grad_norm": 1.375187414405389,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 11218
    },
    {
      "epoch": 0.11219,
      "grad_norm": 1.21500013057869,
      "learning_rate": 0.003,
      "loss": 4.0823,
      "step": 11219
    },
    {
      "epoch": 0.1122,
      "grad_norm": 1.0237848543455739,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 11220
    },
    {
      "epoch": 0.11221,
      "grad_norm": 1.0314896825666124,
      "learning_rate": 0.003,
      "loss": 4.0768,
      "step": 11221
    },
    {
      "epoch": 0.11222,
      "grad_norm": 1.3233867169080658,
      "learning_rate": 0.003,
      "loss": 4.1044,
      "step": 11222
    },
    {
      "epoch": 0.11223,
      "grad_norm": 1.0975586830375574,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 11223
    },
    {
      "epoch": 0.11224,
      "grad_norm": 1.1930854617321562,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 11224
    },
    {
      "epoch": 0.11225,
      "grad_norm": 1.0911558326089497,
      "learning_rate": 0.003,
      "loss": 4.102,
      "step": 11225
    },
    {
      "epoch": 0.11226,
      "grad_norm": 1.2351625865634561,
      "learning_rate": 0.003,
      "loss": 4.0921,
      "step": 11226
    },
    {
      "epoch": 0.11227,
      "grad_norm": 1.1749371871274827,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 11227
    },
    {
      "epoch": 0.11228,
      "grad_norm": 1.198635523522791,
      "learning_rate": 0.003,
      "loss": 4.0796,
      "step": 11228
    },
    {
      "epoch": 0.11229,
      "grad_norm": 1.1240002402811142,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 11229
    },
    {
      "epoch": 0.1123,
      "grad_norm": 1.2241592922697206,
      "learning_rate": 0.003,
      "loss": 4.0851,
      "step": 11230
    },
    {
      "epoch": 0.11231,
      "grad_norm": 1.1970424174019643,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 11231
    },
    {
      "epoch": 0.11232,
      "grad_norm": 1.1421340748635413,
      "learning_rate": 0.003,
      "loss": 4.0665,
      "step": 11232
    },
    {
      "epoch": 0.11233,
      "grad_norm": 1.1759250032252293,
      "learning_rate": 0.003,
      "loss": 4.0861,
      "step": 11233
    },
    {
      "epoch": 0.11234,
      "grad_norm": 1.1609578732577739,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 11234
    },
    {
      "epoch": 0.11235,
      "grad_norm": 1.1821129395196315,
      "learning_rate": 0.003,
      "loss": 4.0815,
      "step": 11235
    },
    {
      "epoch": 0.11236,
      "grad_norm": 1.1070416228627675,
      "learning_rate": 0.003,
      "loss": 4.0923,
      "step": 11236
    },
    {
      "epoch": 0.11237,
      "grad_norm": 1.3664160052213254,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 11237
    },
    {
      "epoch": 0.11238,
      "grad_norm": 0.9119492467214586,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 11238
    },
    {
      "epoch": 0.11239,
      "grad_norm": 1.0622389881528407,
      "learning_rate": 0.003,
      "loss": 4.0815,
      "step": 11239
    },
    {
      "epoch": 0.1124,
      "grad_norm": 1.1894727674355798,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 11240
    },
    {
      "epoch": 0.11241,
      "grad_norm": 0.9537664095570878,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 11241
    },
    {
      "epoch": 0.11242,
      "grad_norm": 1.2108180500354753,
      "learning_rate": 0.003,
      "loss": 4.0831,
      "step": 11242
    },
    {
      "epoch": 0.11243,
      "grad_norm": 1.0377598646551163,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 11243
    },
    {
      "epoch": 0.11244,
      "grad_norm": 1.2667536423936097,
      "learning_rate": 0.003,
      "loss": 4.0763,
      "step": 11244
    },
    {
      "epoch": 0.11245,
      "grad_norm": 1.0199261735712357,
      "learning_rate": 0.003,
      "loss": 4.0922,
      "step": 11245
    },
    {
      "epoch": 0.11246,
      "grad_norm": 1.380042950316513,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 11246
    },
    {
      "epoch": 0.11247,
      "grad_norm": 1.2233265055955735,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 11247
    },
    {
      "epoch": 0.11248,
      "grad_norm": 1.177520883310014,
      "learning_rate": 0.003,
      "loss": 4.1064,
      "step": 11248
    },
    {
      "epoch": 0.11249,
      "grad_norm": 1.0352426374271515,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 11249
    },
    {
      "epoch": 0.1125,
      "grad_norm": 1.2681183552927422,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 11250
    },
    {
      "epoch": 0.11251,
      "grad_norm": 1.1477755872427362,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 11251
    },
    {
      "epoch": 0.11252,
      "grad_norm": 1.517706889182652,
      "learning_rate": 0.003,
      "loss": 4.1032,
      "step": 11252
    },
    {
      "epoch": 0.11253,
      "grad_norm": 1.1177213037118356,
      "learning_rate": 0.003,
      "loss": 4.0839,
      "step": 11253
    },
    {
      "epoch": 0.11254,
      "grad_norm": 1.3343373703779187,
      "learning_rate": 0.003,
      "loss": 4.0811,
      "step": 11254
    },
    {
      "epoch": 0.11255,
      "grad_norm": 1.1567401753985436,
      "learning_rate": 0.003,
      "loss": 4.0898,
      "step": 11255
    },
    {
      "epoch": 0.11256,
      "grad_norm": 1.0806058389994764,
      "learning_rate": 0.003,
      "loss": 4.0834,
      "step": 11256
    },
    {
      "epoch": 0.11257,
      "grad_norm": 1.133158277986818,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 11257
    },
    {
      "epoch": 0.11258,
      "grad_norm": 1.1883695225812174,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 11258
    },
    {
      "epoch": 0.11259,
      "grad_norm": 0.9681430811253013,
      "learning_rate": 0.003,
      "loss": 4.1043,
      "step": 11259
    },
    {
      "epoch": 0.1126,
      "grad_norm": 1.1934967669263257,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 11260
    },
    {
      "epoch": 0.11261,
      "grad_norm": 1.085936306228125,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 11261
    },
    {
      "epoch": 0.11262,
      "grad_norm": 1.28556430915771,
      "learning_rate": 0.003,
      "loss": 4.0867,
      "step": 11262
    },
    {
      "epoch": 0.11263,
      "grad_norm": 1.0671406124670375,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 11263
    },
    {
      "epoch": 0.11264,
      "grad_norm": 1.261192487487167,
      "learning_rate": 0.003,
      "loss": 4.0753,
      "step": 11264
    },
    {
      "epoch": 0.11265,
      "grad_norm": 1.0739516983789419,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 11265
    },
    {
      "epoch": 0.11266,
      "grad_norm": 1.2903709025877286,
      "learning_rate": 0.003,
      "loss": 4.0925,
      "step": 11266
    },
    {
      "epoch": 0.11267,
      "grad_norm": 1.1865966885308028,
      "learning_rate": 0.003,
      "loss": 4.1016,
      "step": 11267
    },
    {
      "epoch": 0.11268,
      "grad_norm": 1.2393487002892085,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 11268
    },
    {
      "epoch": 0.11269,
      "grad_norm": 1.131721357284378,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 11269
    },
    {
      "epoch": 0.1127,
      "grad_norm": 1.0623045335870454,
      "learning_rate": 0.003,
      "loss": 4.075,
      "step": 11270
    },
    {
      "epoch": 0.11271,
      "grad_norm": 1.298629433282519,
      "learning_rate": 0.003,
      "loss": 4.0864,
      "step": 11271
    },
    {
      "epoch": 0.11272,
      "grad_norm": 1.1024801052028652,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 11272
    },
    {
      "epoch": 0.11273,
      "grad_norm": 1.1799581337189502,
      "learning_rate": 0.003,
      "loss": 4.0908,
      "step": 11273
    },
    {
      "epoch": 0.11274,
      "grad_norm": 1.0744504496225524,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 11274
    },
    {
      "epoch": 0.11275,
      "grad_norm": 1.2446994109409262,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 11275
    },
    {
      "epoch": 0.11276,
      "grad_norm": 1.0883069074518403,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 11276
    },
    {
      "epoch": 0.11277,
      "grad_norm": 1.1088581538356497,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 11277
    },
    {
      "epoch": 0.11278,
      "grad_norm": 1.1320400571531242,
      "learning_rate": 0.003,
      "loss": 4.0972,
      "step": 11278
    },
    {
      "epoch": 0.11279,
      "grad_norm": 1.1882710772080816,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 11279
    },
    {
      "epoch": 0.1128,
      "grad_norm": 1.1828816974011485,
      "learning_rate": 0.003,
      "loss": 4.0841,
      "step": 11280
    },
    {
      "epoch": 0.11281,
      "grad_norm": 1.2189576526947539,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 11281
    },
    {
      "epoch": 0.11282,
      "grad_norm": 1.0520825905755233,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 11282
    },
    {
      "epoch": 0.11283,
      "grad_norm": 1.2397599842533094,
      "learning_rate": 0.003,
      "loss": 4.0856,
      "step": 11283
    },
    {
      "epoch": 0.11284,
      "grad_norm": 1.025617880523038,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 11284
    },
    {
      "epoch": 0.11285,
      "grad_norm": 1.1821805898447082,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 11285
    },
    {
      "epoch": 0.11286,
      "grad_norm": 1.1165620824376317,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 11286
    },
    {
      "epoch": 0.11287,
      "grad_norm": 1.1072689494019334,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 11287
    },
    {
      "epoch": 0.11288,
      "grad_norm": 1.2825093591266885,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 11288
    },
    {
      "epoch": 0.11289,
      "grad_norm": 1.3094637479991706,
      "learning_rate": 0.003,
      "loss": 4.1014,
      "step": 11289
    },
    {
      "epoch": 0.1129,
      "grad_norm": 1.2293468957577327,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 11290
    },
    {
      "epoch": 0.11291,
      "grad_norm": 1.3881096727893352,
      "learning_rate": 0.003,
      "loss": 4.1009,
      "step": 11291
    },
    {
      "epoch": 0.11292,
      "grad_norm": 1.1828219431674265,
      "learning_rate": 0.003,
      "loss": 4.0792,
      "step": 11292
    },
    {
      "epoch": 0.11293,
      "grad_norm": 1.0432111324738476,
      "learning_rate": 0.003,
      "loss": 4.0967,
      "step": 11293
    },
    {
      "epoch": 0.11294,
      "grad_norm": 1.2028521119807711,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 11294
    },
    {
      "epoch": 0.11295,
      "grad_norm": 1.1560410520532847,
      "learning_rate": 0.003,
      "loss": 4.082,
      "step": 11295
    },
    {
      "epoch": 0.11296,
      "grad_norm": 1.241080361905138,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 11296
    },
    {
      "epoch": 0.11297,
      "grad_norm": 1.102149499553362,
      "learning_rate": 0.003,
      "loss": 4.0999,
      "step": 11297
    },
    {
      "epoch": 0.11298,
      "grad_norm": 1.4981514137553442,
      "learning_rate": 0.003,
      "loss": 4.0897,
      "step": 11298
    },
    {
      "epoch": 0.11299,
      "grad_norm": 1.2732016237261758,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 11299
    },
    {
      "epoch": 0.113,
      "grad_norm": 1.0855217853652195,
      "learning_rate": 0.003,
      "loss": 4.0966,
      "step": 11300
    },
    {
      "epoch": 0.11301,
      "grad_norm": 1.144721962347034,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 11301
    },
    {
      "epoch": 0.11302,
      "grad_norm": 1.163283114829239,
      "learning_rate": 0.003,
      "loss": 4.0703,
      "step": 11302
    },
    {
      "epoch": 0.11303,
      "grad_norm": 1.5095251442036801,
      "learning_rate": 0.003,
      "loss": 4.0825,
      "step": 11303
    },
    {
      "epoch": 0.11304,
      "grad_norm": 0.8655844076550182,
      "learning_rate": 0.003,
      "loss": 4.0793,
      "step": 11304
    },
    {
      "epoch": 0.11305,
      "grad_norm": 0.9768503913687301,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 11305
    },
    {
      "epoch": 0.11306,
      "grad_norm": 1.1118481212396893,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 11306
    },
    {
      "epoch": 0.11307,
      "grad_norm": 1.2557851707694494,
      "learning_rate": 0.003,
      "loss": 4.0814,
      "step": 11307
    },
    {
      "epoch": 0.11308,
      "grad_norm": 1.0876765492352787,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 11308
    },
    {
      "epoch": 0.11309,
      "grad_norm": 1.3197206224683027,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 11309
    },
    {
      "epoch": 0.1131,
      "grad_norm": 1.1527574893555428,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 11310
    },
    {
      "epoch": 0.11311,
      "grad_norm": 1.0055020429352322,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 11311
    },
    {
      "epoch": 0.11312,
      "grad_norm": 1.2278396206335767,
      "learning_rate": 0.003,
      "loss": 4.0831,
      "step": 11312
    },
    {
      "epoch": 0.11313,
      "grad_norm": 1.0076037063843581,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 11313
    },
    {
      "epoch": 0.11314,
      "grad_norm": 1.4336445287934063,
      "learning_rate": 0.003,
      "loss": 4.0987,
      "step": 11314
    },
    {
      "epoch": 0.11315,
      "grad_norm": 0.8419355428055376,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 11315
    },
    {
      "epoch": 0.11316,
      "grad_norm": 0.9841916247201852,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 11316
    },
    {
      "epoch": 0.11317,
      "grad_norm": 1.3680597035869113,
      "learning_rate": 0.003,
      "loss": 4.0894,
      "step": 11317
    },
    {
      "epoch": 0.11318,
      "grad_norm": 1.393252247457501,
      "learning_rate": 0.003,
      "loss": 4.0954,
      "step": 11318
    },
    {
      "epoch": 0.11319,
      "grad_norm": 1.2698890335191317,
      "learning_rate": 0.003,
      "loss": 4.0904,
      "step": 11319
    },
    {
      "epoch": 0.1132,
      "grad_norm": 1.054983155096937,
      "learning_rate": 0.003,
      "loss": 4.0814,
      "step": 11320
    },
    {
      "epoch": 0.11321,
      "grad_norm": 1.1642598738916654,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 11321
    },
    {
      "epoch": 0.11322,
      "grad_norm": 1.2880074580030998,
      "learning_rate": 0.003,
      "loss": 4.0877,
      "step": 11322
    },
    {
      "epoch": 0.11323,
      "grad_norm": 0.9696344981062905,
      "learning_rate": 0.003,
      "loss": 4.0347,
      "step": 11323
    },
    {
      "epoch": 0.11324,
      "grad_norm": 1.2317118080416707,
      "learning_rate": 0.003,
      "loss": 4.0759,
      "step": 11324
    },
    {
      "epoch": 0.11325,
      "grad_norm": 1.0240155450785555,
      "learning_rate": 0.003,
      "loss": 4.0923,
      "step": 11325
    },
    {
      "epoch": 0.11326,
      "grad_norm": 1.1909002991650128,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 11326
    },
    {
      "epoch": 0.11327,
      "grad_norm": 1.1363333235538478,
      "learning_rate": 0.003,
      "loss": 4.0877,
      "step": 11327
    },
    {
      "epoch": 0.11328,
      "grad_norm": 1.153386328873901,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 11328
    },
    {
      "epoch": 0.11329,
      "grad_norm": 1.1843625290435011,
      "learning_rate": 0.003,
      "loss": 4.0812,
      "step": 11329
    },
    {
      "epoch": 0.1133,
      "grad_norm": 1.1713278236704114,
      "learning_rate": 0.003,
      "loss": 4.0811,
      "step": 11330
    },
    {
      "epoch": 0.11331,
      "grad_norm": 1.4738186710191181,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 11331
    },
    {
      "epoch": 0.11332,
      "grad_norm": 1.1168929903796028,
      "learning_rate": 0.003,
      "loss": 4.0986,
      "step": 11332
    },
    {
      "epoch": 0.11333,
      "grad_norm": 1.4447556787111548,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 11333
    },
    {
      "epoch": 0.11334,
      "grad_norm": 1.0926804158099357,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 11334
    },
    {
      "epoch": 0.11335,
      "grad_norm": 1.2927439581888547,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 11335
    },
    {
      "epoch": 0.11336,
      "grad_norm": 1.077028010839177,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 11336
    },
    {
      "epoch": 0.11337,
      "grad_norm": 1.2918345218299065,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 11337
    },
    {
      "epoch": 0.11338,
      "grad_norm": 1.0598579343301027,
      "learning_rate": 0.003,
      "loss": 4.0843,
      "step": 11338
    },
    {
      "epoch": 0.11339,
      "grad_norm": 1.1257298398963314,
      "learning_rate": 0.003,
      "loss": 4.1108,
      "step": 11339
    },
    {
      "epoch": 0.1134,
      "grad_norm": 1.3162052695908748,
      "learning_rate": 0.003,
      "loss": 4.083,
      "step": 11340
    },
    {
      "epoch": 0.11341,
      "grad_norm": 1.131065545796142,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 11341
    },
    {
      "epoch": 0.11342,
      "grad_norm": 1.1871289356923436,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 11342
    },
    {
      "epoch": 0.11343,
      "grad_norm": 1.2208326094667281,
      "learning_rate": 0.003,
      "loss": 4.0834,
      "step": 11343
    },
    {
      "epoch": 0.11344,
      "grad_norm": 1.147121675994465,
      "learning_rate": 0.003,
      "loss": 4.0918,
      "step": 11344
    },
    {
      "epoch": 0.11345,
      "grad_norm": 1.110093784860901,
      "learning_rate": 0.003,
      "loss": 4.0954,
      "step": 11345
    },
    {
      "epoch": 0.11346,
      "grad_norm": 1.2018669323129128,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 11346
    },
    {
      "epoch": 0.11347,
      "grad_norm": 0.9854247275018928,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 11347
    },
    {
      "epoch": 0.11348,
      "grad_norm": 1.3540848648186976,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 11348
    },
    {
      "epoch": 0.11349,
      "grad_norm": 0.9209528701178629,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 11349
    },
    {
      "epoch": 0.1135,
      "grad_norm": 1.183193945927848,
      "learning_rate": 0.003,
      "loss": 4.1036,
      "step": 11350
    },
    {
      "epoch": 0.11351,
      "grad_norm": 1.2241071788224156,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 11351
    },
    {
      "epoch": 0.11352,
      "grad_norm": 1.264128877176907,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 11352
    },
    {
      "epoch": 0.11353,
      "grad_norm": 1.3375943105466253,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 11353
    },
    {
      "epoch": 0.11354,
      "grad_norm": 0.971453513895793,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 11354
    },
    {
      "epoch": 0.11355,
      "grad_norm": 1.1992359612359424,
      "learning_rate": 0.003,
      "loss": 4.1127,
      "step": 11355
    },
    {
      "epoch": 0.11356,
      "grad_norm": 1.12742305868137,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 11356
    },
    {
      "epoch": 0.11357,
      "grad_norm": 1.057282156982112,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 11357
    },
    {
      "epoch": 0.11358,
      "grad_norm": 1.3540534987747048,
      "learning_rate": 0.003,
      "loss": 4.0825,
      "step": 11358
    },
    {
      "epoch": 0.11359,
      "grad_norm": 1.054180251810891,
      "learning_rate": 0.003,
      "loss": 4.1003,
      "step": 11359
    },
    {
      "epoch": 0.1136,
      "grad_norm": 1.177256362048272,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 11360
    },
    {
      "epoch": 0.11361,
      "grad_norm": 1.1430591013777645,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 11361
    },
    {
      "epoch": 0.11362,
      "grad_norm": 1.2545721554637832,
      "learning_rate": 0.003,
      "loss": 4.0783,
      "step": 11362
    },
    {
      "epoch": 0.11363,
      "grad_norm": 1.1487676491455483,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 11363
    },
    {
      "epoch": 0.11364,
      "grad_norm": 1.174028562853811,
      "learning_rate": 0.003,
      "loss": 4.0935,
      "step": 11364
    },
    {
      "epoch": 0.11365,
      "grad_norm": 1.0524531488709266,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 11365
    },
    {
      "epoch": 0.11366,
      "grad_norm": 1.1127606638729275,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 11366
    },
    {
      "epoch": 0.11367,
      "grad_norm": 1.2889056949638662,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 11367
    },
    {
      "epoch": 0.11368,
      "grad_norm": 1.1945595400318936,
      "learning_rate": 0.003,
      "loss": 4.099,
      "step": 11368
    },
    {
      "epoch": 0.11369,
      "grad_norm": 1.121435622675818,
      "learning_rate": 0.003,
      "loss": 4.0909,
      "step": 11369
    },
    {
      "epoch": 0.1137,
      "grad_norm": 1.2517077973859847,
      "learning_rate": 0.003,
      "loss": 4.1092,
      "step": 11370
    },
    {
      "epoch": 0.11371,
      "grad_norm": 1.0949921154536846,
      "learning_rate": 0.003,
      "loss": 4.0784,
      "step": 11371
    },
    {
      "epoch": 0.11372,
      "grad_norm": 1.247312554142341,
      "learning_rate": 0.003,
      "loss": 4.0705,
      "step": 11372
    },
    {
      "epoch": 0.11373,
      "grad_norm": 1.0455564948736882,
      "learning_rate": 0.003,
      "loss": 4.0839,
      "step": 11373
    },
    {
      "epoch": 0.11374,
      "grad_norm": 1.2309575034906135,
      "learning_rate": 0.003,
      "loss": 4.085,
      "step": 11374
    },
    {
      "epoch": 0.11375,
      "grad_norm": 1.1997157049677671,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 11375
    },
    {
      "epoch": 0.11376,
      "grad_norm": 1.2922369438044266,
      "learning_rate": 0.003,
      "loss": 4.1163,
      "step": 11376
    },
    {
      "epoch": 0.11377,
      "grad_norm": 1.227221408176285,
      "learning_rate": 0.003,
      "loss": 4.0789,
      "step": 11377
    },
    {
      "epoch": 0.11378,
      "grad_norm": 1.5411142967060618,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 11378
    },
    {
      "epoch": 0.11379,
      "grad_norm": 0.9410473447301275,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 11379
    },
    {
      "epoch": 0.1138,
      "grad_norm": 1.1332538562404029,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 11380
    },
    {
      "epoch": 0.11381,
      "grad_norm": 1.4056385188743654,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 11381
    },
    {
      "epoch": 0.11382,
      "grad_norm": 1.1076341193368915,
      "learning_rate": 0.003,
      "loss": 4.0871,
      "step": 11382
    },
    {
      "epoch": 0.11383,
      "grad_norm": 1.3425460788316952,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 11383
    },
    {
      "epoch": 0.11384,
      "grad_norm": 0.9299747587551522,
      "learning_rate": 0.003,
      "loss": 4.0718,
      "step": 11384
    },
    {
      "epoch": 0.11385,
      "grad_norm": 1.1616465110678822,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 11385
    },
    {
      "epoch": 0.11386,
      "grad_norm": 1.228364230665424,
      "learning_rate": 0.003,
      "loss": 4.0931,
      "step": 11386
    },
    {
      "epoch": 0.11387,
      "grad_norm": 1.026775499618035,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 11387
    },
    {
      "epoch": 0.11388,
      "grad_norm": 1.186841490398931,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 11388
    },
    {
      "epoch": 0.11389,
      "grad_norm": 1.113607604079879,
      "learning_rate": 0.003,
      "loss": 4.0784,
      "step": 11389
    },
    {
      "epoch": 0.1139,
      "grad_norm": 1.214021323948272,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 11390
    },
    {
      "epoch": 0.11391,
      "grad_norm": 1.0617336666676447,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 11391
    },
    {
      "epoch": 0.11392,
      "grad_norm": 1.4256327524001133,
      "learning_rate": 0.003,
      "loss": 4.0938,
      "step": 11392
    },
    {
      "epoch": 0.11393,
      "grad_norm": 0.9429026567842537,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 11393
    },
    {
      "epoch": 0.11394,
      "grad_norm": 1.0149734142468037,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 11394
    },
    {
      "epoch": 0.11395,
      "grad_norm": 0.9422668319518931,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 11395
    },
    {
      "epoch": 0.11396,
      "grad_norm": 1.1401996080256407,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 11396
    },
    {
      "epoch": 0.11397,
      "grad_norm": 1.177214766567936,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 11397
    },
    {
      "epoch": 0.11398,
      "grad_norm": 1.0110021900524433,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 11398
    },
    {
      "epoch": 0.11399,
      "grad_norm": 1.1768839261262272,
      "learning_rate": 0.003,
      "loss": 4.0915,
      "step": 11399
    },
    {
      "epoch": 0.114,
      "grad_norm": 1.1414020916346157,
      "learning_rate": 0.003,
      "loss": 4.107,
      "step": 11400
    },
    {
      "epoch": 0.11401,
      "grad_norm": 1.150374703617575,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 11401
    },
    {
      "epoch": 0.11402,
      "grad_norm": 1.215556312236231,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 11402
    },
    {
      "epoch": 0.11403,
      "grad_norm": 1.0440764034627348,
      "learning_rate": 0.003,
      "loss": 4.042,
      "step": 11403
    },
    {
      "epoch": 0.11404,
      "grad_norm": 1.6023256769426333,
      "learning_rate": 0.003,
      "loss": 4.0918,
      "step": 11404
    },
    {
      "epoch": 0.11405,
      "grad_norm": 1.294742133665827,
      "learning_rate": 0.003,
      "loss": 4.0874,
      "step": 11405
    },
    {
      "epoch": 0.11406,
      "grad_norm": 0.9897834597352938,
      "learning_rate": 0.003,
      "loss": 4.0985,
      "step": 11406
    },
    {
      "epoch": 0.11407,
      "grad_norm": 1.3648306468527558,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 11407
    },
    {
      "epoch": 0.11408,
      "grad_norm": 1.017340568621194,
      "learning_rate": 0.003,
      "loss": 4.0885,
      "step": 11408
    },
    {
      "epoch": 0.11409,
      "grad_norm": 1.3555212966368582,
      "learning_rate": 0.003,
      "loss": 4.0948,
      "step": 11409
    },
    {
      "epoch": 0.1141,
      "grad_norm": 1.0123596831049593,
      "learning_rate": 0.003,
      "loss": 4.0828,
      "step": 11410
    },
    {
      "epoch": 0.11411,
      "grad_norm": 1.4084356670391436,
      "learning_rate": 0.003,
      "loss": 4.1008,
      "step": 11411
    },
    {
      "epoch": 0.11412,
      "grad_norm": 1.0910871007698164,
      "learning_rate": 0.003,
      "loss": 4.0923,
      "step": 11412
    },
    {
      "epoch": 0.11413,
      "grad_norm": 0.9821281307103067,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 11413
    },
    {
      "epoch": 0.11414,
      "grad_norm": 1.0281020978417275,
      "learning_rate": 0.003,
      "loss": 4.0828,
      "step": 11414
    },
    {
      "epoch": 0.11415,
      "grad_norm": 1.27061682180603,
      "learning_rate": 0.003,
      "loss": 4.0867,
      "step": 11415
    },
    {
      "epoch": 0.11416,
      "grad_norm": 1.3825399870725403,
      "learning_rate": 0.003,
      "loss": 4.0819,
      "step": 11416
    },
    {
      "epoch": 0.11417,
      "grad_norm": 1.0349060713735456,
      "learning_rate": 0.003,
      "loss": 4.0623,
      "step": 11417
    },
    {
      "epoch": 0.11418,
      "grad_norm": 1.247625981100148,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 11418
    },
    {
      "epoch": 0.11419,
      "grad_norm": 1.076240310475421,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 11419
    },
    {
      "epoch": 0.1142,
      "grad_norm": 1.126178785427172,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 11420
    },
    {
      "epoch": 0.11421,
      "grad_norm": 1.1656854594192045,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 11421
    },
    {
      "epoch": 0.11422,
      "grad_norm": 1.2802536744397197,
      "learning_rate": 0.003,
      "loss": 4.0858,
      "step": 11422
    },
    {
      "epoch": 0.11423,
      "grad_norm": 1.058189762030975,
      "learning_rate": 0.003,
      "loss": 4.1078,
      "step": 11423
    },
    {
      "epoch": 0.11424,
      "grad_norm": 1.254947080462852,
      "learning_rate": 0.003,
      "loss": 4.104,
      "step": 11424
    },
    {
      "epoch": 0.11425,
      "grad_norm": 1.0493001159378452,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 11425
    },
    {
      "epoch": 0.11426,
      "grad_norm": 1.4885665242657737,
      "learning_rate": 0.003,
      "loss": 4.1081,
      "step": 11426
    },
    {
      "epoch": 0.11427,
      "grad_norm": 0.9720233775254423,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 11427
    },
    {
      "epoch": 0.11428,
      "grad_norm": 1.2866999811480075,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 11428
    },
    {
      "epoch": 0.11429,
      "grad_norm": 1.2926212573442324,
      "learning_rate": 0.003,
      "loss": 4.1181,
      "step": 11429
    },
    {
      "epoch": 0.1143,
      "grad_norm": 1.0602107668095546,
      "learning_rate": 0.003,
      "loss": 4.097,
      "step": 11430
    },
    {
      "epoch": 0.11431,
      "grad_norm": 1.3490347702536993,
      "learning_rate": 0.003,
      "loss": 4.1032,
      "step": 11431
    },
    {
      "epoch": 0.11432,
      "grad_norm": 1.3200116165463798,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 11432
    },
    {
      "epoch": 0.11433,
      "grad_norm": 1.0646935424901862,
      "learning_rate": 0.003,
      "loss": 4.1142,
      "step": 11433
    },
    {
      "epoch": 0.11434,
      "grad_norm": 1.053308075790006,
      "learning_rate": 0.003,
      "loss": 4.1086,
      "step": 11434
    },
    {
      "epoch": 0.11435,
      "grad_norm": 1.0987766943842554,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 11435
    },
    {
      "epoch": 0.11436,
      "grad_norm": 1.078887782161941,
      "learning_rate": 0.003,
      "loss": 4.0861,
      "step": 11436
    },
    {
      "epoch": 0.11437,
      "grad_norm": 1.2236619685979702,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 11437
    },
    {
      "epoch": 0.11438,
      "grad_norm": 0.9965897884378967,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 11438
    },
    {
      "epoch": 0.11439,
      "grad_norm": 1.5171415237819303,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 11439
    },
    {
      "epoch": 0.1144,
      "grad_norm": 0.9254183689955512,
      "learning_rate": 0.003,
      "loss": 4.0867,
      "step": 11440
    },
    {
      "epoch": 0.11441,
      "grad_norm": 1.1802455391859032,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 11441
    },
    {
      "epoch": 0.11442,
      "grad_norm": 1.1304408432442188,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 11442
    },
    {
      "epoch": 0.11443,
      "grad_norm": 1.2616223153469455,
      "learning_rate": 0.003,
      "loss": 4.0852,
      "step": 11443
    },
    {
      "epoch": 0.11444,
      "grad_norm": 0.9898632535928127,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 11444
    },
    {
      "epoch": 0.11445,
      "grad_norm": 1.1106024369621383,
      "learning_rate": 0.003,
      "loss": 4.0893,
      "step": 11445
    },
    {
      "epoch": 0.11446,
      "grad_norm": 1.1313665518951734,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 11446
    },
    {
      "epoch": 0.11447,
      "grad_norm": 1.1055046914749922,
      "learning_rate": 0.003,
      "loss": 4.1165,
      "step": 11447
    },
    {
      "epoch": 0.11448,
      "grad_norm": 1.2700027524427357,
      "learning_rate": 0.003,
      "loss": 4.1123,
      "step": 11448
    },
    {
      "epoch": 0.11449,
      "grad_norm": 1.1747973362905308,
      "learning_rate": 0.003,
      "loss": 4.0784,
      "step": 11449
    },
    {
      "epoch": 0.1145,
      "grad_norm": 1.2448867650140631,
      "learning_rate": 0.003,
      "loss": 4.0946,
      "step": 11450
    },
    {
      "epoch": 0.11451,
      "grad_norm": 1.4021973572768907,
      "learning_rate": 0.003,
      "loss": 4.0935,
      "step": 11451
    },
    {
      "epoch": 0.11452,
      "grad_norm": 1.2978912346016391,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 11452
    },
    {
      "epoch": 0.11453,
      "grad_norm": 1.2161204587703507,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 11453
    },
    {
      "epoch": 0.11454,
      "grad_norm": 1.347884771284405,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 11454
    },
    {
      "epoch": 0.11455,
      "grad_norm": 1.0701199335942853,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 11455
    },
    {
      "epoch": 0.11456,
      "grad_norm": 1.087637663877047,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 11456
    },
    {
      "epoch": 0.11457,
      "grad_norm": 1.1001486271515806,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 11457
    },
    {
      "epoch": 0.11458,
      "grad_norm": 1.0288611708922306,
      "learning_rate": 0.003,
      "loss": 4.0878,
      "step": 11458
    },
    {
      "epoch": 0.11459,
      "grad_norm": 1.2930415256059284,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 11459
    },
    {
      "epoch": 0.1146,
      "grad_norm": 0.9396094840689607,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 11460
    },
    {
      "epoch": 0.11461,
      "grad_norm": 1.3243731266257484,
      "learning_rate": 0.003,
      "loss": 4.0956,
      "step": 11461
    },
    {
      "epoch": 0.11462,
      "grad_norm": 1.0499960484767916,
      "learning_rate": 0.003,
      "loss": 4.0807,
      "step": 11462
    },
    {
      "epoch": 0.11463,
      "grad_norm": 1.366084137306469,
      "learning_rate": 0.003,
      "loss": 4.0784,
      "step": 11463
    },
    {
      "epoch": 0.11464,
      "grad_norm": 0.9389564810897638,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 11464
    },
    {
      "epoch": 0.11465,
      "grad_norm": 1.20440081026079,
      "learning_rate": 0.003,
      "loss": 4.0918,
      "step": 11465
    },
    {
      "epoch": 0.11466,
      "grad_norm": 1.149724890423667,
      "learning_rate": 0.003,
      "loss": 4.0751,
      "step": 11466
    },
    {
      "epoch": 0.11467,
      "grad_norm": 1.2853468225896403,
      "learning_rate": 0.003,
      "loss": 4.0938,
      "step": 11467
    },
    {
      "epoch": 0.11468,
      "grad_norm": 1.1978301632485409,
      "learning_rate": 0.003,
      "loss": 4.0777,
      "step": 11468
    },
    {
      "epoch": 0.11469,
      "grad_norm": 1.0202858294727375,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 11469
    },
    {
      "epoch": 0.1147,
      "grad_norm": 1.409755040044635,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 11470
    },
    {
      "epoch": 0.11471,
      "grad_norm": 1.1159553996794154,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 11471
    },
    {
      "epoch": 0.11472,
      "grad_norm": 1.114063611856724,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 11472
    },
    {
      "epoch": 0.11473,
      "grad_norm": 1.2182970670688382,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 11473
    },
    {
      "epoch": 0.11474,
      "grad_norm": 1.0696376544415567,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 11474
    },
    {
      "epoch": 0.11475,
      "grad_norm": 1.332890292488662,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 11475
    },
    {
      "epoch": 0.11476,
      "grad_norm": 0.9756801900159172,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 11476
    },
    {
      "epoch": 0.11477,
      "grad_norm": 1.372607682848459,
      "learning_rate": 0.003,
      "loss": 4.1171,
      "step": 11477
    },
    {
      "epoch": 0.11478,
      "grad_norm": 0.8255639146406556,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 11478
    },
    {
      "epoch": 0.11479,
      "grad_norm": 1.1664926036649057,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 11479
    },
    {
      "epoch": 0.1148,
      "grad_norm": 1.294357451851071,
      "learning_rate": 0.003,
      "loss": 4.1077,
      "step": 11480
    },
    {
      "epoch": 0.11481,
      "grad_norm": 1.3833008263992455,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 11481
    },
    {
      "epoch": 0.11482,
      "grad_norm": 1.1468410323877665,
      "learning_rate": 0.003,
      "loss": 4.1107,
      "step": 11482
    },
    {
      "epoch": 0.11483,
      "grad_norm": 0.9887223520013214,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 11483
    },
    {
      "epoch": 0.11484,
      "grad_norm": 1.1103043081093302,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 11484
    },
    {
      "epoch": 0.11485,
      "grad_norm": 1.2539182845087775,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 11485
    },
    {
      "epoch": 0.11486,
      "grad_norm": 1.229758733901623,
      "learning_rate": 0.003,
      "loss": 4.0849,
      "step": 11486
    },
    {
      "epoch": 0.11487,
      "grad_norm": 1.1836079272602194,
      "learning_rate": 0.003,
      "loss": 4.0797,
      "step": 11487
    },
    {
      "epoch": 0.11488,
      "grad_norm": 0.9349775387277866,
      "learning_rate": 0.003,
      "loss": 4.0822,
      "step": 11488
    },
    {
      "epoch": 0.11489,
      "grad_norm": 1.1135837477366786,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 11489
    },
    {
      "epoch": 0.1149,
      "grad_norm": 1.0788610305126276,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 11490
    },
    {
      "epoch": 0.11491,
      "grad_norm": 1.0996128966845133,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 11491
    },
    {
      "epoch": 0.11492,
      "grad_norm": 1.1962298260701187,
      "learning_rate": 0.003,
      "loss": 4.0814,
      "step": 11492
    },
    {
      "epoch": 0.11493,
      "grad_norm": 1.0941724572100726,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 11493
    },
    {
      "epoch": 0.11494,
      "grad_norm": 1.3475798987524423,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 11494
    },
    {
      "epoch": 0.11495,
      "grad_norm": 1.1329316783150218,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 11495
    },
    {
      "epoch": 0.11496,
      "grad_norm": 1.1896099564690321,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 11496
    },
    {
      "epoch": 0.11497,
      "grad_norm": 1.0928050136095804,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 11497
    },
    {
      "epoch": 0.11498,
      "grad_norm": 1.0963983050897443,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 11498
    },
    {
      "epoch": 0.11499,
      "grad_norm": 1.2008513248037702,
      "learning_rate": 0.003,
      "loss": 4.0784,
      "step": 11499
    },
    {
      "epoch": 0.115,
      "grad_norm": 1.308613469975689,
      "learning_rate": 0.003,
      "loss": 4.1015,
      "step": 11500
    },
    {
      "epoch": 0.11501,
      "grad_norm": 1.3175611324257215,
      "learning_rate": 0.003,
      "loss": 4.101,
      "step": 11501
    },
    {
      "epoch": 0.11502,
      "grad_norm": 1.026242356134211,
      "learning_rate": 0.003,
      "loss": 4.087,
      "step": 11502
    },
    {
      "epoch": 0.11503,
      "grad_norm": 1.1805651466017921,
      "learning_rate": 0.003,
      "loss": 4.0862,
      "step": 11503
    },
    {
      "epoch": 0.11504,
      "grad_norm": 0.9738010615945621,
      "learning_rate": 0.003,
      "loss": 4.0701,
      "step": 11504
    },
    {
      "epoch": 0.11505,
      "grad_norm": 1.3092225762310343,
      "learning_rate": 0.003,
      "loss": 4.0773,
      "step": 11505
    },
    {
      "epoch": 0.11506,
      "grad_norm": 0.97474226050319,
      "learning_rate": 0.003,
      "loss": 4.0981,
      "step": 11506
    },
    {
      "epoch": 0.11507,
      "grad_norm": 1.1334909402353455,
      "learning_rate": 0.003,
      "loss": 4.0939,
      "step": 11507
    },
    {
      "epoch": 0.11508,
      "grad_norm": 1.2437644494475641,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 11508
    },
    {
      "epoch": 0.11509,
      "grad_norm": 1.127927044502533,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 11509
    },
    {
      "epoch": 0.1151,
      "grad_norm": 1.3443112351268103,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 11510
    },
    {
      "epoch": 0.11511,
      "grad_norm": 1.1679143581179199,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 11511
    },
    {
      "epoch": 0.11512,
      "grad_norm": 1.1817297226139298,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 11512
    },
    {
      "epoch": 0.11513,
      "grad_norm": 1.3774151922566873,
      "learning_rate": 0.003,
      "loss": 4.0942,
      "step": 11513
    },
    {
      "epoch": 0.11514,
      "grad_norm": 1.064266101329151,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 11514
    },
    {
      "epoch": 0.11515,
      "grad_norm": 1.3813630987722414,
      "learning_rate": 0.003,
      "loss": 4.1194,
      "step": 11515
    },
    {
      "epoch": 0.11516,
      "grad_norm": 1.0477884800845256,
      "learning_rate": 0.003,
      "loss": 4.0954,
      "step": 11516
    },
    {
      "epoch": 0.11517,
      "grad_norm": 1.321817198455129,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 11517
    },
    {
      "epoch": 0.11518,
      "grad_norm": 0.9354471219736039,
      "learning_rate": 0.003,
      "loss": 4.1007,
      "step": 11518
    },
    {
      "epoch": 0.11519,
      "grad_norm": 1.2091105494550873,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 11519
    },
    {
      "epoch": 0.1152,
      "grad_norm": 1.0806128156529098,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 11520
    },
    {
      "epoch": 0.11521,
      "grad_norm": 1.2408933034442884,
      "learning_rate": 0.003,
      "loss": 4.0829,
      "step": 11521
    },
    {
      "epoch": 0.11522,
      "grad_norm": 1.2467169335127846,
      "learning_rate": 0.003,
      "loss": 4.0837,
      "step": 11522
    },
    {
      "epoch": 0.11523,
      "grad_norm": 1.2856824199643657,
      "learning_rate": 0.003,
      "loss": 4.0864,
      "step": 11523
    },
    {
      "epoch": 0.11524,
      "grad_norm": 1.264360674227239,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 11524
    },
    {
      "epoch": 0.11525,
      "grad_norm": 1.0508319704976452,
      "learning_rate": 0.003,
      "loss": 4.0924,
      "step": 11525
    },
    {
      "epoch": 0.11526,
      "grad_norm": 1.3112540782269941,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 11526
    },
    {
      "epoch": 0.11527,
      "grad_norm": 1.0643098598317824,
      "learning_rate": 0.003,
      "loss": 4.0862,
      "step": 11527
    },
    {
      "epoch": 0.11528,
      "grad_norm": 1.1906495679674107,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 11528
    },
    {
      "epoch": 0.11529,
      "grad_norm": 1.2980704217448864,
      "learning_rate": 0.003,
      "loss": 4.0923,
      "step": 11529
    },
    {
      "epoch": 0.1153,
      "grad_norm": 0.8867430435194795,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 11530
    },
    {
      "epoch": 0.11531,
      "grad_norm": 1.1040483708431459,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 11531
    },
    {
      "epoch": 0.11532,
      "grad_norm": 1.1805903226827017,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 11532
    },
    {
      "epoch": 0.11533,
      "grad_norm": 1.1523593683566506,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 11533
    },
    {
      "epoch": 0.11534,
      "grad_norm": 1.2466735154295447,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 11534
    },
    {
      "epoch": 0.11535,
      "grad_norm": 1.1515859901279828,
      "learning_rate": 0.003,
      "loss": 4.044,
      "step": 11535
    },
    {
      "epoch": 0.11536,
      "grad_norm": 0.9773817166626956,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 11536
    },
    {
      "epoch": 0.11537,
      "grad_norm": 1.353165757233726,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 11537
    },
    {
      "epoch": 0.11538,
      "grad_norm": 1.0286698146467392,
      "learning_rate": 0.003,
      "loss": 4.0938,
      "step": 11538
    },
    {
      "epoch": 0.11539,
      "grad_norm": 1.3615233983899735,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 11539
    },
    {
      "epoch": 0.1154,
      "grad_norm": 0.923404459886176,
      "learning_rate": 0.003,
      "loss": 4.0958,
      "step": 11540
    },
    {
      "epoch": 0.11541,
      "grad_norm": 1.179077597795612,
      "learning_rate": 0.003,
      "loss": 4.0987,
      "step": 11541
    },
    {
      "epoch": 0.11542,
      "grad_norm": 1.0504999776520778,
      "learning_rate": 0.003,
      "loss": 4.0985,
      "step": 11542
    },
    {
      "epoch": 0.11543,
      "grad_norm": 1.266392988607479,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 11543
    },
    {
      "epoch": 0.11544,
      "grad_norm": 1.0690194520990461,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 11544
    },
    {
      "epoch": 0.11545,
      "grad_norm": 1.0187394776585554,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 11545
    },
    {
      "epoch": 0.11546,
      "grad_norm": 1.1881403653169518,
      "learning_rate": 0.003,
      "loss": 4.0922,
      "step": 11546
    },
    {
      "epoch": 0.11547,
      "grad_norm": 0.9061876375465832,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 11547
    },
    {
      "epoch": 0.11548,
      "grad_norm": 1.280760949288439,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 11548
    },
    {
      "epoch": 0.11549,
      "grad_norm": 1.6926012022473007,
      "learning_rate": 0.003,
      "loss": 4.0971,
      "step": 11549
    },
    {
      "epoch": 0.1155,
      "grad_norm": 1.3743997496349738,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 11550
    },
    {
      "epoch": 0.11551,
      "grad_norm": 0.9575390298144556,
      "learning_rate": 0.003,
      "loss": 4.0808,
      "step": 11551
    },
    {
      "epoch": 0.11552,
      "grad_norm": 1.3025097000865644,
      "learning_rate": 0.003,
      "loss": 4.086,
      "step": 11552
    },
    {
      "epoch": 0.11553,
      "grad_norm": 1.174925247280143,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 11553
    },
    {
      "epoch": 0.11554,
      "grad_norm": 1.1507902802997487,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 11554
    },
    {
      "epoch": 0.11555,
      "grad_norm": 1.3723048067418877,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 11555
    },
    {
      "epoch": 0.11556,
      "grad_norm": 1.1434625203555215,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 11556
    },
    {
      "epoch": 0.11557,
      "grad_norm": 1.274192421285598,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 11557
    },
    {
      "epoch": 0.11558,
      "grad_norm": 1.21004073015434,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 11558
    },
    {
      "epoch": 0.11559,
      "grad_norm": 1.0819390577627044,
      "learning_rate": 0.003,
      "loss": 4.0694,
      "step": 11559
    },
    {
      "epoch": 0.1156,
      "grad_norm": 1.2733871597157078,
      "learning_rate": 0.003,
      "loss": 4.1022,
      "step": 11560
    },
    {
      "epoch": 0.11561,
      "grad_norm": 1.1210160300210403,
      "learning_rate": 0.003,
      "loss": 4.0739,
      "step": 11561
    },
    {
      "epoch": 0.11562,
      "grad_norm": 1.341917662056347,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 11562
    },
    {
      "epoch": 0.11563,
      "grad_norm": 0.9899531411900627,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 11563
    },
    {
      "epoch": 0.11564,
      "grad_norm": 1.3331094512680763,
      "learning_rate": 0.003,
      "loss": 4.0757,
      "step": 11564
    },
    {
      "epoch": 0.11565,
      "grad_norm": 1.074378038012777,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 11565
    },
    {
      "epoch": 0.11566,
      "grad_norm": 1.0921225706498394,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 11566
    },
    {
      "epoch": 0.11567,
      "grad_norm": 1.1686265160176652,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 11567
    },
    {
      "epoch": 0.11568,
      "grad_norm": 1.1345061017130718,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 11568
    },
    {
      "epoch": 0.11569,
      "grad_norm": 1.2407044804857985,
      "learning_rate": 0.003,
      "loss": 4.083,
      "step": 11569
    },
    {
      "epoch": 0.1157,
      "grad_norm": 1.2933894276546998,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 11570
    },
    {
      "epoch": 0.11571,
      "grad_norm": 1.0144524727039599,
      "learning_rate": 0.003,
      "loss": 4.0767,
      "step": 11571
    },
    {
      "epoch": 0.11572,
      "grad_norm": 1.154824048951517,
      "learning_rate": 0.003,
      "loss": 4.0313,
      "step": 11572
    },
    {
      "epoch": 0.11573,
      "grad_norm": 1.3344606188631556,
      "learning_rate": 0.003,
      "loss": 4.0876,
      "step": 11573
    },
    {
      "epoch": 0.11574,
      "grad_norm": 0.9324521525913374,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 11574
    },
    {
      "epoch": 0.11575,
      "grad_norm": 0.9666961252283515,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 11575
    },
    {
      "epoch": 0.11576,
      "grad_norm": 1.0929762780109733,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 11576
    },
    {
      "epoch": 0.11577,
      "grad_norm": 1.359434801271052,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 11577
    },
    {
      "epoch": 0.11578,
      "grad_norm": 1.0266321837498404,
      "learning_rate": 0.003,
      "loss": 4.0924,
      "step": 11578
    },
    {
      "epoch": 0.11579,
      "grad_norm": 1.1097851636303517,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 11579
    },
    {
      "epoch": 0.1158,
      "grad_norm": 1.2531227471152486,
      "learning_rate": 0.003,
      "loss": 4.0701,
      "step": 11580
    },
    {
      "epoch": 0.11581,
      "grad_norm": 1.3107090415682447,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 11581
    },
    {
      "epoch": 0.11582,
      "grad_norm": 1.132608200106952,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 11582
    },
    {
      "epoch": 0.11583,
      "grad_norm": 1.1646643953883302,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 11583
    },
    {
      "epoch": 0.11584,
      "grad_norm": 0.966463751002327,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 11584
    },
    {
      "epoch": 0.11585,
      "grad_norm": 1.1856859379676656,
      "learning_rate": 0.003,
      "loss": 4.1024,
      "step": 11585
    },
    {
      "epoch": 0.11586,
      "grad_norm": 1.086649497913007,
      "learning_rate": 0.003,
      "loss": 4.0602,
      "step": 11586
    },
    {
      "epoch": 0.11587,
      "grad_norm": 1.2690379379929062,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 11587
    },
    {
      "epoch": 0.11588,
      "grad_norm": 0.9935718042905032,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 11588
    },
    {
      "epoch": 0.11589,
      "grad_norm": 1.3313725583096991,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 11589
    },
    {
      "epoch": 0.1159,
      "grad_norm": 1.0259547188310283,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 11590
    },
    {
      "epoch": 0.11591,
      "grad_norm": 1.3133936881217214,
      "learning_rate": 0.003,
      "loss": 4.0767,
      "step": 11591
    },
    {
      "epoch": 0.11592,
      "grad_norm": 1.0213838493879515,
      "learning_rate": 0.003,
      "loss": 4.0796,
      "step": 11592
    },
    {
      "epoch": 0.11593,
      "grad_norm": 1.347456693219393,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 11593
    },
    {
      "epoch": 0.11594,
      "grad_norm": 1.1679330089514939,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 11594
    },
    {
      "epoch": 0.11595,
      "grad_norm": 1.104230185366043,
      "learning_rate": 0.003,
      "loss": 4.092,
      "step": 11595
    },
    {
      "epoch": 0.11596,
      "grad_norm": 1.3096538728464635,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 11596
    },
    {
      "epoch": 0.11597,
      "grad_norm": 1.0708176834350167,
      "learning_rate": 0.003,
      "loss": 4.0955,
      "step": 11597
    },
    {
      "epoch": 0.11598,
      "grad_norm": 1.419218488468854,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 11598
    },
    {
      "epoch": 0.11599,
      "grad_norm": 1.059367189303041,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 11599
    },
    {
      "epoch": 0.116,
      "grad_norm": 1.4108671475415633,
      "learning_rate": 0.003,
      "loss": 4.0705,
      "step": 11600
    },
    {
      "epoch": 0.11601,
      "grad_norm": 0.9733638184815594,
      "learning_rate": 0.003,
      "loss": 4.0959,
      "step": 11601
    },
    {
      "epoch": 0.11602,
      "grad_norm": 1.0820043730117697,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 11602
    },
    {
      "epoch": 0.11603,
      "grad_norm": 1.2147531231150952,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 11603
    },
    {
      "epoch": 0.11604,
      "grad_norm": 1.123040949818344,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 11604
    },
    {
      "epoch": 0.11605,
      "grad_norm": 1.3485877186705435,
      "learning_rate": 0.003,
      "loss": 4.0759,
      "step": 11605
    },
    {
      "epoch": 0.11606,
      "grad_norm": 1.0159588779281732,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 11606
    },
    {
      "epoch": 0.11607,
      "grad_norm": 1.31908257646201,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 11607
    },
    {
      "epoch": 0.11608,
      "grad_norm": 0.9972867655077241,
      "learning_rate": 0.003,
      "loss": 4.0891,
      "step": 11608
    },
    {
      "epoch": 0.11609,
      "grad_norm": 1.3061558323354592,
      "learning_rate": 0.003,
      "loss": 4.1141,
      "step": 11609
    },
    {
      "epoch": 0.1161,
      "grad_norm": 0.971407556763974,
      "learning_rate": 0.003,
      "loss": 4.0925,
      "step": 11610
    },
    {
      "epoch": 0.11611,
      "grad_norm": 1.2215246724221014,
      "learning_rate": 0.003,
      "loss": 4.0938,
      "step": 11611
    },
    {
      "epoch": 0.11612,
      "grad_norm": 1.1214563125973465,
      "learning_rate": 0.003,
      "loss": 4.1022,
      "step": 11612
    },
    {
      "epoch": 0.11613,
      "grad_norm": 1.3463323837268832,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 11613
    },
    {
      "epoch": 0.11614,
      "grad_norm": 1.005861871950001,
      "learning_rate": 0.003,
      "loss": 4.0784,
      "step": 11614
    },
    {
      "epoch": 0.11615,
      "grad_norm": 1.1173298064399497,
      "learning_rate": 0.003,
      "loss": 4.0724,
      "step": 11615
    },
    {
      "epoch": 0.11616,
      "grad_norm": 1.0865141677282626,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 11616
    },
    {
      "epoch": 0.11617,
      "grad_norm": 1.2021344904783986,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 11617
    },
    {
      "epoch": 0.11618,
      "grad_norm": 1.2654706652266698,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 11618
    },
    {
      "epoch": 0.11619,
      "grad_norm": 1.1995899472436178,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 11619
    },
    {
      "epoch": 0.1162,
      "grad_norm": 1.0558948472377412,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 11620
    },
    {
      "epoch": 0.11621,
      "grad_norm": 1.2607446375842715,
      "learning_rate": 0.003,
      "loss": 4.0935,
      "step": 11621
    },
    {
      "epoch": 0.11622,
      "grad_norm": 1.115655721531063,
      "learning_rate": 0.003,
      "loss": 4.0834,
      "step": 11622
    },
    {
      "epoch": 0.11623,
      "grad_norm": 1.2795840516663242,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 11623
    },
    {
      "epoch": 0.11624,
      "grad_norm": 1.1604711054182177,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 11624
    },
    {
      "epoch": 0.11625,
      "grad_norm": 1.1954532583033348,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 11625
    },
    {
      "epoch": 0.11626,
      "grad_norm": 1.020769225323057,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 11626
    },
    {
      "epoch": 0.11627,
      "grad_norm": 1.3014178927477036,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 11627
    },
    {
      "epoch": 0.11628,
      "grad_norm": 1.1723273023585614,
      "learning_rate": 0.003,
      "loss": 4.1149,
      "step": 11628
    },
    {
      "epoch": 0.11629,
      "grad_norm": 1.3072394900620368,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 11629
    },
    {
      "epoch": 0.1163,
      "grad_norm": 1.2846287717965974,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 11630
    },
    {
      "epoch": 0.11631,
      "grad_norm": 0.9780232880461375,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 11631
    },
    {
      "epoch": 0.11632,
      "grad_norm": 1.1660762073465643,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 11632
    },
    {
      "epoch": 0.11633,
      "grad_norm": 1.110005484921728,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 11633
    },
    {
      "epoch": 0.11634,
      "grad_norm": 1.1452153297503211,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 11634
    },
    {
      "epoch": 0.11635,
      "grad_norm": 0.9829449454658096,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 11635
    },
    {
      "epoch": 0.11636,
      "grad_norm": 1.3147897318417492,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 11636
    },
    {
      "epoch": 0.11637,
      "grad_norm": 1.0265219018313658,
      "learning_rate": 0.003,
      "loss": 4.0739,
      "step": 11637
    },
    {
      "epoch": 0.11638,
      "grad_norm": 1.21791936895603,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 11638
    },
    {
      "epoch": 0.11639,
      "grad_norm": 1.1939433767765795,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 11639
    },
    {
      "epoch": 0.1164,
      "grad_norm": 1.3414467709428268,
      "learning_rate": 0.003,
      "loss": 4.1041,
      "step": 11640
    },
    {
      "epoch": 0.11641,
      "grad_norm": 1.0941144662922078,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 11641
    },
    {
      "epoch": 0.11642,
      "grad_norm": 1.2308907705165233,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 11642
    },
    {
      "epoch": 0.11643,
      "grad_norm": 0.9691303413285445,
      "learning_rate": 0.003,
      "loss": 4.0874,
      "step": 11643
    },
    {
      "epoch": 0.11644,
      "grad_norm": 1.08208704933791,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 11644
    },
    {
      "epoch": 0.11645,
      "grad_norm": 1.1657566901506018,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 11645
    },
    {
      "epoch": 0.11646,
      "grad_norm": 1.0026554853780012,
      "learning_rate": 0.003,
      "loss": 4.0918,
      "step": 11646
    },
    {
      "epoch": 0.11647,
      "grad_norm": 1.3278869384524574,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 11647
    },
    {
      "epoch": 0.11648,
      "grad_norm": 1.1526164101217382,
      "learning_rate": 0.003,
      "loss": 4.0866,
      "step": 11648
    },
    {
      "epoch": 0.11649,
      "grad_norm": 1.1702419673180477,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 11649
    },
    {
      "epoch": 0.1165,
      "grad_norm": 1.2199436298530248,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 11650
    },
    {
      "epoch": 0.11651,
      "grad_norm": 1.0170923163006491,
      "learning_rate": 0.003,
      "loss": 4.0807,
      "step": 11651
    },
    {
      "epoch": 0.11652,
      "grad_norm": 1.1947281587929344,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 11652
    },
    {
      "epoch": 0.11653,
      "grad_norm": 1.4536369469192147,
      "learning_rate": 0.003,
      "loss": 4.0886,
      "step": 11653
    },
    {
      "epoch": 0.11654,
      "grad_norm": 0.986442918965962,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 11654
    },
    {
      "epoch": 0.11655,
      "grad_norm": 1.3730559055015554,
      "learning_rate": 0.003,
      "loss": 4.079,
      "step": 11655
    },
    {
      "epoch": 0.11656,
      "grad_norm": 0.987156630639679,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 11656
    },
    {
      "epoch": 0.11657,
      "grad_norm": 1.3704453079105388,
      "learning_rate": 0.003,
      "loss": 4.081,
      "step": 11657
    },
    {
      "epoch": 0.11658,
      "grad_norm": 1.1986780205189669,
      "learning_rate": 0.003,
      "loss": 4.1009,
      "step": 11658
    },
    {
      "epoch": 0.11659,
      "grad_norm": 1.2081230426683085,
      "learning_rate": 0.003,
      "loss": 4.0811,
      "step": 11659
    },
    {
      "epoch": 0.1166,
      "grad_norm": 1.2151928435575177,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 11660
    },
    {
      "epoch": 0.11661,
      "grad_norm": 1.2077636774696505,
      "learning_rate": 0.003,
      "loss": 4.0882,
      "step": 11661
    },
    {
      "epoch": 0.11662,
      "grad_norm": 1.1200976843627546,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 11662
    },
    {
      "epoch": 0.11663,
      "grad_norm": 1.3007439222307542,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 11663
    },
    {
      "epoch": 0.11664,
      "grad_norm": 0.9318794503160776,
      "learning_rate": 0.003,
      "loss": 4.0926,
      "step": 11664
    },
    {
      "epoch": 0.11665,
      "grad_norm": 1.098989623725052,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 11665
    },
    {
      "epoch": 0.11666,
      "grad_norm": 1.1501714554377738,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 11666
    },
    {
      "epoch": 0.11667,
      "grad_norm": 1.2824033405427517,
      "learning_rate": 0.003,
      "loss": 4.1042,
      "step": 11667
    },
    {
      "epoch": 0.11668,
      "grad_norm": 0.9966630289505742,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 11668
    },
    {
      "epoch": 0.11669,
      "grad_norm": 1.3169369638514503,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 11669
    },
    {
      "epoch": 0.1167,
      "grad_norm": 1.0024602739049056,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 11670
    },
    {
      "epoch": 0.11671,
      "grad_norm": 1.4408581457072513,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 11671
    },
    {
      "epoch": 0.11672,
      "grad_norm": 0.8851874480879731,
      "learning_rate": 0.003,
      "loss": 4.0991,
      "step": 11672
    },
    {
      "epoch": 0.11673,
      "grad_norm": 1.1906471160370373,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 11673
    },
    {
      "epoch": 0.11674,
      "grad_norm": 1.1719886567944,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 11674
    },
    {
      "epoch": 0.11675,
      "grad_norm": 1.3878506215850128,
      "learning_rate": 0.003,
      "loss": 4.0784,
      "step": 11675
    },
    {
      "epoch": 0.11676,
      "grad_norm": 1.0904085137277049,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 11676
    },
    {
      "epoch": 0.11677,
      "grad_norm": 1.2116152858637508,
      "learning_rate": 0.003,
      "loss": 4.0933,
      "step": 11677
    },
    {
      "epoch": 0.11678,
      "grad_norm": 1.343503928364903,
      "learning_rate": 0.003,
      "loss": 4.0893,
      "step": 11678
    },
    {
      "epoch": 0.11679,
      "grad_norm": 0.937076872190132,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 11679
    },
    {
      "epoch": 0.1168,
      "grad_norm": 1.0519616690909357,
      "learning_rate": 0.003,
      "loss": 4.0947,
      "step": 11680
    },
    {
      "epoch": 0.11681,
      "grad_norm": 1.2549289807301722,
      "learning_rate": 0.003,
      "loss": 4.0676,
      "step": 11681
    },
    {
      "epoch": 0.11682,
      "grad_norm": 0.9579540582874558,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 11682
    },
    {
      "epoch": 0.11683,
      "grad_norm": 1.2352716839598907,
      "learning_rate": 0.003,
      "loss": 4.1017,
      "step": 11683
    },
    {
      "epoch": 0.11684,
      "grad_norm": 1.2156498815663923,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 11684
    },
    {
      "epoch": 0.11685,
      "grad_norm": 1.0363718832082467,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 11685
    },
    {
      "epoch": 0.11686,
      "grad_norm": 1.3470193086825817,
      "learning_rate": 0.003,
      "loss": 4.0853,
      "step": 11686
    },
    {
      "epoch": 0.11687,
      "grad_norm": 1.336107332557728,
      "learning_rate": 0.003,
      "loss": 4.0934,
      "step": 11687
    },
    {
      "epoch": 0.11688,
      "grad_norm": 1.148676523864226,
      "learning_rate": 0.003,
      "loss": 4.0892,
      "step": 11688
    },
    {
      "epoch": 0.11689,
      "grad_norm": 1.2732728738773667,
      "learning_rate": 0.003,
      "loss": 4.0899,
      "step": 11689
    },
    {
      "epoch": 0.1169,
      "grad_norm": 0.9761027416337912,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 11690
    },
    {
      "epoch": 0.11691,
      "grad_norm": 1.194763358774187,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 11691
    },
    {
      "epoch": 0.11692,
      "grad_norm": 1.1155053422367112,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 11692
    },
    {
      "epoch": 0.11693,
      "grad_norm": 1.0844912377130445,
      "learning_rate": 0.003,
      "loss": 4.0781,
      "step": 11693
    },
    {
      "epoch": 0.11694,
      "grad_norm": 1.360219760433983,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 11694
    },
    {
      "epoch": 0.11695,
      "grad_norm": 1.083480128114835,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 11695
    },
    {
      "epoch": 0.11696,
      "grad_norm": 1.209946409459336,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 11696
    },
    {
      "epoch": 0.11697,
      "grad_norm": 1.3042665518425522,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 11697
    },
    {
      "epoch": 0.11698,
      "grad_norm": 1.054254079024093,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 11698
    },
    {
      "epoch": 0.11699,
      "grad_norm": 1.4255708234494604,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 11699
    },
    {
      "epoch": 0.117,
      "grad_norm": 0.9222276852629732,
      "learning_rate": 0.003,
      "loss": 4.0743,
      "step": 11700
    },
    {
      "epoch": 0.11701,
      "grad_norm": 1.0913345591584294,
      "learning_rate": 0.003,
      "loss": 4.0757,
      "step": 11701
    },
    {
      "epoch": 0.11702,
      "grad_norm": 1.2159577552464076,
      "learning_rate": 0.003,
      "loss": 4.0767,
      "step": 11702
    },
    {
      "epoch": 0.11703,
      "grad_norm": 1.2755220220242691,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 11703
    },
    {
      "epoch": 0.11704,
      "grad_norm": 1.1010394356103956,
      "learning_rate": 0.003,
      "loss": 4.0956,
      "step": 11704
    },
    {
      "epoch": 0.11705,
      "grad_norm": 1.181555190974642,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 11705
    },
    {
      "epoch": 0.11706,
      "grad_norm": 1.355731758915925,
      "learning_rate": 0.003,
      "loss": 4.0945,
      "step": 11706
    },
    {
      "epoch": 0.11707,
      "grad_norm": 1.0451505789149136,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 11707
    },
    {
      "epoch": 0.11708,
      "grad_norm": 1.1674014223340736,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 11708
    },
    {
      "epoch": 0.11709,
      "grad_norm": 1.2271039669137753,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 11709
    },
    {
      "epoch": 0.1171,
      "grad_norm": 1.290536541283088,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 11710
    },
    {
      "epoch": 0.11711,
      "grad_norm": 1.192079023146588,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 11711
    },
    {
      "epoch": 0.11712,
      "grad_norm": 0.9538550952961976,
      "learning_rate": 0.003,
      "loss": 4.1077,
      "step": 11712
    },
    {
      "epoch": 0.11713,
      "grad_norm": 1.1694628178896578,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 11713
    },
    {
      "epoch": 0.11714,
      "grad_norm": 1.1266996139387908,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 11714
    },
    {
      "epoch": 0.11715,
      "grad_norm": 1.101302435927173,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 11715
    },
    {
      "epoch": 0.11716,
      "grad_norm": 1.416114823275292,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 11716
    },
    {
      "epoch": 0.11717,
      "grad_norm": 0.8770688127299036,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 11717
    },
    {
      "epoch": 0.11718,
      "grad_norm": 0.9247194966585638,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 11718
    },
    {
      "epoch": 0.11719,
      "grad_norm": 1.1420126400773882,
      "learning_rate": 0.003,
      "loss": 4.0549,
      "step": 11719
    },
    {
      "epoch": 0.1172,
      "grad_norm": 1.2430497400012208,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 11720
    },
    {
      "epoch": 0.11721,
      "grad_norm": 1.3437462196938905,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 11721
    },
    {
      "epoch": 0.11722,
      "grad_norm": 1.1611268169534532,
      "learning_rate": 0.003,
      "loss": 4.0923,
      "step": 11722
    },
    {
      "epoch": 0.11723,
      "grad_norm": 1.1589328188926054,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 11723
    },
    {
      "epoch": 0.11724,
      "grad_norm": 1.1234276251597817,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 11724
    },
    {
      "epoch": 0.11725,
      "grad_norm": 1.392314288157773,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 11725
    },
    {
      "epoch": 0.11726,
      "grad_norm": 1.2016337841307816,
      "learning_rate": 0.003,
      "loss": 4.0793,
      "step": 11726
    },
    {
      "epoch": 0.11727,
      "grad_norm": 1.1672506920181773,
      "learning_rate": 0.003,
      "loss": 4.0982,
      "step": 11727
    },
    {
      "epoch": 0.11728,
      "grad_norm": 1.177675554505713,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 11728
    },
    {
      "epoch": 0.11729,
      "grad_norm": 1.1596721790007607,
      "learning_rate": 0.003,
      "loss": 4.0867,
      "step": 11729
    },
    {
      "epoch": 0.1173,
      "grad_norm": 1.2114792058895776,
      "learning_rate": 0.003,
      "loss": 4.0868,
      "step": 11730
    },
    {
      "epoch": 0.11731,
      "grad_norm": 1.2697478329728649,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 11731
    },
    {
      "epoch": 0.11732,
      "grad_norm": 1.2046703193379353,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 11732
    },
    {
      "epoch": 0.11733,
      "grad_norm": 1.1587625630956098,
      "learning_rate": 0.003,
      "loss": 4.0868,
      "step": 11733
    },
    {
      "epoch": 0.11734,
      "grad_norm": 0.9835383110437321,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 11734
    },
    {
      "epoch": 0.11735,
      "grad_norm": 1.1354652923916457,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 11735
    },
    {
      "epoch": 0.11736,
      "grad_norm": 1.1795068819900612,
      "learning_rate": 0.003,
      "loss": 4.0831,
      "step": 11736
    },
    {
      "epoch": 0.11737,
      "grad_norm": 0.9590724542826325,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 11737
    },
    {
      "epoch": 0.11738,
      "grad_norm": 1.2529064551602063,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 11738
    },
    {
      "epoch": 0.11739,
      "grad_norm": 0.9692882883964946,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 11739
    },
    {
      "epoch": 0.1174,
      "grad_norm": 1.3363662322865228,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 11740
    },
    {
      "epoch": 0.11741,
      "grad_norm": 1.1388596723511775,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 11741
    },
    {
      "epoch": 0.11742,
      "grad_norm": 1.2061449922738792,
      "learning_rate": 0.003,
      "loss": 4.094,
      "step": 11742
    },
    {
      "epoch": 0.11743,
      "grad_norm": 1.2673513330553907,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 11743
    },
    {
      "epoch": 0.11744,
      "grad_norm": 0.9992619777221167,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 11744
    },
    {
      "epoch": 0.11745,
      "grad_norm": 1.4369939742498439,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 11745
    },
    {
      "epoch": 0.11746,
      "grad_norm": 1.0352605145126343,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 11746
    },
    {
      "epoch": 0.11747,
      "grad_norm": 1.372321493331476,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 11747
    },
    {
      "epoch": 0.11748,
      "grad_norm": 1.391876422983213,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 11748
    },
    {
      "epoch": 0.11749,
      "grad_norm": 1.135363815029667,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 11749
    },
    {
      "epoch": 0.1175,
      "grad_norm": 1.242728276892211,
      "learning_rate": 0.003,
      "loss": 4.0961,
      "step": 11750
    },
    {
      "epoch": 0.11751,
      "grad_norm": 1.1148012860516774,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 11751
    },
    {
      "epoch": 0.11752,
      "grad_norm": 1.3070790394082443,
      "learning_rate": 0.003,
      "loss": 4.0751,
      "step": 11752
    },
    {
      "epoch": 0.11753,
      "grad_norm": 0.9992661935620242,
      "learning_rate": 0.003,
      "loss": 4.1025,
      "step": 11753
    },
    {
      "epoch": 0.11754,
      "grad_norm": 1.134067039387444,
      "learning_rate": 0.003,
      "loss": 4.0807,
      "step": 11754
    },
    {
      "epoch": 0.11755,
      "grad_norm": 0.9589451796726519,
      "learning_rate": 0.003,
      "loss": 4.0947,
      "step": 11755
    },
    {
      "epoch": 0.11756,
      "grad_norm": 1.3568027476997095,
      "learning_rate": 0.003,
      "loss": 4.0968,
      "step": 11756
    },
    {
      "epoch": 0.11757,
      "grad_norm": 1.1644018380445698,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 11757
    },
    {
      "epoch": 0.11758,
      "grad_norm": 1.2216645892129108,
      "learning_rate": 0.003,
      "loss": 4.1069,
      "step": 11758
    },
    {
      "epoch": 0.11759,
      "grad_norm": 1.1649690820165117,
      "learning_rate": 0.003,
      "loss": 4.0959,
      "step": 11759
    },
    {
      "epoch": 0.1176,
      "grad_norm": 1.4111637333354248,
      "learning_rate": 0.003,
      "loss": 4.0811,
      "step": 11760
    },
    {
      "epoch": 0.11761,
      "grad_norm": 1.1386523484876632,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 11761
    },
    {
      "epoch": 0.11762,
      "grad_norm": 1.309097793898003,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 11762
    },
    {
      "epoch": 0.11763,
      "grad_norm": 1.0977987428654228,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 11763
    },
    {
      "epoch": 0.11764,
      "grad_norm": 1.3897224575122786,
      "learning_rate": 0.003,
      "loss": 4.0834,
      "step": 11764
    },
    {
      "epoch": 0.11765,
      "grad_norm": 1.1526270512131422,
      "learning_rate": 0.003,
      "loss": 4.09,
      "step": 11765
    },
    {
      "epoch": 0.11766,
      "grad_norm": 1.0482334575456738,
      "learning_rate": 0.003,
      "loss": 4.0799,
      "step": 11766
    },
    {
      "epoch": 0.11767,
      "grad_norm": 1.1680362709673526,
      "learning_rate": 0.003,
      "loss": 4.0868,
      "step": 11767
    },
    {
      "epoch": 0.11768,
      "grad_norm": 0.8998671971903655,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 11768
    },
    {
      "epoch": 0.11769,
      "grad_norm": 1.2109642683247706,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 11769
    },
    {
      "epoch": 0.1177,
      "grad_norm": 1.1568339451894927,
      "learning_rate": 0.003,
      "loss": 4.0763,
      "step": 11770
    },
    {
      "epoch": 0.11771,
      "grad_norm": 1.2552008359510307,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 11771
    },
    {
      "epoch": 0.11772,
      "grad_norm": 1.140799491460487,
      "learning_rate": 0.003,
      "loss": 4.0834,
      "step": 11772
    },
    {
      "epoch": 0.11773,
      "grad_norm": 1.3359287045550823,
      "learning_rate": 0.003,
      "loss": 4.0861,
      "step": 11773
    },
    {
      "epoch": 0.11774,
      "grad_norm": 0.9020151031780287,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 11774
    },
    {
      "epoch": 0.11775,
      "grad_norm": 1.039904881152444,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 11775
    },
    {
      "epoch": 0.11776,
      "grad_norm": 1.1772503869017166,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 11776
    },
    {
      "epoch": 0.11777,
      "grad_norm": 1.0978295001721685,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 11777
    },
    {
      "epoch": 0.11778,
      "grad_norm": 1.2932395259715683,
      "learning_rate": 0.003,
      "loss": 4.0927,
      "step": 11778
    },
    {
      "epoch": 0.11779,
      "grad_norm": 1.062639311748465,
      "learning_rate": 0.003,
      "loss": 4.0718,
      "step": 11779
    },
    {
      "epoch": 0.1178,
      "grad_norm": 1.5522142112093957,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 11780
    },
    {
      "epoch": 0.11781,
      "grad_norm": 1.1901849556182456,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 11781
    },
    {
      "epoch": 0.11782,
      "grad_norm": 1.1360741091738087,
      "learning_rate": 0.003,
      "loss": 4.0897,
      "step": 11782
    },
    {
      "epoch": 0.11783,
      "grad_norm": 1.2956834409184492,
      "learning_rate": 0.003,
      "loss": 4.1031,
      "step": 11783
    },
    {
      "epoch": 0.11784,
      "grad_norm": 0.9080434495652585,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 11784
    },
    {
      "epoch": 0.11785,
      "grad_norm": 1.3873971037919273,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 11785
    },
    {
      "epoch": 0.11786,
      "grad_norm": 0.9221289765526292,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 11786
    },
    {
      "epoch": 0.11787,
      "grad_norm": 1.0909130831395086,
      "learning_rate": 0.003,
      "loss": 4.0954,
      "step": 11787
    },
    {
      "epoch": 0.11788,
      "grad_norm": 1.1162084751400934,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 11788
    },
    {
      "epoch": 0.11789,
      "grad_norm": 1.0164636722393448,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 11789
    },
    {
      "epoch": 0.1179,
      "grad_norm": 1.289296214700985,
      "learning_rate": 0.003,
      "loss": 4.0934,
      "step": 11790
    },
    {
      "epoch": 0.11791,
      "grad_norm": 1.197526365357239,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 11791
    },
    {
      "epoch": 0.11792,
      "grad_norm": 1.3794954189831017,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 11792
    },
    {
      "epoch": 0.11793,
      "grad_norm": 1.3379948580447394,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 11793
    },
    {
      "epoch": 0.11794,
      "grad_norm": 0.9973265523988945,
      "learning_rate": 0.003,
      "loss": 4.0727,
      "step": 11794
    },
    {
      "epoch": 0.11795,
      "grad_norm": 1.0920690685918633,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 11795
    },
    {
      "epoch": 0.11796,
      "grad_norm": 1.175601375502935,
      "learning_rate": 0.003,
      "loss": 4.0974,
      "step": 11796
    },
    {
      "epoch": 0.11797,
      "grad_norm": 1.1703415868934268,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 11797
    },
    {
      "epoch": 0.11798,
      "grad_norm": 1.3008008929070882,
      "learning_rate": 0.003,
      "loss": 4.0728,
      "step": 11798
    },
    {
      "epoch": 0.11799,
      "grad_norm": 1.0314930400630682,
      "learning_rate": 0.003,
      "loss": 4.0998,
      "step": 11799
    },
    {
      "epoch": 0.118,
      "grad_norm": 1.1375274780427134,
      "learning_rate": 0.003,
      "loss": 4.0841,
      "step": 11800
    },
    {
      "epoch": 0.11801,
      "grad_norm": 1.0380180520004971,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 11801
    },
    {
      "epoch": 0.11802,
      "grad_norm": 1.2499064995107485,
      "learning_rate": 0.003,
      "loss": 4.0743,
      "step": 11802
    },
    {
      "epoch": 0.11803,
      "grad_norm": 0.9206344380911405,
      "learning_rate": 0.003,
      "loss": 4.0896,
      "step": 11803
    },
    {
      "epoch": 0.11804,
      "grad_norm": 1.0563731974738848,
      "learning_rate": 0.003,
      "loss": 4.0825,
      "step": 11804
    },
    {
      "epoch": 0.11805,
      "grad_norm": 1.2138022475057453,
      "learning_rate": 0.003,
      "loss": 4.0602,
      "step": 11805
    },
    {
      "epoch": 0.11806,
      "grad_norm": 1.2367306427185214,
      "learning_rate": 0.003,
      "loss": 4.0817,
      "step": 11806
    },
    {
      "epoch": 0.11807,
      "grad_norm": 1.2203777945761236,
      "learning_rate": 0.003,
      "loss": 4.0808,
      "step": 11807
    },
    {
      "epoch": 0.11808,
      "grad_norm": 1.0586284436372069,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 11808
    },
    {
      "epoch": 0.11809,
      "grad_norm": 1.1556376689324874,
      "learning_rate": 0.003,
      "loss": 4.0927,
      "step": 11809
    },
    {
      "epoch": 0.1181,
      "grad_norm": 1.0478388340716422,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 11810
    },
    {
      "epoch": 0.11811,
      "grad_norm": 0.9796072523473666,
      "learning_rate": 0.003,
      "loss": 4.087,
      "step": 11811
    },
    {
      "epoch": 0.11812,
      "grad_norm": 1.3267520540589823,
      "learning_rate": 0.003,
      "loss": 4.0853,
      "step": 11812
    },
    {
      "epoch": 0.11813,
      "grad_norm": 1.0624381801166245,
      "learning_rate": 0.003,
      "loss": 4.0783,
      "step": 11813
    },
    {
      "epoch": 0.11814,
      "grad_norm": 1.4611375132742075,
      "learning_rate": 0.003,
      "loss": 4.0768,
      "step": 11814
    },
    {
      "epoch": 0.11815,
      "grad_norm": 1.5778823206110453,
      "learning_rate": 0.003,
      "loss": 4.0983,
      "step": 11815
    },
    {
      "epoch": 0.11816,
      "grad_norm": 1.2354906198845372,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 11816
    },
    {
      "epoch": 0.11817,
      "grad_norm": 1.1693476173681472,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 11817
    },
    {
      "epoch": 0.11818,
      "grad_norm": 1.1420531165461065,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 11818
    },
    {
      "epoch": 0.11819,
      "grad_norm": 1.2909973798684644,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 11819
    },
    {
      "epoch": 0.1182,
      "grad_norm": 1.2598501868393714,
      "learning_rate": 0.003,
      "loss": 4.0861,
      "step": 11820
    },
    {
      "epoch": 0.11821,
      "grad_norm": 1.0835462615430216,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 11821
    },
    {
      "epoch": 0.11822,
      "grad_norm": 1.2044057463788023,
      "learning_rate": 0.003,
      "loss": 4.0827,
      "step": 11822
    },
    {
      "epoch": 0.11823,
      "grad_norm": 1.0910825742504715,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 11823
    },
    {
      "epoch": 0.11824,
      "grad_norm": 1.2386646366156195,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 11824
    },
    {
      "epoch": 0.11825,
      "grad_norm": 1.0416502202053222,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 11825
    },
    {
      "epoch": 0.11826,
      "grad_norm": 1.2848901046610308,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 11826
    },
    {
      "epoch": 0.11827,
      "grad_norm": 1.2023286965580762,
      "learning_rate": 0.003,
      "loss": 4.0886,
      "step": 11827
    },
    {
      "epoch": 0.11828,
      "grad_norm": 1.2356902108526424,
      "learning_rate": 0.003,
      "loss": 4.091,
      "step": 11828
    },
    {
      "epoch": 0.11829,
      "grad_norm": 1.088816813835146,
      "learning_rate": 0.003,
      "loss": 4.0925,
      "step": 11829
    },
    {
      "epoch": 0.1183,
      "grad_norm": 1.3892098766726853,
      "learning_rate": 0.003,
      "loss": 4.086,
      "step": 11830
    },
    {
      "epoch": 0.11831,
      "grad_norm": 1.1409473182564889,
      "learning_rate": 0.003,
      "loss": 4.079,
      "step": 11831
    },
    {
      "epoch": 0.11832,
      "grad_norm": 0.9492869584044819,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 11832
    },
    {
      "epoch": 0.11833,
      "grad_norm": 1.1940304123684042,
      "learning_rate": 0.003,
      "loss": 4.1049,
      "step": 11833
    },
    {
      "epoch": 0.11834,
      "grad_norm": 1.3422321290193753,
      "learning_rate": 0.003,
      "loss": 4.1046,
      "step": 11834
    },
    {
      "epoch": 0.11835,
      "grad_norm": 0.9584281942908637,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 11835
    },
    {
      "epoch": 0.11836,
      "grad_norm": 1.2123845815097705,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 11836
    },
    {
      "epoch": 0.11837,
      "grad_norm": 1.2698491999443162,
      "learning_rate": 0.003,
      "loss": 4.0784,
      "step": 11837
    },
    {
      "epoch": 0.11838,
      "grad_norm": 1.3122355602083897,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 11838
    },
    {
      "epoch": 0.11839,
      "grad_norm": 1.105366259392173,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 11839
    },
    {
      "epoch": 0.1184,
      "grad_norm": 1.200875820871659,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 11840
    },
    {
      "epoch": 0.11841,
      "grad_norm": 1.1742575594715312,
      "learning_rate": 0.003,
      "loss": 4.0767,
      "step": 11841
    },
    {
      "epoch": 0.11842,
      "grad_norm": 1.0467822742428252,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 11842
    },
    {
      "epoch": 0.11843,
      "grad_norm": 1.2674758216305462,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 11843
    },
    {
      "epoch": 0.11844,
      "grad_norm": 0.8946428012259164,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 11844
    },
    {
      "epoch": 0.11845,
      "grad_norm": 1.5005655593944138,
      "learning_rate": 0.003,
      "loss": 4.0934,
      "step": 11845
    },
    {
      "epoch": 0.11846,
      "grad_norm": 0.985039493861634,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 11846
    },
    {
      "epoch": 0.11847,
      "grad_norm": 1.420682429044482,
      "learning_rate": 0.003,
      "loss": 4.0926,
      "step": 11847
    },
    {
      "epoch": 0.11848,
      "grad_norm": 1.1624676672252878,
      "learning_rate": 0.003,
      "loss": 4.1019,
      "step": 11848
    },
    {
      "epoch": 0.11849,
      "grad_norm": 1.3452780755205915,
      "learning_rate": 0.003,
      "loss": 4.0988,
      "step": 11849
    },
    {
      "epoch": 0.1185,
      "grad_norm": 1.0704095122382435,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 11850
    },
    {
      "epoch": 0.11851,
      "grad_norm": 1.0919642793213178,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 11851
    },
    {
      "epoch": 0.11852,
      "grad_norm": 1.4400788622045355,
      "learning_rate": 0.003,
      "loss": 4.0694,
      "step": 11852
    },
    {
      "epoch": 0.11853,
      "grad_norm": 1.1511166394484542,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 11853
    },
    {
      "epoch": 0.11854,
      "grad_norm": 1.2657967835115171,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 11854
    },
    {
      "epoch": 0.11855,
      "grad_norm": 1.061771798870032,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 11855
    },
    {
      "epoch": 0.11856,
      "grad_norm": 1.1199224960482557,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 11856
    },
    {
      "epoch": 0.11857,
      "grad_norm": 1.22166661194193,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 11857
    },
    {
      "epoch": 0.11858,
      "grad_norm": 1.0497049888388341,
      "learning_rate": 0.003,
      "loss": 4.1052,
      "step": 11858
    },
    {
      "epoch": 0.11859,
      "grad_norm": 1.3246403958345156,
      "learning_rate": 0.003,
      "loss": 4.0906,
      "step": 11859
    },
    {
      "epoch": 0.1186,
      "grad_norm": 1.0904610992730863,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 11860
    },
    {
      "epoch": 0.11861,
      "grad_norm": 1.1682035816854246,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 11861
    },
    {
      "epoch": 0.11862,
      "grad_norm": 1.129212585599079,
      "learning_rate": 0.003,
      "loss": 4.0915,
      "step": 11862
    },
    {
      "epoch": 0.11863,
      "grad_norm": 1.0838812912698164,
      "learning_rate": 0.003,
      "loss": 4.0951,
      "step": 11863
    },
    {
      "epoch": 0.11864,
      "grad_norm": 1.0755256728268683,
      "learning_rate": 0.003,
      "loss": 4.0728,
      "step": 11864
    },
    {
      "epoch": 0.11865,
      "grad_norm": 1.2686653387821047,
      "learning_rate": 0.003,
      "loss": 4.0764,
      "step": 11865
    },
    {
      "epoch": 0.11866,
      "grad_norm": 1.082041014866532,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 11866
    },
    {
      "epoch": 0.11867,
      "grad_norm": 1.224353513755153,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 11867
    },
    {
      "epoch": 0.11868,
      "grad_norm": 1.3885837099920573,
      "learning_rate": 0.003,
      "loss": 4.0756,
      "step": 11868
    },
    {
      "epoch": 0.11869,
      "grad_norm": 1.1683078148841186,
      "learning_rate": 0.003,
      "loss": 4.0837,
      "step": 11869
    },
    {
      "epoch": 0.1187,
      "grad_norm": 1.464726853426062,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 11870
    },
    {
      "epoch": 0.11871,
      "grad_norm": 0.9331683604512,
      "learning_rate": 0.003,
      "loss": 4.0602,
      "step": 11871
    },
    {
      "epoch": 0.11872,
      "grad_norm": 0.9556768456530206,
      "learning_rate": 0.003,
      "loss": 4.0883,
      "step": 11872
    },
    {
      "epoch": 0.11873,
      "grad_norm": 1.1912510423155762,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 11873
    },
    {
      "epoch": 0.11874,
      "grad_norm": 1.1284462075181023,
      "learning_rate": 0.003,
      "loss": 4.0398,
      "step": 11874
    },
    {
      "epoch": 0.11875,
      "grad_norm": 1.3263536085256413,
      "learning_rate": 0.003,
      "loss": 4.0862,
      "step": 11875
    },
    {
      "epoch": 0.11876,
      "grad_norm": 1.338889444852054,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 11876
    },
    {
      "epoch": 0.11877,
      "grad_norm": 1.2191734588981227,
      "learning_rate": 0.003,
      "loss": 4.102,
      "step": 11877
    },
    {
      "epoch": 0.11878,
      "grad_norm": 1.1458563136253266,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 11878
    },
    {
      "epoch": 0.11879,
      "grad_norm": 1.0174231879677096,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 11879
    },
    {
      "epoch": 0.1188,
      "grad_norm": 1.2214455142653529,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 11880
    },
    {
      "epoch": 0.11881,
      "grad_norm": 1.006528896792693,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 11881
    },
    {
      "epoch": 0.11882,
      "grad_norm": 1.4040376973932613,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 11882
    },
    {
      "epoch": 0.11883,
      "grad_norm": 1.028507125117693,
      "learning_rate": 0.003,
      "loss": 4.0936,
      "step": 11883
    },
    {
      "epoch": 0.11884,
      "grad_norm": 1.215953435707132,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 11884
    },
    {
      "epoch": 0.11885,
      "grad_norm": 1.1170954606641776,
      "learning_rate": 0.003,
      "loss": 4.1036,
      "step": 11885
    },
    {
      "epoch": 0.11886,
      "grad_norm": 1.0222720334577216,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 11886
    },
    {
      "epoch": 0.11887,
      "grad_norm": 1.1071360827090961,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 11887
    },
    {
      "epoch": 0.11888,
      "grad_norm": 1.3166253130255614,
      "learning_rate": 0.003,
      "loss": 4.1053,
      "step": 11888
    },
    {
      "epoch": 0.11889,
      "grad_norm": 1.2640837305337336,
      "learning_rate": 0.003,
      "loss": 4.0921,
      "step": 11889
    },
    {
      "epoch": 0.1189,
      "grad_norm": 1.0385008572054317,
      "learning_rate": 0.003,
      "loss": 4.0944,
      "step": 11890
    },
    {
      "epoch": 0.11891,
      "grad_norm": 1.2492202823351668,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 11891
    },
    {
      "epoch": 0.11892,
      "grad_norm": 1.2917337053073015,
      "learning_rate": 0.003,
      "loss": 4.0801,
      "step": 11892
    },
    {
      "epoch": 0.11893,
      "grad_norm": 1.133444321773112,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 11893
    },
    {
      "epoch": 0.11894,
      "grad_norm": 1.2816714042210788,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 11894
    },
    {
      "epoch": 0.11895,
      "grad_norm": 1.1511928847349535,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 11895
    },
    {
      "epoch": 0.11896,
      "grad_norm": 1.1723599609058912,
      "learning_rate": 0.003,
      "loss": 4.0925,
      "step": 11896
    },
    {
      "epoch": 0.11897,
      "grad_norm": 1.0912912177129093,
      "learning_rate": 0.003,
      "loss": 4.0799,
      "step": 11897
    },
    {
      "epoch": 0.11898,
      "grad_norm": 1.2583528065403673,
      "learning_rate": 0.003,
      "loss": 4.0926,
      "step": 11898
    },
    {
      "epoch": 0.11899,
      "grad_norm": 1.2696257492886351,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 11899
    },
    {
      "epoch": 0.119,
      "grad_norm": 1.1657896014255376,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 11900
    },
    {
      "epoch": 0.11901,
      "grad_norm": 1.2337083368973147,
      "learning_rate": 0.003,
      "loss": 4.0939,
      "step": 11901
    },
    {
      "epoch": 0.11902,
      "grad_norm": 1.0673704845935785,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 11902
    },
    {
      "epoch": 0.11903,
      "grad_norm": 1.2072025133556539,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 11903
    },
    {
      "epoch": 0.11904,
      "grad_norm": 0.9622886881189869,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 11904
    },
    {
      "epoch": 0.11905,
      "grad_norm": 1.5013454950863534,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 11905
    },
    {
      "epoch": 0.11906,
      "grad_norm": 0.9027233765240715,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 11906
    },
    {
      "epoch": 0.11907,
      "grad_norm": 1.2764946041770708,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 11907
    },
    {
      "epoch": 0.11908,
      "grad_norm": 1.0055649877091324,
      "learning_rate": 0.003,
      "loss": 4.0913,
      "step": 11908
    },
    {
      "epoch": 0.11909,
      "grad_norm": 1.5417677310497,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 11909
    },
    {
      "epoch": 0.1191,
      "grad_norm": 1.1311757539323997,
      "learning_rate": 0.003,
      "loss": 4.0938,
      "step": 11910
    },
    {
      "epoch": 0.11911,
      "grad_norm": 1.1679816226515805,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 11911
    },
    {
      "epoch": 0.11912,
      "grad_norm": 1.0541786047876225,
      "learning_rate": 0.003,
      "loss": 4.1158,
      "step": 11912
    },
    {
      "epoch": 0.11913,
      "grad_norm": 1.2322905506078932,
      "learning_rate": 0.003,
      "loss": 4.0987,
      "step": 11913
    },
    {
      "epoch": 0.11914,
      "grad_norm": 1.3491037394150307,
      "learning_rate": 0.003,
      "loss": 4.1039,
      "step": 11914
    },
    {
      "epoch": 0.11915,
      "grad_norm": 0.8938748705932589,
      "learning_rate": 0.003,
      "loss": 4.0829,
      "step": 11915
    },
    {
      "epoch": 0.11916,
      "grad_norm": 1.0615958706145021,
      "learning_rate": 0.003,
      "loss": 4.0945,
      "step": 11916
    },
    {
      "epoch": 0.11917,
      "grad_norm": 1.3243478475913795,
      "learning_rate": 0.003,
      "loss": 4.0976,
      "step": 11917
    },
    {
      "epoch": 0.11918,
      "grad_norm": 0.9752861321174783,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 11918
    },
    {
      "epoch": 0.11919,
      "grad_norm": 1.1617839452425116,
      "learning_rate": 0.003,
      "loss": 4.0851,
      "step": 11919
    },
    {
      "epoch": 0.1192,
      "grad_norm": 1.2323714936097194,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 11920
    },
    {
      "epoch": 0.11921,
      "grad_norm": 1.2292482817665966,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 11921
    },
    {
      "epoch": 0.11922,
      "grad_norm": 1.2663608044619241,
      "learning_rate": 0.003,
      "loss": 4.0962,
      "step": 11922
    },
    {
      "epoch": 0.11923,
      "grad_norm": 1.0587798818379033,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 11923
    },
    {
      "epoch": 0.11924,
      "grad_norm": 1.0895636196749694,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 11924
    },
    {
      "epoch": 0.11925,
      "grad_norm": 1.1325718267024283,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 11925
    },
    {
      "epoch": 0.11926,
      "grad_norm": 1.3151618929161546,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 11926
    },
    {
      "epoch": 0.11927,
      "grad_norm": 1.001849668904841,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 11927
    },
    {
      "epoch": 0.11928,
      "grad_norm": 1.4619761719316582,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 11928
    },
    {
      "epoch": 0.11929,
      "grad_norm": 0.930796532554024,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 11929
    },
    {
      "epoch": 0.1193,
      "grad_norm": 1.2378138922895818,
      "learning_rate": 0.003,
      "loss": 4.0789,
      "step": 11930
    },
    {
      "epoch": 0.11931,
      "grad_norm": 1.146907374465275,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 11931
    },
    {
      "epoch": 0.11932,
      "grad_norm": 0.928290416437596,
      "learning_rate": 0.003,
      "loss": 4.0804,
      "step": 11932
    },
    {
      "epoch": 0.11933,
      "grad_norm": 1.0338660433415998,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 11933
    },
    {
      "epoch": 0.11934,
      "grad_norm": 1.2049870696278178,
      "learning_rate": 0.003,
      "loss": 4.0953,
      "step": 11934
    },
    {
      "epoch": 0.11935,
      "grad_norm": 1.3410424655125115,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 11935
    },
    {
      "epoch": 0.11936,
      "grad_norm": 1.0913007430410453,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 11936
    },
    {
      "epoch": 0.11937,
      "grad_norm": 1.3956547335981675,
      "learning_rate": 0.003,
      "loss": 4.092,
      "step": 11937
    },
    {
      "epoch": 0.11938,
      "grad_norm": 1.1526516474361628,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 11938
    },
    {
      "epoch": 0.11939,
      "grad_norm": 1.0661020181841054,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 11939
    },
    {
      "epoch": 0.1194,
      "grad_norm": 1.215621630171941,
      "learning_rate": 0.003,
      "loss": 4.0877,
      "step": 11940
    },
    {
      "epoch": 0.11941,
      "grad_norm": 1.3620176372301322,
      "learning_rate": 0.003,
      "loss": 4.0848,
      "step": 11941
    },
    {
      "epoch": 0.11942,
      "grad_norm": 1.076803965161367,
      "learning_rate": 0.003,
      "loss": 4.0911,
      "step": 11942
    },
    {
      "epoch": 0.11943,
      "grad_norm": 1.3137769906580026,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 11943
    },
    {
      "epoch": 0.11944,
      "grad_norm": 1.2734546183235818,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 11944
    },
    {
      "epoch": 0.11945,
      "grad_norm": 1.240548250296292,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 11945
    },
    {
      "epoch": 0.11946,
      "grad_norm": 1.2086492743738333,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 11946
    },
    {
      "epoch": 0.11947,
      "grad_norm": 1.0632751865289534,
      "learning_rate": 0.003,
      "loss": 4.087,
      "step": 11947
    },
    {
      "epoch": 0.11948,
      "grad_norm": 1.2100696179246582,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 11948
    },
    {
      "epoch": 0.11949,
      "grad_norm": 1.2304485972274908,
      "learning_rate": 0.003,
      "loss": 4.092,
      "step": 11949
    },
    {
      "epoch": 0.1195,
      "grad_norm": 1.1614419796519644,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 11950
    },
    {
      "epoch": 0.11951,
      "grad_norm": 0.9685120514135239,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 11951
    },
    {
      "epoch": 0.11952,
      "grad_norm": 1.1708872350226684,
      "learning_rate": 0.003,
      "loss": 4.0828,
      "step": 11952
    },
    {
      "epoch": 0.11953,
      "grad_norm": 1.1972019050800324,
      "learning_rate": 0.003,
      "loss": 4.0841,
      "step": 11953
    },
    {
      "epoch": 0.11954,
      "grad_norm": 1.1990332756494675,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 11954
    },
    {
      "epoch": 0.11955,
      "grad_norm": 1.1167038436736807,
      "learning_rate": 0.003,
      "loss": 4.0676,
      "step": 11955
    },
    {
      "epoch": 0.11956,
      "grad_norm": 1.138929420282013,
      "learning_rate": 0.003,
      "loss": 4.0871,
      "step": 11956
    },
    {
      "epoch": 0.11957,
      "grad_norm": 1.2896652846169314,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 11957
    },
    {
      "epoch": 0.11958,
      "grad_norm": 1.1849380305493635,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 11958
    },
    {
      "epoch": 0.11959,
      "grad_norm": 0.9725598923562588,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 11959
    },
    {
      "epoch": 0.1196,
      "grad_norm": 1.2310019243117136,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 11960
    },
    {
      "epoch": 0.11961,
      "grad_norm": 1.1828789776611688,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 11961
    },
    {
      "epoch": 0.11962,
      "grad_norm": 1.207568363453171,
      "learning_rate": 0.003,
      "loss": 4.0907,
      "step": 11962
    },
    {
      "epoch": 0.11963,
      "grad_norm": 1.23441993888683,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 11963
    },
    {
      "epoch": 0.11964,
      "grad_norm": 1.081282831387257,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 11964
    },
    {
      "epoch": 0.11965,
      "grad_norm": 1.0644562331563587,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 11965
    },
    {
      "epoch": 0.11966,
      "grad_norm": 1.2165695726517882,
      "learning_rate": 0.003,
      "loss": 4.0672,
      "step": 11966
    },
    {
      "epoch": 0.11967,
      "grad_norm": 1.1643580839905228,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 11967
    },
    {
      "epoch": 0.11968,
      "grad_norm": 1.0694960681873658,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 11968
    },
    {
      "epoch": 0.11969,
      "grad_norm": 1.2798763528913761,
      "learning_rate": 0.003,
      "loss": 4.0898,
      "step": 11969
    },
    {
      "epoch": 0.1197,
      "grad_norm": 1.0104359886256886,
      "learning_rate": 0.003,
      "loss": 4.0764,
      "step": 11970
    },
    {
      "epoch": 0.11971,
      "grad_norm": 1.4908390782821748,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 11971
    },
    {
      "epoch": 0.11972,
      "grad_norm": 1.1262404046440087,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 11972
    },
    {
      "epoch": 0.11973,
      "grad_norm": 1.3700582843926272,
      "learning_rate": 0.003,
      "loss": 4.0729,
      "step": 11973
    },
    {
      "epoch": 0.11974,
      "grad_norm": 1.042113991640309,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 11974
    },
    {
      "epoch": 0.11975,
      "grad_norm": 1.3250275421778095,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 11975
    },
    {
      "epoch": 0.11976,
      "grad_norm": 0.8820121746589544,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 11976
    },
    {
      "epoch": 0.11977,
      "grad_norm": 0.9874024178086407,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 11977
    },
    {
      "epoch": 0.11978,
      "grad_norm": 1.2509197830941192,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 11978
    },
    {
      "epoch": 0.11979,
      "grad_norm": 1.4301678995271991,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 11979
    },
    {
      "epoch": 0.1198,
      "grad_norm": 1.2225856918692968,
      "learning_rate": 0.003,
      "loss": 4.109,
      "step": 11980
    },
    {
      "epoch": 0.11981,
      "grad_norm": 0.9750623949395286,
      "learning_rate": 0.003,
      "loss": 4.0909,
      "step": 11981
    },
    {
      "epoch": 0.11982,
      "grad_norm": 1.1936375123088359,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 11982
    },
    {
      "epoch": 0.11983,
      "grad_norm": 1.1280323217958304,
      "learning_rate": 0.003,
      "loss": 4.0919,
      "step": 11983
    },
    {
      "epoch": 0.11984,
      "grad_norm": 1.427743469061658,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 11984
    },
    {
      "epoch": 0.11985,
      "grad_norm": 1.003738628950787,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 11985
    },
    {
      "epoch": 0.11986,
      "grad_norm": 1.4284484153405892,
      "learning_rate": 0.003,
      "loss": 4.0864,
      "step": 11986
    },
    {
      "epoch": 0.11987,
      "grad_norm": 1.0369046324770255,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 11987
    },
    {
      "epoch": 0.11988,
      "grad_norm": 1.0507093817761055,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 11988
    },
    {
      "epoch": 0.11989,
      "grad_norm": 1.2157923701396773,
      "learning_rate": 0.003,
      "loss": 4.0886,
      "step": 11989
    },
    {
      "epoch": 0.1199,
      "grad_norm": 1.163075834187405,
      "learning_rate": 0.003,
      "loss": 4.0702,
      "step": 11990
    },
    {
      "epoch": 0.11991,
      "grad_norm": 1.1397923959795142,
      "learning_rate": 0.003,
      "loss": 4.0787,
      "step": 11991
    },
    {
      "epoch": 0.11992,
      "grad_norm": 1.1696029570490936,
      "learning_rate": 0.003,
      "loss": 4.1058,
      "step": 11992
    },
    {
      "epoch": 0.11993,
      "grad_norm": 1.0055737816247683,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 11993
    },
    {
      "epoch": 0.11994,
      "grad_norm": 1.4353057542413166,
      "learning_rate": 0.003,
      "loss": 4.0937,
      "step": 11994
    },
    {
      "epoch": 0.11995,
      "grad_norm": 1.0107043389029158,
      "learning_rate": 0.003,
      "loss": 4.0751,
      "step": 11995
    },
    {
      "epoch": 0.11996,
      "grad_norm": 1.3901873327466496,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 11996
    },
    {
      "epoch": 0.11997,
      "grad_norm": 1.0343379072321632,
      "learning_rate": 0.003,
      "loss": 4.0656,
      "step": 11997
    },
    {
      "epoch": 0.11998,
      "grad_norm": 1.1937023604692176,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 11998
    },
    {
      "epoch": 0.11999,
      "grad_norm": 1.40236106905162,
      "learning_rate": 0.003,
      "loss": 4.0982,
      "step": 11999
    },
    {
      "epoch": 0.12,
      "grad_norm": 1.026542270252335,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 12000
    },
    {
      "epoch": 0.12001,
      "grad_norm": 1.1423095300856636,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 12001
    },
    {
      "epoch": 0.12002,
      "grad_norm": 1.3492444697177328,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 12002
    },
    {
      "epoch": 0.12003,
      "grad_norm": 1.03747627559624,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 12003
    },
    {
      "epoch": 0.12004,
      "grad_norm": 1.098077915902504,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 12004
    },
    {
      "epoch": 0.12005,
      "grad_norm": 1.1083046035044317,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 12005
    },
    {
      "epoch": 0.12006,
      "grad_norm": 1.2956516804135265,
      "learning_rate": 0.003,
      "loss": 4.0632,
      "step": 12006
    },
    {
      "epoch": 0.12007,
      "grad_norm": 1.218187648921116,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 12007
    },
    {
      "epoch": 0.12008,
      "grad_norm": 1.2671842173164862,
      "learning_rate": 0.003,
      "loss": 4.1,
      "step": 12008
    },
    {
      "epoch": 0.12009,
      "grad_norm": 1.0001998120171887,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 12009
    },
    {
      "epoch": 0.1201,
      "grad_norm": 1.2404288265445556,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 12010
    },
    {
      "epoch": 0.12011,
      "grad_norm": 1.186709338504087,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 12011
    },
    {
      "epoch": 0.12012,
      "grad_norm": 0.946874831717622,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 12012
    },
    {
      "epoch": 0.12013,
      "grad_norm": 1.2537579467676088,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 12013
    },
    {
      "epoch": 0.12014,
      "grad_norm": 0.9993314650918684,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 12014
    },
    {
      "epoch": 0.12015,
      "grad_norm": 1.4004563110169088,
      "learning_rate": 0.003,
      "loss": 4.0764,
      "step": 12015
    },
    {
      "epoch": 0.12016,
      "grad_norm": 1.156141295287908,
      "learning_rate": 0.003,
      "loss": 4.0804,
      "step": 12016
    },
    {
      "epoch": 0.12017,
      "grad_norm": 1.2964729871447291,
      "learning_rate": 0.003,
      "loss": 4.0852,
      "step": 12017
    },
    {
      "epoch": 0.12018,
      "grad_norm": 1.0999613690530028,
      "learning_rate": 0.003,
      "loss": 4.0186,
      "step": 12018
    },
    {
      "epoch": 0.12019,
      "grad_norm": 1.1599848241675983,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 12019
    },
    {
      "epoch": 0.1202,
      "grad_norm": 1.1798981477348156,
      "learning_rate": 0.003,
      "loss": 4.0853,
      "step": 12020
    },
    {
      "epoch": 0.12021,
      "grad_norm": 1.0775703260675573,
      "learning_rate": 0.003,
      "loss": 4.0819,
      "step": 12021
    },
    {
      "epoch": 0.12022,
      "grad_norm": 1.180706310665044,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 12022
    },
    {
      "epoch": 0.12023,
      "grad_norm": 1.008178597261202,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 12023
    },
    {
      "epoch": 0.12024,
      "grad_norm": 1.22542908123363,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 12024
    },
    {
      "epoch": 0.12025,
      "grad_norm": 0.9947915674056316,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 12025
    },
    {
      "epoch": 0.12026,
      "grad_norm": 1.5178238222188867,
      "learning_rate": 0.003,
      "loss": 4.0878,
      "step": 12026
    },
    {
      "epoch": 0.12027,
      "grad_norm": 1.1898559508214746,
      "learning_rate": 0.003,
      "loss": 4.0672,
      "step": 12027
    },
    {
      "epoch": 0.12028,
      "grad_norm": 1.423286550054181,
      "learning_rate": 0.003,
      "loss": 4.0739,
      "step": 12028
    },
    {
      "epoch": 0.12029,
      "grad_norm": 1.1405204709773993,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 12029
    },
    {
      "epoch": 0.1203,
      "grad_norm": 1.200577829785475,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 12030
    },
    {
      "epoch": 0.12031,
      "grad_norm": 0.9370161339686052,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 12031
    },
    {
      "epoch": 0.12032,
      "grad_norm": 1.1990570439275474,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 12032
    },
    {
      "epoch": 0.12033,
      "grad_norm": 1.0987107400462872,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 12033
    },
    {
      "epoch": 0.12034,
      "grad_norm": 1.1543926616087123,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 12034
    },
    {
      "epoch": 0.12035,
      "grad_norm": 1.0314508365784196,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 12035
    },
    {
      "epoch": 0.12036,
      "grad_norm": 1.1627404611666088,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 12036
    },
    {
      "epoch": 0.12037,
      "grad_norm": 1.3173377590411293,
      "learning_rate": 0.003,
      "loss": 4.0854,
      "step": 12037
    },
    {
      "epoch": 0.12038,
      "grad_norm": 1.513419415873118,
      "learning_rate": 0.003,
      "loss": 4.088,
      "step": 12038
    },
    {
      "epoch": 0.12039,
      "grad_norm": 0.8914448292836505,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 12039
    },
    {
      "epoch": 0.1204,
      "grad_norm": 1.1146273437131011,
      "learning_rate": 0.003,
      "loss": 4.0821,
      "step": 12040
    },
    {
      "epoch": 0.12041,
      "grad_norm": 1.3983349421031432,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 12041
    },
    {
      "epoch": 0.12042,
      "grad_norm": 1.0342958303049343,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 12042
    },
    {
      "epoch": 0.12043,
      "grad_norm": 1.3699563847933571,
      "learning_rate": 0.003,
      "loss": 4.101,
      "step": 12043
    },
    {
      "epoch": 0.12044,
      "grad_norm": 1.1499691529560814,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 12044
    },
    {
      "epoch": 0.12045,
      "grad_norm": 1.1554235526088477,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 12045
    },
    {
      "epoch": 0.12046,
      "grad_norm": 1.1070632794794475,
      "learning_rate": 0.003,
      "loss": 4.0705,
      "step": 12046
    },
    {
      "epoch": 0.12047,
      "grad_norm": 1.1989324046303953,
      "learning_rate": 0.003,
      "loss": 4.0899,
      "step": 12047
    },
    {
      "epoch": 0.12048,
      "grad_norm": 1.3734754037694892,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 12048
    },
    {
      "epoch": 0.12049,
      "grad_norm": 0.9346259551884931,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 12049
    },
    {
      "epoch": 0.1205,
      "grad_norm": 1.2286880245496783,
      "learning_rate": 0.003,
      "loss": 4.1016,
      "step": 12050
    },
    {
      "epoch": 0.12051,
      "grad_norm": 1.2931944632957222,
      "learning_rate": 0.003,
      "loss": 4.0886,
      "step": 12051
    },
    {
      "epoch": 0.12052,
      "grad_norm": 1.186952328950266,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 12052
    },
    {
      "epoch": 0.12053,
      "grad_norm": 1.0423335340296913,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 12053
    },
    {
      "epoch": 0.12054,
      "grad_norm": 1.291648787743366,
      "learning_rate": 0.003,
      "loss": 4.099,
      "step": 12054
    },
    {
      "epoch": 0.12055,
      "grad_norm": 1.0061073650939143,
      "learning_rate": 0.003,
      "loss": 4.0923,
      "step": 12055
    },
    {
      "epoch": 0.12056,
      "grad_norm": 1.3839942243577954,
      "learning_rate": 0.003,
      "loss": 4.0665,
      "step": 12056
    },
    {
      "epoch": 0.12057,
      "grad_norm": 0.9752307709453086,
      "learning_rate": 0.003,
      "loss": 4.0763,
      "step": 12057
    },
    {
      "epoch": 0.12058,
      "grad_norm": 1.2270914306104124,
      "learning_rate": 0.003,
      "loss": 4.1001,
      "step": 12058
    },
    {
      "epoch": 0.12059,
      "grad_norm": 1.0029707921281812,
      "learning_rate": 0.003,
      "loss": 4.0933,
      "step": 12059
    },
    {
      "epoch": 0.1206,
      "grad_norm": 1.1581116882653377,
      "learning_rate": 0.003,
      "loss": 4.0743,
      "step": 12060
    },
    {
      "epoch": 0.12061,
      "grad_norm": 1.16058489656729,
      "learning_rate": 0.003,
      "loss": 4.0998,
      "step": 12061
    },
    {
      "epoch": 0.12062,
      "grad_norm": 1.0890114409959768,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 12062
    },
    {
      "epoch": 0.12063,
      "grad_norm": 1.4169170730642937,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 12063
    },
    {
      "epoch": 0.12064,
      "grad_norm": 1.6320055790805699,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 12064
    },
    {
      "epoch": 0.12065,
      "grad_norm": 1.0678708229046874,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 12065
    },
    {
      "epoch": 0.12066,
      "grad_norm": 1.4154088742505124,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 12066
    },
    {
      "epoch": 0.12067,
      "grad_norm": 1.0580579174780345,
      "learning_rate": 0.003,
      "loss": 4.1071,
      "step": 12067
    },
    {
      "epoch": 0.12068,
      "grad_norm": 1.1573381957667936,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 12068
    },
    {
      "epoch": 0.12069,
      "grad_norm": 1.2075686702373492,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 12069
    },
    {
      "epoch": 0.1207,
      "grad_norm": 1.180040557288595,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 12070
    },
    {
      "epoch": 0.12071,
      "grad_norm": 1.3809455658461673,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 12071
    },
    {
      "epoch": 0.12072,
      "grad_norm": 0.959954898153743,
      "learning_rate": 0.003,
      "loss": 4.1134,
      "step": 12072
    },
    {
      "epoch": 0.12073,
      "grad_norm": 1.2776334910268239,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 12073
    },
    {
      "epoch": 0.12074,
      "grad_norm": 1.3060968377539421,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 12074
    },
    {
      "epoch": 0.12075,
      "grad_norm": 0.9904143169299933,
      "learning_rate": 0.003,
      "loss": 4.024,
      "step": 12075
    },
    {
      "epoch": 0.12076,
      "grad_norm": 1.3380997950361384,
      "learning_rate": 0.003,
      "loss": 4.0743,
      "step": 12076
    },
    {
      "epoch": 0.12077,
      "grad_norm": 1.162997245731524,
      "learning_rate": 0.003,
      "loss": 4.0292,
      "step": 12077
    },
    {
      "epoch": 0.12078,
      "grad_norm": 1.2680231106066544,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 12078
    },
    {
      "epoch": 0.12079,
      "grad_norm": 1.106780471919048,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 12079
    },
    {
      "epoch": 0.1208,
      "grad_norm": 1.3749209213072149,
      "learning_rate": 0.003,
      "loss": 4.0879,
      "step": 12080
    },
    {
      "epoch": 0.12081,
      "grad_norm": 1.0047272466191906,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 12081
    },
    {
      "epoch": 0.12082,
      "grad_norm": 1.2394249155802766,
      "learning_rate": 0.003,
      "loss": 4.0705,
      "step": 12082
    },
    {
      "epoch": 0.12083,
      "grad_norm": 1.1454349115566007,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 12083
    },
    {
      "epoch": 0.12084,
      "grad_norm": 1.2837907309804073,
      "learning_rate": 0.003,
      "loss": 4.0851,
      "step": 12084
    },
    {
      "epoch": 0.12085,
      "grad_norm": 1.1247969860112579,
      "learning_rate": 0.003,
      "loss": 4.0814,
      "step": 12085
    },
    {
      "epoch": 0.12086,
      "grad_norm": 1.281210661018199,
      "learning_rate": 0.003,
      "loss": 4.0893,
      "step": 12086
    },
    {
      "epoch": 0.12087,
      "grad_norm": 1.2889817385160782,
      "learning_rate": 0.003,
      "loss": 4.1129,
      "step": 12087
    },
    {
      "epoch": 0.12088,
      "grad_norm": 0.9718926809926418,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 12088
    },
    {
      "epoch": 0.12089,
      "grad_norm": 1.1986561179093216,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 12089
    },
    {
      "epoch": 0.1209,
      "grad_norm": 1.0917914177599881,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 12090
    },
    {
      "epoch": 0.12091,
      "grad_norm": 1.3732130609897086,
      "learning_rate": 0.003,
      "loss": 4.0767,
      "step": 12091
    },
    {
      "epoch": 0.12092,
      "grad_norm": 0.9824315718137555,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 12092
    },
    {
      "epoch": 0.12093,
      "grad_norm": 1.4181887491487821,
      "learning_rate": 0.003,
      "loss": 4.0996,
      "step": 12093
    },
    {
      "epoch": 0.12094,
      "grad_norm": 1.0585609680051242,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 12094
    },
    {
      "epoch": 0.12095,
      "grad_norm": 1.3200300643697576,
      "learning_rate": 0.003,
      "loss": 4.0915,
      "step": 12095
    },
    {
      "epoch": 0.12096,
      "grad_norm": 1.1183009094442204,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 12096
    },
    {
      "epoch": 0.12097,
      "grad_norm": 1.0695065139721762,
      "learning_rate": 0.003,
      "loss": 4.0987,
      "step": 12097
    },
    {
      "epoch": 0.12098,
      "grad_norm": 1.301116622282068,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 12098
    },
    {
      "epoch": 0.12099,
      "grad_norm": 1.0517241296656699,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 12099
    },
    {
      "epoch": 0.121,
      "grad_norm": 1.124424750696396,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 12100
    },
    {
      "epoch": 0.12101,
      "grad_norm": 1.057831288939637,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 12101
    },
    {
      "epoch": 0.12102,
      "grad_norm": 1.1101237754091218,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 12102
    },
    {
      "epoch": 0.12103,
      "grad_norm": 0.9612959979260564,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 12103
    },
    {
      "epoch": 0.12104,
      "grad_norm": 1.2061186388314846,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 12104
    },
    {
      "epoch": 0.12105,
      "grad_norm": 0.9678635059546692,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 12105
    },
    {
      "epoch": 0.12106,
      "grad_norm": 1.3505427330387154,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 12106
    },
    {
      "epoch": 0.12107,
      "grad_norm": 1.1774391224451677,
      "learning_rate": 0.003,
      "loss": 4.102,
      "step": 12107
    },
    {
      "epoch": 0.12108,
      "grad_norm": 1.2054664808823852,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 12108
    },
    {
      "epoch": 0.12109,
      "grad_norm": 1.3781623075709073,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 12109
    },
    {
      "epoch": 0.1211,
      "grad_norm": 1.2233097675635973,
      "learning_rate": 0.003,
      "loss": 4.0862,
      "step": 12110
    },
    {
      "epoch": 0.12111,
      "grad_norm": 1.2557489541050741,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 12111
    },
    {
      "epoch": 0.12112,
      "grad_norm": 1.110510268205828,
      "learning_rate": 0.003,
      "loss": 4.0907,
      "step": 12112
    },
    {
      "epoch": 0.12113,
      "grad_norm": 1.1942343402586795,
      "learning_rate": 0.003,
      "loss": 4.0861,
      "step": 12113
    },
    {
      "epoch": 0.12114,
      "grad_norm": 1.1603250584499352,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 12114
    },
    {
      "epoch": 0.12115,
      "grad_norm": 1.3816403360973941,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 12115
    },
    {
      "epoch": 0.12116,
      "grad_norm": 0.9444863512485694,
      "learning_rate": 0.003,
      "loss": 4.1,
      "step": 12116
    },
    {
      "epoch": 0.12117,
      "grad_norm": 1.080848550044189,
      "learning_rate": 0.003,
      "loss": 4.0934,
      "step": 12117
    },
    {
      "epoch": 0.12118,
      "grad_norm": 1.1995298311432099,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 12118
    },
    {
      "epoch": 0.12119,
      "grad_norm": 1.3314668602134827,
      "learning_rate": 0.003,
      "loss": 4.0941,
      "step": 12119
    },
    {
      "epoch": 0.1212,
      "grad_norm": 1.1715889058212041,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 12120
    },
    {
      "epoch": 0.12121,
      "grad_norm": 1.3881009117703809,
      "learning_rate": 0.003,
      "loss": 4.0852,
      "step": 12121
    },
    {
      "epoch": 0.12122,
      "grad_norm": 1.1412877222655404,
      "learning_rate": 0.003,
      "loss": 4.0937,
      "step": 12122
    },
    {
      "epoch": 0.12123,
      "grad_norm": 1.2581104621689234,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 12123
    },
    {
      "epoch": 0.12124,
      "grad_norm": 1.0606037278927645,
      "learning_rate": 0.003,
      "loss": 4.0676,
      "step": 12124
    },
    {
      "epoch": 0.12125,
      "grad_norm": 1.1190145081517746,
      "learning_rate": 0.003,
      "loss": 4.085,
      "step": 12125
    },
    {
      "epoch": 0.12126,
      "grad_norm": 1.1601406262785494,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 12126
    },
    {
      "epoch": 0.12127,
      "grad_norm": 1.2080466202977465,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 12127
    },
    {
      "epoch": 0.12128,
      "grad_norm": 1.1227257472864887,
      "learning_rate": 0.003,
      "loss": 4.0898,
      "step": 12128
    },
    {
      "epoch": 0.12129,
      "grad_norm": 1.1102649147398123,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 12129
    },
    {
      "epoch": 0.1213,
      "grad_norm": 1.1537725039029854,
      "learning_rate": 0.003,
      "loss": 4.087,
      "step": 12130
    },
    {
      "epoch": 0.12131,
      "grad_norm": 1.3114867170222135,
      "learning_rate": 0.003,
      "loss": 4.094,
      "step": 12131
    },
    {
      "epoch": 0.12132,
      "grad_norm": 0.9234569238063531,
      "learning_rate": 0.003,
      "loss": 4.0783,
      "step": 12132
    },
    {
      "epoch": 0.12133,
      "grad_norm": 1.1279748123445532,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 12133
    },
    {
      "epoch": 0.12134,
      "grad_norm": 1.2600065947038894,
      "learning_rate": 0.003,
      "loss": 4.0945,
      "step": 12134
    },
    {
      "epoch": 0.12135,
      "grad_norm": 1.007780976926589,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 12135
    },
    {
      "epoch": 0.12136,
      "grad_norm": 1.1984153877860733,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 12136
    },
    {
      "epoch": 0.12137,
      "grad_norm": 1.423530162485099,
      "learning_rate": 0.003,
      "loss": 4.1151,
      "step": 12137
    },
    {
      "epoch": 0.12138,
      "grad_norm": 1.145701795096461,
      "learning_rate": 0.003,
      "loss": 4.0949,
      "step": 12138
    },
    {
      "epoch": 0.12139,
      "grad_norm": 1.189173877147463,
      "learning_rate": 0.003,
      "loss": 4.0942,
      "step": 12139
    },
    {
      "epoch": 0.1214,
      "grad_norm": 1.1973702326318612,
      "learning_rate": 0.003,
      "loss": 4.0958,
      "step": 12140
    },
    {
      "epoch": 0.12141,
      "grad_norm": 1.167840594810345,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 12141
    },
    {
      "epoch": 0.12142,
      "grad_norm": 1.220199217906067,
      "learning_rate": 0.003,
      "loss": 4.083,
      "step": 12142
    },
    {
      "epoch": 0.12143,
      "grad_norm": 1.1368354349474534,
      "learning_rate": 0.003,
      "loss": 4.0834,
      "step": 12143
    },
    {
      "epoch": 0.12144,
      "grad_norm": 1.1779110345400967,
      "learning_rate": 0.003,
      "loss": 4.0984,
      "step": 12144
    },
    {
      "epoch": 0.12145,
      "grad_norm": 1.2251180573005898,
      "learning_rate": 0.003,
      "loss": 4.0946,
      "step": 12145
    },
    {
      "epoch": 0.12146,
      "grad_norm": 0.9548607030454273,
      "learning_rate": 0.003,
      "loss": 4.0879,
      "step": 12146
    },
    {
      "epoch": 0.12147,
      "grad_norm": 1.1049973878812982,
      "learning_rate": 0.003,
      "loss": 4.0783,
      "step": 12147
    },
    {
      "epoch": 0.12148,
      "grad_norm": 1.0970442611358633,
      "learning_rate": 0.003,
      "loss": 4.0883,
      "step": 12148
    },
    {
      "epoch": 0.12149,
      "grad_norm": 1.2126640085318225,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 12149
    },
    {
      "epoch": 0.1215,
      "grad_norm": 1.0151653599442538,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 12150
    },
    {
      "epoch": 0.12151,
      "grad_norm": 1.2551528836697066,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 12151
    },
    {
      "epoch": 0.12152,
      "grad_norm": 1.1346936623381856,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 12152
    },
    {
      "epoch": 0.12153,
      "grad_norm": 1.2703669585817352,
      "learning_rate": 0.003,
      "loss": 4.1049,
      "step": 12153
    },
    {
      "epoch": 0.12154,
      "grad_norm": 1.349583252239081,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 12154
    },
    {
      "epoch": 0.12155,
      "grad_norm": 1.304156342689166,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 12155
    },
    {
      "epoch": 0.12156,
      "grad_norm": 1.171188520618665,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 12156
    },
    {
      "epoch": 0.12157,
      "grad_norm": 1.0722828495955472,
      "learning_rate": 0.003,
      "loss": 4.0251,
      "step": 12157
    },
    {
      "epoch": 0.12158,
      "grad_norm": 1.1483576341362236,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 12158
    },
    {
      "epoch": 0.12159,
      "grad_norm": 1.1198251651835405,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 12159
    },
    {
      "epoch": 0.1216,
      "grad_norm": 1.2441043778471383,
      "learning_rate": 0.003,
      "loss": 4.0632,
      "step": 12160
    },
    {
      "epoch": 0.12161,
      "grad_norm": 1.0390582605247063,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 12161
    },
    {
      "epoch": 0.12162,
      "grad_norm": 1.0943988455876759,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 12162
    },
    {
      "epoch": 0.12163,
      "grad_norm": 1.1896450292303948,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 12163
    },
    {
      "epoch": 0.12164,
      "grad_norm": 1.1110737554572037,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 12164
    },
    {
      "epoch": 0.12165,
      "grad_norm": 1.2722228329526097,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 12165
    },
    {
      "epoch": 0.12166,
      "grad_norm": 0.9537638120975451,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 12166
    },
    {
      "epoch": 0.12167,
      "grad_norm": 1.168497152953338,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 12167
    },
    {
      "epoch": 0.12168,
      "grad_norm": 1.1096490497883948,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 12168
    },
    {
      "epoch": 0.12169,
      "grad_norm": 1.3891119687227524,
      "learning_rate": 0.003,
      "loss": 4.0902,
      "step": 12169
    },
    {
      "epoch": 0.1217,
      "grad_norm": 1.2381209928430519,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 12170
    },
    {
      "epoch": 0.12171,
      "grad_norm": 1.075557009585934,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 12171
    },
    {
      "epoch": 0.12172,
      "grad_norm": 1.252001700722133,
      "learning_rate": 0.003,
      "loss": 4.0901,
      "step": 12172
    },
    {
      "epoch": 0.12173,
      "grad_norm": 1.1991927553926924,
      "learning_rate": 0.003,
      "loss": 4.0729,
      "step": 12173
    },
    {
      "epoch": 0.12174,
      "grad_norm": 1.0706837329851342,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 12174
    },
    {
      "epoch": 0.12175,
      "grad_norm": 1.198039780467412,
      "learning_rate": 0.003,
      "loss": 4.0824,
      "step": 12175
    },
    {
      "epoch": 0.12176,
      "grad_norm": 1.3827857084183623,
      "learning_rate": 0.003,
      "loss": 4.0623,
      "step": 12176
    },
    {
      "epoch": 0.12177,
      "grad_norm": 1.0018996448559927,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 12177
    },
    {
      "epoch": 0.12178,
      "grad_norm": 1.2754499630795086,
      "learning_rate": 0.003,
      "loss": 4.0853,
      "step": 12178
    },
    {
      "epoch": 0.12179,
      "grad_norm": 1.141006379500696,
      "learning_rate": 0.003,
      "loss": 4.0748,
      "step": 12179
    },
    {
      "epoch": 0.1218,
      "grad_norm": 1.1554481432840713,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 12180
    },
    {
      "epoch": 0.12181,
      "grad_norm": 1.1287874762838688,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 12181
    },
    {
      "epoch": 0.12182,
      "grad_norm": 1.1406212931173205,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 12182
    },
    {
      "epoch": 0.12183,
      "grad_norm": 1.044135481875383,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 12183
    },
    {
      "epoch": 0.12184,
      "grad_norm": 1.2984165649244284,
      "learning_rate": 0.003,
      "loss": 4.0812,
      "step": 12184
    },
    {
      "epoch": 0.12185,
      "grad_norm": 1.0484508917758921,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 12185
    },
    {
      "epoch": 0.12186,
      "grad_norm": 1.3348507005714336,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 12186
    },
    {
      "epoch": 0.12187,
      "grad_norm": 1.3208776230573516,
      "learning_rate": 0.003,
      "loss": 4.0859,
      "step": 12187
    },
    {
      "epoch": 0.12188,
      "grad_norm": 1.2186571875290615,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 12188
    },
    {
      "epoch": 0.12189,
      "grad_norm": 1.2492837808487232,
      "learning_rate": 0.003,
      "loss": 4.0658,
      "step": 12189
    },
    {
      "epoch": 0.1219,
      "grad_norm": 1.1334787027314297,
      "learning_rate": 0.003,
      "loss": 4.0783,
      "step": 12190
    },
    {
      "epoch": 0.12191,
      "grad_norm": 1.2246108948049745,
      "learning_rate": 0.003,
      "loss": 4.0938,
      "step": 12191
    },
    {
      "epoch": 0.12192,
      "grad_norm": 1.2110322419568589,
      "learning_rate": 0.003,
      "loss": 4.0884,
      "step": 12192
    },
    {
      "epoch": 0.12193,
      "grad_norm": 1.0261731131892984,
      "learning_rate": 0.003,
      "loss": 4.0936,
      "step": 12193
    },
    {
      "epoch": 0.12194,
      "grad_norm": 1.3792295929539276,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 12194
    },
    {
      "epoch": 0.12195,
      "grad_norm": 0.9497590834552457,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 12195
    },
    {
      "epoch": 0.12196,
      "grad_norm": 1.1971816454138282,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 12196
    },
    {
      "epoch": 0.12197,
      "grad_norm": 1.346176011354049,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 12197
    },
    {
      "epoch": 0.12198,
      "grad_norm": 0.9845918447685015,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 12198
    },
    {
      "epoch": 0.12199,
      "grad_norm": 1.4013619145319482,
      "learning_rate": 0.003,
      "loss": 4.0919,
      "step": 12199
    },
    {
      "epoch": 0.122,
      "grad_norm": 1.3402558991670583,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 12200
    },
    {
      "epoch": 0.12201,
      "grad_norm": 1.0917890609387033,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 12201
    },
    {
      "epoch": 0.12202,
      "grad_norm": 1.3977198473853139,
      "learning_rate": 0.003,
      "loss": 4.0952,
      "step": 12202
    },
    {
      "epoch": 0.12203,
      "grad_norm": 1.1663142701608584,
      "learning_rate": 0.003,
      "loss": 4.0943,
      "step": 12203
    },
    {
      "epoch": 0.12204,
      "grad_norm": 1.3067073939428604,
      "learning_rate": 0.003,
      "loss": 4.1008,
      "step": 12204
    },
    {
      "epoch": 0.12205,
      "grad_norm": 0.9953106606493801,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 12205
    },
    {
      "epoch": 0.12206,
      "grad_norm": 1.1961413384082726,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 12206
    },
    {
      "epoch": 0.12207,
      "grad_norm": 1.1660261913008847,
      "learning_rate": 0.003,
      "loss": 4.1179,
      "step": 12207
    },
    {
      "epoch": 0.12208,
      "grad_norm": 1.3567657303292768,
      "learning_rate": 0.003,
      "loss": 4.0946,
      "step": 12208
    },
    {
      "epoch": 0.12209,
      "grad_norm": 1.1940065501612556,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 12209
    },
    {
      "epoch": 0.1221,
      "grad_norm": 1.3451533385151508,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 12210
    },
    {
      "epoch": 0.12211,
      "grad_norm": 1.0861834534287251,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 12211
    },
    {
      "epoch": 0.12212,
      "grad_norm": 1.1170231539539128,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 12212
    },
    {
      "epoch": 0.12213,
      "grad_norm": 1.1842563818995908,
      "learning_rate": 0.003,
      "loss": 4.0848,
      "step": 12213
    },
    {
      "epoch": 0.12214,
      "grad_norm": 1.2412122957607226,
      "learning_rate": 0.003,
      "loss": 4.0801,
      "step": 12214
    },
    {
      "epoch": 0.12215,
      "grad_norm": 1.281614029600869,
      "learning_rate": 0.003,
      "loss": 4.0978,
      "step": 12215
    },
    {
      "epoch": 0.12216,
      "grad_norm": 1.1905902787785605,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 12216
    },
    {
      "epoch": 0.12217,
      "grad_norm": 1.15855892402375,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 12217
    },
    {
      "epoch": 0.12218,
      "grad_norm": 1.1656183934254463,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 12218
    },
    {
      "epoch": 0.12219,
      "grad_norm": 1.205973966373365,
      "learning_rate": 0.003,
      "loss": 4.0951,
      "step": 12219
    },
    {
      "epoch": 0.1222,
      "grad_norm": 0.9515308914661088,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 12220
    },
    {
      "epoch": 0.12221,
      "grad_norm": 1.2099897259709462,
      "learning_rate": 0.003,
      "loss": 4.0751,
      "step": 12221
    },
    {
      "epoch": 0.12222,
      "grad_norm": 1.3622068216886205,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 12222
    },
    {
      "epoch": 0.12223,
      "grad_norm": 1.1289481339729313,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 12223
    },
    {
      "epoch": 0.12224,
      "grad_norm": 1.1788306971210525,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 12224
    },
    {
      "epoch": 0.12225,
      "grad_norm": 1.2714621284495276,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 12225
    },
    {
      "epoch": 0.12226,
      "grad_norm": 1.172042908502254,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 12226
    },
    {
      "epoch": 0.12227,
      "grad_norm": 1.0272185949615782,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 12227
    },
    {
      "epoch": 0.12228,
      "grad_norm": 1.2050509278203998,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 12228
    },
    {
      "epoch": 0.12229,
      "grad_norm": 1.133457829662722,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 12229
    },
    {
      "epoch": 0.1223,
      "grad_norm": 1.1844731245673616,
      "learning_rate": 0.003,
      "loss": 4.0966,
      "step": 12230
    },
    {
      "epoch": 0.12231,
      "grad_norm": 1.1279404374429298,
      "learning_rate": 0.003,
      "loss": 4.0694,
      "step": 12231
    },
    {
      "epoch": 0.12232,
      "grad_norm": 1.2244882868041893,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 12232
    },
    {
      "epoch": 0.12233,
      "grad_norm": 1.0501181368810093,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 12233
    },
    {
      "epoch": 0.12234,
      "grad_norm": 1.3746819695663268,
      "learning_rate": 0.003,
      "loss": 4.1022,
      "step": 12234
    },
    {
      "epoch": 0.12235,
      "grad_norm": 1.1124087609485749,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 12235
    },
    {
      "epoch": 0.12236,
      "grad_norm": 1.1498676818007616,
      "learning_rate": 0.003,
      "loss": 4.0959,
      "step": 12236
    },
    {
      "epoch": 0.12237,
      "grad_norm": 1.2311318127440682,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 12237
    },
    {
      "epoch": 0.12238,
      "grad_norm": 1.6033985780973559,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 12238
    },
    {
      "epoch": 0.12239,
      "grad_norm": 1.072128730565727,
      "learning_rate": 0.003,
      "loss": 4.0852,
      "step": 12239
    },
    {
      "epoch": 0.1224,
      "grad_norm": 1.3035753237150192,
      "learning_rate": 0.003,
      "loss": 4.0968,
      "step": 12240
    },
    {
      "epoch": 0.12241,
      "grad_norm": 1.1989449538493253,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 12241
    },
    {
      "epoch": 0.12242,
      "grad_norm": 1.114035699582397,
      "learning_rate": 0.003,
      "loss": 4.0751,
      "step": 12242
    },
    {
      "epoch": 0.12243,
      "grad_norm": 1.1325432549923347,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 12243
    },
    {
      "epoch": 0.12244,
      "grad_norm": 1.1685657840734007,
      "learning_rate": 0.003,
      "loss": 4.0917,
      "step": 12244
    },
    {
      "epoch": 0.12245,
      "grad_norm": 1.2109037845732278,
      "learning_rate": 0.003,
      "loss": 4.0975,
      "step": 12245
    },
    {
      "epoch": 0.12246,
      "grad_norm": 1.2960492703053292,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 12246
    },
    {
      "epoch": 0.12247,
      "grad_norm": 1.0630001470423178,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 12247
    },
    {
      "epoch": 0.12248,
      "grad_norm": 1.439686551242132,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 12248
    },
    {
      "epoch": 0.12249,
      "grad_norm": 0.9360021317225822,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 12249
    },
    {
      "epoch": 0.1225,
      "grad_norm": 1.1608835012568275,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 12250
    },
    {
      "epoch": 0.12251,
      "grad_norm": 1.152778853567974,
      "learning_rate": 0.003,
      "loss": 4.0936,
      "step": 12251
    },
    {
      "epoch": 0.12252,
      "grad_norm": 1.3070190135020505,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 12252
    },
    {
      "epoch": 0.12253,
      "grad_norm": 1.1703555806685912,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 12253
    },
    {
      "epoch": 0.12254,
      "grad_norm": 1.2753604282061932,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 12254
    },
    {
      "epoch": 0.12255,
      "grad_norm": 1.2012730702627195,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 12255
    },
    {
      "epoch": 0.12256,
      "grad_norm": 1.2685624175175607,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 12256
    },
    {
      "epoch": 0.12257,
      "grad_norm": 1.1982293227603562,
      "learning_rate": 0.003,
      "loss": 4.0961,
      "step": 12257
    },
    {
      "epoch": 0.12258,
      "grad_norm": 1.3454858034418558,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 12258
    },
    {
      "epoch": 0.12259,
      "grad_norm": 1.2663687933849272,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 12259
    },
    {
      "epoch": 0.1226,
      "grad_norm": 1.2182908134730372,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 12260
    },
    {
      "epoch": 0.12261,
      "grad_norm": 1.2478259449383944,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 12261
    },
    {
      "epoch": 0.12262,
      "grad_norm": 1.0688438965810219,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 12262
    },
    {
      "epoch": 0.12263,
      "grad_norm": 1.2976972823949195,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 12263
    },
    {
      "epoch": 0.12264,
      "grad_norm": 1.148927289043632,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 12264
    },
    {
      "epoch": 0.12265,
      "grad_norm": 0.9320813625265878,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 12265
    },
    {
      "epoch": 0.12266,
      "grad_norm": 1.1132041468172307,
      "learning_rate": 0.003,
      "loss": 4.04,
      "step": 12266
    },
    {
      "epoch": 0.12267,
      "grad_norm": 1.099340406144865,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 12267
    },
    {
      "epoch": 0.12268,
      "grad_norm": 1.065205149602413,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 12268
    },
    {
      "epoch": 0.12269,
      "grad_norm": 1.1588607244578912,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 12269
    },
    {
      "epoch": 0.1227,
      "grad_norm": 1.1704601580263845,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 12270
    },
    {
      "epoch": 0.12271,
      "grad_norm": 1.3898945239488298,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 12271
    },
    {
      "epoch": 0.12272,
      "grad_norm": 0.95609587772018,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 12272
    },
    {
      "epoch": 0.12273,
      "grad_norm": 1.0888462421073244,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 12273
    },
    {
      "epoch": 0.12274,
      "grad_norm": 1.2938651078465015,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 12274
    },
    {
      "epoch": 0.12275,
      "grad_norm": 1.0515210845199563,
      "learning_rate": 0.003,
      "loss": 4.0865,
      "step": 12275
    },
    {
      "epoch": 0.12276,
      "grad_norm": 1.1681798074447074,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 12276
    },
    {
      "epoch": 0.12277,
      "grad_norm": 1.0957574398621501,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 12277
    },
    {
      "epoch": 0.12278,
      "grad_norm": 1.2402753483112188,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 12278
    },
    {
      "epoch": 0.12279,
      "grad_norm": 1.3253921086663587,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 12279
    },
    {
      "epoch": 0.1228,
      "grad_norm": 0.9446132722770976,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 12280
    },
    {
      "epoch": 0.12281,
      "grad_norm": 1.2543549659079503,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 12281
    },
    {
      "epoch": 0.12282,
      "grad_norm": 1.044464140315054,
      "learning_rate": 0.003,
      "loss": 4.0968,
      "step": 12282
    },
    {
      "epoch": 0.12283,
      "grad_norm": 1.4448428457498539,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 12283
    },
    {
      "epoch": 0.12284,
      "grad_norm": 0.9323247854530715,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 12284
    },
    {
      "epoch": 0.12285,
      "grad_norm": 1.2183938038468276,
      "learning_rate": 0.003,
      "loss": 4.0838,
      "step": 12285
    },
    {
      "epoch": 0.12286,
      "grad_norm": 1.1168696478309292,
      "learning_rate": 0.003,
      "loss": 4.0806,
      "step": 12286
    },
    {
      "epoch": 0.12287,
      "grad_norm": 1.4118380090895937,
      "learning_rate": 0.003,
      "loss": 4.0872,
      "step": 12287
    },
    {
      "epoch": 0.12288,
      "grad_norm": 1.034995448495016,
      "learning_rate": 0.003,
      "loss": 4.0924,
      "step": 12288
    },
    {
      "epoch": 0.12289,
      "grad_norm": 1.2572160704779352,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 12289
    },
    {
      "epoch": 0.1229,
      "grad_norm": 1.0887530584287088,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 12290
    },
    {
      "epoch": 0.12291,
      "grad_norm": 1.2194821967348717,
      "learning_rate": 0.003,
      "loss": 4.0753,
      "step": 12291
    },
    {
      "epoch": 0.12292,
      "grad_norm": 1.1774785860755754,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 12292
    },
    {
      "epoch": 0.12293,
      "grad_norm": 1.290622218740054,
      "learning_rate": 0.003,
      "loss": 4.0879,
      "step": 12293
    },
    {
      "epoch": 0.12294,
      "grad_norm": 1.2162922983304212,
      "learning_rate": 0.003,
      "loss": 4.0872,
      "step": 12294
    },
    {
      "epoch": 0.12295,
      "grad_norm": 1.1223430685307745,
      "learning_rate": 0.003,
      "loss": 4.0827,
      "step": 12295
    },
    {
      "epoch": 0.12296,
      "grad_norm": 1.3312238598478945,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 12296
    },
    {
      "epoch": 0.12297,
      "grad_norm": 1.2024661509907468,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 12297
    },
    {
      "epoch": 0.12298,
      "grad_norm": 0.9552507213085488,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 12298
    },
    {
      "epoch": 0.12299,
      "grad_norm": 1.107910154506854,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 12299
    },
    {
      "epoch": 0.123,
      "grad_norm": 0.9669575672708685,
      "learning_rate": 0.003,
      "loss": 4.079,
      "step": 12300
    },
    {
      "epoch": 0.12301,
      "grad_norm": 1.2230981789950965,
      "learning_rate": 0.003,
      "loss": 4.098,
      "step": 12301
    },
    {
      "epoch": 0.12302,
      "grad_norm": 0.9706414365878976,
      "learning_rate": 0.003,
      "loss": 4.0855,
      "step": 12302
    },
    {
      "epoch": 0.12303,
      "grad_norm": 1.3413519996827952,
      "learning_rate": 0.003,
      "loss": 4.1424,
      "step": 12303
    },
    {
      "epoch": 0.12304,
      "grad_norm": 1.0182058543155155,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 12304
    },
    {
      "epoch": 0.12305,
      "grad_norm": 1.2254298651040276,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 12305
    },
    {
      "epoch": 0.12306,
      "grad_norm": 1.1502601972603381,
      "learning_rate": 0.003,
      "loss": 4.0757,
      "step": 12306
    },
    {
      "epoch": 0.12307,
      "grad_norm": 1.1914233088333748,
      "learning_rate": 0.003,
      "loss": 4.0703,
      "step": 12307
    },
    {
      "epoch": 0.12308,
      "grad_norm": 1.5601911292837407,
      "learning_rate": 0.003,
      "loss": 4.0975,
      "step": 12308
    },
    {
      "epoch": 0.12309,
      "grad_norm": 1.1028187394945874,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 12309
    },
    {
      "epoch": 0.1231,
      "grad_norm": 1.540945453323211,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 12310
    },
    {
      "epoch": 0.12311,
      "grad_norm": 1.060524542842975,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 12311
    },
    {
      "epoch": 0.12312,
      "grad_norm": 1.1688223005429994,
      "learning_rate": 0.003,
      "loss": 4.092,
      "step": 12312
    },
    {
      "epoch": 0.12313,
      "grad_norm": 0.9494509325263587,
      "learning_rate": 0.003,
      "loss": 4.0978,
      "step": 12313
    },
    {
      "epoch": 0.12314,
      "grad_norm": 1.231785054785133,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 12314
    },
    {
      "epoch": 0.12315,
      "grad_norm": 1.2738306714406045,
      "learning_rate": 0.003,
      "loss": 4.1012,
      "step": 12315
    },
    {
      "epoch": 0.12316,
      "grad_norm": 1.1829541714808394,
      "learning_rate": 0.003,
      "loss": 4.0873,
      "step": 12316
    },
    {
      "epoch": 0.12317,
      "grad_norm": 1.4545722081228218,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 12317
    },
    {
      "epoch": 0.12318,
      "grad_norm": 1.1860778929682056,
      "learning_rate": 0.003,
      "loss": 4.098,
      "step": 12318
    },
    {
      "epoch": 0.12319,
      "grad_norm": 1.105919643488645,
      "learning_rate": 0.003,
      "loss": 4.0777,
      "step": 12319
    },
    {
      "epoch": 0.1232,
      "grad_norm": 1.2407488668861022,
      "learning_rate": 0.003,
      "loss": 4.0999,
      "step": 12320
    },
    {
      "epoch": 0.12321,
      "grad_norm": 1.1156137272992521,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 12321
    },
    {
      "epoch": 0.12322,
      "grad_norm": 1.45072845598213,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 12322
    },
    {
      "epoch": 0.12323,
      "grad_norm": 1.194969851622858,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 12323
    },
    {
      "epoch": 0.12324,
      "grad_norm": 1.2640609944605872,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 12324
    },
    {
      "epoch": 0.12325,
      "grad_norm": 0.9388456031153591,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 12325
    },
    {
      "epoch": 0.12326,
      "grad_norm": 1.282589163964992,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 12326
    },
    {
      "epoch": 0.12327,
      "grad_norm": 1.3107507611608642,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 12327
    },
    {
      "epoch": 0.12328,
      "grad_norm": 1.1448524193305218,
      "learning_rate": 0.003,
      "loss": 4.0418,
      "step": 12328
    },
    {
      "epoch": 0.12329,
      "grad_norm": 1.2593319977525492,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 12329
    },
    {
      "epoch": 0.1233,
      "grad_norm": 1.0922788769721852,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 12330
    },
    {
      "epoch": 0.12331,
      "grad_norm": 1.326047175565969,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 12331
    },
    {
      "epoch": 0.12332,
      "grad_norm": 1.0559325620610494,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 12332
    },
    {
      "epoch": 0.12333,
      "grad_norm": 1.333598551232034,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 12333
    },
    {
      "epoch": 0.12334,
      "grad_norm": 1.070779532164327,
      "learning_rate": 0.003,
      "loss": 4.0828,
      "step": 12334
    },
    {
      "epoch": 0.12335,
      "grad_norm": 1.337160726790681,
      "learning_rate": 0.003,
      "loss": 4.0793,
      "step": 12335
    },
    {
      "epoch": 0.12336,
      "grad_norm": 1.045959131867625,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 12336
    },
    {
      "epoch": 0.12337,
      "grad_norm": 1.1652231452234632,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 12337
    },
    {
      "epoch": 0.12338,
      "grad_norm": 1.1153823730527561,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 12338
    },
    {
      "epoch": 0.12339,
      "grad_norm": 1.1801428814861659,
      "learning_rate": 0.003,
      "loss": 4.0809,
      "step": 12339
    },
    {
      "epoch": 0.1234,
      "grad_norm": 1.1411467633416132,
      "learning_rate": 0.003,
      "loss": 4.1012,
      "step": 12340
    },
    {
      "epoch": 0.12341,
      "grad_norm": 1.183971895065385,
      "learning_rate": 0.003,
      "loss": 4.0878,
      "step": 12341
    },
    {
      "epoch": 0.12342,
      "grad_norm": 1.2148932364453693,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 12342
    },
    {
      "epoch": 0.12343,
      "grad_norm": 1.3997236811583327,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 12343
    },
    {
      "epoch": 0.12344,
      "grad_norm": 1.3906783136925027,
      "learning_rate": 0.003,
      "loss": 4.0811,
      "step": 12344
    },
    {
      "epoch": 0.12345,
      "grad_norm": 1.1929763591115197,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 12345
    },
    {
      "epoch": 0.12346,
      "grad_norm": 1.0421233278605289,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 12346
    },
    {
      "epoch": 0.12347,
      "grad_norm": 1.2864293081553977,
      "learning_rate": 0.003,
      "loss": 4.0748,
      "step": 12347
    },
    {
      "epoch": 0.12348,
      "grad_norm": 1.0784610258983045,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 12348
    },
    {
      "epoch": 0.12349,
      "grad_norm": 1.1614271899541546,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 12349
    },
    {
      "epoch": 0.1235,
      "grad_norm": 0.9254677323202702,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 12350
    },
    {
      "epoch": 0.12351,
      "grad_norm": 1.2856580170551943,
      "learning_rate": 0.003,
      "loss": 4.0854,
      "step": 12351
    },
    {
      "epoch": 0.12352,
      "grad_norm": 0.9532348430578328,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 12352
    },
    {
      "epoch": 0.12353,
      "grad_norm": 1.3028059426238214,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 12353
    },
    {
      "epoch": 0.12354,
      "grad_norm": 1.1371787413729926,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 12354
    },
    {
      "epoch": 0.12355,
      "grad_norm": 1.4618199369215297,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 12355
    },
    {
      "epoch": 0.12356,
      "grad_norm": 1.066228952067004,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 12356
    },
    {
      "epoch": 0.12357,
      "grad_norm": 1.1062125587234808,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 12357
    },
    {
      "epoch": 0.12358,
      "grad_norm": 1.1954685144933828,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 12358
    },
    {
      "epoch": 0.12359,
      "grad_norm": 1.0600695155564492,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 12359
    },
    {
      "epoch": 0.1236,
      "grad_norm": 1.2525463893335433,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 12360
    },
    {
      "epoch": 0.12361,
      "grad_norm": 1.1782542376101623,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 12361
    },
    {
      "epoch": 0.12362,
      "grad_norm": 1.1600706645438057,
      "learning_rate": 0.003,
      "loss": 4.0821,
      "step": 12362
    },
    {
      "epoch": 0.12363,
      "grad_norm": 1.137380421954321,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 12363
    },
    {
      "epoch": 0.12364,
      "grad_norm": 1.1280376424988865,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 12364
    },
    {
      "epoch": 0.12365,
      "grad_norm": 1.1698311964249937,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 12365
    },
    {
      "epoch": 0.12366,
      "grad_norm": 1.3353646434819473,
      "learning_rate": 0.003,
      "loss": 4.1267,
      "step": 12366
    },
    {
      "epoch": 0.12367,
      "grad_norm": 1.2863268829142191,
      "learning_rate": 0.003,
      "loss": 4.0942,
      "step": 12367
    },
    {
      "epoch": 0.12368,
      "grad_norm": 1.0029004777191795,
      "learning_rate": 0.003,
      "loss": 4.0728,
      "step": 12368
    },
    {
      "epoch": 0.12369,
      "grad_norm": 1.4233370290194403,
      "learning_rate": 0.003,
      "loss": 4.0943,
      "step": 12369
    },
    {
      "epoch": 0.1237,
      "grad_norm": 1.1184490331663481,
      "learning_rate": 0.003,
      "loss": 4.0797,
      "step": 12370
    },
    {
      "epoch": 0.12371,
      "grad_norm": 1.2770423154469277,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 12371
    },
    {
      "epoch": 0.12372,
      "grad_norm": 0.9439049554373123,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 12372
    },
    {
      "epoch": 0.12373,
      "grad_norm": 1.2224924524375946,
      "learning_rate": 0.003,
      "loss": 4.0878,
      "step": 12373
    },
    {
      "epoch": 0.12374,
      "grad_norm": 1.0573524541014572,
      "learning_rate": 0.003,
      "loss": 4.0718,
      "step": 12374
    },
    {
      "epoch": 0.12375,
      "grad_norm": 1.138607119920257,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 12375
    },
    {
      "epoch": 0.12376,
      "grad_norm": 1.0254491056459425,
      "learning_rate": 0.003,
      "loss": 4.0892,
      "step": 12376
    },
    {
      "epoch": 0.12377,
      "grad_norm": 1.1953961864241487,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 12377
    },
    {
      "epoch": 0.12378,
      "grad_norm": 1.239262872294417,
      "learning_rate": 0.003,
      "loss": 4.1009,
      "step": 12378
    },
    {
      "epoch": 0.12379,
      "grad_norm": 1.3245694630712719,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 12379
    },
    {
      "epoch": 0.1238,
      "grad_norm": 1.0423837674916894,
      "learning_rate": 0.003,
      "loss": 4.0808,
      "step": 12380
    },
    {
      "epoch": 0.12381,
      "grad_norm": 1.1667607294537654,
      "learning_rate": 0.003,
      "loss": 4.0783,
      "step": 12381
    },
    {
      "epoch": 0.12382,
      "grad_norm": 1.1767961078546874,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 12382
    },
    {
      "epoch": 0.12383,
      "grad_norm": 1.187882008900835,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 12383
    },
    {
      "epoch": 0.12384,
      "grad_norm": 1.0051234424869664,
      "learning_rate": 0.003,
      "loss": 4.0728,
      "step": 12384
    },
    {
      "epoch": 0.12385,
      "grad_norm": 1.2969760483111112,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 12385
    },
    {
      "epoch": 0.12386,
      "grad_norm": 1.0697051420470138,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 12386
    },
    {
      "epoch": 0.12387,
      "grad_norm": 1.3510261709568954,
      "learning_rate": 0.003,
      "loss": 4.0727,
      "step": 12387
    },
    {
      "epoch": 0.12388,
      "grad_norm": 1.0318070562588793,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 12388
    },
    {
      "epoch": 0.12389,
      "grad_norm": 1.6188747792730196,
      "learning_rate": 0.003,
      "loss": 4.1061,
      "step": 12389
    },
    {
      "epoch": 0.1239,
      "grad_norm": 0.946262468912621,
      "learning_rate": 0.003,
      "loss": 4.1156,
      "step": 12390
    },
    {
      "epoch": 0.12391,
      "grad_norm": 1.3309245924925017,
      "learning_rate": 0.003,
      "loss": 4.044,
      "step": 12391
    },
    {
      "epoch": 0.12392,
      "grad_norm": 1.2873771647331873,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 12392
    },
    {
      "epoch": 0.12393,
      "grad_norm": 1.157658538183153,
      "learning_rate": 0.003,
      "loss": 4.086,
      "step": 12393
    },
    {
      "epoch": 0.12394,
      "grad_norm": 1.2264491677633973,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 12394
    },
    {
      "epoch": 0.12395,
      "grad_norm": 0.9912588373523377,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 12395
    },
    {
      "epoch": 0.12396,
      "grad_norm": 1.3829782770017245,
      "learning_rate": 0.003,
      "loss": 4.1015,
      "step": 12396
    },
    {
      "epoch": 0.12397,
      "grad_norm": 0.9969910644951696,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 12397
    },
    {
      "epoch": 0.12398,
      "grad_norm": 1.3826345300514964,
      "learning_rate": 0.003,
      "loss": 4.108,
      "step": 12398
    },
    {
      "epoch": 0.12399,
      "grad_norm": 1.0440178845749568,
      "learning_rate": 0.003,
      "loss": 4.1056,
      "step": 12399
    },
    {
      "epoch": 0.124,
      "grad_norm": 1.349613962975738,
      "learning_rate": 0.003,
      "loss": 4.1233,
      "step": 12400
    },
    {
      "epoch": 0.12401,
      "grad_norm": 1.0142913897995074,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 12401
    },
    {
      "epoch": 0.12402,
      "grad_norm": 1.3609411930461677,
      "learning_rate": 0.003,
      "loss": 4.0932,
      "step": 12402
    },
    {
      "epoch": 0.12403,
      "grad_norm": 1.1827122840577082,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 12403
    },
    {
      "epoch": 0.12404,
      "grad_norm": 1.2215790114579566,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 12404
    },
    {
      "epoch": 0.12405,
      "grad_norm": 1.243523400765216,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 12405
    },
    {
      "epoch": 0.12406,
      "grad_norm": 1.1446590993375974,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 12406
    },
    {
      "epoch": 0.12407,
      "grad_norm": 1.2028933130830788,
      "learning_rate": 0.003,
      "loss": 4.0632,
      "step": 12407
    },
    {
      "epoch": 0.12408,
      "grad_norm": 1.0805977575687995,
      "learning_rate": 0.003,
      "loss": 4.0823,
      "step": 12408
    },
    {
      "epoch": 0.12409,
      "grad_norm": 1.2076277518877603,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 12409
    },
    {
      "epoch": 0.1241,
      "grad_norm": 1.0360928232504336,
      "learning_rate": 0.003,
      "loss": 4.0839,
      "step": 12410
    },
    {
      "epoch": 0.12411,
      "grad_norm": 1.2538038581764603,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 12411
    },
    {
      "epoch": 0.12412,
      "grad_norm": 1.117318743300164,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 12412
    },
    {
      "epoch": 0.12413,
      "grad_norm": 1.2765684340945003,
      "learning_rate": 0.003,
      "loss": 4.0974,
      "step": 12413
    },
    {
      "epoch": 0.12414,
      "grad_norm": 1.1275644577185353,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 12414
    },
    {
      "epoch": 0.12415,
      "grad_norm": 1.1486775459125977,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 12415
    },
    {
      "epoch": 0.12416,
      "grad_norm": 1.116843978464965,
      "learning_rate": 0.003,
      "loss": 4.0913,
      "step": 12416
    },
    {
      "epoch": 0.12417,
      "grad_norm": 1.0780602688508731,
      "learning_rate": 0.003,
      "loss": 4.0864,
      "step": 12417
    },
    {
      "epoch": 0.12418,
      "grad_norm": 1.2625972608543656,
      "learning_rate": 0.003,
      "loss": 4.0809,
      "step": 12418
    },
    {
      "epoch": 0.12419,
      "grad_norm": 1.136511891701055,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 12419
    },
    {
      "epoch": 0.1242,
      "grad_norm": 1.335064760846712,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 12420
    },
    {
      "epoch": 0.12421,
      "grad_norm": 1.1493168734621346,
      "learning_rate": 0.003,
      "loss": 4.095,
      "step": 12421
    },
    {
      "epoch": 0.12422,
      "grad_norm": 1.2595484150746281,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 12422
    },
    {
      "epoch": 0.12423,
      "grad_norm": 1.1535911104465582,
      "learning_rate": 0.003,
      "loss": 4.0799,
      "step": 12423
    },
    {
      "epoch": 0.12424,
      "grad_norm": 1.1185599234146864,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 12424
    },
    {
      "epoch": 0.12425,
      "grad_norm": 0.9901883288896391,
      "learning_rate": 0.003,
      "loss": 4.0564,
      "step": 12425
    },
    {
      "epoch": 0.12426,
      "grad_norm": 1.2859790206056052,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 12426
    },
    {
      "epoch": 0.12427,
      "grad_norm": 1.2239228314237462,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 12427
    },
    {
      "epoch": 0.12428,
      "grad_norm": 1.223034801205773,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 12428
    },
    {
      "epoch": 0.12429,
      "grad_norm": 1.1267875607961046,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 12429
    },
    {
      "epoch": 0.1243,
      "grad_norm": 1.2112429229194317,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 12430
    },
    {
      "epoch": 0.12431,
      "grad_norm": 1.2009002242700133,
      "learning_rate": 0.003,
      "loss": 4.0617,
      "step": 12431
    },
    {
      "epoch": 0.12432,
      "grad_norm": 1.1621112911264861,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 12432
    },
    {
      "epoch": 0.12433,
      "grad_norm": 1.4528639075118381,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 12433
    },
    {
      "epoch": 0.12434,
      "grad_norm": 0.9828266663691128,
      "learning_rate": 0.003,
      "loss": 4.0999,
      "step": 12434
    },
    {
      "epoch": 0.12435,
      "grad_norm": 1.208413930102256,
      "learning_rate": 0.003,
      "loss": 4.0728,
      "step": 12435
    },
    {
      "epoch": 0.12436,
      "grad_norm": 1.2434917891523287,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 12436
    },
    {
      "epoch": 0.12437,
      "grad_norm": 1.240598712732967,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 12437
    },
    {
      "epoch": 0.12438,
      "grad_norm": 1.029094900595442,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 12438
    },
    {
      "epoch": 0.12439,
      "grad_norm": 1.349558368050492,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 12439
    },
    {
      "epoch": 0.1244,
      "grad_norm": 0.9220520856370474,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 12440
    },
    {
      "epoch": 0.12441,
      "grad_norm": 1.191546944432022,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 12441
    },
    {
      "epoch": 0.12442,
      "grad_norm": 1.0966955815185557,
      "learning_rate": 0.003,
      "loss": 4.075,
      "step": 12442
    },
    {
      "epoch": 0.12443,
      "grad_norm": 1.181104240860222,
      "learning_rate": 0.003,
      "loss": 4.0801,
      "step": 12443
    },
    {
      "epoch": 0.12444,
      "grad_norm": 1.106511036608127,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 12444
    },
    {
      "epoch": 0.12445,
      "grad_norm": 1.2160044974004254,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 12445
    },
    {
      "epoch": 0.12446,
      "grad_norm": 1.2540860300867798,
      "learning_rate": 0.003,
      "loss": 4.0837,
      "step": 12446
    },
    {
      "epoch": 0.12447,
      "grad_norm": 1.5920465287533634,
      "learning_rate": 0.003,
      "loss": 4.1002,
      "step": 12447
    },
    {
      "epoch": 0.12448,
      "grad_norm": 1.3714492953122197,
      "learning_rate": 0.003,
      "loss": 4.0904,
      "step": 12448
    },
    {
      "epoch": 0.12449,
      "grad_norm": 1.1988615033271683,
      "learning_rate": 0.003,
      "loss": 4.107,
      "step": 12449
    },
    {
      "epoch": 0.1245,
      "grad_norm": 1.2820507313484732,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 12450
    },
    {
      "epoch": 0.12451,
      "grad_norm": 1.0123391719593466,
      "learning_rate": 0.003,
      "loss": 4.0342,
      "step": 12451
    },
    {
      "epoch": 0.12452,
      "grad_norm": 1.1708447718933537,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 12452
    },
    {
      "epoch": 0.12453,
      "grad_norm": 1.0224703740085388,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 12453
    },
    {
      "epoch": 0.12454,
      "grad_norm": 1.096286692309235,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 12454
    },
    {
      "epoch": 0.12455,
      "grad_norm": 1.3979018490361652,
      "learning_rate": 0.003,
      "loss": 4.0823,
      "step": 12455
    },
    {
      "epoch": 0.12456,
      "grad_norm": 1.146467679758208,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 12456
    },
    {
      "epoch": 0.12457,
      "grad_norm": 1.5371802869904305,
      "learning_rate": 0.003,
      "loss": 4.0867,
      "step": 12457
    },
    {
      "epoch": 0.12458,
      "grad_norm": 0.9429196785460733,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 12458
    },
    {
      "epoch": 0.12459,
      "grad_norm": 1.126333732451378,
      "learning_rate": 0.003,
      "loss": 4.0872,
      "step": 12459
    },
    {
      "epoch": 0.1246,
      "grad_norm": 1.3427249346932093,
      "learning_rate": 0.003,
      "loss": 4.101,
      "step": 12460
    },
    {
      "epoch": 0.12461,
      "grad_norm": 1.2335821482057865,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 12461
    },
    {
      "epoch": 0.12462,
      "grad_norm": 1.2589350122049023,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 12462
    },
    {
      "epoch": 0.12463,
      "grad_norm": 1.1078088631731713,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 12463
    },
    {
      "epoch": 0.12464,
      "grad_norm": 1.156194221905579,
      "learning_rate": 0.003,
      "loss": 4.0835,
      "step": 12464
    },
    {
      "epoch": 0.12465,
      "grad_norm": 1.1287746508449288,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 12465
    },
    {
      "epoch": 0.12466,
      "grad_norm": 1.1136569055957326,
      "learning_rate": 0.003,
      "loss": 4.0931,
      "step": 12466
    },
    {
      "epoch": 0.12467,
      "grad_norm": 1.0799338044455242,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 12467
    },
    {
      "epoch": 0.12468,
      "grad_norm": 1.3289748461710855,
      "learning_rate": 0.003,
      "loss": 4.0967,
      "step": 12468
    },
    {
      "epoch": 0.12469,
      "grad_norm": 1.1151078645112802,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 12469
    },
    {
      "epoch": 0.1247,
      "grad_norm": 1.1463534088936385,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 12470
    },
    {
      "epoch": 0.12471,
      "grad_norm": 1.1519553387047017,
      "learning_rate": 0.003,
      "loss": 4.0921,
      "step": 12471
    },
    {
      "epoch": 0.12472,
      "grad_norm": 1.2285550661794005,
      "learning_rate": 0.003,
      "loss": 4.0891,
      "step": 12472
    },
    {
      "epoch": 0.12473,
      "grad_norm": 1.1215923088146995,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 12473
    },
    {
      "epoch": 0.12474,
      "grad_norm": 1.231438287323106,
      "learning_rate": 0.003,
      "loss": 4.083,
      "step": 12474
    },
    {
      "epoch": 0.12475,
      "grad_norm": 1.0679941372411055,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 12475
    },
    {
      "epoch": 0.12476,
      "grad_norm": 1.0239810114957488,
      "learning_rate": 0.003,
      "loss": 4.0943,
      "step": 12476
    },
    {
      "epoch": 0.12477,
      "grad_norm": 1.3657673364526786,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 12477
    },
    {
      "epoch": 0.12478,
      "grad_norm": 1.066101071076436,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 12478
    },
    {
      "epoch": 0.12479,
      "grad_norm": 1.7640966435197194,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 12479
    },
    {
      "epoch": 0.1248,
      "grad_norm": 1.102522439741497,
      "learning_rate": 0.003,
      "loss": 4.075,
      "step": 12480
    },
    {
      "epoch": 0.12481,
      "grad_norm": 1.241907969502746,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 12481
    },
    {
      "epoch": 0.12482,
      "grad_norm": 1.1094473374941114,
      "learning_rate": 0.003,
      "loss": 4.0878,
      "step": 12482
    },
    {
      "epoch": 0.12483,
      "grad_norm": 1.3305339634481412,
      "learning_rate": 0.003,
      "loss": 4.0907,
      "step": 12483
    },
    {
      "epoch": 0.12484,
      "grad_norm": 1.04860359051801,
      "learning_rate": 0.003,
      "loss": 4.0304,
      "step": 12484
    },
    {
      "epoch": 0.12485,
      "grad_norm": 1.3466472306173978,
      "learning_rate": 0.003,
      "loss": 4.0898,
      "step": 12485
    },
    {
      "epoch": 0.12486,
      "grad_norm": 1.091955860046152,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 12486
    },
    {
      "epoch": 0.12487,
      "grad_norm": 1.1542843548616597,
      "learning_rate": 0.003,
      "loss": 4.0371,
      "step": 12487
    },
    {
      "epoch": 0.12488,
      "grad_norm": 1.2977298649449636,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 12488
    },
    {
      "epoch": 0.12489,
      "grad_norm": 1.1502135615273064,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 12489
    },
    {
      "epoch": 0.1249,
      "grad_norm": 1.1954733894997076,
      "learning_rate": 0.003,
      "loss": 4.0756,
      "step": 12490
    },
    {
      "epoch": 0.12491,
      "grad_norm": 1.1477409651605421,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 12491
    },
    {
      "epoch": 0.12492,
      "grad_norm": 1.0229908159016856,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 12492
    },
    {
      "epoch": 0.12493,
      "grad_norm": 1.397010869196908,
      "learning_rate": 0.003,
      "loss": 4.0885,
      "step": 12493
    },
    {
      "epoch": 0.12494,
      "grad_norm": 1.0552851041740259,
      "learning_rate": 0.003,
      "loss": 4.057,
      "step": 12494
    },
    {
      "epoch": 0.12495,
      "grad_norm": 1.2568137758291984,
      "learning_rate": 0.003,
      "loss": 4.0789,
      "step": 12495
    },
    {
      "epoch": 0.12496,
      "grad_norm": 1.3314768441604752,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 12496
    },
    {
      "epoch": 0.12497,
      "grad_norm": 1.4693373016661582,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 12497
    },
    {
      "epoch": 0.12498,
      "grad_norm": 0.940180612411933,
      "learning_rate": 0.003,
      "loss": 4.0701,
      "step": 12498
    },
    {
      "epoch": 0.12499,
      "grad_norm": 1.0852382733465447,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 12499
    },
    {
      "epoch": 0.125,
      "grad_norm": 1.5622120700812359,
      "learning_rate": 0.003,
      "loss": 4.124,
      "step": 12500
    },
    {
      "epoch": 0.12501,
      "grad_norm": 0.8739362980405638,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 12501
    },
    {
      "epoch": 0.12502,
      "grad_norm": 1.0636747915337694,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 12502
    },
    {
      "epoch": 0.12503,
      "grad_norm": 1.2278657194272873,
      "learning_rate": 0.003,
      "loss": 4.0839,
      "step": 12503
    },
    {
      "epoch": 0.12504,
      "grad_norm": 1.0974608415259097,
      "learning_rate": 0.003,
      "loss": 4.0822,
      "step": 12504
    },
    {
      "epoch": 0.12505,
      "grad_norm": 1.1248766463340456,
      "learning_rate": 0.003,
      "loss": 4.0924,
      "step": 12505
    },
    {
      "epoch": 0.12506,
      "grad_norm": 1.2399103972271988,
      "learning_rate": 0.003,
      "loss": 4.0829,
      "step": 12506
    },
    {
      "epoch": 0.12507,
      "grad_norm": 1.1756145223100918,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 12507
    },
    {
      "epoch": 0.12508,
      "grad_norm": 1.1015940953540795,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 12508
    },
    {
      "epoch": 0.12509,
      "grad_norm": 1.1879648693043818,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 12509
    },
    {
      "epoch": 0.1251,
      "grad_norm": 1.1637957279316695,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 12510
    },
    {
      "epoch": 0.12511,
      "grad_norm": 1.2494606772760055,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 12511
    },
    {
      "epoch": 0.12512,
      "grad_norm": 1.2181112953250357,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 12512
    },
    {
      "epoch": 0.12513,
      "grad_norm": 0.9729638548913785,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 12513
    },
    {
      "epoch": 0.12514,
      "grad_norm": 1.2901127381666835,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 12514
    },
    {
      "epoch": 0.12515,
      "grad_norm": 1.0976102482057357,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 12515
    },
    {
      "epoch": 0.12516,
      "grad_norm": 1.336702885012363,
      "learning_rate": 0.003,
      "loss": 4.0849,
      "step": 12516
    },
    {
      "epoch": 0.12517,
      "grad_norm": 1.0272505207107212,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 12517
    },
    {
      "epoch": 0.12518,
      "grad_norm": 1.3057449911660892,
      "learning_rate": 0.003,
      "loss": 4.0941,
      "step": 12518
    },
    {
      "epoch": 0.12519,
      "grad_norm": 1.218532581003206,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 12519
    },
    {
      "epoch": 0.1252,
      "grad_norm": 1.0277171173468325,
      "learning_rate": 0.003,
      "loss": 4.0312,
      "step": 12520
    },
    {
      "epoch": 0.12521,
      "grad_norm": 1.48322118713419,
      "learning_rate": 0.003,
      "loss": 4.0858,
      "step": 12521
    },
    {
      "epoch": 0.12522,
      "grad_norm": 1.028286503421473,
      "learning_rate": 0.003,
      "loss": 4.1146,
      "step": 12522
    },
    {
      "epoch": 0.12523,
      "grad_norm": 1.2785485432284658,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 12523
    },
    {
      "epoch": 0.12524,
      "grad_norm": 1.133692151244233,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 12524
    },
    {
      "epoch": 0.12525,
      "grad_norm": 1.3082450711743057,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 12525
    },
    {
      "epoch": 0.12526,
      "grad_norm": 1.2367359920672203,
      "learning_rate": 0.003,
      "loss": 4.1063,
      "step": 12526
    },
    {
      "epoch": 0.12527,
      "grad_norm": 1.1937292021887054,
      "learning_rate": 0.003,
      "loss": 4.1004,
      "step": 12527
    },
    {
      "epoch": 0.12528,
      "grad_norm": 1.2382152188842006,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 12528
    },
    {
      "epoch": 0.12529,
      "grad_norm": 1.3361864008371442,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 12529
    },
    {
      "epoch": 0.1253,
      "grad_norm": 0.9484016556199034,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 12530
    },
    {
      "epoch": 0.12531,
      "grad_norm": 1.1821540840825915,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 12531
    },
    {
      "epoch": 0.12532,
      "grad_norm": 1.270341277351583,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 12532
    },
    {
      "epoch": 0.12533,
      "grad_norm": 1.0751666876115358,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 12533
    },
    {
      "epoch": 0.12534,
      "grad_norm": 1.1701969786843704,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 12534
    },
    {
      "epoch": 0.12535,
      "grad_norm": 1.065142223277472,
      "learning_rate": 0.003,
      "loss": 4.0767,
      "step": 12535
    },
    {
      "epoch": 0.12536,
      "grad_norm": 1.269846923699688,
      "learning_rate": 0.003,
      "loss": 4.0139,
      "step": 12536
    },
    {
      "epoch": 0.12537,
      "grad_norm": 1.1512438095355702,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 12537
    },
    {
      "epoch": 0.12538,
      "grad_norm": 1.0280753584333013,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 12538
    },
    {
      "epoch": 0.12539,
      "grad_norm": 1.1432091793641852,
      "learning_rate": 0.003,
      "loss": 4.0979,
      "step": 12539
    },
    {
      "epoch": 0.1254,
      "grad_norm": 1.0965031416949658,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 12540
    },
    {
      "epoch": 0.12541,
      "grad_norm": 1.3324295530793964,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 12541
    },
    {
      "epoch": 0.12542,
      "grad_norm": 1.082712427398565,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 12542
    },
    {
      "epoch": 0.12543,
      "grad_norm": 1.4584132011678437,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 12543
    },
    {
      "epoch": 0.12544,
      "grad_norm": 1.0347081253877564,
      "learning_rate": 0.003,
      "loss": 4.1032,
      "step": 12544
    },
    {
      "epoch": 0.12545,
      "grad_norm": 1.4080593441623668,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 12545
    },
    {
      "epoch": 0.12546,
      "grad_norm": 0.94172210940559,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 12546
    },
    {
      "epoch": 0.12547,
      "grad_norm": 1.204417126633597,
      "learning_rate": 0.003,
      "loss": 4.0996,
      "step": 12547
    },
    {
      "epoch": 0.12548,
      "grad_norm": 1.153669862341393,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 12548
    },
    {
      "epoch": 0.12549,
      "grad_norm": 1.1623807046879422,
      "learning_rate": 0.003,
      "loss": 4.0824,
      "step": 12549
    },
    {
      "epoch": 0.1255,
      "grad_norm": 1.2223824905007339,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 12550
    },
    {
      "epoch": 0.12551,
      "grad_norm": 1.401806814284153,
      "learning_rate": 0.003,
      "loss": 4.091,
      "step": 12551
    },
    {
      "epoch": 0.12552,
      "grad_norm": 1.0804692663089763,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 12552
    },
    {
      "epoch": 0.12553,
      "grad_norm": 1.3223499261165146,
      "learning_rate": 0.003,
      "loss": 4.0898,
      "step": 12553
    },
    {
      "epoch": 0.12554,
      "grad_norm": 1.2048586744932688,
      "learning_rate": 0.003,
      "loss": 4.0672,
      "step": 12554
    },
    {
      "epoch": 0.12555,
      "grad_norm": 1.220463489847063,
      "learning_rate": 0.003,
      "loss": 4.0851,
      "step": 12555
    },
    {
      "epoch": 0.12556,
      "grad_norm": 1.2298530213918342,
      "learning_rate": 0.003,
      "loss": 4.103,
      "step": 12556
    },
    {
      "epoch": 0.12557,
      "grad_norm": 1.107477371434991,
      "learning_rate": 0.003,
      "loss": 4.0901,
      "step": 12557
    },
    {
      "epoch": 0.12558,
      "grad_norm": 1.4953971908411758,
      "learning_rate": 0.003,
      "loss": 4.0848,
      "step": 12558
    },
    {
      "epoch": 0.12559,
      "grad_norm": 1.0919959852714403,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 12559
    },
    {
      "epoch": 0.1256,
      "grad_norm": 1.4031108439000246,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 12560
    },
    {
      "epoch": 0.12561,
      "grad_norm": 0.9274982786809287,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 12561
    },
    {
      "epoch": 0.12562,
      "grad_norm": 1.148283669382533,
      "learning_rate": 0.003,
      "loss": 4.0665,
      "step": 12562
    },
    {
      "epoch": 0.12563,
      "grad_norm": 1.252732398453347,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 12563
    },
    {
      "epoch": 0.12564,
      "grad_norm": 1.179501544509447,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 12564
    },
    {
      "epoch": 0.12565,
      "grad_norm": 1.2630777174253471,
      "learning_rate": 0.003,
      "loss": 4.0837,
      "step": 12565
    },
    {
      "epoch": 0.12566,
      "grad_norm": 1.1104232624786845,
      "learning_rate": 0.003,
      "loss": 4.0819,
      "step": 12566
    },
    {
      "epoch": 0.12567,
      "grad_norm": 1.0172810898817368,
      "learning_rate": 0.003,
      "loss": 4.1082,
      "step": 12567
    },
    {
      "epoch": 0.12568,
      "grad_norm": 1.1101095264680614,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 12568
    },
    {
      "epoch": 0.12569,
      "grad_norm": 1.0660138431796577,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 12569
    },
    {
      "epoch": 0.1257,
      "grad_norm": 1.1675985526701762,
      "learning_rate": 0.003,
      "loss": 4.1186,
      "step": 12570
    },
    {
      "epoch": 0.12571,
      "grad_norm": 1.1076673801819517,
      "learning_rate": 0.003,
      "loss": 4.0837,
      "step": 12571
    },
    {
      "epoch": 0.12572,
      "grad_norm": 1.2810009421141986,
      "learning_rate": 0.003,
      "loss": 4.0865,
      "step": 12572
    },
    {
      "epoch": 0.12573,
      "grad_norm": 1.3484034745623017,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 12573
    },
    {
      "epoch": 0.12574,
      "grad_norm": 1.3426946654575465,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 12574
    },
    {
      "epoch": 0.12575,
      "grad_norm": 0.9506284253662771,
      "learning_rate": 0.003,
      "loss": 4.1076,
      "step": 12575
    },
    {
      "epoch": 0.12576,
      "grad_norm": 1.3356304158256906,
      "learning_rate": 0.003,
      "loss": 4.0808,
      "step": 12576
    },
    {
      "epoch": 0.12577,
      "grad_norm": 1.3499833136159347,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 12577
    },
    {
      "epoch": 0.12578,
      "grad_norm": 0.9853562356103978,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 12578
    },
    {
      "epoch": 0.12579,
      "grad_norm": 1.1326976276614282,
      "learning_rate": 0.003,
      "loss": 4.081,
      "step": 12579
    },
    {
      "epoch": 0.1258,
      "grad_norm": 1.0176354143472115,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 12580
    },
    {
      "epoch": 0.12581,
      "grad_norm": 1.4072549274790551,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 12581
    },
    {
      "epoch": 0.12582,
      "grad_norm": 0.7865868508456874,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 12582
    },
    {
      "epoch": 0.12583,
      "grad_norm": 0.9827649168754795,
      "learning_rate": 0.003,
      "loss": 4.0865,
      "step": 12583
    },
    {
      "epoch": 0.12584,
      "grad_norm": 1.3261940918286663,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 12584
    },
    {
      "epoch": 0.12585,
      "grad_norm": 1.0096186221785435,
      "learning_rate": 0.003,
      "loss": 4.1009,
      "step": 12585
    },
    {
      "epoch": 0.12586,
      "grad_norm": 1.550758321674492,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 12586
    },
    {
      "epoch": 0.12587,
      "grad_norm": 1.28775237574974,
      "learning_rate": 0.003,
      "loss": 4.1042,
      "step": 12587
    },
    {
      "epoch": 0.12588,
      "grad_norm": 1.212541757461834,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 12588
    },
    {
      "epoch": 0.12589,
      "grad_norm": 1.0041117070411152,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 12589
    },
    {
      "epoch": 0.1259,
      "grad_norm": 1.3697511255613661,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 12590
    },
    {
      "epoch": 0.12591,
      "grad_norm": 1.0498516362975538,
      "learning_rate": 0.003,
      "loss": 4.0617,
      "step": 12591
    },
    {
      "epoch": 0.12592,
      "grad_norm": 1.4301823880591849,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 12592
    },
    {
      "epoch": 0.12593,
      "grad_norm": 0.9182078521998195,
      "learning_rate": 0.003,
      "loss": 4.0768,
      "step": 12593
    },
    {
      "epoch": 0.12594,
      "grad_norm": 1.1837839051014718,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 12594
    },
    {
      "epoch": 0.12595,
      "grad_norm": 1.298135818800885,
      "learning_rate": 0.003,
      "loss": 4.1093,
      "step": 12595
    },
    {
      "epoch": 0.12596,
      "grad_norm": 1.041094948367079,
      "learning_rate": 0.003,
      "loss": 4.042,
      "step": 12596
    },
    {
      "epoch": 0.12597,
      "grad_norm": 1.490598566038195,
      "learning_rate": 0.003,
      "loss": 4.0902,
      "step": 12597
    },
    {
      "epoch": 0.12598,
      "grad_norm": 1.0931752922510034,
      "learning_rate": 0.003,
      "loss": 4.0962,
      "step": 12598
    },
    {
      "epoch": 0.12599,
      "grad_norm": 1.2504967445814124,
      "learning_rate": 0.003,
      "loss": 4.1034,
      "step": 12599
    },
    {
      "epoch": 0.126,
      "grad_norm": 1.1377759522818098,
      "learning_rate": 0.003,
      "loss": 4.0966,
      "step": 12600
    },
    {
      "epoch": 0.12601,
      "grad_norm": 1.15656521401536,
      "learning_rate": 0.003,
      "loss": 4.1128,
      "step": 12601
    },
    {
      "epoch": 0.12602,
      "grad_norm": 0.9867574592186018,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 12602
    },
    {
      "epoch": 0.12603,
      "grad_norm": 1.26686466484779,
      "learning_rate": 0.003,
      "loss": 4.0304,
      "step": 12603
    },
    {
      "epoch": 0.12604,
      "grad_norm": 1.091406672024988,
      "learning_rate": 0.003,
      "loss": 4.0985,
      "step": 12604
    },
    {
      "epoch": 0.12605,
      "grad_norm": 1.1478402161573151,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 12605
    },
    {
      "epoch": 0.12606,
      "grad_norm": 1.2518292789824488,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 12606
    },
    {
      "epoch": 0.12607,
      "grad_norm": 1.2143300345493522,
      "learning_rate": 0.003,
      "loss": 4.0658,
      "step": 12607
    },
    {
      "epoch": 0.12608,
      "grad_norm": 1.1601393056615503,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 12608
    },
    {
      "epoch": 0.12609,
      "grad_norm": 1.2908871565774243,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 12609
    },
    {
      "epoch": 0.1261,
      "grad_norm": 1.0781328575412508,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 12610
    },
    {
      "epoch": 0.12611,
      "grad_norm": 1.3641588513571903,
      "learning_rate": 0.003,
      "loss": 4.0911,
      "step": 12611
    },
    {
      "epoch": 0.12612,
      "grad_norm": 0.9713706238293194,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 12612
    },
    {
      "epoch": 0.12613,
      "grad_norm": 1.2393486901278956,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 12613
    },
    {
      "epoch": 0.12614,
      "grad_norm": 1.1093799942750657,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 12614
    },
    {
      "epoch": 0.12615,
      "grad_norm": 1.270999707985446,
      "learning_rate": 0.003,
      "loss": 4.0987,
      "step": 12615
    },
    {
      "epoch": 0.12616,
      "grad_norm": 1.1626868005815518,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 12616
    },
    {
      "epoch": 0.12617,
      "grad_norm": 1.1501465323341438,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 12617
    },
    {
      "epoch": 0.12618,
      "grad_norm": 1.0542480027114456,
      "learning_rate": 0.003,
      "loss": 4.0547,
      "step": 12618
    },
    {
      "epoch": 0.12619,
      "grad_norm": 1.210139597853682,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 12619
    },
    {
      "epoch": 0.1262,
      "grad_norm": 1.2551499777634003,
      "learning_rate": 0.003,
      "loss": 4.0865,
      "step": 12620
    },
    {
      "epoch": 0.12621,
      "grad_norm": 1.1108012833404652,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 12621
    },
    {
      "epoch": 0.12622,
      "grad_norm": 1.0156488990422134,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 12622
    },
    {
      "epoch": 0.12623,
      "grad_norm": 1.1962566895669906,
      "learning_rate": 0.003,
      "loss": 4.0767,
      "step": 12623
    },
    {
      "epoch": 0.12624,
      "grad_norm": 1.0158639537186367,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 12624
    },
    {
      "epoch": 0.12625,
      "grad_norm": 1.3317713116392733,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 12625
    },
    {
      "epoch": 0.12626,
      "grad_norm": 0.9373068860721429,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 12626
    },
    {
      "epoch": 0.12627,
      "grad_norm": 1.2071017440177256,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 12627
    },
    {
      "epoch": 0.12628,
      "grad_norm": 1.2255144596796357,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 12628
    },
    {
      "epoch": 0.12629,
      "grad_norm": 1.199131934289616,
      "learning_rate": 0.003,
      "loss": 4.0963,
      "step": 12629
    },
    {
      "epoch": 0.1263,
      "grad_norm": 1.262215865484067,
      "learning_rate": 0.003,
      "loss": 4.057,
      "step": 12630
    },
    {
      "epoch": 0.12631,
      "grad_norm": 1.0301353256706713,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 12631
    },
    {
      "epoch": 0.12632,
      "grad_norm": 1.1811291528410854,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 12632
    },
    {
      "epoch": 0.12633,
      "grad_norm": 1.2521403597119023,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 12633
    },
    {
      "epoch": 0.12634,
      "grad_norm": 1.104551521815721,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 12634
    },
    {
      "epoch": 0.12635,
      "grad_norm": 1.1228874241048405,
      "learning_rate": 0.003,
      "loss": 4.1076,
      "step": 12635
    },
    {
      "epoch": 0.12636,
      "grad_norm": 1.217972941351292,
      "learning_rate": 0.003,
      "loss": 4.0743,
      "step": 12636
    },
    {
      "epoch": 0.12637,
      "grad_norm": 1.5946769636593079,
      "learning_rate": 0.003,
      "loss": 4.0865,
      "step": 12637
    },
    {
      "epoch": 0.12638,
      "grad_norm": 0.9998925575565182,
      "learning_rate": 0.003,
      "loss": 4.0727,
      "step": 12638
    },
    {
      "epoch": 0.12639,
      "grad_norm": 1.4039675600512114,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 12639
    },
    {
      "epoch": 0.1264,
      "grad_norm": 1.003036680605145,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 12640
    },
    {
      "epoch": 0.12641,
      "grad_norm": 1.4049383754523674,
      "learning_rate": 0.003,
      "loss": 4.0787,
      "step": 12641
    },
    {
      "epoch": 0.12642,
      "grad_norm": 1.305155487945457,
      "learning_rate": 0.003,
      "loss": 4.0728,
      "step": 12642
    },
    {
      "epoch": 0.12643,
      "grad_norm": 0.8031734594976118,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 12643
    },
    {
      "epoch": 0.12644,
      "grad_norm": 0.8521750456478882,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 12644
    },
    {
      "epoch": 0.12645,
      "grad_norm": 1.2685751637446443,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 12645
    },
    {
      "epoch": 0.12646,
      "grad_norm": 1.3968989411592685,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 12646
    },
    {
      "epoch": 0.12647,
      "grad_norm": 1.073636686301602,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 12647
    },
    {
      "epoch": 0.12648,
      "grad_norm": 1.1945443103769646,
      "learning_rate": 0.003,
      "loss": 4.1042,
      "step": 12648
    },
    {
      "epoch": 0.12649,
      "grad_norm": 1.190570374890015,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 12649
    },
    {
      "epoch": 0.1265,
      "grad_norm": 1.166870543567209,
      "learning_rate": 0.003,
      "loss": 4.0995,
      "step": 12650
    },
    {
      "epoch": 0.12651,
      "grad_norm": 1.0606374742427602,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 12651
    },
    {
      "epoch": 0.12652,
      "grad_norm": 1.0566125104734068,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 12652
    },
    {
      "epoch": 0.12653,
      "grad_norm": 1.2680473842459052,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 12653
    },
    {
      "epoch": 0.12654,
      "grad_norm": 1.466838845584005,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 12654
    },
    {
      "epoch": 0.12655,
      "grad_norm": 1.1214819051890923,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 12655
    },
    {
      "epoch": 0.12656,
      "grad_norm": 1.0688303461370197,
      "learning_rate": 0.003,
      "loss": 4.0965,
      "step": 12656
    },
    {
      "epoch": 0.12657,
      "grad_norm": 1.259173862912746,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 12657
    },
    {
      "epoch": 0.12658,
      "grad_norm": 1.0776915341427002,
      "learning_rate": 0.003,
      "loss": 4.0963,
      "step": 12658
    },
    {
      "epoch": 0.12659,
      "grad_norm": 1.2629615894034179,
      "learning_rate": 0.003,
      "loss": 4.0478,
      "step": 12659
    },
    {
      "epoch": 0.1266,
      "grad_norm": 1.2892363491649281,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 12660
    },
    {
      "epoch": 0.12661,
      "grad_norm": 1.3285413004792743,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 12661
    },
    {
      "epoch": 0.12662,
      "grad_norm": 1.180371463792909,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 12662
    },
    {
      "epoch": 0.12663,
      "grad_norm": 1.208193650593098,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 12663
    },
    {
      "epoch": 0.12664,
      "grad_norm": 1.0500808440531122,
      "learning_rate": 0.003,
      "loss": 4.0619,
      "step": 12664
    },
    {
      "epoch": 0.12665,
      "grad_norm": 1.1607253819424002,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 12665
    },
    {
      "epoch": 0.12666,
      "grad_norm": 1.022073776534333,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 12666
    },
    {
      "epoch": 0.12667,
      "grad_norm": 1.2749582096083807,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 12667
    },
    {
      "epoch": 0.12668,
      "grad_norm": 1.1541879354072204,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 12668
    },
    {
      "epoch": 0.12669,
      "grad_norm": 1.0729069740078794,
      "learning_rate": 0.003,
      "loss": 4.0935,
      "step": 12669
    },
    {
      "epoch": 0.1267,
      "grad_norm": 1.0752540096809793,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 12670
    },
    {
      "epoch": 0.12671,
      "grad_norm": 1.3556142957785262,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 12671
    },
    {
      "epoch": 0.12672,
      "grad_norm": 1.0754056558863596,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 12672
    },
    {
      "epoch": 0.12673,
      "grad_norm": 1.4216700371204218,
      "learning_rate": 0.003,
      "loss": 4.0865,
      "step": 12673
    },
    {
      "epoch": 0.12674,
      "grad_norm": 1.130987561730525,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 12674
    },
    {
      "epoch": 0.12675,
      "grad_norm": 1.3325284780537245,
      "learning_rate": 0.003,
      "loss": 4.0965,
      "step": 12675
    },
    {
      "epoch": 0.12676,
      "grad_norm": 1.1970970779957901,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 12676
    },
    {
      "epoch": 0.12677,
      "grad_norm": 1.0600549073306158,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 12677
    },
    {
      "epoch": 0.12678,
      "grad_norm": 1.1510265499571368,
      "learning_rate": 0.003,
      "loss": 4.0718,
      "step": 12678
    },
    {
      "epoch": 0.12679,
      "grad_norm": 1.1025950728150287,
      "learning_rate": 0.003,
      "loss": 4.0806,
      "step": 12679
    },
    {
      "epoch": 0.1268,
      "grad_norm": 1.5977246430944323,
      "learning_rate": 0.003,
      "loss": 4.0841,
      "step": 12680
    },
    {
      "epoch": 0.12681,
      "grad_norm": 1.2572186245403059,
      "learning_rate": 0.003,
      "loss": 4.0899,
      "step": 12681
    },
    {
      "epoch": 0.12682,
      "grad_norm": 1.0544016504397105,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 12682
    },
    {
      "epoch": 0.12683,
      "grad_norm": 1.277683584940182,
      "learning_rate": 0.003,
      "loss": 4.0946,
      "step": 12683
    },
    {
      "epoch": 0.12684,
      "grad_norm": 1.1984352962734288,
      "learning_rate": 0.003,
      "loss": 4.093,
      "step": 12684
    },
    {
      "epoch": 0.12685,
      "grad_norm": 0.974734679415916,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 12685
    },
    {
      "epoch": 0.12686,
      "grad_norm": 1.1643936922516476,
      "learning_rate": 0.003,
      "loss": 4.088,
      "step": 12686
    },
    {
      "epoch": 0.12687,
      "grad_norm": 1.2262435368031364,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 12687
    },
    {
      "epoch": 0.12688,
      "grad_norm": 1.2296156118738668,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 12688
    },
    {
      "epoch": 0.12689,
      "grad_norm": 1.0892106921141844,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 12689
    },
    {
      "epoch": 0.1269,
      "grad_norm": 1.0366641496791478,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 12690
    },
    {
      "epoch": 0.12691,
      "grad_norm": 1.3342956070153213,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 12691
    },
    {
      "epoch": 0.12692,
      "grad_norm": 0.9619591761019171,
      "learning_rate": 0.003,
      "loss": 4.087,
      "step": 12692
    },
    {
      "epoch": 0.12693,
      "grad_norm": 1.4034682137840007,
      "learning_rate": 0.003,
      "loss": 4.0672,
      "step": 12693
    },
    {
      "epoch": 0.12694,
      "grad_norm": 1.0726913636880717,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 12694
    },
    {
      "epoch": 0.12695,
      "grad_norm": 1.6632780527157962,
      "learning_rate": 0.003,
      "loss": 4.0937,
      "step": 12695
    },
    {
      "epoch": 0.12696,
      "grad_norm": 0.8681943038818645,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 12696
    },
    {
      "epoch": 0.12697,
      "grad_norm": 1.0063370084866963,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 12697
    },
    {
      "epoch": 0.12698,
      "grad_norm": 1.3100329034788503,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 12698
    },
    {
      "epoch": 0.12699,
      "grad_norm": 1.133944589846256,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 12699
    },
    {
      "epoch": 0.127,
      "grad_norm": 1.1630734414695099,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 12700
    },
    {
      "epoch": 0.12701,
      "grad_norm": 1.09819934090099,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 12701
    },
    {
      "epoch": 0.12702,
      "grad_norm": 1.2941794036977707,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 12702
    },
    {
      "epoch": 0.12703,
      "grad_norm": 1.2328994449759347,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 12703
    },
    {
      "epoch": 0.12704,
      "grad_norm": 1.1596742990374647,
      "learning_rate": 0.003,
      "loss": 4.0835,
      "step": 12704
    },
    {
      "epoch": 0.12705,
      "grad_norm": 1.249626557969983,
      "learning_rate": 0.003,
      "loss": 4.0757,
      "step": 12705
    },
    {
      "epoch": 0.12706,
      "grad_norm": 1.0664166312741485,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 12706
    },
    {
      "epoch": 0.12707,
      "grad_norm": 1.213254277457309,
      "learning_rate": 0.003,
      "loss": 4.0792,
      "step": 12707
    },
    {
      "epoch": 0.12708,
      "grad_norm": 0.9958840449855603,
      "learning_rate": 0.003,
      "loss": 4.0867,
      "step": 12708
    },
    {
      "epoch": 0.12709,
      "grad_norm": 1.3146805427924255,
      "learning_rate": 0.003,
      "loss": 4.079,
      "step": 12709
    },
    {
      "epoch": 0.1271,
      "grad_norm": 1.0398187077801997,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 12710
    },
    {
      "epoch": 0.12711,
      "grad_norm": 1.1626402744939957,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 12711
    },
    {
      "epoch": 0.12712,
      "grad_norm": 1.1473284230800858,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 12712
    },
    {
      "epoch": 0.12713,
      "grad_norm": 1.0848536100335324,
      "learning_rate": 0.003,
      "loss": 4.0701,
      "step": 12713
    },
    {
      "epoch": 0.12714,
      "grad_norm": 1.3831331715427961,
      "learning_rate": 0.003,
      "loss": 4.0968,
      "step": 12714
    },
    {
      "epoch": 0.12715,
      "grad_norm": 1.1338759456068388,
      "learning_rate": 0.003,
      "loss": 4.0434,
      "step": 12715
    },
    {
      "epoch": 0.12716,
      "grad_norm": 1.1864130921092912,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 12716
    },
    {
      "epoch": 0.12717,
      "grad_norm": 1.201188020850399,
      "learning_rate": 0.003,
      "loss": 4.0853,
      "step": 12717
    },
    {
      "epoch": 0.12718,
      "grad_norm": 1.1889820047256625,
      "learning_rate": 0.003,
      "loss": 4.0656,
      "step": 12718
    },
    {
      "epoch": 0.12719,
      "grad_norm": 1.3267095900035595,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 12719
    },
    {
      "epoch": 0.1272,
      "grad_norm": 1.105599328700241,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 12720
    },
    {
      "epoch": 0.12721,
      "grad_norm": 1.340115460536244,
      "learning_rate": 0.003,
      "loss": 4.09,
      "step": 12721
    },
    {
      "epoch": 0.12722,
      "grad_norm": 1.206281956639625,
      "learning_rate": 0.003,
      "loss": 4.0829,
      "step": 12722
    },
    {
      "epoch": 0.12723,
      "grad_norm": 1.3126374431751684,
      "learning_rate": 0.003,
      "loss": 4.0845,
      "step": 12723
    },
    {
      "epoch": 0.12724,
      "grad_norm": 1.3455412140019556,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 12724
    },
    {
      "epoch": 0.12725,
      "grad_norm": 1.2989919649637907,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 12725
    },
    {
      "epoch": 0.12726,
      "grad_norm": 1.138944369119727,
      "learning_rate": 0.003,
      "loss": 4.0602,
      "step": 12726
    },
    {
      "epoch": 0.12727,
      "grad_norm": 1.1531095634219064,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 12727
    },
    {
      "epoch": 0.12728,
      "grad_norm": 1.2911306924099242,
      "learning_rate": 0.003,
      "loss": 4.0845,
      "step": 12728
    },
    {
      "epoch": 0.12729,
      "grad_norm": 1.0833222993254583,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 12729
    },
    {
      "epoch": 0.1273,
      "grad_norm": 1.4432297163898637,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 12730
    },
    {
      "epoch": 0.12731,
      "grad_norm": 0.9822744898425566,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 12731
    },
    {
      "epoch": 0.12732,
      "grad_norm": 1.1457318578638307,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 12732
    },
    {
      "epoch": 0.12733,
      "grad_norm": 1.3808953292491515,
      "learning_rate": 0.003,
      "loss": 4.0888,
      "step": 12733
    },
    {
      "epoch": 0.12734,
      "grad_norm": 1.2028234266526825,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 12734
    },
    {
      "epoch": 0.12735,
      "grad_norm": 0.9884754749991274,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 12735
    },
    {
      "epoch": 0.12736,
      "grad_norm": 1.2084403474756913,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 12736
    },
    {
      "epoch": 0.12737,
      "grad_norm": 1.0040944184692593,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 12737
    },
    {
      "epoch": 0.12738,
      "grad_norm": 1.2811809584095792,
      "learning_rate": 0.003,
      "loss": 4.0858,
      "step": 12738
    },
    {
      "epoch": 0.12739,
      "grad_norm": 1.0230493648988197,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 12739
    },
    {
      "epoch": 0.1274,
      "grad_norm": 1.4005792410516809,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 12740
    },
    {
      "epoch": 0.12741,
      "grad_norm": 1.1006908025651114,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 12741
    },
    {
      "epoch": 0.12742,
      "grad_norm": 1.2419269613043473,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 12742
    },
    {
      "epoch": 0.12743,
      "grad_norm": 1.056815602258476,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 12743
    },
    {
      "epoch": 0.12744,
      "grad_norm": 1.0891562763539673,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 12744
    },
    {
      "epoch": 0.12745,
      "grad_norm": 1.2863861344562593,
      "learning_rate": 0.003,
      "loss": 4.0864,
      "step": 12745
    },
    {
      "epoch": 0.12746,
      "grad_norm": 1.1750782483639854,
      "learning_rate": 0.003,
      "loss": 4.0931,
      "step": 12746
    },
    {
      "epoch": 0.12747,
      "grad_norm": 1.0367537228548345,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 12747
    },
    {
      "epoch": 0.12748,
      "grad_norm": 1.0320366689138416,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 12748
    },
    {
      "epoch": 0.12749,
      "grad_norm": 1.3090270380465214,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 12749
    },
    {
      "epoch": 0.1275,
      "grad_norm": 0.9623742918617262,
      "learning_rate": 0.003,
      "loss": 4.079,
      "step": 12750
    },
    {
      "epoch": 0.12751,
      "grad_norm": 1.4293745653200822,
      "learning_rate": 0.003,
      "loss": 4.1016,
      "step": 12751
    },
    {
      "epoch": 0.12752,
      "grad_norm": 0.9835449704041017,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 12752
    },
    {
      "epoch": 0.12753,
      "grad_norm": 1.1922279569087715,
      "learning_rate": 0.003,
      "loss": 4.0718,
      "step": 12753
    },
    {
      "epoch": 0.12754,
      "grad_norm": 1.2462405781423709,
      "learning_rate": 0.003,
      "loss": 4.105,
      "step": 12754
    },
    {
      "epoch": 0.12755,
      "grad_norm": 1.2474327637473313,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 12755
    },
    {
      "epoch": 0.12756,
      "grad_norm": 1.2417319598905454,
      "learning_rate": 0.003,
      "loss": 4.0882,
      "step": 12756
    },
    {
      "epoch": 0.12757,
      "grad_norm": 1.0348760164518949,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 12757
    },
    {
      "epoch": 0.12758,
      "grad_norm": 1.2812274353318962,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 12758
    },
    {
      "epoch": 0.12759,
      "grad_norm": 1.259984768621498,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 12759
    },
    {
      "epoch": 0.1276,
      "grad_norm": 1.4602602011892685,
      "learning_rate": 0.003,
      "loss": 4.0886,
      "step": 12760
    },
    {
      "epoch": 0.12761,
      "grad_norm": 0.9397846125753508,
      "learning_rate": 0.003,
      "loss": 4.0675,
      "step": 12761
    },
    {
      "epoch": 0.12762,
      "grad_norm": 1.186600256474887,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 12762
    },
    {
      "epoch": 0.12763,
      "grad_norm": 1.073769792838965,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 12763
    },
    {
      "epoch": 0.12764,
      "grad_norm": 1.275205021998926,
      "learning_rate": 0.003,
      "loss": 4.1016,
      "step": 12764
    },
    {
      "epoch": 0.12765,
      "grad_norm": 1.1742251437935147,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 12765
    },
    {
      "epoch": 0.12766,
      "grad_norm": 1.3329399824900785,
      "learning_rate": 0.003,
      "loss": 4.0862,
      "step": 12766
    },
    {
      "epoch": 0.12767,
      "grad_norm": 1.0791021912483265,
      "learning_rate": 0.003,
      "loss": 4.0705,
      "step": 12767
    },
    {
      "epoch": 0.12768,
      "grad_norm": 1.4595623801311226,
      "learning_rate": 0.003,
      "loss": 4.0975,
      "step": 12768
    },
    {
      "epoch": 0.12769,
      "grad_norm": 1.1736033398782468,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 12769
    },
    {
      "epoch": 0.1277,
      "grad_norm": 1.1062460564248988,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 12770
    },
    {
      "epoch": 0.12771,
      "grad_norm": 1.041613507025483,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 12771
    },
    {
      "epoch": 0.12772,
      "grad_norm": 1.3741621938740265,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 12772
    },
    {
      "epoch": 0.12773,
      "grad_norm": 0.9883536324275963,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 12773
    },
    {
      "epoch": 0.12774,
      "grad_norm": 1.409100278621397,
      "learning_rate": 0.003,
      "loss": 4.0309,
      "step": 12774
    },
    {
      "epoch": 0.12775,
      "grad_norm": 0.9143556891914499,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 12775
    },
    {
      "epoch": 0.12776,
      "grad_norm": 1.1985009871078336,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 12776
    },
    {
      "epoch": 0.12777,
      "grad_norm": 1.196130551271495,
      "learning_rate": 0.003,
      "loss": 4.0676,
      "step": 12777
    },
    {
      "epoch": 0.12778,
      "grad_norm": 1.0073557140282612,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 12778
    },
    {
      "epoch": 0.12779,
      "grad_norm": 1.405770620626207,
      "learning_rate": 0.003,
      "loss": 4.0973,
      "step": 12779
    },
    {
      "epoch": 0.1278,
      "grad_norm": 1.0804071655753458,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 12780
    },
    {
      "epoch": 0.12781,
      "grad_norm": 1.0997196151059192,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 12781
    },
    {
      "epoch": 0.12782,
      "grad_norm": 1.1494608347279651,
      "learning_rate": 0.003,
      "loss": 4.086,
      "step": 12782
    },
    {
      "epoch": 0.12783,
      "grad_norm": 1.0321321835252084,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 12783
    },
    {
      "epoch": 0.12784,
      "grad_norm": 1.1384291428999396,
      "learning_rate": 0.003,
      "loss": 4.0705,
      "step": 12784
    },
    {
      "epoch": 0.12785,
      "grad_norm": 1.288495970579294,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 12785
    },
    {
      "epoch": 0.12786,
      "grad_norm": 1.363284159380646,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 12786
    },
    {
      "epoch": 0.12787,
      "grad_norm": 1.3524790224997603,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 12787
    },
    {
      "epoch": 0.12788,
      "grad_norm": 1.0276998746728125,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 12788
    },
    {
      "epoch": 0.12789,
      "grad_norm": 1.318856149931721,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 12789
    },
    {
      "epoch": 0.1279,
      "grad_norm": 1.2069682807094462,
      "learning_rate": 0.003,
      "loss": 4.0927,
      "step": 12790
    },
    {
      "epoch": 0.12791,
      "grad_norm": 1.1184456166514853,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 12791
    },
    {
      "epoch": 0.12792,
      "grad_norm": 1.1067357652558798,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 12792
    },
    {
      "epoch": 0.12793,
      "grad_norm": 1.4670707203875561,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 12793
    },
    {
      "epoch": 0.12794,
      "grad_norm": 0.8727727545953691,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 12794
    },
    {
      "epoch": 0.12795,
      "grad_norm": 0.9961457976446118,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 12795
    },
    {
      "epoch": 0.12796,
      "grad_norm": 1.254421658016817,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 12796
    },
    {
      "epoch": 0.12797,
      "grad_norm": 1.24442927219961,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 12797
    },
    {
      "epoch": 0.12798,
      "grad_norm": 1.3697473984511896,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 12798
    },
    {
      "epoch": 0.12799,
      "grad_norm": 0.9884266532498901,
      "learning_rate": 0.003,
      "loss": 4.081,
      "step": 12799
    },
    {
      "epoch": 0.128,
      "grad_norm": 1.1109350516665366,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 12800
    },
    {
      "epoch": 0.12801,
      "grad_norm": 1.3380461520808835,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 12801
    },
    {
      "epoch": 0.12802,
      "grad_norm": 0.9970239959885036,
      "learning_rate": 0.003,
      "loss": 4.1064,
      "step": 12802
    },
    {
      "epoch": 0.12803,
      "grad_norm": 1.3235631353916777,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 12803
    },
    {
      "epoch": 0.12804,
      "grad_norm": 0.8483000630193144,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 12804
    },
    {
      "epoch": 0.12805,
      "grad_norm": 1.1064973927009718,
      "learning_rate": 0.003,
      "loss": 4.0781,
      "step": 12805
    },
    {
      "epoch": 0.12806,
      "grad_norm": 1.2732880584031754,
      "learning_rate": 0.003,
      "loss": 4.0759,
      "step": 12806
    },
    {
      "epoch": 0.12807,
      "grad_norm": 1.2812021949362313,
      "learning_rate": 0.003,
      "loss": 4.0896,
      "step": 12807
    },
    {
      "epoch": 0.12808,
      "grad_norm": 1.1966594964800683,
      "learning_rate": 0.003,
      "loss": 4.0728,
      "step": 12808
    },
    {
      "epoch": 0.12809,
      "grad_norm": 1.16669330019199,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 12809
    },
    {
      "epoch": 0.1281,
      "grad_norm": 1.2485541233691577,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 12810
    },
    {
      "epoch": 0.12811,
      "grad_norm": 1.3432781017291722,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 12811
    },
    {
      "epoch": 0.12812,
      "grad_norm": 1.1129839438989915,
      "learning_rate": 0.003,
      "loss": 4.0756,
      "step": 12812
    },
    {
      "epoch": 0.12813,
      "grad_norm": 1.0945004724599967,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 12813
    },
    {
      "epoch": 0.12814,
      "grad_norm": 1.3305159561639528,
      "learning_rate": 0.003,
      "loss": 4.0886,
      "step": 12814
    },
    {
      "epoch": 0.12815,
      "grad_norm": 0.9993067828031007,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 12815
    },
    {
      "epoch": 0.12816,
      "grad_norm": 1.1632692943418896,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 12816
    },
    {
      "epoch": 0.12817,
      "grad_norm": 1.0089713823630224,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 12817
    },
    {
      "epoch": 0.12818,
      "grad_norm": 1.467243264855757,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 12818
    },
    {
      "epoch": 0.12819,
      "grad_norm": 1.1672674505745604,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 12819
    },
    {
      "epoch": 0.1282,
      "grad_norm": 1.0441202538190926,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 12820
    },
    {
      "epoch": 0.12821,
      "grad_norm": 1.3899115952334344,
      "learning_rate": 0.003,
      "loss": 4.0751,
      "step": 12821
    },
    {
      "epoch": 0.12822,
      "grad_norm": 1.1284507672330641,
      "learning_rate": 0.003,
      "loss": 4.1015,
      "step": 12822
    },
    {
      "epoch": 0.12823,
      "grad_norm": 1.5657702324506915,
      "learning_rate": 0.003,
      "loss": 4.0989,
      "step": 12823
    },
    {
      "epoch": 0.12824,
      "grad_norm": 0.8546350417815805,
      "learning_rate": 0.003,
      "loss": 4.0748,
      "step": 12824
    },
    {
      "epoch": 0.12825,
      "grad_norm": 1.0705539272876288,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 12825
    },
    {
      "epoch": 0.12826,
      "grad_norm": 1.4048058479185337,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 12826
    },
    {
      "epoch": 0.12827,
      "grad_norm": 1.2121907285812725,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 12827
    },
    {
      "epoch": 0.12828,
      "grad_norm": 1.2009641061982406,
      "learning_rate": 0.003,
      "loss": 4.1033,
      "step": 12828
    },
    {
      "epoch": 0.12829,
      "grad_norm": 1.1627578672696073,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 12829
    },
    {
      "epoch": 0.1283,
      "grad_norm": 1.0991143035535262,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 12830
    },
    {
      "epoch": 0.12831,
      "grad_norm": 1.115238829257928,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 12831
    },
    {
      "epoch": 0.12832,
      "grad_norm": 1.1877303958826508,
      "learning_rate": 0.003,
      "loss": 4.0684,
      "step": 12832
    },
    {
      "epoch": 0.12833,
      "grad_norm": 1.2737008439070627,
      "learning_rate": 0.003,
      "loss": 4.1135,
      "step": 12833
    },
    {
      "epoch": 0.12834,
      "grad_norm": 0.9863971781458649,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 12834
    },
    {
      "epoch": 0.12835,
      "grad_norm": 1.217349751553351,
      "learning_rate": 0.003,
      "loss": 4.0619,
      "step": 12835
    },
    {
      "epoch": 0.12836,
      "grad_norm": 0.948173851737352,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 12836
    },
    {
      "epoch": 0.12837,
      "grad_norm": 1.3598810604323968,
      "learning_rate": 0.003,
      "loss": 4.1047,
      "step": 12837
    },
    {
      "epoch": 0.12838,
      "grad_norm": 1.138930648132535,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 12838
    },
    {
      "epoch": 0.12839,
      "grad_norm": 1.0006638436626087,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 12839
    },
    {
      "epoch": 0.1284,
      "grad_norm": 1.3073913436699431,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 12840
    },
    {
      "epoch": 0.12841,
      "grad_norm": 1.3275864218910867,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 12841
    },
    {
      "epoch": 0.12842,
      "grad_norm": 1.1493120075388126,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 12842
    },
    {
      "epoch": 0.12843,
      "grad_norm": 1.2017852678334084,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 12843
    },
    {
      "epoch": 0.12844,
      "grad_norm": 1.057771641595308,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 12844
    },
    {
      "epoch": 0.12845,
      "grad_norm": 1.3808357093166381,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 12845
    },
    {
      "epoch": 0.12846,
      "grad_norm": 1.1008203357281807,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 12846
    },
    {
      "epoch": 0.12847,
      "grad_norm": 1.3152387191500043,
      "learning_rate": 0.003,
      "loss": 4.0868,
      "step": 12847
    },
    {
      "epoch": 0.12848,
      "grad_norm": 1.3343889831602418,
      "learning_rate": 0.003,
      "loss": 4.0884,
      "step": 12848
    },
    {
      "epoch": 0.12849,
      "grad_norm": 1.1294171865863156,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 12849
    },
    {
      "epoch": 0.1285,
      "grad_norm": 1.1586576580299826,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 12850
    },
    {
      "epoch": 0.12851,
      "grad_norm": 1.1504795266991583,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 12851
    },
    {
      "epoch": 0.12852,
      "grad_norm": 1.1865678989137314,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 12852
    },
    {
      "epoch": 0.12853,
      "grad_norm": 1.2176250634540564,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 12853
    },
    {
      "epoch": 0.12854,
      "grad_norm": 1.0883767807314606,
      "learning_rate": 0.003,
      "loss": 4.0893,
      "step": 12854
    },
    {
      "epoch": 0.12855,
      "grad_norm": 1.0889257086818527,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 12855
    },
    {
      "epoch": 0.12856,
      "grad_norm": 1.2076962938762292,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 12856
    },
    {
      "epoch": 0.12857,
      "grad_norm": 1.0516746139614412,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 12857
    },
    {
      "epoch": 0.12858,
      "grad_norm": 1.4350109548800867,
      "learning_rate": 0.003,
      "loss": 4.093,
      "step": 12858
    },
    {
      "epoch": 0.12859,
      "grad_norm": 0.9591249658291462,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 12859
    },
    {
      "epoch": 0.1286,
      "grad_norm": 1.5555432130245146,
      "learning_rate": 0.003,
      "loss": 4.0811,
      "step": 12860
    },
    {
      "epoch": 0.12861,
      "grad_norm": 1.1690526647953225,
      "learning_rate": 0.003,
      "loss": 4.0837,
      "step": 12861
    },
    {
      "epoch": 0.12862,
      "grad_norm": 1.2929818536041606,
      "learning_rate": 0.003,
      "loss": 4.0701,
      "step": 12862
    },
    {
      "epoch": 0.12863,
      "grad_norm": 1.0584395412571626,
      "learning_rate": 0.003,
      "loss": 4.09,
      "step": 12863
    },
    {
      "epoch": 0.12864,
      "grad_norm": 1.1070018585167076,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 12864
    },
    {
      "epoch": 0.12865,
      "grad_norm": 1.1516901687191021,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 12865
    },
    {
      "epoch": 0.12866,
      "grad_norm": 1.171564827066514,
      "learning_rate": 0.003,
      "loss": 4.0842,
      "step": 12866
    },
    {
      "epoch": 0.12867,
      "grad_norm": 1.0159836410101402,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 12867
    },
    {
      "epoch": 0.12868,
      "grad_norm": 1.2280536990300008,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 12868
    },
    {
      "epoch": 0.12869,
      "grad_norm": 1.4569571047621208,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 12869
    },
    {
      "epoch": 0.1287,
      "grad_norm": 1.2578352058524276,
      "learning_rate": 0.003,
      "loss": 4.0801,
      "step": 12870
    },
    {
      "epoch": 0.12871,
      "grad_norm": 1.159664751084178,
      "learning_rate": 0.003,
      "loss": 4.1094,
      "step": 12871
    },
    {
      "epoch": 0.12872,
      "grad_norm": 1.04319532085096,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 12872
    },
    {
      "epoch": 0.12873,
      "grad_norm": 1.0788402168566222,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 12873
    },
    {
      "epoch": 0.12874,
      "grad_norm": 1.068336320457033,
      "learning_rate": 0.003,
      "loss": 4.0792,
      "step": 12874
    },
    {
      "epoch": 0.12875,
      "grad_norm": 1.3543454544421982,
      "learning_rate": 0.003,
      "loss": 4.0906,
      "step": 12875
    },
    {
      "epoch": 0.12876,
      "grad_norm": 1.3794616530476864,
      "learning_rate": 0.003,
      "loss": 4.0712,
      "step": 12876
    },
    {
      "epoch": 0.12877,
      "grad_norm": 1.044127299764906,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 12877
    },
    {
      "epoch": 0.12878,
      "grad_norm": 1.0555971510389346,
      "learning_rate": 0.003,
      "loss": 4.0784,
      "step": 12878
    },
    {
      "epoch": 0.12879,
      "grad_norm": 1.049314355759846,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 12879
    },
    {
      "epoch": 0.1288,
      "grad_norm": 1.3798266253275924,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 12880
    },
    {
      "epoch": 0.12881,
      "grad_norm": 0.9047032299045279,
      "learning_rate": 0.003,
      "loss": 4.0743,
      "step": 12881
    },
    {
      "epoch": 0.12882,
      "grad_norm": 1.237667655709752,
      "learning_rate": 0.003,
      "loss": 4.0672,
      "step": 12882
    },
    {
      "epoch": 0.12883,
      "grad_norm": 1.0513962810178894,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 12883
    },
    {
      "epoch": 0.12884,
      "grad_norm": 1.1654304128154607,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 12884
    },
    {
      "epoch": 0.12885,
      "grad_norm": 1.223231574979754,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 12885
    },
    {
      "epoch": 0.12886,
      "grad_norm": 1.3370106029329338,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 12886
    },
    {
      "epoch": 0.12887,
      "grad_norm": 1.2360740240021264,
      "learning_rate": 0.003,
      "loss": 4.0929,
      "step": 12887
    },
    {
      "epoch": 0.12888,
      "grad_norm": 1.2319839349362194,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 12888
    },
    {
      "epoch": 0.12889,
      "grad_norm": 1.3882447262004995,
      "learning_rate": 0.003,
      "loss": 4.0962,
      "step": 12889
    },
    {
      "epoch": 0.1289,
      "grad_norm": 0.9886714804166923,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 12890
    },
    {
      "epoch": 0.12891,
      "grad_norm": 1.277581788852025,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 12891
    },
    {
      "epoch": 0.12892,
      "grad_norm": 0.9658180522667765,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 12892
    },
    {
      "epoch": 0.12893,
      "grad_norm": 1.071268157458069,
      "learning_rate": 0.003,
      "loss": 4.1138,
      "step": 12893
    },
    {
      "epoch": 0.12894,
      "grad_norm": 1.352899796848344,
      "learning_rate": 0.003,
      "loss": 4.0859,
      "step": 12894
    },
    {
      "epoch": 0.12895,
      "grad_norm": 1.0521509767290662,
      "learning_rate": 0.003,
      "loss": 4.0837,
      "step": 12895
    },
    {
      "epoch": 0.12896,
      "grad_norm": 1.5389168201105246,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 12896
    },
    {
      "epoch": 0.12897,
      "grad_norm": 1.0686414383777203,
      "learning_rate": 0.003,
      "loss": 4.0271,
      "step": 12897
    },
    {
      "epoch": 0.12898,
      "grad_norm": 1.2169757972025348,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 12898
    },
    {
      "epoch": 0.12899,
      "grad_norm": 1.308036689238183,
      "learning_rate": 0.003,
      "loss": 4.1004,
      "step": 12899
    },
    {
      "epoch": 0.129,
      "grad_norm": 1.2307611483435221,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 12900
    },
    {
      "epoch": 0.12901,
      "grad_norm": 1.0337449423782072,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 12901
    },
    {
      "epoch": 0.12902,
      "grad_norm": 1.2459216760216396,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 12902
    },
    {
      "epoch": 0.12903,
      "grad_norm": 1.3137652546230592,
      "learning_rate": 0.003,
      "loss": 4.0968,
      "step": 12903
    },
    {
      "epoch": 0.12904,
      "grad_norm": 1.2224418199363158,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 12904
    },
    {
      "epoch": 0.12905,
      "grad_norm": 1.0773334695030383,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 12905
    },
    {
      "epoch": 0.12906,
      "grad_norm": 1.1445121047428575,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 12906
    },
    {
      "epoch": 0.12907,
      "grad_norm": 1.1651006592214215,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 12907
    },
    {
      "epoch": 0.12908,
      "grad_norm": 1.235356870985851,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 12908
    },
    {
      "epoch": 0.12909,
      "grad_norm": 1.0979832086059393,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 12909
    },
    {
      "epoch": 0.1291,
      "grad_norm": 1.099055647655412,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 12910
    },
    {
      "epoch": 0.12911,
      "grad_norm": 1.3345734515203431,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 12911
    },
    {
      "epoch": 0.12912,
      "grad_norm": 1.2055307153529464,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 12912
    },
    {
      "epoch": 0.12913,
      "grad_norm": 0.9378615333877254,
      "learning_rate": 0.003,
      "loss": 4.0718,
      "step": 12913
    },
    {
      "epoch": 0.12914,
      "grad_norm": 1.2759949524150114,
      "learning_rate": 0.003,
      "loss": 4.0829,
      "step": 12914
    },
    {
      "epoch": 0.12915,
      "grad_norm": 1.4032020521873743,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 12915
    },
    {
      "epoch": 0.12916,
      "grad_norm": 1.4120384567652084,
      "learning_rate": 0.003,
      "loss": 4.0931,
      "step": 12916
    },
    {
      "epoch": 0.12917,
      "grad_norm": 1.1960338866006393,
      "learning_rate": 0.003,
      "loss": 4.0817,
      "step": 12917
    },
    {
      "epoch": 0.12918,
      "grad_norm": 1.0388195462652365,
      "learning_rate": 0.003,
      "loss": 4.0941,
      "step": 12918
    },
    {
      "epoch": 0.12919,
      "grad_norm": 1.303206159930564,
      "learning_rate": 0.003,
      "loss": 4.0773,
      "step": 12919
    },
    {
      "epoch": 0.1292,
      "grad_norm": 1.2077059437923325,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 12920
    },
    {
      "epoch": 0.12921,
      "grad_norm": 1.0952104661367275,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 12921
    },
    {
      "epoch": 0.12922,
      "grad_norm": 1.344907820003931,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 12922
    },
    {
      "epoch": 0.12923,
      "grad_norm": 1.155637762860284,
      "learning_rate": 0.003,
      "loss": 4.0264,
      "step": 12923
    },
    {
      "epoch": 0.12924,
      "grad_norm": 1.3436189844739663,
      "learning_rate": 0.003,
      "loss": 4.0892,
      "step": 12924
    },
    {
      "epoch": 0.12925,
      "grad_norm": 1.0353235582247116,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 12925
    },
    {
      "epoch": 0.12926,
      "grad_norm": 1.2229537570230653,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 12926
    },
    {
      "epoch": 0.12927,
      "grad_norm": 1.0059810201492685,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 12927
    },
    {
      "epoch": 0.12928,
      "grad_norm": 1.3482657563536515,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 12928
    },
    {
      "epoch": 0.12929,
      "grad_norm": 1.0034702187552473,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 12929
    },
    {
      "epoch": 0.1293,
      "grad_norm": 1.237325267234494,
      "learning_rate": 0.003,
      "loss": 4.0768,
      "step": 12930
    },
    {
      "epoch": 0.12931,
      "grad_norm": 1.217620116486049,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 12931
    },
    {
      "epoch": 0.12932,
      "grad_norm": 1.0866115666571554,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 12932
    },
    {
      "epoch": 0.12933,
      "grad_norm": 1.196918851275972,
      "learning_rate": 0.003,
      "loss": 4.0801,
      "step": 12933
    },
    {
      "epoch": 0.12934,
      "grad_norm": 1.118890903625114,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 12934
    },
    {
      "epoch": 0.12935,
      "grad_norm": 1.2573756682627455,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 12935
    },
    {
      "epoch": 0.12936,
      "grad_norm": 0.9737075399054111,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 12936
    },
    {
      "epoch": 0.12937,
      "grad_norm": 1.213197985342146,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 12937
    },
    {
      "epoch": 0.12938,
      "grad_norm": 1.2727782592637276,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 12938
    },
    {
      "epoch": 0.12939,
      "grad_norm": 1.0975732780763439,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 12939
    },
    {
      "epoch": 0.1294,
      "grad_norm": 1.2000156851268204,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 12940
    },
    {
      "epoch": 0.12941,
      "grad_norm": 1.1312255444762633,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 12941
    },
    {
      "epoch": 0.12942,
      "grad_norm": 1.2019313243678091,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 12942
    },
    {
      "epoch": 0.12943,
      "grad_norm": 1.1117822536192914,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 12943
    },
    {
      "epoch": 0.12944,
      "grad_norm": 1.1989374864817226,
      "learning_rate": 0.003,
      "loss": 4.0768,
      "step": 12944
    },
    {
      "epoch": 0.12945,
      "grad_norm": 1.15357346799443,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 12945
    },
    {
      "epoch": 0.12946,
      "grad_norm": 1.3358287172664458,
      "learning_rate": 0.003,
      "loss": 4.0962,
      "step": 12946
    },
    {
      "epoch": 0.12947,
      "grad_norm": 1.2171654215607066,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 12947
    },
    {
      "epoch": 0.12948,
      "grad_norm": 1.1992249878175494,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 12948
    },
    {
      "epoch": 0.12949,
      "grad_norm": 1.2569764949972773,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 12949
    },
    {
      "epoch": 0.1295,
      "grad_norm": 1.2619205520569914,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 12950
    },
    {
      "epoch": 0.12951,
      "grad_norm": 1.1394939514320945,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 12951
    },
    {
      "epoch": 0.12952,
      "grad_norm": 1.155842315189784,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 12952
    },
    {
      "epoch": 0.12953,
      "grad_norm": 1.1707677310635063,
      "learning_rate": 0.003,
      "loss": 4.0793,
      "step": 12953
    },
    {
      "epoch": 0.12954,
      "grad_norm": 1.0933718841461246,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 12954
    },
    {
      "epoch": 0.12955,
      "grad_norm": 1.1535013083902386,
      "learning_rate": 0.003,
      "loss": 4.0822,
      "step": 12955
    },
    {
      "epoch": 0.12956,
      "grad_norm": 1.2597637233126602,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 12956
    },
    {
      "epoch": 0.12957,
      "grad_norm": 1.2129477062167615,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 12957
    },
    {
      "epoch": 0.12958,
      "grad_norm": 1.0259513574852872,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 12958
    },
    {
      "epoch": 0.12959,
      "grad_norm": 1.3098981298516374,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 12959
    },
    {
      "epoch": 0.1296,
      "grad_norm": 1.0705475071474728,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 12960
    },
    {
      "epoch": 0.12961,
      "grad_norm": 1.2082787123272298,
      "learning_rate": 0.003,
      "loss": 4.0954,
      "step": 12961
    },
    {
      "epoch": 0.12962,
      "grad_norm": 0.9752946105506849,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 12962
    },
    {
      "epoch": 0.12963,
      "grad_norm": 1.427170678310923,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 12963
    },
    {
      "epoch": 0.12964,
      "grad_norm": 0.9801046548980611,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 12964
    },
    {
      "epoch": 0.12965,
      "grad_norm": 1.4189584638318544,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 12965
    },
    {
      "epoch": 0.12966,
      "grad_norm": 0.9692416621823341,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 12966
    },
    {
      "epoch": 0.12967,
      "grad_norm": 1.2840081091300894,
      "learning_rate": 0.003,
      "loss": 4.0245,
      "step": 12967
    },
    {
      "epoch": 0.12968,
      "grad_norm": 1.0219463558374213,
      "learning_rate": 0.003,
      "loss": 4.0822,
      "step": 12968
    },
    {
      "epoch": 0.12969,
      "grad_norm": 1.3432349293723256,
      "learning_rate": 0.003,
      "loss": 4.0656,
      "step": 12969
    },
    {
      "epoch": 0.1297,
      "grad_norm": 0.9788760612331872,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 12970
    },
    {
      "epoch": 0.12971,
      "grad_norm": 1.1150769051237421,
      "learning_rate": 0.003,
      "loss": 4.0672,
      "step": 12971
    },
    {
      "epoch": 0.12972,
      "grad_norm": 1.5177506434977075,
      "learning_rate": 0.003,
      "loss": 4.083,
      "step": 12972
    },
    {
      "epoch": 0.12973,
      "grad_norm": 1.0099978634477083,
      "learning_rate": 0.003,
      "loss": 4.1072,
      "step": 12973
    },
    {
      "epoch": 0.12974,
      "grad_norm": 1.1922998133660232,
      "learning_rate": 0.003,
      "loss": 4.0727,
      "step": 12974
    },
    {
      "epoch": 0.12975,
      "grad_norm": 1.2232258790717931,
      "learning_rate": 0.003,
      "loss": 4.0739,
      "step": 12975
    },
    {
      "epoch": 0.12976,
      "grad_norm": 1.101427794606135,
      "learning_rate": 0.003,
      "loss": 4.0913,
      "step": 12976
    },
    {
      "epoch": 0.12977,
      "grad_norm": 1.304895856234629,
      "learning_rate": 0.003,
      "loss": 4.0977,
      "step": 12977
    },
    {
      "epoch": 0.12978,
      "grad_norm": 1.2628731587815114,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 12978
    },
    {
      "epoch": 0.12979,
      "grad_norm": 1.6235453994304816,
      "learning_rate": 0.003,
      "loss": 4.0619,
      "step": 12979
    },
    {
      "epoch": 0.1298,
      "grad_norm": 1.2048853555128978,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 12980
    },
    {
      "epoch": 0.12981,
      "grad_norm": 1.2375469153589247,
      "learning_rate": 0.003,
      "loss": 4.0807,
      "step": 12981
    },
    {
      "epoch": 0.12982,
      "grad_norm": 1.0542275359671747,
      "learning_rate": 0.003,
      "loss": 4.1048,
      "step": 12982
    },
    {
      "epoch": 0.12983,
      "grad_norm": 1.2239658195260248,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 12983
    },
    {
      "epoch": 0.12984,
      "grad_norm": 1.4114467551974932,
      "learning_rate": 0.003,
      "loss": 4.0839,
      "step": 12984
    },
    {
      "epoch": 0.12985,
      "grad_norm": 1.0277566905987274,
      "learning_rate": 0.003,
      "loss": 4.0748,
      "step": 12985
    },
    {
      "epoch": 0.12986,
      "grad_norm": 1.2165699490367765,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 12986
    },
    {
      "epoch": 0.12987,
      "grad_norm": 1.0726865407475268,
      "learning_rate": 0.003,
      "loss": 4.0757,
      "step": 12987
    },
    {
      "epoch": 0.12988,
      "grad_norm": 1.3509187805816179,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 12988
    },
    {
      "epoch": 0.12989,
      "grad_norm": 0.9369681453881235,
      "learning_rate": 0.003,
      "loss": 4.0672,
      "step": 12989
    },
    {
      "epoch": 0.1299,
      "grad_norm": 1.0227497888593235,
      "learning_rate": 0.003,
      "loss": 4.0767,
      "step": 12990
    },
    {
      "epoch": 0.12991,
      "grad_norm": 1.2447174630530253,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 12991
    },
    {
      "epoch": 0.12992,
      "grad_norm": 1.2955141038836735,
      "learning_rate": 0.003,
      "loss": 4.0837,
      "step": 12992
    },
    {
      "epoch": 0.12993,
      "grad_norm": 1.0410231425868315,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 12993
    },
    {
      "epoch": 0.12994,
      "grad_norm": 1.2099567963574025,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 12994
    },
    {
      "epoch": 0.12995,
      "grad_norm": 1.154147808313667,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 12995
    },
    {
      "epoch": 0.12996,
      "grad_norm": 1.2765151728742004,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 12996
    },
    {
      "epoch": 0.12997,
      "grad_norm": 0.9682271673833246,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 12997
    },
    {
      "epoch": 0.12998,
      "grad_norm": 1.1146858882428259,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 12998
    },
    {
      "epoch": 0.12999,
      "grad_norm": 1.1852155319301692,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 12999
    },
    {
      "epoch": 0.13,
      "grad_norm": 1.0976339654389153,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 13000
    },
    {
      "epoch": 0.13001,
      "grad_norm": 1.1719840711361706,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 13001
    },
    {
      "epoch": 0.13002,
      "grad_norm": 1.2395325556126913,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 13002
    },
    {
      "epoch": 0.13003,
      "grad_norm": 1.2171854677821783,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 13003
    },
    {
      "epoch": 0.13004,
      "grad_norm": 1.2637731269041053,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 13004
    },
    {
      "epoch": 0.13005,
      "grad_norm": 1.1017358129235937,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 13005
    },
    {
      "epoch": 0.13006,
      "grad_norm": 1.2406184371706632,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 13006
    },
    {
      "epoch": 0.13007,
      "grad_norm": 1.1825684407984873,
      "learning_rate": 0.003,
      "loss": 4.085,
      "step": 13007
    },
    {
      "epoch": 0.13008,
      "grad_norm": 1.3488824485332012,
      "learning_rate": 0.003,
      "loss": 4.0901,
      "step": 13008
    },
    {
      "epoch": 0.13009,
      "grad_norm": 1.2234427376099881,
      "learning_rate": 0.003,
      "loss": 4.0727,
      "step": 13009
    },
    {
      "epoch": 0.1301,
      "grad_norm": 1.08591708301902,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 13010
    },
    {
      "epoch": 0.13011,
      "grad_norm": 1.3407717742473235,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 13011
    },
    {
      "epoch": 0.13012,
      "grad_norm": 1.2155971810878954,
      "learning_rate": 0.003,
      "loss": 4.0907,
      "step": 13012
    },
    {
      "epoch": 0.13013,
      "grad_norm": 1.0879271257155427,
      "learning_rate": 0.003,
      "loss": 4.0906,
      "step": 13013
    },
    {
      "epoch": 0.13014,
      "grad_norm": 1.1851693890243746,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 13014
    },
    {
      "epoch": 0.13015,
      "grad_norm": 1.199017353126804,
      "learning_rate": 0.003,
      "loss": 4.0783,
      "step": 13015
    },
    {
      "epoch": 0.13016,
      "grad_norm": 1.1776438621405436,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 13016
    },
    {
      "epoch": 0.13017,
      "grad_norm": 0.9992607017416343,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 13017
    },
    {
      "epoch": 0.13018,
      "grad_norm": 1.3721850549640682,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 13018
    },
    {
      "epoch": 0.13019,
      "grad_norm": 1.1592138262428602,
      "learning_rate": 0.003,
      "loss": 4.0866,
      "step": 13019
    },
    {
      "epoch": 0.1302,
      "grad_norm": 1.3905239030194738,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 13020
    },
    {
      "epoch": 0.13021,
      "grad_norm": 1.0426943295167048,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 13021
    },
    {
      "epoch": 0.13022,
      "grad_norm": 1.337761810483605,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 13022
    },
    {
      "epoch": 0.13023,
      "grad_norm": 1.3428463476352528,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 13023
    },
    {
      "epoch": 0.13024,
      "grad_norm": 1.0184689002960445,
      "learning_rate": 0.003,
      "loss": 4.0238,
      "step": 13024
    },
    {
      "epoch": 0.13025,
      "grad_norm": 1.567374367389213,
      "learning_rate": 0.003,
      "loss": 4.081,
      "step": 13025
    },
    {
      "epoch": 0.13026,
      "grad_norm": 0.9682298144928962,
      "learning_rate": 0.003,
      "loss": 4.0855,
      "step": 13026
    },
    {
      "epoch": 0.13027,
      "grad_norm": 1.2481611156368244,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 13027
    },
    {
      "epoch": 0.13028,
      "grad_norm": 1.1118337049905826,
      "learning_rate": 0.003,
      "loss": 4.1026,
      "step": 13028
    },
    {
      "epoch": 0.13029,
      "grad_norm": 1.0678002232745039,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 13029
    },
    {
      "epoch": 0.1303,
      "grad_norm": 1.3028948228940662,
      "learning_rate": 0.003,
      "loss": 4.1037,
      "step": 13030
    },
    {
      "epoch": 0.13031,
      "grad_norm": 1.1345815629618317,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 13031
    },
    {
      "epoch": 0.13032,
      "grad_norm": 1.2764060035685951,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 13032
    },
    {
      "epoch": 0.13033,
      "grad_norm": 1.0880223457154283,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 13033
    },
    {
      "epoch": 0.13034,
      "grad_norm": 1.2493974225524753,
      "learning_rate": 0.003,
      "loss": 4.0989,
      "step": 13034
    },
    {
      "epoch": 0.13035,
      "grad_norm": 1.192649822548278,
      "learning_rate": 0.003,
      "loss": 4.0901,
      "step": 13035
    },
    {
      "epoch": 0.13036,
      "grad_norm": 1.1621841236091006,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 13036
    },
    {
      "epoch": 0.13037,
      "grad_norm": 1.1555538619464063,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 13037
    },
    {
      "epoch": 0.13038,
      "grad_norm": 1.1040581259118034,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 13038
    },
    {
      "epoch": 0.13039,
      "grad_norm": 1.2449707400894137,
      "learning_rate": 0.003,
      "loss": 4.0942,
      "step": 13039
    },
    {
      "epoch": 0.1304,
      "grad_norm": 1.1186950779568379,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 13040
    },
    {
      "epoch": 0.13041,
      "grad_norm": 1.2095185684633825,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 13041
    },
    {
      "epoch": 0.13042,
      "grad_norm": 1.0596708796928151,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 13042
    },
    {
      "epoch": 0.13043,
      "grad_norm": 1.327168194888388,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 13043
    },
    {
      "epoch": 0.13044,
      "grad_norm": 1.0088888183450886,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 13044
    },
    {
      "epoch": 0.13045,
      "grad_norm": 1.373055801348955,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 13045
    },
    {
      "epoch": 0.13046,
      "grad_norm": 1.039158109370196,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 13046
    },
    {
      "epoch": 0.13047,
      "grad_norm": 1.254604689950872,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 13047
    },
    {
      "epoch": 0.13048,
      "grad_norm": 1.303497464207566,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 13048
    },
    {
      "epoch": 0.13049,
      "grad_norm": 1.1142815098414798,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 13049
    },
    {
      "epoch": 0.1305,
      "grad_norm": 1.0922039155513472,
      "learning_rate": 0.003,
      "loss": 4.0336,
      "step": 13050
    },
    {
      "epoch": 0.13051,
      "grad_norm": 1.2383441895824563,
      "learning_rate": 0.003,
      "loss": 4.0932,
      "step": 13051
    },
    {
      "epoch": 0.13052,
      "grad_norm": 0.871361235889519,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 13052
    },
    {
      "epoch": 0.13053,
      "grad_norm": 1.20706424684798,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 13053
    },
    {
      "epoch": 0.13054,
      "grad_norm": 1.1275445937801618,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 13054
    },
    {
      "epoch": 0.13055,
      "grad_norm": 1.3058414663434768,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 13055
    },
    {
      "epoch": 0.13056,
      "grad_norm": 1.1606410345404876,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 13056
    },
    {
      "epoch": 0.13057,
      "grad_norm": 1.1159909777718091,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 13057
    },
    {
      "epoch": 0.13058,
      "grad_norm": 1.3532554025376324,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 13058
    },
    {
      "epoch": 0.13059,
      "grad_norm": 0.9671037931541167,
      "learning_rate": 0.003,
      "loss": 4.0935,
      "step": 13059
    },
    {
      "epoch": 0.1306,
      "grad_norm": 1.3555157417557786,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 13060
    },
    {
      "epoch": 0.13061,
      "grad_norm": 1.1339550840622015,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 13061
    },
    {
      "epoch": 0.13062,
      "grad_norm": 1.3155454591800106,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 13062
    },
    {
      "epoch": 0.13063,
      "grad_norm": 0.9497245187691132,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 13063
    },
    {
      "epoch": 0.13064,
      "grad_norm": 1.1112703813027285,
      "learning_rate": 0.003,
      "loss": 4.1008,
      "step": 13064
    },
    {
      "epoch": 0.13065,
      "grad_norm": 1.3094402100725666,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 13065
    },
    {
      "epoch": 0.13066,
      "grad_norm": 1.207723488602815,
      "learning_rate": 0.003,
      "loss": 4.0694,
      "step": 13066
    },
    {
      "epoch": 0.13067,
      "grad_norm": 1.138535241928582,
      "learning_rate": 0.003,
      "loss": 4.1175,
      "step": 13067
    },
    {
      "epoch": 0.13068,
      "grad_norm": 1.125570761311579,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 13068
    },
    {
      "epoch": 0.13069,
      "grad_norm": 1.1380573095984057,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 13069
    },
    {
      "epoch": 0.1307,
      "grad_norm": 1.3993482428247788,
      "learning_rate": 0.003,
      "loss": 4.0829,
      "step": 13070
    },
    {
      "epoch": 0.13071,
      "grad_norm": 1.0460800989467518,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 13071
    },
    {
      "epoch": 0.13072,
      "grad_norm": 1.3210821096496814,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 13072
    },
    {
      "epoch": 0.13073,
      "grad_norm": 1.2782443653088875,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 13073
    },
    {
      "epoch": 0.13074,
      "grad_norm": 1.0264864205435482,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 13074
    },
    {
      "epoch": 0.13075,
      "grad_norm": 1.4539548194948315,
      "learning_rate": 0.003,
      "loss": 4.0893,
      "step": 13075
    },
    {
      "epoch": 0.13076,
      "grad_norm": 1.0571972127694433,
      "learning_rate": 0.003,
      "loss": 4.0821,
      "step": 13076
    },
    {
      "epoch": 0.13077,
      "grad_norm": 1.4052143426558386,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 13077
    },
    {
      "epoch": 0.13078,
      "grad_norm": 1.0168093488205356,
      "learning_rate": 0.003,
      "loss": 4.0602,
      "step": 13078
    },
    {
      "epoch": 0.13079,
      "grad_norm": 1.0372755501789055,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 13079
    },
    {
      "epoch": 0.1308,
      "grad_norm": 1.1618249789461472,
      "learning_rate": 0.003,
      "loss": 4.0676,
      "step": 13080
    },
    {
      "epoch": 0.13081,
      "grad_norm": 1.0624452352346405,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 13081
    },
    {
      "epoch": 0.13082,
      "grad_norm": 1.531261452855752,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 13082
    },
    {
      "epoch": 0.13083,
      "grad_norm": 0.9416551787171488,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 13083
    },
    {
      "epoch": 0.13084,
      "grad_norm": 1.0731902948525347,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 13084
    },
    {
      "epoch": 0.13085,
      "grad_norm": 1.3053143377615222,
      "learning_rate": 0.003,
      "loss": 4.0854,
      "step": 13085
    },
    {
      "epoch": 0.13086,
      "grad_norm": 1.268815155355026,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 13086
    },
    {
      "epoch": 0.13087,
      "grad_norm": 1.0761772837233665,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 13087
    },
    {
      "epoch": 0.13088,
      "grad_norm": 1.274969036571922,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 13088
    },
    {
      "epoch": 0.13089,
      "grad_norm": 1.324164809138917,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 13089
    },
    {
      "epoch": 0.1309,
      "grad_norm": 1.016357532614868,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 13090
    },
    {
      "epoch": 0.13091,
      "grad_norm": 1.2212894941990593,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 13091
    },
    {
      "epoch": 0.13092,
      "grad_norm": 1.1327866585335682,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 13092
    },
    {
      "epoch": 0.13093,
      "grad_norm": 1.2464559565085225,
      "learning_rate": 0.003,
      "loss": 4.0819,
      "step": 13093
    },
    {
      "epoch": 0.13094,
      "grad_norm": 1.1703567866448106,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 13094
    },
    {
      "epoch": 0.13095,
      "grad_norm": 1.1254181586581946,
      "learning_rate": 0.003,
      "loss": 4.0702,
      "step": 13095
    },
    {
      "epoch": 0.13096,
      "grad_norm": 1.4898858598531313,
      "learning_rate": 0.003,
      "loss": 4.0801,
      "step": 13096
    },
    {
      "epoch": 0.13097,
      "grad_norm": 1.0993970060752314,
      "learning_rate": 0.003,
      "loss": 4.097,
      "step": 13097
    },
    {
      "epoch": 0.13098,
      "grad_norm": 1.2468958183914973,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 13098
    },
    {
      "epoch": 0.13099,
      "grad_norm": 1.1356271282152646,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 13099
    },
    {
      "epoch": 0.131,
      "grad_norm": 1.164359375650946,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 13100
    },
    {
      "epoch": 0.13101,
      "grad_norm": 1.2026038979244935,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 13101
    },
    {
      "epoch": 0.13102,
      "grad_norm": 1.239000477800128,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 13102
    },
    {
      "epoch": 0.13103,
      "grad_norm": 1.0921048185721713,
      "learning_rate": 0.003,
      "loss": 4.0855,
      "step": 13103
    },
    {
      "epoch": 0.13104,
      "grad_norm": 1.1912772826367217,
      "learning_rate": 0.003,
      "loss": 4.0724,
      "step": 13104
    },
    {
      "epoch": 0.13105,
      "grad_norm": 1.2496469503092782,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 13105
    },
    {
      "epoch": 0.13106,
      "grad_norm": 1.1050279696637815,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 13106
    },
    {
      "epoch": 0.13107,
      "grad_norm": 1.0460931373647986,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 13107
    },
    {
      "epoch": 0.13108,
      "grad_norm": 1.15041158164609,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 13108
    },
    {
      "epoch": 0.13109,
      "grad_norm": 1.202937178450559,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 13109
    },
    {
      "epoch": 0.1311,
      "grad_norm": 1.3292885960129865,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 13110
    },
    {
      "epoch": 0.13111,
      "grad_norm": 1.037369122357519,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 13111
    },
    {
      "epoch": 0.13112,
      "grad_norm": 1.2547884682511412,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 13112
    },
    {
      "epoch": 0.13113,
      "grad_norm": 1.1019106389585718,
      "learning_rate": 0.003,
      "loss": 4.1,
      "step": 13113
    },
    {
      "epoch": 0.13114,
      "grad_norm": 1.203936727523638,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 13114
    },
    {
      "epoch": 0.13115,
      "grad_norm": 1.162395306383368,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 13115
    },
    {
      "epoch": 0.13116,
      "grad_norm": 1.2454704246661696,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 13116
    },
    {
      "epoch": 0.13117,
      "grad_norm": 1.1536892102972363,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 13117
    },
    {
      "epoch": 0.13118,
      "grad_norm": 1.236290395505284,
      "learning_rate": 0.003,
      "loss": 4.0912,
      "step": 13118
    },
    {
      "epoch": 0.13119,
      "grad_norm": 1.1220643138722115,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 13119
    },
    {
      "epoch": 0.1312,
      "grad_norm": 1.2630931565665404,
      "learning_rate": 0.003,
      "loss": 4.0848,
      "step": 13120
    },
    {
      "epoch": 0.13121,
      "grad_norm": 1.0232700484349586,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 13121
    },
    {
      "epoch": 0.13122,
      "grad_norm": 1.3329053565683004,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 13122
    },
    {
      "epoch": 0.13123,
      "grad_norm": 1.1981636851001758,
      "learning_rate": 0.003,
      "loss": 4.0617,
      "step": 13123
    },
    {
      "epoch": 0.13124,
      "grad_norm": 1.122876647482152,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 13124
    },
    {
      "epoch": 0.13125,
      "grad_norm": 1.0983788389474274,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 13125
    },
    {
      "epoch": 0.13126,
      "grad_norm": 1.189172427531487,
      "learning_rate": 0.003,
      "loss": 4.0729,
      "step": 13126
    },
    {
      "epoch": 0.13127,
      "grad_norm": 1.2610958209663654,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 13127
    },
    {
      "epoch": 0.13128,
      "grad_norm": 1.177518134913952,
      "learning_rate": 0.003,
      "loss": 4.0359,
      "step": 13128
    },
    {
      "epoch": 0.13129,
      "grad_norm": 1.2070916925420854,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 13129
    },
    {
      "epoch": 0.1313,
      "grad_norm": 1.021589725438157,
      "learning_rate": 0.003,
      "loss": 4.0751,
      "step": 13130
    },
    {
      "epoch": 0.13131,
      "grad_norm": 1.416014144166172,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 13131
    },
    {
      "epoch": 0.13132,
      "grad_norm": 1.1578426046991976,
      "learning_rate": 0.003,
      "loss": 4.079,
      "step": 13132
    },
    {
      "epoch": 0.13133,
      "grad_norm": 1.1432614142886794,
      "learning_rate": 0.003,
      "loss": 4.0955,
      "step": 13133
    },
    {
      "epoch": 0.13134,
      "grad_norm": 1.1401084280990508,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 13134
    },
    {
      "epoch": 0.13135,
      "grad_norm": 1.2440658002569782,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 13135
    },
    {
      "epoch": 0.13136,
      "grad_norm": 1.052418668704034,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 13136
    },
    {
      "epoch": 0.13137,
      "grad_norm": 1.303485320067905,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 13137
    },
    {
      "epoch": 0.13138,
      "grad_norm": 1.2187999745892937,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 13138
    },
    {
      "epoch": 0.13139,
      "grad_norm": 1.2058533722431775,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 13139
    },
    {
      "epoch": 0.1314,
      "grad_norm": 1.0408781113472696,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 13140
    },
    {
      "epoch": 0.13141,
      "grad_norm": 1.1411681778399358,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 13141
    },
    {
      "epoch": 0.13142,
      "grad_norm": 1.2319320845876192,
      "learning_rate": 0.003,
      "loss": 4.0962,
      "step": 13142
    },
    {
      "epoch": 0.13143,
      "grad_norm": 1.085798298116153,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 13143
    },
    {
      "epoch": 0.13144,
      "grad_norm": 1.3545254009295953,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 13144
    },
    {
      "epoch": 0.13145,
      "grad_norm": 0.9143757032556452,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 13145
    },
    {
      "epoch": 0.13146,
      "grad_norm": 1.4720284030388406,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 13146
    },
    {
      "epoch": 0.13147,
      "grad_norm": 1.1731835884741624,
      "learning_rate": 0.003,
      "loss": 4.0824,
      "step": 13147
    },
    {
      "epoch": 0.13148,
      "grad_norm": 1.6066667376482742,
      "learning_rate": 0.003,
      "loss": 4.099,
      "step": 13148
    },
    {
      "epoch": 0.13149,
      "grad_norm": 0.9410727696498126,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 13149
    },
    {
      "epoch": 0.1315,
      "grad_norm": 1.1983778287162503,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 13150
    },
    {
      "epoch": 0.13151,
      "grad_norm": 1.0307962254410346,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 13151
    },
    {
      "epoch": 0.13152,
      "grad_norm": 1.43694201892184,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 13152
    },
    {
      "epoch": 0.13153,
      "grad_norm": 1.0677993579040252,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 13153
    },
    {
      "epoch": 0.13154,
      "grad_norm": 1.268461574067591,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 13154
    },
    {
      "epoch": 0.13155,
      "grad_norm": 1.3489756824284524,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 13155
    },
    {
      "epoch": 0.13156,
      "grad_norm": 1.2563872234790574,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 13156
    },
    {
      "epoch": 0.13157,
      "grad_norm": 1.1144314498494128,
      "learning_rate": 0.003,
      "loss": 4.1076,
      "step": 13157
    },
    {
      "epoch": 0.13158,
      "grad_norm": 1.1621371252015023,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 13158
    },
    {
      "epoch": 0.13159,
      "grad_norm": 1.1953764462629082,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 13159
    },
    {
      "epoch": 0.1316,
      "grad_norm": 1.3534961536701053,
      "learning_rate": 0.003,
      "loss": 4.0342,
      "step": 13160
    },
    {
      "epoch": 0.13161,
      "grad_norm": 1.242098722764693,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 13161
    },
    {
      "epoch": 0.13162,
      "grad_norm": 1.1870910181990675,
      "learning_rate": 0.003,
      "loss": 4.0763,
      "step": 13162
    },
    {
      "epoch": 0.13163,
      "grad_norm": 1.2544164663661443,
      "learning_rate": 0.003,
      "loss": 4.1104,
      "step": 13163
    },
    {
      "epoch": 0.13164,
      "grad_norm": 0.9468907101214062,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 13164
    },
    {
      "epoch": 0.13165,
      "grad_norm": 1.147766455124515,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 13165
    },
    {
      "epoch": 0.13166,
      "grad_norm": 1.173114027596755,
      "learning_rate": 0.003,
      "loss": 4.0701,
      "step": 13166
    },
    {
      "epoch": 0.13167,
      "grad_norm": 1.3007314649436021,
      "learning_rate": 0.003,
      "loss": 4.0882,
      "step": 13167
    },
    {
      "epoch": 0.13168,
      "grad_norm": 1.2323954828024348,
      "learning_rate": 0.003,
      "loss": 4.0908,
      "step": 13168
    },
    {
      "epoch": 0.13169,
      "grad_norm": 1.1484314440408978,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 13169
    },
    {
      "epoch": 0.1317,
      "grad_norm": 1.2450412020280535,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 13170
    },
    {
      "epoch": 0.13171,
      "grad_norm": 1.0654001535175357,
      "learning_rate": 0.003,
      "loss": 4.0971,
      "step": 13171
    },
    {
      "epoch": 0.13172,
      "grad_norm": 1.2522723227148644,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 13172
    },
    {
      "epoch": 0.13173,
      "grad_norm": 1.2519635803570286,
      "learning_rate": 0.003,
      "loss": 4.0423,
      "step": 13173
    },
    {
      "epoch": 0.13174,
      "grad_norm": 1.3000796495416944,
      "learning_rate": 0.003,
      "loss": 4.0962,
      "step": 13174
    },
    {
      "epoch": 0.13175,
      "grad_norm": 1.1218033849114384,
      "learning_rate": 0.003,
      "loss": 4.0293,
      "step": 13175
    },
    {
      "epoch": 0.13176,
      "grad_norm": 1.40399595321406,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 13176
    },
    {
      "epoch": 0.13177,
      "grad_norm": 1.034644584531117,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 13177
    },
    {
      "epoch": 0.13178,
      "grad_norm": 1.2692271154725556,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 13178
    },
    {
      "epoch": 0.13179,
      "grad_norm": 1.0060631692783792,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 13179
    },
    {
      "epoch": 0.1318,
      "grad_norm": 1.2995122597023647,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 13180
    },
    {
      "epoch": 0.13181,
      "grad_norm": 1.0182110416638797,
      "learning_rate": 0.003,
      "loss": 4.0423,
      "step": 13181
    },
    {
      "epoch": 0.13182,
      "grad_norm": 1.3172261033849268,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 13182
    },
    {
      "epoch": 0.13183,
      "grad_norm": 1.1460498770288394,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 13183
    },
    {
      "epoch": 0.13184,
      "grad_norm": 1.4216084949568217,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 13184
    },
    {
      "epoch": 0.13185,
      "grad_norm": 1.147631551120462,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 13185
    },
    {
      "epoch": 0.13186,
      "grad_norm": 1.1662040796241357,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 13186
    },
    {
      "epoch": 0.13187,
      "grad_norm": 1.0624324039471014,
      "learning_rate": 0.003,
      "loss": 4.0438,
      "step": 13187
    },
    {
      "epoch": 0.13188,
      "grad_norm": 1.4689423539802247,
      "learning_rate": 0.003,
      "loss": 4.0757,
      "step": 13188
    },
    {
      "epoch": 0.13189,
      "grad_norm": 1.2125059062651504,
      "learning_rate": 0.003,
      "loss": 4.0416,
      "step": 13189
    },
    {
      "epoch": 0.1319,
      "grad_norm": 1.294366777943818,
      "learning_rate": 0.003,
      "loss": 4.1072,
      "step": 13190
    },
    {
      "epoch": 0.13191,
      "grad_norm": 1.2504632282545622,
      "learning_rate": 0.003,
      "loss": 4.1012,
      "step": 13191
    },
    {
      "epoch": 0.13192,
      "grad_norm": 1.2500300419282089,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 13192
    },
    {
      "epoch": 0.13193,
      "grad_norm": 1.188141854015912,
      "learning_rate": 0.003,
      "loss": 4.0252,
      "step": 13193
    },
    {
      "epoch": 0.13194,
      "grad_norm": 1.1643243566980073,
      "learning_rate": 0.003,
      "loss": 4.0702,
      "step": 13194
    },
    {
      "epoch": 0.13195,
      "grad_norm": 1.2181816479686556,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 13195
    },
    {
      "epoch": 0.13196,
      "grad_norm": 1.3827877725312507,
      "learning_rate": 0.003,
      "loss": 4.03,
      "step": 13196
    },
    {
      "epoch": 0.13197,
      "grad_norm": 1.1016038971829898,
      "learning_rate": 0.003,
      "loss": 4.0792,
      "step": 13197
    },
    {
      "epoch": 0.13198,
      "grad_norm": 1.1855740411996358,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 13198
    },
    {
      "epoch": 0.13199,
      "grad_norm": 1.2171685480656886,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 13199
    },
    {
      "epoch": 0.132,
      "grad_norm": 1.1741359782355432,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 13200
    },
    {
      "epoch": 0.13201,
      "grad_norm": 1.0440591109644022,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 13201
    },
    {
      "epoch": 0.13202,
      "grad_norm": 1.274471138754622,
      "learning_rate": 0.003,
      "loss": 4.0807,
      "step": 13202
    },
    {
      "epoch": 0.13203,
      "grad_norm": 1.0243281102287765,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 13203
    },
    {
      "epoch": 0.13204,
      "grad_norm": 1.2860425256286774,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 13204
    },
    {
      "epoch": 0.13205,
      "grad_norm": 1.1348776907669282,
      "learning_rate": 0.003,
      "loss": 4.0789,
      "step": 13205
    },
    {
      "epoch": 0.13206,
      "grad_norm": 1.1730696996692072,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 13206
    },
    {
      "epoch": 0.13207,
      "grad_norm": 1.1865654537115866,
      "learning_rate": 0.003,
      "loss": 4.0703,
      "step": 13207
    },
    {
      "epoch": 0.13208,
      "grad_norm": 1.171465135259456,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 13208
    },
    {
      "epoch": 0.13209,
      "grad_norm": 1.167747396466667,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 13209
    },
    {
      "epoch": 0.1321,
      "grad_norm": 1.3028876053170206,
      "learning_rate": 0.003,
      "loss": 4.1076,
      "step": 13210
    },
    {
      "epoch": 0.13211,
      "grad_norm": 1.0152159430030967,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 13211
    },
    {
      "epoch": 0.13212,
      "grad_norm": 1.3183786319105706,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 13212
    },
    {
      "epoch": 0.13213,
      "grad_norm": 1.0998785599181296,
      "learning_rate": 0.003,
      "loss": 4.083,
      "step": 13213
    },
    {
      "epoch": 0.13214,
      "grad_norm": 1.3318000426642578,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 13214
    },
    {
      "epoch": 0.13215,
      "grad_norm": 1.0581639289197025,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 13215
    },
    {
      "epoch": 0.13216,
      "grad_norm": 1.2798620619411303,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 13216
    },
    {
      "epoch": 0.13217,
      "grad_norm": 1.1679712848756147,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 13217
    },
    {
      "epoch": 0.13218,
      "grad_norm": 1.0639680271963092,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 13218
    },
    {
      "epoch": 0.13219,
      "grad_norm": 1.080567226953388,
      "learning_rate": 0.003,
      "loss": 4.116,
      "step": 13219
    },
    {
      "epoch": 0.1322,
      "grad_norm": 1.126118010598661,
      "learning_rate": 0.003,
      "loss": 4.1015,
      "step": 13220
    },
    {
      "epoch": 0.13221,
      "grad_norm": 1.3291393029914746,
      "learning_rate": 0.003,
      "loss": 4.0787,
      "step": 13221
    },
    {
      "epoch": 0.13222,
      "grad_norm": 1.1108425950802159,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 13222
    },
    {
      "epoch": 0.13223,
      "grad_norm": 1.2382546339313611,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 13223
    },
    {
      "epoch": 0.13224,
      "grad_norm": 1.1371082689527137,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 13224
    },
    {
      "epoch": 0.13225,
      "grad_norm": 1.3903131092309453,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 13225
    },
    {
      "epoch": 0.13226,
      "grad_norm": 1.1597561076975533,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 13226
    },
    {
      "epoch": 0.13227,
      "grad_norm": 1.042148751678186,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 13227
    },
    {
      "epoch": 0.13228,
      "grad_norm": 1.2992142654925891,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 13228
    },
    {
      "epoch": 0.13229,
      "grad_norm": 0.8828692668277073,
      "learning_rate": 0.003,
      "loss": 4.0724,
      "step": 13229
    },
    {
      "epoch": 0.1323,
      "grad_norm": 1.3203625380909305,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 13230
    },
    {
      "epoch": 0.13231,
      "grad_norm": 1.0816150876660933,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 13231
    },
    {
      "epoch": 0.13232,
      "grad_norm": 1.0725194297395733,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 13232
    },
    {
      "epoch": 0.13233,
      "grad_norm": 1.1413962335441905,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 13233
    },
    {
      "epoch": 0.13234,
      "grad_norm": 1.2134453387384976,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 13234
    },
    {
      "epoch": 0.13235,
      "grad_norm": 1.247755762674242,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 13235
    },
    {
      "epoch": 0.13236,
      "grad_norm": 1.0189650430636072,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 13236
    },
    {
      "epoch": 0.13237,
      "grad_norm": 1.4715555159366667,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 13237
    },
    {
      "epoch": 0.13238,
      "grad_norm": 1.0375021199817416,
      "learning_rate": 0.003,
      "loss": 4.0656,
      "step": 13238
    },
    {
      "epoch": 0.13239,
      "grad_norm": 1.4209193503839939,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 13239
    },
    {
      "epoch": 0.1324,
      "grad_norm": 1.1020368731645678,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 13240
    },
    {
      "epoch": 0.13241,
      "grad_norm": 1.324787650406456,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 13241
    },
    {
      "epoch": 0.13242,
      "grad_norm": 1.2556118004452719,
      "learning_rate": 0.003,
      "loss": 4.0961,
      "step": 13242
    },
    {
      "epoch": 0.13243,
      "grad_norm": 1.0985048995218638,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 13243
    },
    {
      "epoch": 0.13244,
      "grad_norm": 1.2957851512955199,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 13244
    },
    {
      "epoch": 0.13245,
      "grad_norm": 1.0214523845328929,
      "learning_rate": 0.003,
      "loss": 4.0237,
      "step": 13245
    },
    {
      "epoch": 0.13246,
      "grad_norm": 1.1986691549585764,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 13246
    },
    {
      "epoch": 0.13247,
      "grad_norm": 1.4288685128409502,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 13247
    },
    {
      "epoch": 0.13248,
      "grad_norm": 1.0236478696036977,
      "learning_rate": 0.003,
      "loss": 4.0848,
      "step": 13248
    },
    {
      "epoch": 0.13249,
      "grad_norm": 1.292811480699596,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 13249
    },
    {
      "epoch": 0.1325,
      "grad_norm": 0.9843660778914611,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 13250
    },
    {
      "epoch": 0.13251,
      "grad_norm": 1.2233385186478687,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 13251
    },
    {
      "epoch": 0.13252,
      "grad_norm": 1.1244979257071788,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 13252
    },
    {
      "epoch": 0.13253,
      "grad_norm": 1.3177861997563658,
      "learning_rate": 0.003,
      "loss": 4.034,
      "step": 13253
    },
    {
      "epoch": 0.13254,
      "grad_norm": 1.0244642998401052,
      "learning_rate": 0.003,
      "loss": 4.0904,
      "step": 13254
    },
    {
      "epoch": 0.13255,
      "grad_norm": 1.2270378253803171,
      "learning_rate": 0.003,
      "loss": 4.0839,
      "step": 13255
    },
    {
      "epoch": 0.13256,
      "grad_norm": 1.2548833887046018,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 13256
    },
    {
      "epoch": 0.13257,
      "grad_norm": 1.2879983769150376,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 13257
    },
    {
      "epoch": 0.13258,
      "grad_norm": 1.0310468271487272,
      "learning_rate": 0.003,
      "loss": 4.0623,
      "step": 13258
    },
    {
      "epoch": 0.13259,
      "grad_norm": 1.2970101383442554,
      "learning_rate": 0.003,
      "loss": 4.092,
      "step": 13259
    },
    {
      "epoch": 0.1326,
      "grad_norm": 1.2461330317886523,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 13260
    },
    {
      "epoch": 0.13261,
      "grad_norm": 1.1113100025589566,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 13261
    },
    {
      "epoch": 0.13262,
      "grad_norm": 1.3786132650631386,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 13262
    },
    {
      "epoch": 0.13263,
      "grad_norm": 1.0032645606950847,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 13263
    },
    {
      "epoch": 0.13264,
      "grad_norm": 1.3399680984267557,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 13264
    },
    {
      "epoch": 0.13265,
      "grad_norm": 1.017522080857777,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 13265
    },
    {
      "epoch": 0.13266,
      "grad_norm": 1.64686624277984,
      "learning_rate": 0.003,
      "loss": 4.0834,
      "step": 13266
    },
    {
      "epoch": 0.13267,
      "grad_norm": 0.8593432409687214,
      "learning_rate": 0.003,
      "loss": 4.0728,
      "step": 13267
    },
    {
      "epoch": 0.13268,
      "grad_norm": 1.1885456279714617,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 13268
    },
    {
      "epoch": 0.13269,
      "grad_norm": 1.1230075687986711,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 13269
    },
    {
      "epoch": 0.1327,
      "grad_norm": 1.3107914102384386,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 13270
    },
    {
      "epoch": 0.13271,
      "grad_norm": 1.1820667544165129,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 13271
    },
    {
      "epoch": 0.13272,
      "grad_norm": 1.3241595677319673,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 13272
    },
    {
      "epoch": 0.13273,
      "grad_norm": 1.115483229779608,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 13273
    },
    {
      "epoch": 0.13274,
      "grad_norm": 1.0905548862218508,
      "learning_rate": 0.003,
      "loss": 4.0932,
      "step": 13274
    },
    {
      "epoch": 0.13275,
      "grad_norm": 1.1888232473964362,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 13275
    },
    {
      "epoch": 0.13276,
      "grad_norm": 1.176879818014123,
      "learning_rate": 0.003,
      "loss": 4.0547,
      "step": 13276
    },
    {
      "epoch": 0.13277,
      "grad_norm": 1.1947106153364264,
      "learning_rate": 0.003,
      "loss": 4.0753,
      "step": 13277
    },
    {
      "epoch": 0.13278,
      "grad_norm": 1.341444256794462,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 13278
    },
    {
      "epoch": 0.13279,
      "grad_norm": 0.945105729131143,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 13279
    },
    {
      "epoch": 0.1328,
      "grad_norm": 1.0371380072191345,
      "learning_rate": 0.003,
      "loss": 4.0432,
      "step": 13280
    },
    {
      "epoch": 0.13281,
      "grad_norm": 1.3667997773870617,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 13281
    },
    {
      "epoch": 0.13282,
      "grad_norm": 1.1626626485048392,
      "learning_rate": 0.003,
      "loss": 4.0941,
      "step": 13282
    },
    {
      "epoch": 0.13283,
      "grad_norm": 1.1427129167806978,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 13283
    },
    {
      "epoch": 0.13284,
      "grad_norm": 1.101227744769405,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 13284
    },
    {
      "epoch": 0.13285,
      "grad_norm": 1.149143830642704,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 13285
    },
    {
      "epoch": 0.13286,
      "grad_norm": 1.1520063370772313,
      "learning_rate": 0.003,
      "loss": 4.0989,
      "step": 13286
    },
    {
      "epoch": 0.13287,
      "grad_norm": 1.2596745982290913,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 13287
    },
    {
      "epoch": 0.13288,
      "grad_norm": 1.3171987184882712,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 13288
    },
    {
      "epoch": 0.13289,
      "grad_norm": 1.0863020590124632,
      "learning_rate": 0.003,
      "loss": 4.0902,
      "step": 13289
    },
    {
      "epoch": 0.1329,
      "grad_norm": 1.2508202256063077,
      "learning_rate": 0.003,
      "loss": 4.0925,
      "step": 13290
    },
    {
      "epoch": 0.13291,
      "grad_norm": 1.1495342256166174,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 13291
    },
    {
      "epoch": 0.13292,
      "grad_norm": 1.0969413588048633,
      "learning_rate": 0.003,
      "loss": 4.0748,
      "step": 13292
    },
    {
      "epoch": 0.13293,
      "grad_norm": 1.2066481663915818,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 13293
    },
    {
      "epoch": 0.13294,
      "grad_norm": 1.310751051697473,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 13294
    },
    {
      "epoch": 0.13295,
      "grad_norm": 1.1537848448961812,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 13295
    },
    {
      "epoch": 0.13296,
      "grad_norm": 1.2863255538835128,
      "learning_rate": 0.003,
      "loss": 4.1097,
      "step": 13296
    },
    {
      "epoch": 0.13297,
      "grad_norm": 1.1418414868931317,
      "learning_rate": 0.003,
      "loss": 4.0676,
      "step": 13297
    },
    {
      "epoch": 0.13298,
      "grad_norm": 1.4867602662556318,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 13298
    },
    {
      "epoch": 0.13299,
      "grad_norm": 0.9831475534156756,
      "learning_rate": 0.003,
      "loss": 4.1155,
      "step": 13299
    },
    {
      "epoch": 0.133,
      "grad_norm": 1.2855457081631143,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 13300
    },
    {
      "epoch": 0.13301,
      "grad_norm": 0.9964339126410291,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 13301
    },
    {
      "epoch": 0.13302,
      "grad_norm": 1.4105926735675485,
      "learning_rate": 0.003,
      "loss": 4.0326,
      "step": 13302
    },
    {
      "epoch": 0.13303,
      "grad_norm": 1.124060560334596,
      "learning_rate": 0.003,
      "loss": 4.0824,
      "step": 13303
    },
    {
      "epoch": 0.13304,
      "grad_norm": 1.1347947355935488,
      "learning_rate": 0.003,
      "loss": 4.0724,
      "step": 13304
    },
    {
      "epoch": 0.13305,
      "grad_norm": 1.1243413098280783,
      "learning_rate": 0.003,
      "loss": 4.0972,
      "step": 13305
    },
    {
      "epoch": 0.13306,
      "grad_norm": 1.2185377831088966,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 13306
    },
    {
      "epoch": 0.13307,
      "grad_norm": 1.1696030730137574,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 13307
    },
    {
      "epoch": 0.13308,
      "grad_norm": 1.2161013629889335,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 13308
    },
    {
      "epoch": 0.13309,
      "grad_norm": 1.4482005394663107,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 13309
    },
    {
      "epoch": 0.1331,
      "grad_norm": 1.2558887517933217,
      "learning_rate": 0.003,
      "loss": 4.0632,
      "step": 13310
    },
    {
      "epoch": 0.13311,
      "grad_norm": 0.9864618715968552,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 13311
    },
    {
      "epoch": 0.13312,
      "grad_norm": 1.2173793332268772,
      "learning_rate": 0.003,
      "loss": 4.0815,
      "step": 13312
    },
    {
      "epoch": 0.13313,
      "grad_norm": 1.2436277242839906,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 13313
    },
    {
      "epoch": 0.13314,
      "grad_norm": 1.3875133554853671,
      "learning_rate": 0.003,
      "loss": 4.0694,
      "step": 13314
    },
    {
      "epoch": 0.13315,
      "grad_norm": 0.9751721952947197,
      "learning_rate": 0.003,
      "loss": 4.0804,
      "step": 13315
    },
    {
      "epoch": 0.13316,
      "grad_norm": 1.278534885777062,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 13316
    },
    {
      "epoch": 0.13317,
      "grad_norm": 1.01636143477108,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 13317
    },
    {
      "epoch": 0.13318,
      "grad_norm": 1.3649983840486495,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 13318
    },
    {
      "epoch": 0.13319,
      "grad_norm": 0.9565038889965672,
      "learning_rate": 0.003,
      "loss": 4.0895,
      "step": 13319
    },
    {
      "epoch": 0.1332,
      "grad_norm": 1.262266957645931,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 13320
    },
    {
      "epoch": 0.13321,
      "grad_norm": 1.1835388605510213,
      "learning_rate": 0.003,
      "loss": 4.0817,
      "step": 13321
    },
    {
      "epoch": 0.13322,
      "grad_norm": 1.2606599407501045,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 13322
    },
    {
      "epoch": 0.13323,
      "grad_norm": 1.552563990129336,
      "learning_rate": 0.003,
      "loss": 4.0853,
      "step": 13323
    },
    {
      "epoch": 0.13324,
      "grad_norm": 1.046510957547619,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 13324
    },
    {
      "epoch": 0.13325,
      "grad_norm": 1.110530797228649,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 13325
    },
    {
      "epoch": 0.13326,
      "grad_norm": 1.0935804074363407,
      "learning_rate": 0.003,
      "loss": 4.1024,
      "step": 13326
    },
    {
      "epoch": 0.13327,
      "grad_norm": 1.2812629789819532,
      "learning_rate": 0.003,
      "loss": 4.0702,
      "step": 13327
    },
    {
      "epoch": 0.13328,
      "grad_norm": 0.937706571734598,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 13328
    },
    {
      "epoch": 0.13329,
      "grad_norm": 1.504345922785764,
      "learning_rate": 0.003,
      "loss": 4.1024,
      "step": 13329
    },
    {
      "epoch": 0.1333,
      "grad_norm": 0.942919545368339,
      "learning_rate": 0.003,
      "loss": 4.0938,
      "step": 13330
    },
    {
      "epoch": 0.13331,
      "grad_norm": 1.3860701408176805,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 13331
    },
    {
      "epoch": 0.13332,
      "grad_norm": 1.1649499190977812,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 13332
    },
    {
      "epoch": 0.13333,
      "grad_norm": 1.150537119298711,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 13333
    },
    {
      "epoch": 0.13334,
      "grad_norm": 1.1921957935384304,
      "learning_rate": 0.003,
      "loss": 4.0834,
      "step": 13334
    },
    {
      "epoch": 0.13335,
      "grad_norm": 1.2058355277788635,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 13335
    },
    {
      "epoch": 0.13336,
      "grad_norm": 1.14521866323671,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 13336
    },
    {
      "epoch": 0.13337,
      "grad_norm": 1.1264333972428362,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 13337
    },
    {
      "epoch": 0.13338,
      "grad_norm": 1.4305065758976705,
      "learning_rate": 0.003,
      "loss": 4.1101,
      "step": 13338
    },
    {
      "epoch": 0.13339,
      "grad_norm": 1.3457442251358365,
      "learning_rate": 0.003,
      "loss": 4.0811,
      "step": 13339
    },
    {
      "epoch": 0.1334,
      "grad_norm": 1.0819564548054041,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 13340
    },
    {
      "epoch": 0.13341,
      "grad_norm": 1.1564015759857842,
      "learning_rate": 0.003,
      "loss": 4.1099,
      "step": 13341
    },
    {
      "epoch": 0.13342,
      "grad_norm": 1.2673644877503936,
      "learning_rate": 0.003,
      "loss": 4.0904,
      "step": 13342
    },
    {
      "epoch": 0.13343,
      "grad_norm": 1.2180577385368219,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 13343
    },
    {
      "epoch": 0.13344,
      "grad_norm": 1.0635239571968256,
      "learning_rate": 0.003,
      "loss": 4.1039,
      "step": 13344
    },
    {
      "epoch": 0.13345,
      "grad_norm": 1.259260532413804,
      "learning_rate": 0.003,
      "loss": 4.1126,
      "step": 13345
    },
    {
      "epoch": 0.13346,
      "grad_norm": 1.0411929273126528,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 13346
    },
    {
      "epoch": 0.13347,
      "grad_norm": 1.1397615490181243,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 13347
    },
    {
      "epoch": 0.13348,
      "grad_norm": 1.259020935793382,
      "learning_rate": 0.003,
      "loss": 4.0892,
      "step": 13348
    },
    {
      "epoch": 0.13349,
      "grad_norm": 1.1319109432970802,
      "learning_rate": 0.003,
      "loss": 4.0743,
      "step": 13349
    },
    {
      "epoch": 0.1335,
      "grad_norm": 1.2067967507361204,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 13350
    },
    {
      "epoch": 0.13351,
      "grad_norm": 1.1658964781212067,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 13351
    },
    {
      "epoch": 0.13352,
      "grad_norm": 1.3254913743020704,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 13352
    },
    {
      "epoch": 0.13353,
      "grad_norm": 1.1830811251199571,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 13353
    },
    {
      "epoch": 0.13354,
      "grad_norm": 1.3095382839092642,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 13354
    },
    {
      "epoch": 0.13355,
      "grad_norm": 1.4355120103751222,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 13355
    },
    {
      "epoch": 0.13356,
      "grad_norm": 0.882950527780688,
      "learning_rate": 0.003,
      "loss": 4.0743,
      "step": 13356
    },
    {
      "epoch": 0.13357,
      "grad_norm": 0.992528936217066,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 13357
    },
    {
      "epoch": 0.13358,
      "grad_norm": 1.3482841530983989,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 13358
    },
    {
      "epoch": 0.13359,
      "grad_norm": 1.0200084021979765,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 13359
    },
    {
      "epoch": 0.1336,
      "grad_norm": 1.3198266502439855,
      "learning_rate": 0.003,
      "loss": 4.0809,
      "step": 13360
    },
    {
      "epoch": 0.13361,
      "grad_norm": 0.9318948648450732,
      "learning_rate": 0.003,
      "loss": 4.0564,
      "step": 13361
    },
    {
      "epoch": 0.13362,
      "grad_norm": 1.156104443980015,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 13362
    },
    {
      "epoch": 0.13363,
      "grad_norm": 1.1348916514353162,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 13363
    },
    {
      "epoch": 0.13364,
      "grad_norm": 0.9545398557086426,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 13364
    },
    {
      "epoch": 0.13365,
      "grad_norm": 1.0430415277592333,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 13365
    },
    {
      "epoch": 0.13366,
      "grad_norm": 1.2973396135907518,
      "learning_rate": 0.003,
      "loss": 4.0797,
      "step": 13366
    },
    {
      "epoch": 0.13367,
      "grad_norm": 1.377584331147787,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 13367
    },
    {
      "epoch": 0.13368,
      "grad_norm": 1.087111394889965,
      "learning_rate": 0.003,
      "loss": 4.0876,
      "step": 13368
    },
    {
      "epoch": 0.13369,
      "grad_norm": 1.1399775172501416,
      "learning_rate": 0.003,
      "loss": 4.0739,
      "step": 13369
    },
    {
      "epoch": 0.1337,
      "grad_norm": 1.2782889127256056,
      "learning_rate": 0.003,
      "loss": 4.0739,
      "step": 13370
    },
    {
      "epoch": 0.13371,
      "grad_norm": 1.2107464720288288,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 13371
    },
    {
      "epoch": 0.13372,
      "grad_norm": 1.1235269728784218,
      "learning_rate": 0.003,
      "loss": 4.0712,
      "step": 13372
    },
    {
      "epoch": 0.13373,
      "grad_norm": 1.171530183194923,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 13373
    },
    {
      "epoch": 0.13374,
      "grad_norm": 1.0460985774606695,
      "learning_rate": 0.003,
      "loss": 4.0976,
      "step": 13374
    },
    {
      "epoch": 0.13375,
      "grad_norm": 1.2202546470687783,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 13375
    },
    {
      "epoch": 0.13376,
      "grad_norm": 1.2928630380745547,
      "learning_rate": 0.003,
      "loss": 4.0884,
      "step": 13376
    },
    {
      "epoch": 0.13377,
      "grad_norm": 1.1104800307893876,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 13377
    },
    {
      "epoch": 0.13378,
      "grad_norm": 1.3133149278619125,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 13378
    },
    {
      "epoch": 0.13379,
      "grad_norm": 1.192621777611117,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 13379
    },
    {
      "epoch": 0.1338,
      "grad_norm": 1.6324962573798982,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 13380
    },
    {
      "epoch": 0.13381,
      "grad_norm": 0.9608650701463821,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 13381
    },
    {
      "epoch": 0.13382,
      "grad_norm": 1.309755125207003,
      "learning_rate": 0.003,
      "loss": 4.0783,
      "step": 13382
    },
    {
      "epoch": 0.13383,
      "grad_norm": 1.2184602263012025,
      "learning_rate": 0.003,
      "loss": 4.0623,
      "step": 13383
    },
    {
      "epoch": 0.13384,
      "grad_norm": 0.9707313098371623,
      "learning_rate": 0.003,
      "loss": 4.0283,
      "step": 13384
    },
    {
      "epoch": 0.13385,
      "grad_norm": 1.3512516690476681,
      "learning_rate": 0.003,
      "loss": 4.0792,
      "step": 13385
    },
    {
      "epoch": 0.13386,
      "grad_norm": 1.0809748766020124,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 13386
    },
    {
      "epoch": 0.13387,
      "grad_norm": 1.2485092151247863,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 13387
    },
    {
      "epoch": 0.13388,
      "grad_norm": 1.3101065144013673,
      "learning_rate": 0.003,
      "loss": 4.0862,
      "step": 13388
    },
    {
      "epoch": 0.13389,
      "grad_norm": 1.084908616260319,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 13389
    },
    {
      "epoch": 0.1339,
      "grad_norm": 1.22079628713085,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 13390
    },
    {
      "epoch": 0.13391,
      "grad_norm": 1.0202426905455475,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 13391
    },
    {
      "epoch": 0.13392,
      "grad_norm": 1.2992335676237536,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 13392
    },
    {
      "epoch": 0.13393,
      "grad_norm": 1.092089891274764,
      "learning_rate": 0.003,
      "loss": 4.0837,
      "step": 13393
    },
    {
      "epoch": 0.13394,
      "grad_norm": 1.3867392115270731,
      "learning_rate": 0.003,
      "loss": 4.0817,
      "step": 13394
    },
    {
      "epoch": 0.13395,
      "grad_norm": 0.9820590058309596,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 13395
    },
    {
      "epoch": 0.13396,
      "grad_norm": 1.3005697214378344,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 13396
    },
    {
      "epoch": 0.13397,
      "grad_norm": 1.279191656902853,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 13397
    },
    {
      "epoch": 0.13398,
      "grad_norm": 1.299460212780781,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 13398
    },
    {
      "epoch": 0.13399,
      "grad_norm": 1.123847793349164,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 13399
    },
    {
      "epoch": 0.134,
      "grad_norm": 1.254970037907744,
      "learning_rate": 0.003,
      "loss": 4.0977,
      "step": 13400
    },
    {
      "epoch": 0.13401,
      "grad_norm": 1.2751182808912296,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 13401
    },
    {
      "epoch": 0.13402,
      "grad_norm": 1.2877165752219555,
      "learning_rate": 0.003,
      "loss": 4.0247,
      "step": 13402
    },
    {
      "epoch": 0.13403,
      "grad_norm": 1.0003900002349755,
      "learning_rate": 0.003,
      "loss": 4.105,
      "step": 13403
    },
    {
      "epoch": 0.13404,
      "grad_norm": 1.0931429176530132,
      "learning_rate": 0.003,
      "loss": 4.0973,
      "step": 13404
    },
    {
      "epoch": 0.13405,
      "grad_norm": 1.2238882247414367,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 13405
    },
    {
      "epoch": 0.13406,
      "grad_norm": 1.226888309982652,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 13406
    },
    {
      "epoch": 0.13407,
      "grad_norm": 1.5015375172841177,
      "learning_rate": 0.003,
      "loss": 4.088,
      "step": 13407
    },
    {
      "epoch": 0.13408,
      "grad_norm": 1.0494091926556364,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 13408
    },
    {
      "epoch": 0.13409,
      "grad_norm": 1.2981650249555947,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 13409
    },
    {
      "epoch": 0.1341,
      "grad_norm": 1.139531561438358,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 13410
    },
    {
      "epoch": 0.13411,
      "grad_norm": 1.107127473358016,
      "learning_rate": 0.003,
      "loss": 4.0829,
      "step": 13411
    },
    {
      "epoch": 0.13412,
      "grad_norm": 1.1067004651124173,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 13412
    },
    {
      "epoch": 0.13413,
      "grad_norm": 1.2818189360675667,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 13413
    },
    {
      "epoch": 0.13414,
      "grad_norm": 1.0920613489076882,
      "learning_rate": 0.003,
      "loss": 4.0941,
      "step": 13414
    },
    {
      "epoch": 0.13415,
      "grad_norm": 1.323974517486657,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 13415
    },
    {
      "epoch": 0.13416,
      "grad_norm": 1.1510418258436874,
      "learning_rate": 0.003,
      "loss": 4.0751,
      "step": 13416
    },
    {
      "epoch": 0.13417,
      "grad_norm": 1.3101953345153434,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 13417
    },
    {
      "epoch": 0.13418,
      "grad_norm": 1.2484858107402523,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 13418
    },
    {
      "epoch": 0.13419,
      "grad_norm": 1.2008316192186337,
      "learning_rate": 0.003,
      "loss": 4.087,
      "step": 13419
    },
    {
      "epoch": 0.1342,
      "grad_norm": 1.1571115511829444,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 13420
    },
    {
      "epoch": 0.13421,
      "grad_norm": 1.3922202445034606,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 13421
    },
    {
      "epoch": 0.13422,
      "grad_norm": 1.0939348589277045,
      "learning_rate": 0.003,
      "loss": 4.0923,
      "step": 13422
    },
    {
      "epoch": 0.13423,
      "grad_norm": 1.3721463765694832,
      "learning_rate": 0.003,
      "loss": 4.0792,
      "step": 13423
    },
    {
      "epoch": 0.13424,
      "grad_norm": 0.9950380943555217,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 13424
    },
    {
      "epoch": 0.13425,
      "grad_norm": 1.4568519919068281,
      "learning_rate": 0.003,
      "loss": 4.0896,
      "step": 13425
    },
    {
      "epoch": 0.13426,
      "grad_norm": 0.8845714068799299,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 13426
    },
    {
      "epoch": 0.13427,
      "grad_norm": 0.961609139691443,
      "learning_rate": 0.003,
      "loss": 4.0781,
      "step": 13427
    },
    {
      "epoch": 0.13428,
      "grad_norm": 1.3462502809623975,
      "learning_rate": 0.003,
      "loss": 4.0954,
      "step": 13428
    },
    {
      "epoch": 0.13429,
      "grad_norm": 1.1194356350584982,
      "learning_rate": 0.003,
      "loss": 4.0857,
      "step": 13429
    },
    {
      "epoch": 0.1343,
      "grad_norm": 1.4920878363470662,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 13430
    },
    {
      "epoch": 0.13431,
      "grad_norm": 0.8483834854771911,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 13431
    },
    {
      "epoch": 0.13432,
      "grad_norm": 0.9112242246450506,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 13432
    },
    {
      "epoch": 0.13433,
      "grad_norm": 1.2665685261460575,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 13433
    },
    {
      "epoch": 0.13434,
      "grad_norm": 1.0610030221051627,
      "learning_rate": 0.003,
      "loss": 4.075,
      "step": 13434
    },
    {
      "epoch": 0.13435,
      "grad_norm": 1.321751025696581,
      "learning_rate": 0.003,
      "loss": 4.105,
      "step": 13435
    },
    {
      "epoch": 0.13436,
      "grad_norm": 0.9682699838920557,
      "learning_rate": 0.003,
      "loss": 4.0891,
      "step": 13436
    },
    {
      "epoch": 0.13437,
      "grad_norm": 1.3736444982158735,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 13437
    },
    {
      "epoch": 0.13438,
      "grad_norm": 1.2448295072637303,
      "learning_rate": 0.003,
      "loss": 4.0895,
      "step": 13438
    },
    {
      "epoch": 0.13439,
      "grad_norm": 1.2473219844564283,
      "learning_rate": 0.003,
      "loss": 4.0876,
      "step": 13439
    },
    {
      "epoch": 0.1344,
      "grad_norm": 1.3130340499660091,
      "learning_rate": 0.003,
      "loss": 4.0969,
      "step": 13440
    },
    {
      "epoch": 0.13441,
      "grad_norm": 1.240881935789378,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 13441
    },
    {
      "epoch": 0.13442,
      "grad_norm": 1.269013493595987,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 13442
    },
    {
      "epoch": 0.13443,
      "grad_norm": 1.2069173559517443,
      "learning_rate": 0.003,
      "loss": 4.0829,
      "step": 13443
    },
    {
      "epoch": 0.13444,
      "grad_norm": 1.2916260889649558,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 13444
    },
    {
      "epoch": 0.13445,
      "grad_norm": 1.021728453694052,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 13445
    },
    {
      "epoch": 0.13446,
      "grad_norm": 1.1582896508308027,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 13446
    },
    {
      "epoch": 0.13447,
      "grad_norm": 1.2018218800263953,
      "learning_rate": 0.003,
      "loss": 4.1036,
      "step": 13447
    },
    {
      "epoch": 0.13448,
      "grad_norm": 1.1793134666029423,
      "learning_rate": 0.003,
      "loss": 4.0781,
      "step": 13448
    },
    {
      "epoch": 0.13449,
      "grad_norm": 1.113403756290677,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 13449
    },
    {
      "epoch": 0.1345,
      "grad_norm": 1.2679419571020423,
      "learning_rate": 0.003,
      "loss": 4.0183,
      "step": 13450
    },
    {
      "epoch": 0.13451,
      "grad_norm": 1.0504262222413014,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 13451
    },
    {
      "epoch": 0.13452,
      "grad_norm": 1.6461457947162657,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 13452
    },
    {
      "epoch": 0.13453,
      "grad_norm": 0.8415228653785007,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 13453
    },
    {
      "epoch": 0.13454,
      "grad_norm": 0.9853824307930039,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 13454
    },
    {
      "epoch": 0.13455,
      "grad_norm": 1.3711398037232354,
      "learning_rate": 0.003,
      "loss": 4.0963,
      "step": 13455
    },
    {
      "epoch": 0.13456,
      "grad_norm": 0.9299209880239284,
      "learning_rate": 0.003,
      "loss": 4.0874,
      "step": 13456
    },
    {
      "epoch": 0.13457,
      "grad_norm": 1.2201305236616733,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 13457
    },
    {
      "epoch": 0.13458,
      "grad_norm": 1.042117831521888,
      "learning_rate": 0.003,
      "loss": 4.0965,
      "step": 13458
    },
    {
      "epoch": 0.13459,
      "grad_norm": 1.2885917369252293,
      "learning_rate": 0.003,
      "loss": 4.0975,
      "step": 13459
    },
    {
      "epoch": 0.1346,
      "grad_norm": 1.1186516456657598,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 13460
    },
    {
      "epoch": 0.13461,
      "grad_norm": 1.1909986315329697,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 13461
    },
    {
      "epoch": 0.13462,
      "grad_norm": 1.2576997493640354,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 13462
    },
    {
      "epoch": 0.13463,
      "grad_norm": 1.155181532510466,
      "learning_rate": 0.003,
      "loss": 4.0841,
      "step": 13463
    },
    {
      "epoch": 0.13464,
      "grad_norm": 1.5427338937478448,
      "learning_rate": 0.003,
      "loss": 4.0943,
      "step": 13464
    },
    {
      "epoch": 0.13465,
      "grad_norm": 1.1015368366752862,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 13465
    },
    {
      "epoch": 0.13466,
      "grad_norm": 1.328116987003986,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 13466
    },
    {
      "epoch": 0.13467,
      "grad_norm": 1.4210595457619721,
      "learning_rate": 0.003,
      "loss": 4.1008,
      "step": 13467
    },
    {
      "epoch": 0.13468,
      "grad_norm": 1.0154008116906554,
      "learning_rate": 0.003,
      "loss": 4.0724,
      "step": 13468
    },
    {
      "epoch": 0.13469,
      "grad_norm": 1.3831223411143163,
      "learning_rate": 0.003,
      "loss": 4.0814,
      "step": 13469
    },
    {
      "epoch": 0.1347,
      "grad_norm": 1.1187264304497462,
      "learning_rate": 0.003,
      "loss": 4.0617,
      "step": 13470
    },
    {
      "epoch": 0.13471,
      "grad_norm": 1.3256152725200183,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 13471
    },
    {
      "epoch": 0.13472,
      "grad_norm": 1.0436438983185985,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 13472
    },
    {
      "epoch": 0.13473,
      "grad_norm": 1.3890226571349744,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 13473
    },
    {
      "epoch": 0.13474,
      "grad_norm": 0.9523790124680513,
      "learning_rate": 0.003,
      "loss": 4.0759,
      "step": 13474
    },
    {
      "epoch": 0.13475,
      "grad_norm": 1.2705342694225594,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 13475
    },
    {
      "epoch": 0.13476,
      "grad_norm": 1.0416515369439097,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 13476
    },
    {
      "epoch": 0.13477,
      "grad_norm": 1.213808961134788,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 13477
    },
    {
      "epoch": 0.13478,
      "grad_norm": 1.2567203891908283,
      "learning_rate": 0.003,
      "loss": 4.084,
      "step": 13478
    },
    {
      "epoch": 0.13479,
      "grad_norm": 1.0627266915077016,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 13479
    },
    {
      "epoch": 0.1348,
      "grad_norm": 1.1942181777864018,
      "learning_rate": 0.003,
      "loss": 4.0835,
      "step": 13480
    },
    {
      "epoch": 0.13481,
      "grad_norm": 0.9086917010183528,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 13481
    },
    {
      "epoch": 0.13482,
      "grad_norm": 1.0518581902163413,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 13482
    },
    {
      "epoch": 0.13483,
      "grad_norm": 1.1908018688437683,
      "learning_rate": 0.003,
      "loss": 4.0328,
      "step": 13483
    },
    {
      "epoch": 0.13484,
      "grad_norm": 1.1141590588201575,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 13484
    },
    {
      "epoch": 0.13485,
      "grad_norm": 1.254355944042286,
      "learning_rate": 0.003,
      "loss": 4.0946,
      "step": 13485
    },
    {
      "epoch": 0.13486,
      "grad_norm": 1.3334461860370763,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 13486
    },
    {
      "epoch": 0.13487,
      "grad_norm": 1.2665983361640198,
      "learning_rate": 0.003,
      "loss": 4.0623,
      "step": 13487
    },
    {
      "epoch": 0.13488,
      "grad_norm": 1.0654884519609067,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 13488
    },
    {
      "epoch": 0.13489,
      "grad_norm": 1.7471630662376938,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 13489
    },
    {
      "epoch": 0.1349,
      "grad_norm": 1.08610677249902,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 13490
    },
    {
      "epoch": 0.13491,
      "grad_norm": 1.6699300578111145,
      "learning_rate": 0.003,
      "loss": 4.0928,
      "step": 13491
    },
    {
      "epoch": 0.13492,
      "grad_norm": 1.0051323094271105,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 13492
    },
    {
      "epoch": 0.13493,
      "grad_norm": 1.1366130593463755,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 13493
    },
    {
      "epoch": 0.13494,
      "grad_norm": 1.0985346160202802,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 13494
    },
    {
      "epoch": 0.13495,
      "grad_norm": 1.3406522988660494,
      "learning_rate": 0.003,
      "loss": 4.1061,
      "step": 13495
    },
    {
      "epoch": 0.13496,
      "grad_norm": 1.28654771486778,
      "learning_rate": 0.003,
      "loss": 4.0986,
      "step": 13496
    },
    {
      "epoch": 0.13497,
      "grad_norm": 1.0071406929819673,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 13497
    },
    {
      "epoch": 0.13498,
      "grad_norm": 1.4246452225409638,
      "learning_rate": 0.003,
      "loss": 4.0838,
      "step": 13498
    },
    {
      "epoch": 0.13499,
      "grad_norm": 0.9972509121748382,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 13499
    },
    {
      "epoch": 0.135,
      "grad_norm": 1.189967531780217,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 13500
    },
    {
      "epoch": 0.13501,
      "grad_norm": 1.1493270847412234,
      "learning_rate": 0.003,
      "loss": 4.0958,
      "step": 13501
    },
    {
      "epoch": 0.13502,
      "grad_norm": 1.189309815156667,
      "learning_rate": 0.003,
      "loss": 4.0857,
      "step": 13502
    },
    {
      "epoch": 0.13503,
      "grad_norm": 1.2288407869403217,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 13503
    },
    {
      "epoch": 0.13504,
      "grad_norm": 1.1103535353418132,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 13504
    },
    {
      "epoch": 0.13505,
      "grad_norm": 1.4194568999011024,
      "learning_rate": 0.003,
      "loss": 4.094,
      "step": 13505
    },
    {
      "epoch": 0.13506,
      "grad_norm": 1.220666174798732,
      "learning_rate": 0.003,
      "loss": 4.0817,
      "step": 13506
    },
    {
      "epoch": 0.13507,
      "grad_norm": 1.1415944662630442,
      "learning_rate": 0.003,
      "loss": 4.0895,
      "step": 13507
    },
    {
      "epoch": 0.13508,
      "grad_norm": 1.1661369922608253,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 13508
    },
    {
      "epoch": 0.13509,
      "grad_norm": 1.3463481120750507,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 13509
    },
    {
      "epoch": 0.1351,
      "grad_norm": 1.0168206869032022,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 13510
    },
    {
      "epoch": 0.13511,
      "grad_norm": 1.1550104408646904,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 13511
    },
    {
      "epoch": 0.13512,
      "grad_norm": 1.4461693850672317,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 13512
    },
    {
      "epoch": 0.13513,
      "grad_norm": 1.0399061773826157,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 13513
    },
    {
      "epoch": 0.13514,
      "grad_norm": 1.342808826553646,
      "learning_rate": 0.003,
      "loss": 4.0884,
      "step": 13514
    },
    {
      "epoch": 0.13515,
      "grad_norm": 1.032416494086337,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 13515
    },
    {
      "epoch": 0.13516,
      "grad_norm": 1.3650543147519953,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 13516
    },
    {
      "epoch": 0.13517,
      "grad_norm": 1.0965216566801397,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 13517
    },
    {
      "epoch": 0.13518,
      "grad_norm": 1.099857419661087,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 13518
    },
    {
      "epoch": 0.13519,
      "grad_norm": 1.1411897913570885,
      "learning_rate": 0.003,
      "loss": 4.0787,
      "step": 13519
    },
    {
      "epoch": 0.1352,
      "grad_norm": 1.2794171237422962,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 13520
    },
    {
      "epoch": 0.13521,
      "grad_norm": 0.9811240347362448,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 13521
    },
    {
      "epoch": 0.13522,
      "grad_norm": 1.268850909392575,
      "learning_rate": 0.003,
      "loss": 4.082,
      "step": 13522
    },
    {
      "epoch": 0.13523,
      "grad_norm": 1.1586115250772768,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 13523
    },
    {
      "epoch": 0.13524,
      "grad_norm": 0.9997886279609471,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 13524
    },
    {
      "epoch": 0.13525,
      "grad_norm": 1.2578207296594535,
      "learning_rate": 0.003,
      "loss": 4.0767,
      "step": 13525
    },
    {
      "epoch": 0.13526,
      "grad_norm": 1.1211549330059851,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 13526
    },
    {
      "epoch": 0.13527,
      "grad_norm": 1.3489510791240398,
      "learning_rate": 0.003,
      "loss": 4.0712,
      "step": 13527
    },
    {
      "epoch": 0.13528,
      "grad_norm": 0.9549824449680652,
      "learning_rate": 0.003,
      "loss": 4.0617,
      "step": 13528
    },
    {
      "epoch": 0.13529,
      "grad_norm": 1.402123824992118,
      "learning_rate": 0.003,
      "loss": 4.1072,
      "step": 13529
    },
    {
      "epoch": 0.1353,
      "grad_norm": 0.9658857549632947,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 13530
    },
    {
      "epoch": 0.13531,
      "grad_norm": 1.2385811105356495,
      "learning_rate": 0.003,
      "loss": 4.0839,
      "step": 13531
    },
    {
      "epoch": 0.13532,
      "grad_norm": 1.3384536701437566,
      "learning_rate": 0.003,
      "loss": 4.1002,
      "step": 13532
    },
    {
      "epoch": 0.13533,
      "grad_norm": 1.1271439594468355,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 13533
    },
    {
      "epoch": 0.13534,
      "grad_norm": 1.4053236824732496,
      "learning_rate": 0.003,
      "loss": 4.1023,
      "step": 13534
    },
    {
      "epoch": 0.13535,
      "grad_norm": 1.4210254217327944,
      "learning_rate": 0.003,
      "loss": 4.0799,
      "step": 13535
    },
    {
      "epoch": 0.13536,
      "grad_norm": 1.1757317921279626,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 13536
    },
    {
      "epoch": 0.13537,
      "grad_norm": 1.1826886943855999,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 13537
    },
    {
      "epoch": 0.13538,
      "grad_norm": 1.3867148362658737,
      "learning_rate": 0.003,
      "loss": 4.0922,
      "step": 13538
    },
    {
      "epoch": 0.13539,
      "grad_norm": 1.339948436143428,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 13539
    },
    {
      "epoch": 0.1354,
      "grad_norm": 1.0840175756391561,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 13540
    },
    {
      "epoch": 0.13541,
      "grad_norm": 1.1034967651255367,
      "learning_rate": 0.003,
      "loss": 4.0965,
      "step": 13541
    },
    {
      "epoch": 0.13542,
      "grad_norm": 1.495915605936551,
      "learning_rate": 0.003,
      "loss": 4.0917,
      "step": 13542
    },
    {
      "epoch": 0.13543,
      "grad_norm": 1.0025020971735863,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 13543
    },
    {
      "epoch": 0.13544,
      "grad_norm": 1.4015261150506153,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 13544
    },
    {
      "epoch": 0.13545,
      "grad_norm": 1.0777302094105228,
      "learning_rate": 0.003,
      "loss": 4.0684,
      "step": 13545
    },
    {
      "epoch": 0.13546,
      "grad_norm": 1.0075832217606906,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 13546
    },
    {
      "epoch": 0.13547,
      "grad_norm": 1.4568098120896713,
      "learning_rate": 0.003,
      "loss": 4.0418,
      "step": 13547
    },
    {
      "epoch": 0.13548,
      "grad_norm": 1.0073230103121065,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 13548
    },
    {
      "epoch": 0.13549,
      "grad_norm": 1.1906217853371375,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 13549
    },
    {
      "epoch": 0.1355,
      "grad_norm": 0.9433059001387746,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 13550
    },
    {
      "epoch": 0.13551,
      "grad_norm": 1.1362882888979027,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 13551
    },
    {
      "epoch": 0.13552,
      "grad_norm": 1.1704663789178789,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 13552
    },
    {
      "epoch": 0.13553,
      "grad_norm": 1.2410924551269613,
      "learning_rate": 0.003,
      "loss": 4.1047,
      "step": 13553
    },
    {
      "epoch": 0.13554,
      "grad_norm": 1.1613234355481612,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 13554
    },
    {
      "epoch": 0.13555,
      "grad_norm": 1.0467286749826417,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 13555
    },
    {
      "epoch": 0.13556,
      "grad_norm": 1.3129682484329963,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 13556
    },
    {
      "epoch": 0.13557,
      "grad_norm": 1.308745439163254,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 13557
    },
    {
      "epoch": 0.13558,
      "grad_norm": 1.2234312370273055,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 13558
    },
    {
      "epoch": 0.13559,
      "grad_norm": 0.9438356557960328,
      "learning_rate": 0.003,
      "loss": 4.0797,
      "step": 13559
    },
    {
      "epoch": 0.1356,
      "grad_norm": 1.1359668216145018,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 13560
    },
    {
      "epoch": 0.13561,
      "grad_norm": 1.2025332339088028,
      "learning_rate": 0.003,
      "loss": 4.0729,
      "step": 13561
    },
    {
      "epoch": 0.13562,
      "grad_norm": 1.0278362689972806,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 13562
    },
    {
      "epoch": 0.13563,
      "grad_norm": 1.2927399972064535,
      "learning_rate": 0.003,
      "loss": 4.0202,
      "step": 13563
    },
    {
      "epoch": 0.13564,
      "grad_norm": 1.1454504278844562,
      "learning_rate": 0.003,
      "loss": 4.0857,
      "step": 13564
    },
    {
      "epoch": 0.13565,
      "grad_norm": 1.2713561682590202,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 13565
    },
    {
      "epoch": 0.13566,
      "grad_norm": 1.0101189277867662,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 13566
    },
    {
      "epoch": 0.13567,
      "grad_norm": 1.3452338901105434,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 13567
    },
    {
      "epoch": 0.13568,
      "grad_norm": 0.9651810314708721,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 13568
    },
    {
      "epoch": 0.13569,
      "grad_norm": 1.200150574860146,
      "learning_rate": 0.003,
      "loss": 4.0705,
      "step": 13569
    },
    {
      "epoch": 0.1357,
      "grad_norm": 1.3013228175830704,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 13570
    },
    {
      "epoch": 0.13571,
      "grad_norm": 1.1413524786109808,
      "learning_rate": 0.003,
      "loss": 4.0728,
      "step": 13571
    },
    {
      "epoch": 0.13572,
      "grad_norm": 1.3872233678122348,
      "learning_rate": 0.003,
      "loss": 4.0799,
      "step": 13572
    },
    {
      "epoch": 0.13573,
      "grad_norm": 1.19536948436539,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 13573
    },
    {
      "epoch": 0.13574,
      "grad_norm": 1.2507510937540127,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 13574
    },
    {
      "epoch": 0.13575,
      "grad_norm": 1.0919209378269294,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 13575
    },
    {
      "epoch": 0.13576,
      "grad_norm": 1.4568843241344767,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 13576
    },
    {
      "epoch": 0.13577,
      "grad_norm": 1.0313914316793116,
      "learning_rate": 0.003,
      "loss": 4.0796,
      "step": 13577
    },
    {
      "epoch": 0.13578,
      "grad_norm": 1.2021084920918845,
      "learning_rate": 0.003,
      "loss": 4.0807,
      "step": 13578
    },
    {
      "epoch": 0.13579,
      "grad_norm": 1.180819236115947,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 13579
    },
    {
      "epoch": 0.1358,
      "grad_norm": 1.112970308942345,
      "learning_rate": 0.003,
      "loss": 4.0676,
      "step": 13580
    },
    {
      "epoch": 0.13581,
      "grad_norm": 1.6581105119328479,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 13581
    },
    {
      "epoch": 0.13582,
      "grad_norm": 1.2212266178712439,
      "learning_rate": 0.003,
      "loss": 4.0886,
      "step": 13582
    },
    {
      "epoch": 0.13583,
      "grad_norm": 1.2713278799534757,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 13583
    },
    {
      "epoch": 0.13584,
      "grad_norm": 0.9492981552071117,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 13584
    },
    {
      "epoch": 0.13585,
      "grad_norm": 1.376007204854274,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 13585
    },
    {
      "epoch": 0.13586,
      "grad_norm": 1.0569112183452425,
      "learning_rate": 0.003,
      "loss": 4.1088,
      "step": 13586
    },
    {
      "epoch": 0.13587,
      "grad_norm": 1.3311495423571125,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 13587
    },
    {
      "epoch": 0.13588,
      "grad_norm": 1.0424856372298525,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 13588
    },
    {
      "epoch": 0.13589,
      "grad_norm": 1.3682385130528956,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 13589
    },
    {
      "epoch": 0.1359,
      "grad_norm": 1.201664787748843,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 13590
    },
    {
      "epoch": 0.13591,
      "grad_norm": 1.3490298062373562,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 13591
    },
    {
      "epoch": 0.13592,
      "grad_norm": 0.9562478284952731,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 13592
    },
    {
      "epoch": 0.13593,
      "grad_norm": 1.2375419040612659,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 13593
    },
    {
      "epoch": 0.13594,
      "grad_norm": 1.0476579348639876,
      "learning_rate": 0.003,
      "loss": 4.0828,
      "step": 13594
    },
    {
      "epoch": 0.13595,
      "grad_norm": 1.0717762364233978,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 13595
    },
    {
      "epoch": 0.13596,
      "grad_norm": 1.0671422537293738,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 13596
    },
    {
      "epoch": 0.13597,
      "grad_norm": 1.3256644481635962,
      "learning_rate": 0.003,
      "loss": 4.0743,
      "step": 13597
    },
    {
      "epoch": 0.13598,
      "grad_norm": 1.144294080622358,
      "learning_rate": 0.003,
      "loss": 4.1036,
      "step": 13598
    },
    {
      "epoch": 0.13599,
      "grad_norm": 1.0859542569821918,
      "learning_rate": 0.003,
      "loss": 4.0842,
      "step": 13599
    },
    {
      "epoch": 0.136,
      "grad_norm": 1.1406897699892184,
      "learning_rate": 0.003,
      "loss": 4.0234,
      "step": 13600
    },
    {
      "epoch": 0.13601,
      "grad_norm": 1.247529096109531,
      "learning_rate": 0.003,
      "loss": 4.0854,
      "step": 13601
    },
    {
      "epoch": 0.13602,
      "grad_norm": 1.1840828561107422,
      "learning_rate": 0.003,
      "loss": 4.0757,
      "step": 13602
    },
    {
      "epoch": 0.13603,
      "grad_norm": 1.2623904567135522,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 13603
    },
    {
      "epoch": 0.13604,
      "grad_norm": 0.9505781321776695,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 13604
    },
    {
      "epoch": 0.13605,
      "grad_norm": 1.2276234061362843,
      "learning_rate": 0.003,
      "loss": 4.024,
      "step": 13605
    },
    {
      "epoch": 0.13606,
      "grad_norm": 0.997265695680624,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 13606
    },
    {
      "epoch": 0.13607,
      "grad_norm": 1.2553216295764094,
      "learning_rate": 0.003,
      "loss": 4.102,
      "step": 13607
    },
    {
      "epoch": 0.13608,
      "grad_norm": 1.200664004006863,
      "learning_rate": 0.003,
      "loss": 4.0851,
      "step": 13608
    },
    {
      "epoch": 0.13609,
      "grad_norm": 1.0895193259639602,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 13609
    },
    {
      "epoch": 0.1361,
      "grad_norm": 1.4530026766767474,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 13610
    },
    {
      "epoch": 0.13611,
      "grad_norm": 1.2262235822245915,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 13611
    },
    {
      "epoch": 0.13612,
      "grad_norm": 1.36036589820018,
      "learning_rate": 0.003,
      "loss": 4.0841,
      "step": 13612
    },
    {
      "epoch": 0.13613,
      "grad_norm": 1.1091710118023062,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 13613
    },
    {
      "epoch": 0.13614,
      "grad_norm": 1.1614141717257438,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 13614
    },
    {
      "epoch": 0.13615,
      "grad_norm": 1.3336021860149063,
      "learning_rate": 0.003,
      "loss": 4.0801,
      "step": 13615
    },
    {
      "epoch": 0.13616,
      "grad_norm": 1.153599402627099,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 13616
    },
    {
      "epoch": 0.13617,
      "grad_norm": 1.1603709609738238,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 13617
    },
    {
      "epoch": 0.13618,
      "grad_norm": 1.1346068059085979,
      "learning_rate": 0.003,
      "loss": 4.0879,
      "step": 13618
    },
    {
      "epoch": 0.13619,
      "grad_norm": 1.0892165185334561,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 13619
    },
    {
      "epoch": 0.1362,
      "grad_norm": 1.1292986742158317,
      "learning_rate": 0.003,
      "loss": 4.0841,
      "step": 13620
    },
    {
      "epoch": 0.13621,
      "grad_norm": 1.560107881154133,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 13621
    },
    {
      "epoch": 0.13622,
      "grad_norm": 1.1694445330416534,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 13622
    },
    {
      "epoch": 0.13623,
      "grad_norm": 1.614640768254796,
      "learning_rate": 0.003,
      "loss": 4.0837,
      "step": 13623
    },
    {
      "epoch": 0.13624,
      "grad_norm": 0.9008587605126983,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 13624
    },
    {
      "epoch": 0.13625,
      "grad_norm": 1.0479880204115588,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 13625
    },
    {
      "epoch": 0.13626,
      "grad_norm": 1.2980084288540925,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 13626
    },
    {
      "epoch": 0.13627,
      "grad_norm": 1.2328245026092544,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 13627
    },
    {
      "epoch": 0.13628,
      "grad_norm": 1.2485744616304153,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 13628
    },
    {
      "epoch": 0.13629,
      "grad_norm": 1.1805256275888583,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 13629
    },
    {
      "epoch": 0.1363,
      "grad_norm": 1.1925719533196926,
      "learning_rate": 0.003,
      "loss": 4.0382,
      "step": 13630
    },
    {
      "epoch": 0.13631,
      "grad_norm": 1.0630848397550998,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 13631
    },
    {
      "epoch": 0.13632,
      "grad_norm": 1.350964628485614,
      "learning_rate": 0.003,
      "loss": 4.1045,
      "step": 13632
    },
    {
      "epoch": 0.13633,
      "grad_norm": 0.9968793398396996,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 13633
    },
    {
      "epoch": 0.13634,
      "grad_norm": 1.412486851774138,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 13634
    },
    {
      "epoch": 0.13635,
      "grad_norm": 1.03583980831029,
      "learning_rate": 0.003,
      "loss": 4.0908,
      "step": 13635
    },
    {
      "epoch": 0.13636,
      "grad_norm": 1.3373321063845935,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 13636
    },
    {
      "epoch": 0.13637,
      "grad_norm": 1.241854730615486,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 13637
    },
    {
      "epoch": 0.13638,
      "grad_norm": 1.255189863858571,
      "learning_rate": 0.003,
      "loss": 4.0965,
      "step": 13638
    },
    {
      "epoch": 0.13639,
      "grad_norm": 1.294169323504986,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 13639
    },
    {
      "epoch": 0.1364,
      "grad_norm": 1.3626068385772372,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 13640
    },
    {
      "epoch": 0.13641,
      "grad_norm": 1.1096206915740712,
      "learning_rate": 0.003,
      "loss": 4.097,
      "step": 13641
    },
    {
      "epoch": 0.13642,
      "grad_norm": 1.1413504000739507,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 13642
    },
    {
      "epoch": 0.13643,
      "grad_norm": 1.356345520757034,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 13643
    },
    {
      "epoch": 0.13644,
      "grad_norm": 1.0209199059256513,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 13644
    },
    {
      "epoch": 0.13645,
      "grad_norm": 1.173159769080088,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 13645
    },
    {
      "epoch": 0.13646,
      "grad_norm": 0.9623922125269074,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 13646
    },
    {
      "epoch": 0.13647,
      "grad_norm": 1.3193897614870387,
      "learning_rate": 0.003,
      "loss": 4.0773,
      "step": 13647
    },
    {
      "epoch": 0.13648,
      "grad_norm": 1.0635369222409417,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 13648
    },
    {
      "epoch": 0.13649,
      "grad_norm": 1.1228393873951896,
      "learning_rate": 0.003,
      "loss": 4.0718,
      "step": 13649
    },
    {
      "epoch": 0.1365,
      "grad_norm": 1.3158969083041163,
      "learning_rate": 0.003,
      "loss": 4.0724,
      "step": 13650
    },
    {
      "epoch": 0.13651,
      "grad_norm": 1.0029865082671081,
      "learning_rate": 0.003,
      "loss": 4.0724,
      "step": 13651
    },
    {
      "epoch": 0.13652,
      "grad_norm": 1.3868079902769934,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 13652
    },
    {
      "epoch": 0.13653,
      "grad_norm": 1.1745509739886861,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 13653
    },
    {
      "epoch": 0.13654,
      "grad_norm": 1.4310037147962442,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 13654
    },
    {
      "epoch": 0.13655,
      "grad_norm": 1.315973174976202,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 13655
    },
    {
      "epoch": 0.13656,
      "grad_norm": 1.1774246017219323,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 13656
    },
    {
      "epoch": 0.13657,
      "grad_norm": 1.2043319701834125,
      "learning_rate": 0.003,
      "loss": 4.0602,
      "step": 13657
    },
    {
      "epoch": 0.13658,
      "grad_norm": 1.0467647848338972,
      "learning_rate": 0.003,
      "loss": 4.0945,
      "step": 13658
    },
    {
      "epoch": 0.13659,
      "grad_norm": 1.1918431884790026,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 13659
    },
    {
      "epoch": 0.1366,
      "grad_norm": 1.0430180175805952,
      "learning_rate": 0.003,
      "loss": 4.1008,
      "step": 13660
    },
    {
      "epoch": 0.13661,
      "grad_norm": 1.1816583087991992,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 13661
    },
    {
      "epoch": 0.13662,
      "grad_norm": 1.1048636429833953,
      "learning_rate": 0.003,
      "loss": 4.0796,
      "step": 13662
    },
    {
      "epoch": 0.13663,
      "grad_norm": 1.4735721937903374,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 13663
    },
    {
      "epoch": 0.13664,
      "grad_norm": 1.2188453846802905,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 13664
    },
    {
      "epoch": 0.13665,
      "grad_norm": 1.3376154423362254,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 13665
    },
    {
      "epoch": 0.13666,
      "grad_norm": 1.1007384584021513,
      "learning_rate": 0.003,
      "loss": 4.0806,
      "step": 13666
    },
    {
      "epoch": 0.13667,
      "grad_norm": 1.3815169014254765,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 13667
    },
    {
      "epoch": 0.13668,
      "grad_norm": 1.064453029465472,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 13668
    },
    {
      "epoch": 0.13669,
      "grad_norm": 1.2765847403316573,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 13669
    },
    {
      "epoch": 0.1367,
      "grad_norm": 0.9455494847940942,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 13670
    },
    {
      "epoch": 0.13671,
      "grad_norm": 1.2949353596820141,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 13671
    },
    {
      "epoch": 0.13672,
      "grad_norm": 1.1328451712184167,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 13672
    },
    {
      "epoch": 0.13673,
      "grad_norm": 1.141295914638846,
      "learning_rate": 0.003,
      "loss": 4.0658,
      "step": 13673
    },
    {
      "epoch": 0.13674,
      "grad_norm": 1.2513018708628711,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 13674
    },
    {
      "epoch": 0.13675,
      "grad_norm": 1.0576007704115173,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 13675
    },
    {
      "epoch": 0.13676,
      "grad_norm": 1.536526245377778,
      "learning_rate": 0.003,
      "loss": 4.0347,
      "step": 13676
    },
    {
      "epoch": 0.13677,
      "grad_norm": 1.0623188298061705,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 13677
    },
    {
      "epoch": 0.13678,
      "grad_norm": 1.0695343095334628,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 13678
    },
    {
      "epoch": 0.13679,
      "grad_norm": 1.255050117367722,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 13679
    },
    {
      "epoch": 0.1368,
      "grad_norm": 1.143266529110637,
      "learning_rate": 0.003,
      "loss": 4.0478,
      "step": 13680
    },
    {
      "epoch": 0.13681,
      "grad_norm": 1.1790345274322582,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 13681
    },
    {
      "epoch": 0.13682,
      "grad_norm": 0.8769317544083803,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 13682
    },
    {
      "epoch": 0.13683,
      "grad_norm": 0.9956358013379742,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 13683
    },
    {
      "epoch": 0.13684,
      "grad_norm": 1.2610983337776362,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 13684
    },
    {
      "epoch": 0.13685,
      "grad_norm": 1.224229969634879,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 13685
    },
    {
      "epoch": 0.13686,
      "grad_norm": 1.2852057042781868,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 13686
    },
    {
      "epoch": 0.13687,
      "grad_norm": 1.1287402541109783,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 13687
    },
    {
      "epoch": 0.13688,
      "grad_norm": 1.3066891799267129,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 13688
    },
    {
      "epoch": 0.13689,
      "grad_norm": 1.0248433613412835,
      "learning_rate": 0.003,
      "loss": 4.0843,
      "step": 13689
    },
    {
      "epoch": 0.1369,
      "grad_norm": 1.367176090369949,
      "learning_rate": 0.003,
      "loss": 4.0923,
      "step": 13690
    },
    {
      "epoch": 0.13691,
      "grad_norm": 0.9683325719123493,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 13691
    },
    {
      "epoch": 0.13692,
      "grad_norm": 1.441647625810317,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 13692
    },
    {
      "epoch": 0.13693,
      "grad_norm": 1.065749153683736,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 13693
    },
    {
      "epoch": 0.13694,
      "grad_norm": 1.3819720275263285,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 13694
    },
    {
      "epoch": 0.13695,
      "grad_norm": 1.0748305770889302,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 13695
    },
    {
      "epoch": 0.13696,
      "grad_norm": 1.3677968293134606,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 13696
    },
    {
      "epoch": 0.13697,
      "grad_norm": 1.147089897423671,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 13697
    },
    {
      "epoch": 0.13698,
      "grad_norm": 1.235718274909368,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 13698
    },
    {
      "epoch": 0.13699,
      "grad_norm": 1.0225627832242097,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 13699
    },
    {
      "epoch": 0.137,
      "grad_norm": 1.1623363764699286,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 13700
    },
    {
      "epoch": 0.13701,
      "grad_norm": 1.0890333717262264,
      "learning_rate": 0.003,
      "loss": 4.0963,
      "step": 13701
    },
    {
      "epoch": 0.13702,
      "grad_norm": 1.1302786432260996,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 13702
    },
    {
      "epoch": 0.13703,
      "grad_norm": 1.0776517926981157,
      "learning_rate": 0.003,
      "loss": 4.0438,
      "step": 13703
    },
    {
      "epoch": 0.13704,
      "grad_norm": 1.3785458069212284,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 13704
    },
    {
      "epoch": 0.13705,
      "grad_norm": 1.2512617040619354,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 13705
    },
    {
      "epoch": 0.13706,
      "grad_norm": 1.3928377499783793,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 13706
    },
    {
      "epoch": 0.13707,
      "grad_norm": 1.118559237908763,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 13707
    },
    {
      "epoch": 0.13708,
      "grad_norm": 1.2029639746066683,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 13708
    },
    {
      "epoch": 0.13709,
      "grad_norm": 1.1127653339477992,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 13709
    },
    {
      "epoch": 0.1371,
      "grad_norm": 1.2691378804015563,
      "learning_rate": 0.003,
      "loss": 4.0328,
      "step": 13710
    },
    {
      "epoch": 0.13711,
      "grad_norm": 1.0512236630876264,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 13711
    },
    {
      "epoch": 0.13712,
      "grad_norm": 1.4292441372063016,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 13712
    },
    {
      "epoch": 0.13713,
      "grad_norm": 0.9978745751173643,
      "learning_rate": 0.003,
      "loss": 4.0701,
      "step": 13713
    },
    {
      "epoch": 0.13714,
      "grad_norm": 1.2432364841852932,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 13714
    },
    {
      "epoch": 0.13715,
      "grad_norm": 1.297845034586291,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 13715
    },
    {
      "epoch": 0.13716,
      "grad_norm": 1.2900468090400596,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 13716
    },
    {
      "epoch": 0.13717,
      "grad_norm": 1.181269532330925,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 13717
    },
    {
      "epoch": 0.13718,
      "grad_norm": 1.329669457108706,
      "learning_rate": 0.003,
      "loss": 4.0851,
      "step": 13718
    },
    {
      "epoch": 0.13719,
      "grad_norm": 1.3658069391182663,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 13719
    },
    {
      "epoch": 0.1372,
      "grad_norm": 1.1011609524331176,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 13720
    },
    {
      "epoch": 0.13721,
      "grad_norm": 1.069565039739558,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 13721
    },
    {
      "epoch": 0.13722,
      "grad_norm": 1.0644531780194006,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 13722
    },
    {
      "epoch": 0.13723,
      "grad_norm": 1.4266250579306097,
      "learning_rate": 0.003,
      "loss": 4.0718,
      "step": 13723
    },
    {
      "epoch": 0.13724,
      "grad_norm": 1.3837474539387862,
      "learning_rate": 0.003,
      "loss": 4.0812,
      "step": 13724
    },
    {
      "epoch": 0.13725,
      "grad_norm": 1.2266512686805247,
      "learning_rate": 0.003,
      "loss": 4.0909,
      "step": 13725
    },
    {
      "epoch": 0.13726,
      "grad_norm": 1.0802335853435896,
      "learning_rate": 0.003,
      "loss": 4.0712,
      "step": 13726
    },
    {
      "epoch": 0.13727,
      "grad_norm": 1.2520063776990553,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 13727
    },
    {
      "epoch": 0.13728,
      "grad_norm": 1.1506232520668933,
      "learning_rate": 0.003,
      "loss": 4.0888,
      "step": 13728
    },
    {
      "epoch": 0.13729,
      "grad_norm": 1.191036533468555,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 13729
    },
    {
      "epoch": 0.1373,
      "grad_norm": 1.0785639678596188,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 13730
    },
    {
      "epoch": 0.13731,
      "grad_norm": 1.3274351259060906,
      "learning_rate": 0.003,
      "loss": 4.0675,
      "step": 13731
    },
    {
      "epoch": 0.13732,
      "grad_norm": 1.1462342271727504,
      "learning_rate": 0.003,
      "loss": 4.0276,
      "step": 13732
    },
    {
      "epoch": 0.13733,
      "grad_norm": 1.1965092703547526,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 13733
    },
    {
      "epoch": 0.13734,
      "grad_norm": 1.0113223792794093,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 13734
    },
    {
      "epoch": 0.13735,
      "grad_norm": 1.4309755113711486,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 13735
    },
    {
      "epoch": 0.13736,
      "grad_norm": 1.2237061269968448,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 13736
    },
    {
      "epoch": 0.13737,
      "grad_norm": 1.2061844744891848,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 13737
    },
    {
      "epoch": 0.13738,
      "grad_norm": 1.1160780969915798,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 13738
    },
    {
      "epoch": 0.13739,
      "grad_norm": 1.1317129219921174,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 13739
    },
    {
      "epoch": 0.1374,
      "grad_norm": 1.2730856773538404,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 13740
    },
    {
      "epoch": 0.13741,
      "grad_norm": 1.2716542917352498,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 13741
    },
    {
      "epoch": 0.13742,
      "grad_norm": 1.1083900184558646,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 13742
    },
    {
      "epoch": 0.13743,
      "grad_norm": 1.3454270303632545,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 13743
    },
    {
      "epoch": 0.13744,
      "grad_norm": 1.034111525761471,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 13744
    },
    {
      "epoch": 0.13745,
      "grad_norm": 1.2830839448043023,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 13745
    },
    {
      "epoch": 0.13746,
      "grad_norm": 1.0135350503384342,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 13746
    },
    {
      "epoch": 0.13747,
      "grad_norm": 1.2599057190334695,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 13747
    },
    {
      "epoch": 0.13748,
      "grad_norm": 1.140178222134933,
      "learning_rate": 0.003,
      "loss": 4.1005,
      "step": 13748
    },
    {
      "epoch": 0.13749,
      "grad_norm": 1.2357363339772103,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 13749
    },
    {
      "epoch": 0.1375,
      "grad_norm": 1.072744201221382,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 13750
    },
    {
      "epoch": 0.13751,
      "grad_norm": 1.1219993471230616,
      "learning_rate": 0.003,
      "loss": 4.0924,
      "step": 13751
    },
    {
      "epoch": 0.13752,
      "grad_norm": 1.0795350179180665,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 13752
    },
    {
      "epoch": 0.13753,
      "grad_norm": 1.2898162353050031,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 13753
    },
    {
      "epoch": 0.13754,
      "grad_norm": 1.2443028895509332,
      "learning_rate": 0.003,
      "loss": 4.0783,
      "step": 13754
    },
    {
      "epoch": 0.13755,
      "grad_norm": 1.196620422488927,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 13755
    },
    {
      "epoch": 0.13756,
      "grad_norm": 1.0545696215897438,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 13756
    },
    {
      "epoch": 0.13757,
      "grad_norm": 1.5430740459539571,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 13757
    },
    {
      "epoch": 0.13758,
      "grad_norm": 1.055078720882959,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 13758
    },
    {
      "epoch": 0.13759,
      "grad_norm": 1.4273948140349615,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 13759
    },
    {
      "epoch": 0.1376,
      "grad_norm": 1.1561993353954598,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 13760
    },
    {
      "epoch": 0.13761,
      "grad_norm": 1.0385151262168988,
      "learning_rate": 0.003,
      "loss": 4.0286,
      "step": 13761
    },
    {
      "epoch": 0.13762,
      "grad_norm": 1.0751004141967082,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 13762
    },
    {
      "epoch": 0.13763,
      "grad_norm": 1.4191592640008177,
      "learning_rate": 0.003,
      "loss": 4.1066,
      "step": 13763
    },
    {
      "epoch": 0.13764,
      "grad_norm": 1.284386984507721,
      "learning_rate": 0.003,
      "loss": 4.094,
      "step": 13764
    },
    {
      "epoch": 0.13765,
      "grad_norm": 1.092072644588252,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 13765
    },
    {
      "epoch": 0.13766,
      "grad_norm": 1.3050467595815576,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 13766
    },
    {
      "epoch": 0.13767,
      "grad_norm": 1.0362345927564356,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 13767
    },
    {
      "epoch": 0.13768,
      "grad_norm": 1.1589269088035632,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 13768
    },
    {
      "epoch": 0.13769,
      "grad_norm": 0.940109702074667,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 13769
    },
    {
      "epoch": 0.1377,
      "grad_norm": 1.2589329742785462,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 13770
    },
    {
      "epoch": 0.13771,
      "grad_norm": 1.1183296378587286,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 13771
    },
    {
      "epoch": 0.13772,
      "grad_norm": 1.2373485396105985,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 13772
    },
    {
      "epoch": 0.13773,
      "grad_norm": 1.1013092832854794,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 13773
    },
    {
      "epoch": 0.13774,
      "grad_norm": 1.1323220409061479,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 13774
    },
    {
      "epoch": 0.13775,
      "grad_norm": 1.2697176050393486,
      "learning_rate": 0.003,
      "loss": 4.0911,
      "step": 13775
    },
    {
      "epoch": 0.13776,
      "grad_norm": 1.2289730726276897,
      "learning_rate": 0.003,
      "loss": 4.0139,
      "step": 13776
    },
    {
      "epoch": 0.13777,
      "grad_norm": 1.0859061023105696,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 13777
    },
    {
      "epoch": 0.13778,
      "grad_norm": 1.431775187944301,
      "learning_rate": 0.003,
      "loss": 4.042,
      "step": 13778
    },
    {
      "epoch": 0.13779,
      "grad_norm": 1.023077064814121,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 13779
    },
    {
      "epoch": 0.1378,
      "grad_norm": 1.4857316253141342,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 13780
    },
    {
      "epoch": 0.13781,
      "grad_norm": 0.9571007767783862,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 13781
    },
    {
      "epoch": 0.13782,
      "grad_norm": 1.242294225250045,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 13782
    },
    {
      "epoch": 0.13783,
      "grad_norm": 1.2180590672415628,
      "learning_rate": 0.003,
      "loss": 4.1137,
      "step": 13783
    },
    {
      "epoch": 0.13784,
      "grad_norm": 1.321609816960609,
      "learning_rate": 0.003,
      "loss": 4.0815,
      "step": 13784
    },
    {
      "epoch": 0.13785,
      "grad_norm": 1.0398449333073807,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 13785
    },
    {
      "epoch": 0.13786,
      "grad_norm": 1.2166767032863475,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 13786
    },
    {
      "epoch": 0.13787,
      "grad_norm": 1.4130608082527525,
      "learning_rate": 0.003,
      "loss": 4.0756,
      "step": 13787
    },
    {
      "epoch": 0.13788,
      "grad_norm": 1.102039258721679,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 13788
    },
    {
      "epoch": 0.13789,
      "grad_norm": 1.2791281383107909,
      "learning_rate": 0.003,
      "loss": 4.0949,
      "step": 13789
    },
    {
      "epoch": 0.1379,
      "grad_norm": 1.0757574152524256,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 13790
    },
    {
      "epoch": 0.13791,
      "grad_norm": 1.2327339175446506,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 13791
    },
    {
      "epoch": 0.13792,
      "grad_norm": 1.4027215777211544,
      "learning_rate": 0.003,
      "loss": 4.0729,
      "step": 13792
    },
    {
      "epoch": 0.13793,
      "grad_norm": 1.3955735712252815,
      "learning_rate": 0.003,
      "loss": 4.0767,
      "step": 13793
    },
    {
      "epoch": 0.13794,
      "grad_norm": 1.279540475431449,
      "learning_rate": 0.003,
      "loss": 4.0975,
      "step": 13794
    },
    {
      "epoch": 0.13795,
      "grad_norm": 1.2178098654806002,
      "learning_rate": 0.003,
      "loss": 4.0792,
      "step": 13795
    },
    {
      "epoch": 0.13796,
      "grad_norm": 1.0539717585069517,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 13796
    },
    {
      "epoch": 0.13797,
      "grad_norm": 1.1084300591943956,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 13797
    },
    {
      "epoch": 0.13798,
      "grad_norm": 1.1013695496932099,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 13798
    },
    {
      "epoch": 0.13799,
      "grad_norm": 1.2816063137585847,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 13799
    },
    {
      "epoch": 0.138,
      "grad_norm": 1.2913325263741893,
      "learning_rate": 0.003,
      "loss": 4.0748,
      "step": 13800
    },
    {
      "epoch": 0.13801,
      "grad_norm": 1.0391772873458964,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 13801
    },
    {
      "epoch": 0.13802,
      "grad_norm": 0.976617087548208,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 13802
    },
    {
      "epoch": 0.13803,
      "grad_norm": 1.1665271871136662,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 13803
    },
    {
      "epoch": 0.13804,
      "grad_norm": 1.0006403922231308,
      "learning_rate": 0.003,
      "loss": 4.0892,
      "step": 13804
    },
    {
      "epoch": 0.13805,
      "grad_norm": 1.3645898219329173,
      "learning_rate": 0.003,
      "loss": 4.0547,
      "step": 13805
    },
    {
      "epoch": 0.13806,
      "grad_norm": 1.3277857270183633,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 13806
    },
    {
      "epoch": 0.13807,
      "grad_norm": 1.0644790355228766,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 13807
    },
    {
      "epoch": 0.13808,
      "grad_norm": 1.6318741175584281,
      "learning_rate": 0.003,
      "loss": 4.0865,
      "step": 13808
    },
    {
      "epoch": 0.13809,
      "grad_norm": 0.7879688329732388,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 13809
    },
    {
      "epoch": 0.1381,
      "grad_norm": 1.0661121011213612,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 13810
    },
    {
      "epoch": 0.13811,
      "grad_norm": 1.4052756870562733,
      "learning_rate": 0.003,
      "loss": 4.0975,
      "step": 13811
    },
    {
      "epoch": 0.13812,
      "grad_norm": 1.0490862665519338,
      "learning_rate": 0.003,
      "loss": 4.0807,
      "step": 13812
    },
    {
      "epoch": 0.13813,
      "grad_norm": 1.3622389610202286,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 13813
    },
    {
      "epoch": 0.13814,
      "grad_norm": 1.0425545423063145,
      "learning_rate": 0.003,
      "loss": 4.0853,
      "step": 13814
    },
    {
      "epoch": 0.13815,
      "grad_norm": 1.2575941945973617,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 13815
    },
    {
      "epoch": 0.13816,
      "grad_norm": 1.2265374473742845,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 13816
    },
    {
      "epoch": 0.13817,
      "grad_norm": 1.038745981724648,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 13817
    },
    {
      "epoch": 0.13818,
      "grad_norm": 1.4731932890584123,
      "learning_rate": 0.003,
      "loss": 4.0619,
      "step": 13818
    },
    {
      "epoch": 0.13819,
      "grad_norm": 0.870903805105368,
      "learning_rate": 0.003,
      "loss": 4.0549,
      "step": 13819
    },
    {
      "epoch": 0.1382,
      "grad_norm": 1.2175904467110854,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 13820
    },
    {
      "epoch": 0.13821,
      "grad_norm": 1.1400424745695283,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 13821
    },
    {
      "epoch": 0.13822,
      "grad_norm": 1.1995296242121614,
      "learning_rate": 0.003,
      "loss": 4.0914,
      "step": 13822
    },
    {
      "epoch": 0.13823,
      "grad_norm": 1.5178825762562025,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 13823
    },
    {
      "epoch": 0.13824,
      "grad_norm": 1.2214546895126566,
      "learning_rate": 0.003,
      "loss": 4.0925,
      "step": 13824
    },
    {
      "epoch": 0.13825,
      "grad_norm": 1.2664357163506683,
      "learning_rate": 0.003,
      "loss": 4.0623,
      "step": 13825
    },
    {
      "epoch": 0.13826,
      "grad_norm": 1.0589329402026078,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 13826
    },
    {
      "epoch": 0.13827,
      "grad_norm": 1.1789523161858053,
      "learning_rate": 0.003,
      "loss": 4.0809,
      "step": 13827
    },
    {
      "epoch": 0.13828,
      "grad_norm": 1.5475286557737047,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 13828
    },
    {
      "epoch": 0.13829,
      "grad_norm": 0.8351369152104988,
      "learning_rate": 0.003,
      "loss": 4.0317,
      "step": 13829
    },
    {
      "epoch": 0.1383,
      "grad_norm": 0.9956043388064251,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 13830
    },
    {
      "epoch": 0.13831,
      "grad_norm": 1.3769018795018921,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 13831
    },
    {
      "epoch": 0.13832,
      "grad_norm": 1.1845831230408836,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 13832
    },
    {
      "epoch": 0.13833,
      "grad_norm": 1.2332693899003906,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 13833
    },
    {
      "epoch": 0.13834,
      "grad_norm": 1.1534843571421134,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 13834
    },
    {
      "epoch": 0.13835,
      "grad_norm": 1.14869899572321,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 13835
    },
    {
      "epoch": 0.13836,
      "grad_norm": 1.0403880611532428,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 13836
    },
    {
      "epoch": 0.13837,
      "grad_norm": 1.130107909605646,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 13837
    },
    {
      "epoch": 0.13838,
      "grad_norm": 1.1789301956378286,
      "learning_rate": 0.003,
      "loss": 4.0275,
      "step": 13838
    },
    {
      "epoch": 0.13839,
      "grad_norm": 1.0178110819703394,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 13839
    },
    {
      "epoch": 0.1384,
      "grad_norm": 1.3602920796271976,
      "learning_rate": 0.003,
      "loss": 4.0789,
      "step": 13840
    },
    {
      "epoch": 0.13841,
      "grad_norm": 1.086448902654459,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 13841
    },
    {
      "epoch": 0.13842,
      "grad_norm": 1.1826934581231108,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 13842
    },
    {
      "epoch": 0.13843,
      "grad_norm": 1.2588360759412796,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 13843
    },
    {
      "epoch": 0.13844,
      "grad_norm": 1.0960611948512418,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 13844
    },
    {
      "epoch": 0.13845,
      "grad_norm": 1.066118581321973,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 13845
    },
    {
      "epoch": 0.13846,
      "grad_norm": 1.238514866519139,
      "learning_rate": 0.003,
      "loss": 4.087,
      "step": 13846
    },
    {
      "epoch": 0.13847,
      "grad_norm": 1.3378615113114039,
      "learning_rate": 0.003,
      "loss": 4.044,
      "step": 13847
    },
    {
      "epoch": 0.13848,
      "grad_norm": 1.107126510372777,
      "learning_rate": 0.003,
      "loss": 4.0656,
      "step": 13848
    },
    {
      "epoch": 0.13849,
      "grad_norm": 1.1923907811098338,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 13849
    },
    {
      "epoch": 0.1385,
      "grad_norm": 1.2036075929545467,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 13850
    },
    {
      "epoch": 0.13851,
      "grad_norm": 1.3265920242843754,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 13851
    },
    {
      "epoch": 0.13852,
      "grad_norm": 1.0783781712805554,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 13852
    },
    {
      "epoch": 0.13853,
      "grad_norm": 1.300679877981243,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 13853
    },
    {
      "epoch": 0.13854,
      "grad_norm": 0.979441386732072,
      "learning_rate": 0.003,
      "loss": 4.0753,
      "step": 13854
    },
    {
      "epoch": 0.13855,
      "grad_norm": 1.2626027757975171,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 13855
    },
    {
      "epoch": 0.13856,
      "grad_norm": 1.0324943518145326,
      "learning_rate": 0.003,
      "loss": 4.0855,
      "step": 13856
    },
    {
      "epoch": 0.13857,
      "grad_norm": 1.2208611478898446,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 13857
    },
    {
      "epoch": 0.13858,
      "grad_norm": 1.4150408537958241,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 13858
    },
    {
      "epoch": 0.13859,
      "grad_norm": 1.1934719812277916,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 13859
    },
    {
      "epoch": 0.1386,
      "grad_norm": 1.1097521653095541,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 13860
    },
    {
      "epoch": 0.13861,
      "grad_norm": 1.0591341409277695,
      "learning_rate": 0.003,
      "loss": 4.0789,
      "step": 13861
    },
    {
      "epoch": 0.13862,
      "grad_norm": 1.311078165099448,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 13862
    },
    {
      "epoch": 0.13863,
      "grad_norm": 1.075891909671555,
      "learning_rate": 0.003,
      "loss": 4.1015,
      "step": 13863
    },
    {
      "epoch": 0.13864,
      "grad_norm": 1.266066950103621,
      "learning_rate": 0.003,
      "loss": 4.0906,
      "step": 13864
    },
    {
      "epoch": 0.13865,
      "grad_norm": 1.3438837530062315,
      "learning_rate": 0.003,
      "loss": 4.0843,
      "step": 13865
    },
    {
      "epoch": 0.13866,
      "grad_norm": 0.934191573347225,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 13866
    },
    {
      "epoch": 0.13867,
      "grad_norm": 1.2095735572592086,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 13867
    },
    {
      "epoch": 0.13868,
      "grad_norm": 1.3806980271358968,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 13868
    },
    {
      "epoch": 0.13869,
      "grad_norm": 0.8708374532186117,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 13869
    },
    {
      "epoch": 0.1387,
      "grad_norm": 1.1395576474767661,
      "learning_rate": 0.003,
      "loss": 4.0346,
      "step": 13870
    },
    {
      "epoch": 0.13871,
      "grad_norm": 1.1544323853403469,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 13871
    },
    {
      "epoch": 0.13872,
      "grad_norm": 1.1905739230752463,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 13872
    },
    {
      "epoch": 0.13873,
      "grad_norm": 1.23088045055871,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 13873
    },
    {
      "epoch": 0.13874,
      "grad_norm": 1.0749892857645922,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 13874
    },
    {
      "epoch": 0.13875,
      "grad_norm": 1.6296366511374096,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 13875
    },
    {
      "epoch": 0.13876,
      "grad_norm": 1.0406320129171167,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 13876
    },
    {
      "epoch": 0.13877,
      "grad_norm": 1.5276874377748664,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 13877
    },
    {
      "epoch": 0.13878,
      "grad_norm": 0.9085018293664214,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 13878
    },
    {
      "epoch": 0.13879,
      "grad_norm": 1.198722324986746,
      "learning_rate": 0.003,
      "loss": 4.0896,
      "step": 13879
    },
    {
      "epoch": 0.1388,
      "grad_norm": 1.4900679753925414,
      "learning_rate": 0.003,
      "loss": 4.085,
      "step": 13880
    },
    {
      "epoch": 0.13881,
      "grad_norm": 0.9588686404421287,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 13881
    },
    {
      "epoch": 0.13882,
      "grad_norm": 1.016556436833725,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 13882
    },
    {
      "epoch": 0.13883,
      "grad_norm": 1.3591825267573006,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 13883
    },
    {
      "epoch": 0.13884,
      "grad_norm": 0.9969580757172046,
      "learning_rate": 0.003,
      "loss": 4.0851,
      "step": 13884
    },
    {
      "epoch": 0.13885,
      "grad_norm": 1.376113519006804,
      "learning_rate": 0.003,
      "loss": 4.113,
      "step": 13885
    },
    {
      "epoch": 0.13886,
      "grad_norm": 1.130956073216537,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 13886
    },
    {
      "epoch": 0.13887,
      "grad_norm": 1.3813816134763195,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 13887
    },
    {
      "epoch": 0.13888,
      "grad_norm": 1.0133877966264333,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 13888
    },
    {
      "epoch": 0.13889,
      "grad_norm": 1.3471834838681127,
      "learning_rate": 0.003,
      "loss": 4.0841,
      "step": 13889
    },
    {
      "epoch": 0.1389,
      "grad_norm": 1.0201858599279923,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 13890
    },
    {
      "epoch": 0.13891,
      "grad_norm": 1.2828078609119384,
      "learning_rate": 0.003,
      "loss": 4.0808,
      "step": 13891
    },
    {
      "epoch": 0.13892,
      "grad_norm": 1.11863619216386,
      "learning_rate": 0.003,
      "loss": 4.0849,
      "step": 13892
    },
    {
      "epoch": 0.13893,
      "grad_norm": 1.603158276512601,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 13893
    },
    {
      "epoch": 0.13894,
      "grad_norm": 1.1969476992562909,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 13894
    },
    {
      "epoch": 0.13895,
      "grad_norm": 1.5423365160858793,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 13895
    },
    {
      "epoch": 0.13896,
      "grad_norm": 1.077928852196919,
      "learning_rate": 0.003,
      "loss": 4.0909,
      "step": 13896
    },
    {
      "epoch": 0.13897,
      "grad_norm": 1.026728940213821,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 13897
    },
    {
      "epoch": 0.13898,
      "grad_norm": 1.2677260856492953,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 13898
    },
    {
      "epoch": 0.13899,
      "grad_norm": 1.2272126840482172,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 13899
    },
    {
      "epoch": 0.139,
      "grad_norm": 1.287321008248166,
      "learning_rate": 0.003,
      "loss": 4.097,
      "step": 13900
    },
    {
      "epoch": 0.13901,
      "grad_norm": 1.0518284235161448,
      "learning_rate": 0.003,
      "loss": 4.0864,
      "step": 13901
    },
    {
      "epoch": 0.13902,
      "grad_norm": 1.1496082951367534,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 13902
    },
    {
      "epoch": 0.13903,
      "grad_norm": 1.0881877055239924,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 13903
    },
    {
      "epoch": 0.13904,
      "grad_norm": 1.0367206105761448,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 13904
    },
    {
      "epoch": 0.13905,
      "grad_norm": 1.3221464375084346,
      "learning_rate": 0.003,
      "loss": 4.0858,
      "step": 13905
    },
    {
      "epoch": 0.13906,
      "grad_norm": 1.0677619426077882,
      "learning_rate": 0.003,
      "loss": 4.025,
      "step": 13906
    },
    {
      "epoch": 0.13907,
      "grad_norm": 1.4128252294951043,
      "learning_rate": 0.003,
      "loss": 4.0623,
      "step": 13907
    },
    {
      "epoch": 0.13908,
      "grad_norm": 0.880496247210434,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 13908
    },
    {
      "epoch": 0.13909,
      "grad_norm": 1.0017699963385105,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 13909
    },
    {
      "epoch": 0.1391,
      "grad_norm": 1.2636365868887092,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 13910
    },
    {
      "epoch": 0.13911,
      "grad_norm": 1.225275355478596,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 13911
    },
    {
      "epoch": 0.13912,
      "grad_norm": 1.3060925427906085,
      "learning_rate": 0.003,
      "loss": 4.0839,
      "step": 13912
    },
    {
      "epoch": 0.13913,
      "grad_norm": 1.2074339416830029,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 13913
    },
    {
      "epoch": 0.13914,
      "grad_norm": 1.2732718468049447,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 13914
    },
    {
      "epoch": 0.13915,
      "grad_norm": 1.059581908738672,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 13915
    },
    {
      "epoch": 0.13916,
      "grad_norm": 1.3835418931483388,
      "learning_rate": 0.003,
      "loss": 4.0777,
      "step": 13916
    },
    {
      "epoch": 0.13917,
      "grad_norm": 1.286371224707917,
      "learning_rate": 0.003,
      "loss": 4.0873,
      "step": 13917
    },
    {
      "epoch": 0.13918,
      "grad_norm": 1.1491760972411582,
      "learning_rate": 0.003,
      "loss": 4.0892,
      "step": 13918
    },
    {
      "epoch": 0.13919,
      "grad_norm": 1.0381404485028898,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 13919
    },
    {
      "epoch": 0.1392,
      "grad_norm": 1.2936934515418033,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 13920
    },
    {
      "epoch": 0.13921,
      "grad_norm": 1.1336491850307007,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 13921
    },
    {
      "epoch": 0.13922,
      "grad_norm": 1.032283544406887,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 13922
    },
    {
      "epoch": 0.13923,
      "grad_norm": 1.1350421647630262,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 13923
    },
    {
      "epoch": 0.13924,
      "grad_norm": 1.2181477345896563,
      "learning_rate": 0.003,
      "loss": 4.0665,
      "step": 13924
    },
    {
      "epoch": 0.13925,
      "grad_norm": 1.2935392461959319,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 13925
    },
    {
      "epoch": 0.13926,
      "grad_norm": 1.580693431878095,
      "learning_rate": 0.003,
      "loss": 4.0792,
      "step": 13926
    },
    {
      "epoch": 0.13927,
      "grad_norm": 1.0568452383884184,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 13927
    },
    {
      "epoch": 0.13928,
      "grad_norm": 1.1826999735195407,
      "learning_rate": 0.003,
      "loss": 4.0787,
      "step": 13928
    },
    {
      "epoch": 0.13929,
      "grad_norm": 1.4389017526386207,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 13929
    },
    {
      "epoch": 0.1393,
      "grad_norm": 1.0264331694045916,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 13930
    },
    {
      "epoch": 0.13931,
      "grad_norm": 1.3517532712896185,
      "learning_rate": 0.003,
      "loss": 4.0048,
      "step": 13931
    },
    {
      "epoch": 0.13932,
      "grad_norm": 1.06929122881059,
      "learning_rate": 0.003,
      "loss": 4.0245,
      "step": 13932
    },
    {
      "epoch": 0.13933,
      "grad_norm": 1.362157964664218,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 13933
    },
    {
      "epoch": 0.13934,
      "grad_norm": 1.0026056408211013,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 13934
    },
    {
      "epoch": 0.13935,
      "grad_norm": 1.4217178830899353,
      "learning_rate": 0.003,
      "loss": 4.075,
      "step": 13935
    },
    {
      "epoch": 0.13936,
      "grad_norm": 1.038671947066456,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 13936
    },
    {
      "epoch": 0.13937,
      "grad_norm": 1.2887008768297814,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 13937
    },
    {
      "epoch": 0.13938,
      "grad_norm": 1.2099968579918394,
      "learning_rate": 0.003,
      "loss": 4.057,
      "step": 13938
    },
    {
      "epoch": 0.13939,
      "grad_norm": 1.3522984630469852,
      "learning_rate": 0.003,
      "loss": 4.0784,
      "step": 13939
    },
    {
      "epoch": 0.1394,
      "grad_norm": 1.1961466683874793,
      "learning_rate": 0.003,
      "loss": 4.044,
      "step": 13940
    },
    {
      "epoch": 0.13941,
      "grad_norm": 1.306101262984324,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 13941
    },
    {
      "epoch": 0.13942,
      "grad_norm": 0.9711576861323818,
      "learning_rate": 0.003,
      "loss": 4.0676,
      "step": 13942
    },
    {
      "epoch": 0.13943,
      "grad_norm": 1.1791976699448328,
      "learning_rate": 0.003,
      "loss": 4.1138,
      "step": 13943
    },
    {
      "epoch": 0.13944,
      "grad_norm": 1.0763256910193317,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 13944
    },
    {
      "epoch": 0.13945,
      "grad_norm": 1.190284236924628,
      "learning_rate": 0.003,
      "loss": 4.0724,
      "step": 13945
    },
    {
      "epoch": 0.13946,
      "grad_norm": 1.1505593702223464,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 13946
    },
    {
      "epoch": 0.13947,
      "grad_norm": 1.3241486709444608,
      "learning_rate": 0.003,
      "loss": 4.0825,
      "step": 13947
    },
    {
      "epoch": 0.13948,
      "grad_norm": 1.2436621418152587,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 13948
    },
    {
      "epoch": 0.13949,
      "grad_norm": 1.0489075729453334,
      "learning_rate": 0.003,
      "loss": 4.0797,
      "step": 13949
    },
    {
      "epoch": 0.1395,
      "grad_norm": 1.1671792480190273,
      "learning_rate": 0.003,
      "loss": 4.0858,
      "step": 13950
    },
    {
      "epoch": 0.13951,
      "grad_norm": 1.2678027178461682,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 13951
    },
    {
      "epoch": 0.13952,
      "grad_norm": 1.1699449496959624,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 13952
    },
    {
      "epoch": 0.13953,
      "grad_norm": 1.3098685402383983,
      "learning_rate": 0.003,
      "loss": 4.0658,
      "step": 13953
    },
    {
      "epoch": 0.13954,
      "grad_norm": 1.224454648402645,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 13954
    },
    {
      "epoch": 0.13955,
      "grad_norm": 1.3048386632435085,
      "learning_rate": 0.003,
      "loss": 4.0834,
      "step": 13955
    },
    {
      "epoch": 0.13956,
      "grad_norm": 1.0227877046412943,
      "learning_rate": 0.003,
      "loss": 4.0764,
      "step": 13956
    },
    {
      "epoch": 0.13957,
      "grad_norm": 1.1985956342806694,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 13957
    },
    {
      "epoch": 0.13958,
      "grad_norm": 1.183718979418515,
      "learning_rate": 0.003,
      "loss": 4.079,
      "step": 13958
    },
    {
      "epoch": 0.13959,
      "grad_norm": 1.2330220548490956,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 13959
    },
    {
      "epoch": 0.1396,
      "grad_norm": 1.125662126079691,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 13960
    },
    {
      "epoch": 0.13961,
      "grad_norm": 1.285516911003546,
      "learning_rate": 0.003,
      "loss": 4.0684,
      "step": 13961
    },
    {
      "epoch": 0.13962,
      "grad_norm": 1.1264208265619262,
      "learning_rate": 0.003,
      "loss": 4.0796,
      "step": 13962
    },
    {
      "epoch": 0.13963,
      "grad_norm": 1.031640488901549,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 13963
    },
    {
      "epoch": 0.13964,
      "grad_norm": 1.2802365534499318,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 13964
    },
    {
      "epoch": 0.13965,
      "grad_norm": 1.1884413630126052,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 13965
    },
    {
      "epoch": 0.13966,
      "grad_norm": 1.1690090473186747,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 13966
    },
    {
      "epoch": 0.13967,
      "grad_norm": 1.1366104707513918,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 13967
    },
    {
      "epoch": 0.13968,
      "grad_norm": 1.2198371288381218,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 13968
    },
    {
      "epoch": 0.13969,
      "grad_norm": 1.120964014292074,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 13969
    },
    {
      "epoch": 0.1397,
      "grad_norm": 1.6853975112623714,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 13970
    },
    {
      "epoch": 0.13971,
      "grad_norm": 0.8516396043457239,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 13971
    },
    {
      "epoch": 0.13972,
      "grad_norm": 1.3989485640785961,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 13972
    },
    {
      "epoch": 0.13973,
      "grad_norm": 1.2911245355553416,
      "learning_rate": 0.003,
      "loss": 4.0943,
      "step": 13973
    },
    {
      "epoch": 0.13974,
      "grad_norm": 1.3562710136515597,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 13974
    },
    {
      "epoch": 0.13975,
      "grad_norm": 1.0626370665776999,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 13975
    },
    {
      "epoch": 0.13976,
      "grad_norm": 1.3882714398556264,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 13976
    },
    {
      "epoch": 0.13977,
      "grad_norm": 1.07602319752501,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 13977
    },
    {
      "epoch": 0.13978,
      "grad_norm": 1.204983796527482,
      "learning_rate": 0.003,
      "loss": 4.1228,
      "step": 13978
    },
    {
      "epoch": 0.13979,
      "grad_norm": 1.0239793207772239,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 13979
    },
    {
      "epoch": 0.1398,
      "grad_norm": 1.4068167574457042,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 13980
    },
    {
      "epoch": 0.13981,
      "grad_norm": 0.9272392246853138,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 13981
    },
    {
      "epoch": 0.13982,
      "grad_norm": 1.2235259694039406,
      "learning_rate": 0.003,
      "loss": 4.0743,
      "step": 13982
    },
    {
      "epoch": 0.13983,
      "grad_norm": 1.0285946068778307,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 13983
    },
    {
      "epoch": 0.13984,
      "grad_norm": 1.3088445090796035,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 13984
    },
    {
      "epoch": 0.13985,
      "grad_norm": 1.0166766015149529,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 13985
    },
    {
      "epoch": 0.13986,
      "grad_norm": 1.2031489036038912,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 13986
    },
    {
      "epoch": 0.13987,
      "grad_norm": 1.0570294895755987,
      "learning_rate": 0.003,
      "loss": 4.0418,
      "step": 13987
    },
    {
      "epoch": 0.13988,
      "grad_norm": 1.4843615987129113,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 13988
    },
    {
      "epoch": 0.13989,
      "grad_norm": 1.172269746382644,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 13989
    },
    {
      "epoch": 0.1399,
      "grad_norm": 1.1866403118247013,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 13990
    },
    {
      "epoch": 0.13991,
      "grad_norm": 1.1538755961087634,
      "learning_rate": 0.003,
      "loss": 4.0759,
      "step": 13991
    },
    {
      "epoch": 0.13992,
      "grad_norm": 1.3588394984928112,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 13992
    },
    {
      "epoch": 0.13993,
      "grad_norm": 1.1448255332881148,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 13993
    },
    {
      "epoch": 0.13994,
      "grad_norm": 1.1296028881145412,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 13994
    },
    {
      "epoch": 0.13995,
      "grad_norm": 1.3123610288025107,
      "learning_rate": 0.003,
      "loss": 4.0619,
      "step": 13995
    },
    {
      "epoch": 0.13996,
      "grad_norm": 1.025069752380935,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 13996
    },
    {
      "epoch": 0.13997,
      "grad_norm": 1.5053800598899543,
      "learning_rate": 0.003,
      "loss": 4.0743,
      "step": 13997
    },
    {
      "epoch": 0.13998,
      "grad_norm": 0.953506370355428,
      "learning_rate": 0.003,
      "loss": 4.0718,
      "step": 13998
    },
    {
      "epoch": 0.13999,
      "grad_norm": 1.0708954418212988,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 13999
    },
    {
      "epoch": 0.14,
      "grad_norm": 1.0853424429098648,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 14000
    },
    {
      "epoch": 0.14001,
      "grad_norm": 1.1751322615896895,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 14001
    },
    {
      "epoch": 0.14002,
      "grad_norm": 1.2627706928641533,
      "learning_rate": 0.003,
      "loss": 4.0801,
      "step": 14002
    },
    {
      "epoch": 0.14003,
      "grad_norm": 1.256909647986896,
      "learning_rate": 0.003,
      "loss": 4.0822,
      "step": 14003
    },
    {
      "epoch": 0.14004,
      "grad_norm": 1.2356056386980325,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 14004
    },
    {
      "epoch": 0.14005,
      "grad_norm": 1.1453273029563364,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 14005
    },
    {
      "epoch": 0.14006,
      "grad_norm": 1.2198936454607405,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 14006
    },
    {
      "epoch": 0.14007,
      "grad_norm": 1.1391939945267104,
      "learning_rate": 0.003,
      "loss": 4.0789,
      "step": 14007
    },
    {
      "epoch": 0.14008,
      "grad_norm": 1.3187676811830558,
      "learning_rate": 0.003,
      "loss": 4.0845,
      "step": 14008
    },
    {
      "epoch": 0.14009,
      "grad_norm": 0.9599819243120673,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 14009
    },
    {
      "epoch": 0.1401,
      "grad_norm": 1.2606158811736639,
      "learning_rate": 0.003,
      "loss": 4.0797,
      "step": 14010
    },
    {
      "epoch": 0.14011,
      "grad_norm": 1.3055698944912726,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 14011
    },
    {
      "epoch": 0.14012,
      "grad_norm": 1.2797345567603078,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 14012
    },
    {
      "epoch": 0.14013,
      "grad_norm": 1.4950526637194654,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 14013
    },
    {
      "epoch": 0.14014,
      "grad_norm": 0.9905350914594043,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 14014
    },
    {
      "epoch": 0.14015,
      "grad_norm": 1.2965093091361792,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 14015
    },
    {
      "epoch": 0.14016,
      "grad_norm": 1.2418385758571846,
      "learning_rate": 0.003,
      "loss": 4.083,
      "step": 14016
    },
    {
      "epoch": 0.14017,
      "grad_norm": 1.2348271237915762,
      "learning_rate": 0.003,
      "loss": 4.0712,
      "step": 14017
    },
    {
      "epoch": 0.14018,
      "grad_norm": 1.2745667416649695,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 14018
    },
    {
      "epoch": 0.14019,
      "grad_norm": 1.082411174356095,
      "learning_rate": 0.003,
      "loss": 4.075,
      "step": 14019
    },
    {
      "epoch": 0.1402,
      "grad_norm": 1.2153197263791202,
      "learning_rate": 0.003,
      "loss": 4.0306,
      "step": 14020
    },
    {
      "epoch": 0.14021,
      "grad_norm": 1.0910587248310606,
      "learning_rate": 0.003,
      "loss": 4.0784,
      "step": 14021
    },
    {
      "epoch": 0.14022,
      "grad_norm": 1.2500389515470083,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 14022
    },
    {
      "epoch": 0.14023,
      "grad_norm": 1.0380589880341928,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 14023
    },
    {
      "epoch": 0.14024,
      "grad_norm": 1.4434775858039697,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 14024
    },
    {
      "epoch": 0.14025,
      "grad_norm": 0.9907731511394811,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 14025
    },
    {
      "epoch": 0.14026,
      "grad_norm": 1.4208867005861814,
      "learning_rate": 0.003,
      "loss": 4.081,
      "step": 14026
    },
    {
      "epoch": 0.14027,
      "grad_norm": 1.115862229281504,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 14027
    },
    {
      "epoch": 0.14028,
      "grad_norm": 1.1944116583170195,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 14028
    },
    {
      "epoch": 0.14029,
      "grad_norm": 1.1740283527298874,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 14029
    },
    {
      "epoch": 0.1403,
      "grad_norm": 1.2636790423414466,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 14030
    },
    {
      "epoch": 0.14031,
      "grad_norm": 1.0205070534553105,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 14031
    },
    {
      "epoch": 0.14032,
      "grad_norm": 1.148178017331799,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 14032
    },
    {
      "epoch": 0.14033,
      "grad_norm": 1.101090766483742,
      "learning_rate": 0.003,
      "loss": 4.0908,
      "step": 14033
    },
    {
      "epoch": 0.14034,
      "grad_norm": 1.180918011469458,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 14034
    },
    {
      "epoch": 0.14035,
      "grad_norm": 1.3016727533830463,
      "learning_rate": 0.003,
      "loss": 4.0998,
      "step": 14035
    },
    {
      "epoch": 0.14036,
      "grad_norm": 1.1174539382869852,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 14036
    },
    {
      "epoch": 0.14037,
      "grad_norm": 1.1302127058999685,
      "learning_rate": 0.003,
      "loss": 4.0977,
      "step": 14037
    },
    {
      "epoch": 0.14038,
      "grad_norm": 1.1432418603164374,
      "learning_rate": 0.003,
      "loss": 4.1043,
      "step": 14038
    },
    {
      "epoch": 0.14039,
      "grad_norm": 1.0127791234279393,
      "learning_rate": 0.003,
      "loss": 4.0874,
      "step": 14039
    },
    {
      "epoch": 0.1404,
      "grad_norm": 1.2078468546327734,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 14040
    },
    {
      "epoch": 0.14041,
      "grad_norm": 1.0044245340634046,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 14041
    },
    {
      "epoch": 0.14042,
      "grad_norm": 1.3387862921051845,
      "learning_rate": 0.003,
      "loss": 4.0851,
      "step": 14042
    },
    {
      "epoch": 0.14043,
      "grad_norm": 1.1765964167320597,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 14043
    },
    {
      "epoch": 0.14044,
      "grad_norm": 1.1365245865954814,
      "learning_rate": 0.003,
      "loss": 4.0336,
      "step": 14044
    },
    {
      "epoch": 0.14045,
      "grad_norm": 1.2670225146315648,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 14045
    },
    {
      "epoch": 0.14046,
      "grad_norm": 1.1551713629150766,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 14046
    },
    {
      "epoch": 0.14047,
      "grad_norm": 1.2263739906716984,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 14047
    },
    {
      "epoch": 0.14048,
      "grad_norm": 1.3292121038850169,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 14048
    },
    {
      "epoch": 0.14049,
      "grad_norm": 1.1707901228667954,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 14049
    },
    {
      "epoch": 0.1405,
      "grad_norm": 1.1820577056492885,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 14050
    },
    {
      "epoch": 0.14051,
      "grad_norm": 1.4970989890341049,
      "learning_rate": 0.003,
      "loss": 4.0416,
      "step": 14051
    },
    {
      "epoch": 0.14052,
      "grad_norm": 1.1880382310046214,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 14052
    },
    {
      "epoch": 0.14053,
      "grad_norm": 1.0418475121645348,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 14053
    },
    {
      "epoch": 0.14054,
      "grad_norm": 1.3826547250959087,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 14054
    },
    {
      "epoch": 0.14055,
      "grad_norm": 1.1989247615972918,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 14055
    },
    {
      "epoch": 0.14056,
      "grad_norm": 1.4922771838130722,
      "learning_rate": 0.003,
      "loss": 4.0882,
      "step": 14056
    },
    {
      "epoch": 0.14057,
      "grad_norm": 0.9828593072226139,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 14057
    },
    {
      "epoch": 0.14058,
      "grad_norm": 1.242153543410688,
      "learning_rate": 0.003,
      "loss": 4.0895,
      "step": 14058
    },
    {
      "epoch": 0.14059,
      "grad_norm": 1.2644191548856771,
      "learning_rate": 0.003,
      "loss": 4.0897,
      "step": 14059
    },
    {
      "epoch": 0.1406,
      "grad_norm": 1.0402160183989233,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 14060
    },
    {
      "epoch": 0.14061,
      "grad_norm": 1.4643926891587589,
      "learning_rate": 0.003,
      "loss": 4.0781,
      "step": 14061
    },
    {
      "epoch": 0.14062,
      "grad_norm": 1.2854550032648755,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 14062
    },
    {
      "epoch": 0.14063,
      "grad_norm": 1.1399715066086162,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 14063
    },
    {
      "epoch": 0.14064,
      "grad_norm": 1.2613905152553255,
      "learning_rate": 0.003,
      "loss": 4.0868,
      "step": 14064
    },
    {
      "epoch": 0.14065,
      "grad_norm": 1.109738252666644,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 14065
    },
    {
      "epoch": 0.14066,
      "grad_norm": 1.1536706142441462,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 14066
    },
    {
      "epoch": 0.14067,
      "grad_norm": 1.1686760221894372,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 14067
    },
    {
      "epoch": 0.14068,
      "grad_norm": 1.4431129370117735,
      "learning_rate": 0.003,
      "loss": 4.0478,
      "step": 14068
    },
    {
      "epoch": 0.14069,
      "grad_norm": 1.1529287834339563,
      "learning_rate": 0.003,
      "loss": 4.0729,
      "step": 14069
    },
    {
      "epoch": 0.1407,
      "grad_norm": 1.2272293396189673,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 14070
    },
    {
      "epoch": 0.14071,
      "grad_norm": 1.2113547139493819,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 14071
    },
    {
      "epoch": 0.14072,
      "grad_norm": 1.0976937568041476,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 14072
    },
    {
      "epoch": 0.14073,
      "grad_norm": 1.247524526764744,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 14073
    },
    {
      "epoch": 0.14074,
      "grad_norm": 1.089618174756747,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 14074
    },
    {
      "epoch": 0.14075,
      "grad_norm": 1.700931146159072,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 14075
    },
    {
      "epoch": 0.14076,
      "grad_norm": 1.1481484438903082,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 14076
    },
    {
      "epoch": 0.14077,
      "grad_norm": 1.7389127274356944,
      "learning_rate": 0.003,
      "loss": 4.0751,
      "step": 14077
    },
    {
      "epoch": 0.14078,
      "grad_norm": 0.91208467488463,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 14078
    },
    {
      "epoch": 0.14079,
      "grad_norm": 1.058450877881687,
      "learning_rate": 0.003,
      "loss": 4.0394,
      "step": 14079
    },
    {
      "epoch": 0.1408,
      "grad_norm": 1.5232097577975936,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 14080
    },
    {
      "epoch": 0.14081,
      "grad_norm": 1.1325448640909364,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 14081
    },
    {
      "epoch": 0.14082,
      "grad_norm": 1.3692201824713788,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 14082
    },
    {
      "epoch": 0.14083,
      "grad_norm": 1.2405415707110088,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 14083
    },
    {
      "epoch": 0.14084,
      "grad_norm": 1.2612958054984984,
      "learning_rate": 0.003,
      "loss": 4.0311,
      "step": 14084
    },
    {
      "epoch": 0.14085,
      "grad_norm": 1.0115092048586234,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 14085
    },
    {
      "epoch": 0.14086,
      "grad_norm": 1.1961231758905728,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 14086
    },
    {
      "epoch": 0.14087,
      "grad_norm": 1.1376853253553598,
      "learning_rate": 0.003,
      "loss": 4.0751,
      "step": 14087
    },
    {
      "epoch": 0.14088,
      "grad_norm": 1.2719190721417335,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 14088
    },
    {
      "epoch": 0.14089,
      "grad_norm": 0.9257838863610739,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 14089
    },
    {
      "epoch": 0.1409,
      "grad_norm": 1.0136199776793982,
      "learning_rate": 0.003,
      "loss": 4.0809,
      "step": 14090
    },
    {
      "epoch": 0.14091,
      "grad_norm": 1.1143236991560046,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 14091
    },
    {
      "epoch": 0.14092,
      "grad_norm": 1.4618438256914872,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 14092
    },
    {
      "epoch": 0.14093,
      "grad_norm": 1.2310701034540763,
      "learning_rate": 0.003,
      "loss": 4.0658,
      "step": 14093
    },
    {
      "epoch": 0.14094,
      "grad_norm": 1.3875878617357045,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 14094
    },
    {
      "epoch": 0.14095,
      "grad_norm": 1.2401622519093114,
      "learning_rate": 0.003,
      "loss": 4.084,
      "step": 14095
    },
    {
      "epoch": 0.14096,
      "grad_norm": 1.100162709342287,
      "learning_rate": 0.003,
      "loss": 4.0701,
      "step": 14096
    },
    {
      "epoch": 0.14097,
      "grad_norm": 1.3648806080597096,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 14097
    },
    {
      "epoch": 0.14098,
      "grad_norm": 0.9119748179992018,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 14098
    },
    {
      "epoch": 0.14099,
      "grad_norm": 1.101659224646554,
      "learning_rate": 0.003,
      "loss": 4.0764,
      "step": 14099
    },
    {
      "epoch": 0.141,
      "grad_norm": 1.2489497461395942,
      "learning_rate": 0.003,
      "loss": 4.0979,
      "step": 14100
    },
    {
      "epoch": 0.14101,
      "grad_norm": 1.087271878838567,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 14101
    },
    {
      "epoch": 0.14102,
      "grad_norm": 1.538799319935321,
      "learning_rate": 0.003,
      "loss": 4.0701,
      "step": 14102
    },
    {
      "epoch": 0.14103,
      "grad_norm": 1.0619706577492025,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 14103
    },
    {
      "epoch": 0.14104,
      "grad_norm": 1.3883605928555496,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 14104
    },
    {
      "epoch": 0.14105,
      "grad_norm": 1.091052895958119,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 14105
    },
    {
      "epoch": 0.14106,
      "grad_norm": 1.26460143028129,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 14106
    },
    {
      "epoch": 0.14107,
      "grad_norm": 1.0241565414578302,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 14107
    },
    {
      "epoch": 0.14108,
      "grad_norm": 1.4581418517518614,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 14108
    },
    {
      "epoch": 0.14109,
      "grad_norm": 1.020482183692946,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 14109
    },
    {
      "epoch": 0.1411,
      "grad_norm": 1.2695426847696474,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 14110
    },
    {
      "epoch": 0.14111,
      "grad_norm": 1.042396582666057,
      "learning_rate": 0.003,
      "loss": 4.0753,
      "step": 14111
    },
    {
      "epoch": 0.14112,
      "grad_norm": 1.3471325666922835,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 14112
    },
    {
      "epoch": 0.14113,
      "grad_norm": 1.1013474928848515,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 14113
    },
    {
      "epoch": 0.14114,
      "grad_norm": 1.3049329414297008,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 14114
    },
    {
      "epoch": 0.14115,
      "grad_norm": 1.165601398901598,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 14115
    },
    {
      "epoch": 0.14116,
      "grad_norm": 1.2845637290855145,
      "learning_rate": 0.003,
      "loss": 4.0724,
      "step": 14116
    },
    {
      "epoch": 0.14117,
      "grad_norm": 1.215084813674362,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 14117
    },
    {
      "epoch": 0.14118,
      "grad_norm": 1.267146370467284,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 14118
    },
    {
      "epoch": 0.14119,
      "grad_norm": 1.2088876244998399,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 14119
    },
    {
      "epoch": 0.1412,
      "grad_norm": 1.0370689493716259,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 14120
    },
    {
      "epoch": 0.14121,
      "grad_norm": 1.3567784912838432,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 14121
    },
    {
      "epoch": 0.14122,
      "grad_norm": 1.1358911454763512,
      "learning_rate": 0.003,
      "loss": 4.0864,
      "step": 14122
    },
    {
      "epoch": 0.14123,
      "grad_norm": 1.444138133727849,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 14123
    },
    {
      "epoch": 0.14124,
      "grad_norm": 1.019323927670304,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 14124
    },
    {
      "epoch": 0.14125,
      "grad_norm": 1.0620644030242081,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 14125
    },
    {
      "epoch": 0.14126,
      "grad_norm": 1.3640724702723905,
      "learning_rate": 0.003,
      "loss": 4.0178,
      "step": 14126
    },
    {
      "epoch": 0.14127,
      "grad_norm": 1.062696357746494,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 14127
    },
    {
      "epoch": 0.14128,
      "grad_norm": 1.2543281286826446,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 14128
    },
    {
      "epoch": 0.14129,
      "grad_norm": 1.2307060795050326,
      "learning_rate": 0.003,
      "loss": 4.0549,
      "step": 14129
    },
    {
      "epoch": 0.1413,
      "grad_norm": 1.0676327695640522,
      "learning_rate": 0.003,
      "loss": 4.0831,
      "step": 14130
    },
    {
      "epoch": 0.14131,
      "grad_norm": 1.1381901901492002,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 14131
    },
    {
      "epoch": 0.14132,
      "grad_norm": 1.1621490624517714,
      "learning_rate": 0.003,
      "loss": 4.094,
      "step": 14132
    },
    {
      "epoch": 0.14133,
      "grad_norm": 0.9949905353372355,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 14133
    },
    {
      "epoch": 0.14134,
      "grad_norm": 1.3635458799768592,
      "learning_rate": 0.003,
      "loss": 4.0968,
      "step": 14134
    },
    {
      "epoch": 0.14135,
      "grad_norm": 0.9973050565209519,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 14135
    },
    {
      "epoch": 0.14136,
      "grad_norm": 1.5237844040165016,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 14136
    },
    {
      "epoch": 0.14137,
      "grad_norm": 1.2408921889432498,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 14137
    },
    {
      "epoch": 0.14138,
      "grad_norm": 1.1249081956948144,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 14138
    },
    {
      "epoch": 0.14139,
      "grad_norm": 1.089470920215599,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 14139
    },
    {
      "epoch": 0.1414,
      "grad_norm": 1.766406695149215,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 14140
    },
    {
      "epoch": 0.14141,
      "grad_norm": 0.9474434010605183,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 14141
    },
    {
      "epoch": 0.14142,
      "grad_norm": 1.1543755248262677,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 14142
    },
    {
      "epoch": 0.14143,
      "grad_norm": 1.15551118814518,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 14143
    },
    {
      "epoch": 0.14144,
      "grad_norm": 1.0949168441885477,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 14144
    },
    {
      "epoch": 0.14145,
      "grad_norm": 1.2158154688963252,
      "learning_rate": 0.003,
      "loss": 4.0817,
      "step": 14145
    },
    {
      "epoch": 0.14146,
      "grad_norm": 1.1930896585816888,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 14146
    },
    {
      "epoch": 0.14147,
      "grad_norm": 1.3306246715158585,
      "learning_rate": 0.003,
      "loss": 4.0807,
      "step": 14147
    },
    {
      "epoch": 0.14148,
      "grad_norm": 0.9676846760335293,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 14148
    },
    {
      "epoch": 0.14149,
      "grad_norm": 1.212415931754474,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 14149
    },
    {
      "epoch": 0.1415,
      "grad_norm": 1.452342777360195,
      "learning_rate": 0.003,
      "loss": 4.1047,
      "step": 14150
    },
    {
      "epoch": 0.14151,
      "grad_norm": 1.2710544608686976,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 14151
    },
    {
      "epoch": 0.14152,
      "grad_norm": 1.0482634458726003,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 14152
    },
    {
      "epoch": 0.14153,
      "grad_norm": 1.4379195293470144,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 14153
    },
    {
      "epoch": 0.14154,
      "grad_norm": 1.191609048243655,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 14154
    },
    {
      "epoch": 0.14155,
      "grad_norm": 1.3787592896305192,
      "learning_rate": 0.003,
      "loss": 4.0796,
      "step": 14155
    },
    {
      "epoch": 0.14156,
      "grad_norm": 0.9893806391740821,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 14156
    },
    {
      "epoch": 0.14157,
      "grad_norm": 1.272420136765873,
      "learning_rate": 0.003,
      "loss": 4.0882,
      "step": 14157
    },
    {
      "epoch": 0.14158,
      "grad_norm": 1.2617920027622527,
      "learning_rate": 0.003,
      "loss": 4.0835,
      "step": 14158
    },
    {
      "epoch": 0.14159,
      "grad_norm": 1.1700761289369743,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 14159
    },
    {
      "epoch": 0.1416,
      "grad_norm": 1.122186994806511,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 14160
    },
    {
      "epoch": 0.14161,
      "grad_norm": 1.1404942329267413,
      "learning_rate": 0.003,
      "loss": 4.1088,
      "step": 14161
    },
    {
      "epoch": 0.14162,
      "grad_norm": 1.085778004031023,
      "learning_rate": 0.003,
      "loss": 4.0478,
      "step": 14162
    },
    {
      "epoch": 0.14163,
      "grad_norm": 1.0694076701689075,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 14163
    },
    {
      "epoch": 0.14164,
      "grad_norm": 1.1191905529863895,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 14164
    },
    {
      "epoch": 0.14165,
      "grad_norm": 1.5149121636829188,
      "learning_rate": 0.003,
      "loss": 4.0962,
      "step": 14165
    },
    {
      "epoch": 0.14166,
      "grad_norm": 1.1636360103645447,
      "learning_rate": 0.003,
      "loss": 4.085,
      "step": 14166
    },
    {
      "epoch": 0.14167,
      "grad_norm": 1.2826324722743676,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 14167
    },
    {
      "epoch": 0.14168,
      "grad_norm": 1.1435295912881631,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 14168
    },
    {
      "epoch": 0.14169,
      "grad_norm": 1.0783640644185406,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 14169
    },
    {
      "epoch": 0.1417,
      "grad_norm": 1.2931780213121669,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 14170
    },
    {
      "epoch": 0.14171,
      "grad_norm": 1.1902891825456416,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 14171
    },
    {
      "epoch": 0.14172,
      "grad_norm": 1.284651646093003,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 14172
    },
    {
      "epoch": 0.14173,
      "grad_norm": 1.1197725075637848,
      "learning_rate": 0.003,
      "loss": 4.0792,
      "step": 14173
    },
    {
      "epoch": 0.14174,
      "grad_norm": 1.32959635212142,
      "learning_rate": 0.003,
      "loss": 4.0783,
      "step": 14174
    },
    {
      "epoch": 0.14175,
      "grad_norm": 1.0929271176927622,
      "learning_rate": 0.003,
      "loss": 4.087,
      "step": 14175
    },
    {
      "epoch": 0.14176,
      "grad_norm": 1.107336305247538,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 14176
    },
    {
      "epoch": 0.14177,
      "grad_norm": 1.0345033569018824,
      "learning_rate": 0.003,
      "loss": 4.1002,
      "step": 14177
    },
    {
      "epoch": 0.14178,
      "grad_norm": 1.119196119261943,
      "learning_rate": 0.003,
      "loss": 4.0315,
      "step": 14178
    },
    {
      "epoch": 0.14179,
      "grad_norm": 1.4634545139397872,
      "learning_rate": 0.003,
      "loss": 4.0827,
      "step": 14179
    },
    {
      "epoch": 0.1418,
      "grad_norm": 1.027755596902636,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 14180
    },
    {
      "epoch": 0.14181,
      "grad_norm": 1.6493733952615102,
      "learning_rate": 0.003,
      "loss": 4.0825,
      "step": 14181
    },
    {
      "epoch": 0.14182,
      "grad_norm": 1.199597131516992,
      "learning_rate": 0.003,
      "loss": 4.0703,
      "step": 14182
    },
    {
      "epoch": 0.14183,
      "grad_norm": 1.2057489298199282,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 14183
    },
    {
      "epoch": 0.14184,
      "grad_norm": 1.2224922049388296,
      "learning_rate": 0.003,
      "loss": 4.0888,
      "step": 14184
    },
    {
      "epoch": 0.14185,
      "grad_norm": 1.1297684255979763,
      "learning_rate": 0.003,
      "loss": 4.084,
      "step": 14185
    },
    {
      "epoch": 0.14186,
      "grad_norm": 1.2586919988023766,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 14186
    },
    {
      "epoch": 0.14187,
      "grad_norm": 1.0496742596269901,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 14187
    },
    {
      "epoch": 0.14188,
      "grad_norm": 1.2778464566434349,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 14188
    },
    {
      "epoch": 0.14189,
      "grad_norm": 1.091293365195764,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 14189
    },
    {
      "epoch": 0.1419,
      "grad_norm": 1.181619627239842,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 14190
    },
    {
      "epoch": 0.14191,
      "grad_norm": 1.1808431279254916,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 14191
    },
    {
      "epoch": 0.14192,
      "grad_norm": 1.1471303523369158,
      "learning_rate": 0.003,
      "loss": 4.1122,
      "step": 14192
    },
    {
      "epoch": 0.14193,
      "grad_norm": 1.1733350139049805,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 14193
    },
    {
      "epoch": 0.14194,
      "grad_norm": 1.2063705463703653,
      "learning_rate": 0.003,
      "loss": 4.1014,
      "step": 14194
    },
    {
      "epoch": 0.14195,
      "grad_norm": 1.4150917924345123,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 14195
    },
    {
      "epoch": 0.14196,
      "grad_norm": 1.1644947084397448,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 14196
    },
    {
      "epoch": 0.14197,
      "grad_norm": 1.3360296380209244,
      "learning_rate": 0.003,
      "loss": 4.0759,
      "step": 14197
    },
    {
      "epoch": 0.14198,
      "grad_norm": 1.3234276563046434,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 14198
    },
    {
      "epoch": 0.14199,
      "grad_norm": 1.655669932559523,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 14199
    },
    {
      "epoch": 0.142,
      "grad_norm": 0.8645260924147421,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 14200
    },
    {
      "epoch": 0.14201,
      "grad_norm": 1.231467452337542,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 14201
    },
    {
      "epoch": 0.14202,
      "grad_norm": 1.3296047105728226,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 14202
    },
    {
      "epoch": 0.14203,
      "grad_norm": 1.0361973528335804,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 14203
    },
    {
      "epoch": 0.14204,
      "grad_norm": 1.2130661454494625,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 14204
    },
    {
      "epoch": 0.14205,
      "grad_norm": 1.0396218101989512,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 14205
    },
    {
      "epoch": 0.14206,
      "grad_norm": 1.1370721026997253,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 14206
    },
    {
      "epoch": 0.14207,
      "grad_norm": 1.0778254858539933,
      "learning_rate": 0.003,
      "loss": 4.0855,
      "step": 14207
    },
    {
      "epoch": 0.14208,
      "grad_norm": 1.2817697240467347,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 14208
    },
    {
      "epoch": 0.14209,
      "grad_norm": 1.571986516035277,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 14209
    },
    {
      "epoch": 0.1421,
      "grad_norm": 1.2369047244446083,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 14210
    },
    {
      "epoch": 0.14211,
      "grad_norm": 1.122695466629707,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 14211
    },
    {
      "epoch": 0.14212,
      "grad_norm": 1.2753495336204912,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 14212
    },
    {
      "epoch": 0.14213,
      "grad_norm": 1.2800777890439599,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 14213
    },
    {
      "epoch": 0.14214,
      "grad_norm": 1.1483107106387056,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 14214
    },
    {
      "epoch": 0.14215,
      "grad_norm": 1.1398680420847587,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 14215
    },
    {
      "epoch": 0.14216,
      "grad_norm": 1.1207686970460085,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 14216
    },
    {
      "epoch": 0.14217,
      "grad_norm": 1.0481411536484901,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 14217
    },
    {
      "epoch": 0.14218,
      "grad_norm": 1.2157894765444972,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 14218
    },
    {
      "epoch": 0.14219,
      "grad_norm": 1.175092816881649,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 14219
    },
    {
      "epoch": 0.1422,
      "grad_norm": 1.0675976028832213,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 14220
    },
    {
      "epoch": 0.14221,
      "grad_norm": 1.4064689804811845,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 14221
    },
    {
      "epoch": 0.14222,
      "grad_norm": 1.31466024641552,
      "learning_rate": 0.003,
      "loss": 4.0925,
      "step": 14222
    },
    {
      "epoch": 0.14223,
      "grad_norm": 1.071967340068421,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 14223
    },
    {
      "epoch": 0.14224,
      "grad_norm": 1.3461964597737213,
      "learning_rate": 0.003,
      "loss": 4.025,
      "step": 14224
    },
    {
      "epoch": 0.14225,
      "grad_norm": 0.8939320576619639,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 14225
    },
    {
      "epoch": 0.14226,
      "grad_norm": 1.1882540605637555,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 14226
    },
    {
      "epoch": 0.14227,
      "grad_norm": 1.1193579100454676,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 14227
    },
    {
      "epoch": 0.14228,
      "grad_norm": 1.5711398723592591,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 14228
    },
    {
      "epoch": 0.14229,
      "grad_norm": 1.071306204940862,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 14229
    },
    {
      "epoch": 0.1423,
      "grad_norm": 1.199926718935403,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 14230
    },
    {
      "epoch": 0.14231,
      "grad_norm": 1.2968772399813384,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 14231
    },
    {
      "epoch": 0.14232,
      "grad_norm": 1.2110709858588478,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 14232
    },
    {
      "epoch": 0.14233,
      "grad_norm": 1.076850702138636,
      "learning_rate": 0.003,
      "loss": 4.0924,
      "step": 14233
    },
    {
      "epoch": 0.14234,
      "grad_norm": 1.2137894987587472,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 14234
    },
    {
      "epoch": 0.14235,
      "grad_norm": 0.9700130811788708,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 14235
    },
    {
      "epoch": 0.14236,
      "grad_norm": 1.2284258014361296,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 14236
    },
    {
      "epoch": 0.14237,
      "grad_norm": 1.0625057397406377,
      "learning_rate": 0.003,
      "loss": 4.0547,
      "step": 14237
    },
    {
      "epoch": 0.14238,
      "grad_norm": 1.3454226153455804,
      "learning_rate": 0.003,
      "loss": 4.0839,
      "step": 14238
    },
    {
      "epoch": 0.14239,
      "grad_norm": 1.0430985593988913,
      "learning_rate": 0.003,
      "loss": 4.0956,
      "step": 14239
    },
    {
      "epoch": 0.1424,
      "grad_norm": 1.2549151547175872,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 14240
    },
    {
      "epoch": 0.14241,
      "grad_norm": 1.105889056985785,
      "learning_rate": 0.003,
      "loss": 4.0932,
      "step": 14241
    },
    {
      "epoch": 0.14242,
      "grad_norm": 1.2534217071630724,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 14242
    },
    {
      "epoch": 0.14243,
      "grad_norm": 1.490414740246,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 14243
    },
    {
      "epoch": 0.14244,
      "grad_norm": 1.1990047833883828,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 14244
    },
    {
      "epoch": 0.14245,
      "grad_norm": 1.2362137951101504,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 14245
    },
    {
      "epoch": 0.14246,
      "grad_norm": 1.2196722993987894,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 14246
    },
    {
      "epoch": 0.14247,
      "grad_norm": 1.2809217326424946,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 14247
    },
    {
      "epoch": 0.14248,
      "grad_norm": 1.0180462757614577,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 14248
    },
    {
      "epoch": 0.14249,
      "grad_norm": 1.293475424515683,
      "learning_rate": 0.003,
      "loss": 4.0219,
      "step": 14249
    },
    {
      "epoch": 0.1425,
      "grad_norm": 1.0327402727410269,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 14250
    },
    {
      "epoch": 0.14251,
      "grad_norm": 1.464622467671724,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 14251
    },
    {
      "epoch": 0.14252,
      "grad_norm": 1.1079023727495274,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 14252
    },
    {
      "epoch": 0.14253,
      "grad_norm": 1.1738022294070674,
      "learning_rate": 0.003,
      "loss": 4.0969,
      "step": 14253
    },
    {
      "epoch": 0.14254,
      "grad_norm": 1.1749821721531346,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 14254
    },
    {
      "epoch": 0.14255,
      "grad_norm": 1.155641367291191,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 14255
    },
    {
      "epoch": 0.14256,
      "grad_norm": 1.4718813972164393,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 14256
    },
    {
      "epoch": 0.14257,
      "grad_norm": 1.1861704431299545,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 14257
    },
    {
      "epoch": 0.14258,
      "grad_norm": 1.3177585554808537,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 14258
    },
    {
      "epoch": 0.14259,
      "grad_norm": 1.2620995819080834,
      "learning_rate": 0.003,
      "loss": 4.0724,
      "step": 14259
    },
    {
      "epoch": 0.1426,
      "grad_norm": 1.0326300047426755,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 14260
    },
    {
      "epoch": 0.14261,
      "grad_norm": 1.445018691737256,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 14261
    },
    {
      "epoch": 0.14262,
      "grad_norm": 0.9199967785969957,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 14262
    },
    {
      "epoch": 0.14263,
      "grad_norm": 1.2215931395439386,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 14263
    },
    {
      "epoch": 0.14264,
      "grad_norm": 1.1709372196894714,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 14264
    },
    {
      "epoch": 0.14265,
      "grad_norm": 1.232346357358036,
      "learning_rate": 0.003,
      "loss": 4.0866,
      "step": 14265
    },
    {
      "epoch": 0.14266,
      "grad_norm": 1.3335474977640058,
      "learning_rate": 0.003,
      "loss": 4.0842,
      "step": 14266
    },
    {
      "epoch": 0.14267,
      "grad_norm": 1.0131622771709141,
      "learning_rate": 0.003,
      "loss": 4.0812,
      "step": 14267
    },
    {
      "epoch": 0.14268,
      "grad_norm": 1.5364409505944565,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 14268
    },
    {
      "epoch": 0.14269,
      "grad_norm": 0.8789420939411684,
      "learning_rate": 0.003,
      "loss": 4.0899,
      "step": 14269
    },
    {
      "epoch": 0.1427,
      "grad_norm": 1.2583510459232634,
      "learning_rate": 0.003,
      "loss": 4.0838,
      "step": 14270
    },
    {
      "epoch": 0.14271,
      "grad_norm": 1.0326989710149879,
      "learning_rate": 0.003,
      "loss": 4.0617,
      "step": 14271
    },
    {
      "epoch": 0.14272,
      "grad_norm": 1.2486351214299949,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 14272
    },
    {
      "epoch": 0.14273,
      "grad_norm": 1.0690904813796103,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 14273
    },
    {
      "epoch": 0.14274,
      "grad_norm": 1.2556787600710217,
      "learning_rate": 0.003,
      "loss": 4.0773,
      "step": 14274
    },
    {
      "epoch": 0.14275,
      "grad_norm": 1.3684099959816238,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 14275
    },
    {
      "epoch": 0.14276,
      "grad_norm": 1.062957034604356,
      "learning_rate": 0.003,
      "loss": 4.0724,
      "step": 14276
    },
    {
      "epoch": 0.14277,
      "grad_norm": 1.258125035002094,
      "learning_rate": 0.003,
      "loss": 4.0841,
      "step": 14277
    },
    {
      "epoch": 0.14278,
      "grad_norm": 1.07996135461999,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 14278
    },
    {
      "epoch": 0.14279,
      "grad_norm": 1.3070816281444275,
      "learning_rate": 0.003,
      "loss": 4.0948,
      "step": 14279
    },
    {
      "epoch": 0.1428,
      "grad_norm": 1.0147384420988028,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 14280
    },
    {
      "epoch": 0.14281,
      "grad_norm": 1.233363304571996,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 14281
    },
    {
      "epoch": 0.14282,
      "grad_norm": 1.1563333766935289,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 14282
    },
    {
      "epoch": 0.14283,
      "grad_norm": 1.1194835920626935,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 14283
    },
    {
      "epoch": 0.14284,
      "grad_norm": 1.2053877604243153,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 14284
    },
    {
      "epoch": 0.14285,
      "grad_norm": 1.1034257992664742,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 14285
    },
    {
      "epoch": 0.14286,
      "grad_norm": 1.3143939008549936,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 14286
    },
    {
      "epoch": 0.14287,
      "grad_norm": 1.3366976752252706,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 14287
    },
    {
      "epoch": 0.14288,
      "grad_norm": 1.1310654369469988,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 14288
    },
    {
      "epoch": 0.14289,
      "grad_norm": 1.2265929427028606,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 14289
    },
    {
      "epoch": 0.1429,
      "grad_norm": 1.2046181480804943,
      "learning_rate": 0.003,
      "loss": 4.0319,
      "step": 14290
    },
    {
      "epoch": 0.14291,
      "grad_norm": 1.1558269131326242,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 14291
    },
    {
      "epoch": 0.14292,
      "grad_norm": 1.0649877913921748,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 14292
    },
    {
      "epoch": 0.14293,
      "grad_norm": 1.4234779215332185,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 14293
    },
    {
      "epoch": 0.14294,
      "grad_norm": 1.082763152735558,
      "learning_rate": 0.003,
      "loss": 4.0882,
      "step": 14294
    },
    {
      "epoch": 0.14295,
      "grad_norm": 1.173731979995803,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 14295
    },
    {
      "epoch": 0.14296,
      "grad_norm": 1.1618342398990031,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 14296
    },
    {
      "epoch": 0.14297,
      "grad_norm": 1.398643937597955,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 14297
    },
    {
      "epoch": 0.14298,
      "grad_norm": 1.1297895969250424,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 14298
    },
    {
      "epoch": 0.14299,
      "grad_norm": 1.0644719244957406,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 14299
    },
    {
      "epoch": 0.143,
      "grad_norm": 1.1861565471135649,
      "learning_rate": 0.003,
      "loss": 4.0871,
      "step": 14300
    },
    {
      "epoch": 0.14301,
      "grad_norm": 1.0575205910749435,
      "learning_rate": 0.003,
      "loss": 4.0767,
      "step": 14301
    },
    {
      "epoch": 0.14302,
      "grad_norm": 1.3843489024489457,
      "learning_rate": 0.003,
      "loss": 4.0346,
      "step": 14302
    },
    {
      "epoch": 0.14303,
      "grad_norm": 0.9955134745867775,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 14303
    },
    {
      "epoch": 0.14304,
      "grad_norm": 1.363422538376115,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 14304
    },
    {
      "epoch": 0.14305,
      "grad_norm": 1.1986656569895822,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 14305
    },
    {
      "epoch": 0.14306,
      "grad_norm": 1.2880639273858205,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 14306
    },
    {
      "epoch": 0.14307,
      "grad_norm": 1.1620878358597082,
      "learning_rate": 0.003,
      "loss": 4.0804,
      "step": 14307
    },
    {
      "epoch": 0.14308,
      "grad_norm": 1.1767622378675848,
      "learning_rate": 0.003,
      "loss": 4.0845,
      "step": 14308
    },
    {
      "epoch": 0.14309,
      "grad_norm": 1.19414029814871,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 14309
    },
    {
      "epoch": 0.1431,
      "grad_norm": 1.1167610645284485,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 14310
    },
    {
      "epoch": 0.14311,
      "grad_norm": 1.1773561919335593,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 14311
    },
    {
      "epoch": 0.14312,
      "grad_norm": 1.1455104222251367,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 14312
    },
    {
      "epoch": 0.14313,
      "grad_norm": 1.1359866568220862,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 14313
    },
    {
      "epoch": 0.14314,
      "grad_norm": 1.420754054680384,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 14314
    },
    {
      "epoch": 0.14315,
      "grad_norm": 1.0646963383903258,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 14315
    },
    {
      "epoch": 0.14316,
      "grad_norm": 1.2763403852729251,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 14316
    },
    {
      "epoch": 0.14317,
      "grad_norm": 1.094937836624008,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 14317
    },
    {
      "epoch": 0.14318,
      "grad_norm": 1.2742582445618376,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 14318
    },
    {
      "epoch": 0.14319,
      "grad_norm": 1.2092886240243337,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 14319
    },
    {
      "epoch": 0.1432,
      "grad_norm": 1.1382478374397993,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 14320
    },
    {
      "epoch": 0.14321,
      "grad_norm": 1.2418323837035394,
      "learning_rate": 0.003,
      "loss": 4.04,
      "step": 14321
    },
    {
      "epoch": 0.14322,
      "grad_norm": 1.1518922494326984,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 14322
    },
    {
      "epoch": 0.14323,
      "grad_norm": 1.1400534779865883,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 14323
    },
    {
      "epoch": 0.14324,
      "grad_norm": 1.0692597026270678,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 14324
    },
    {
      "epoch": 0.14325,
      "grad_norm": 1.2840580875617695,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 14325
    },
    {
      "epoch": 0.14326,
      "grad_norm": 1.2816960237064363,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 14326
    },
    {
      "epoch": 0.14327,
      "grad_norm": 1.1003045057564285,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 14327
    },
    {
      "epoch": 0.14328,
      "grad_norm": 1.3623564374963049,
      "learning_rate": 0.003,
      "loss": 4.1072,
      "step": 14328
    },
    {
      "epoch": 0.14329,
      "grad_norm": 1.1472976404966218,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 14329
    },
    {
      "epoch": 0.1433,
      "grad_norm": 1.3164838448329237,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 14330
    },
    {
      "epoch": 0.14331,
      "grad_norm": 1.1756095255143983,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 14331
    },
    {
      "epoch": 0.14332,
      "grad_norm": 1.3444697447616594,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 14332
    },
    {
      "epoch": 0.14333,
      "grad_norm": 1.0284512757583038,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 14333
    },
    {
      "epoch": 0.14334,
      "grad_norm": 1.1859319506845603,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 14334
    },
    {
      "epoch": 0.14335,
      "grad_norm": 1.3257963016987404,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 14335
    },
    {
      "epoch": 0.14336,
      "grad_norm": 1.1837739932179037,
      "learning_rate": 0.003,
      "loss": 4.0656,
      "step": 14336
    },
    {
      "epoch": 0.14337,
      "grad_norm": 1.5042978304051147,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 14337
    },
    {
      "epoch": 0.14338,
      "grad_norm": 1.0186872922432264,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 14338
    },
    {
      "epoch": 0.14339,
      "grad_norm": 1.1228133682719914,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 14339
    },
    {
      "epoch": 0.1434,
      "grad_norm": 1.2933482445652345,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 14340
    },
    {
      "epoch": 0.14341,
      "grad_norm": 1.2985215677679838,
      "learning_rate": 0.003,
      "loss": 4.0884,
      "step": 14341
    },
    {
      "epoch": 0.14342,
      "grad_norm": 1.2023861963450682,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 14342
    },
    {
      "epoch": 0.14343,
      "grad_norm": 1.1193205301312157,
      "learning_rate": 0.003,
      "loss": 4.1037,
      "step": 14343
    },
    {
      "epoch": 0.14344,
      "grad_norm": 1.2517525510158198,
      "learning_rate": 0.003,
      "loss": 4.1045,
      "step": 14344
    },
    {
      "epoch": 0.14345,
      "grad_norm": 0.987524666521823,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 14345
    },
    {
      "epoch": 0.14346,
      "grad_norm": 1.4945311055814365,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 14346
    },
    {
      "epoch": 0.14347,
      "grad_norm": 0.925123815103442,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 14347
    },
    {
      "epoch": 0.14348,
      "grad_norm": 1.2561977804757412,
      "learning_rate": 0.003,
      "loss": 4.087,
      "step": 14348
    },
    {
      "epoch": 0.14349,
      "grad_norm": 1.3819098921980475,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 14349
    },
    {
      "epoch": 0.1435,
      "grad_norm": 1.2217818750913538,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 14350
    },
    {
      "epoch": 0.14351,
      "grad_norm": 1.4013634989561357,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 14351
    },
    {
      "epoch": 0.14352,
      "grad_norm": 1.0295581584090119,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 14352
    },
    {
      "epoch": 0.14353,
      "grad_norm": 1.4056870647209194,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 14353
    },
    {
      "epoch": 0.14354,
      "grad_norm": 1.0034190206026332,
      "learning_rate": 0.003,
      "loss": 4.0345,
      "step": 14354
    },
    {
      "epoch": 0.14355,
      "grad_norm": 1.316326546089397,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 14355
    },
    {
      "epoch": 0.14356,
      "grad_norm": 0.9754900843938116,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 14356
    },
    {
      "epoch": 0.14357,
      "grad_norm": 1.297848544306027,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 14357
    },
    {
      "epoch": 0.14358,
      "grad_norm": 1.279855276088557,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 14358
    },
    {
      "epoch": 0.14359,
      "grad_norm": 1.2715881490998309,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 14359
    },
    {
      "epoch": 0.1436,
      "grad_norm": 1.0772515093805437,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 14360
    },
    {
      "epoch": 0.14361,
      "grad_norm": 1.326509901035539,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 14361
    },
    {
      "epoch": 0.14362,
      "grad_norm": 1.0928487982487218,
      "learning_rate": 0.003,
      "loss": 4.1031,
      "step": 14362
    },
    {
      "epoch": 0.14363,
      "grad_norm": 1.1140734060002717,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 14363
    },
    {
      "epoch": 0.14364,
      "grad_norm": 1.2634878968911296,
      "learning_rate": 0.003,
      "loss": 4.0789,
      "step": 14364
    },
    {
      "epoch": 0.14365,
      "grad_norm": 0.9166447285257685,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 14365
    },
    {
      "epoch": 0.14366,
      "grad_norm": 1.1893186520089365,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 14366
    },
    {
      "epoch": 0.14367,
      "grad_norm": 1.4496297340179196,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 14367
    },
    {
      "epoch": 0.14368,
      "grad_norm": 0.9584473542956278,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 14368
    },
    {
      "epoch": 0.14369,
      "grad_norm": 1.1633714002749225,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 14369
    },
    {
      "epoch": 0.1437,
      "grad_norm": 1.1387893863445588,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 14370
    },
    {
      "epoch": 0.14371,
      "grad_norm": 1.1966518735212899,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 14371
    },
    {
      "epoch": 0.14372,
      "grad_norm": 1.1451166210976724,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 14372
    },
    {
      "epoch": 0.14373,
      "grad_norm": 1.2668958911872625,
      "learning_rate": 0.003,
      "loss": 4.0821,
      "step": 14373
    },
    {
      "epoch": 0.14374,
      "grad_norm": 1.1588675101541697,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 14374
    },
    {
      "epoch": 0.14375,
      "grad_norm": 1.006739453380164,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 14375
    },
    {
      "epoch": 0.14376,
      "grad_norm": 1.3155615412750452,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 14376
    },
    {
      "epoch": 0.14377,
      "grad_norm": 1.3150776956736385,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 14377
    },
    {
      "epoch": 0.14378,
      "grad_norm": 1.3536875500683214,
      "learning_rate": 0.003,
      "loss": 4.057,
      "step": 14378
    },
    {
      "epoch": 0.14379,
      "grad_norm": 1.1112812989222873,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 14379
    },
    {
      "epoch": 0.1438,
      "grad_norm": 1.1198013178022161,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 14380
    },
    {
      "epoch": 0.14381,
      "grad_norm": 1.270568937258633,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 14381
    },
    {
      "epoch": 0.14382,
      "grad_norm": 0.9293268891593124,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 14382
    },
    {
      "epoch": 0.14383,
      "grad_norm": 1.3597104760585619,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 14383
    },
    {
      "epoch": 0.14384,
      "grad_norm": 1.0496305274028601,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 14384
    },
    {
      "epoch": 0.14385,
      "grad_norm": 1.2067881598096581,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 14385
    },
    {
      "epoch": 0.14386,
      "grad_norm": 1.1605057054446346,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 14386
    },
    {
      "epoch": 0.14387,
      "grad_norm": 1.4127073565580401,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 14387
    },
    {
      "epoch": 0.14388,
      "grad_norm": 1.0999021637295774,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 14388
    },
    {
      "epoch": 0.14389,
      "grad_norm": 1.235426272669938,
      "learning_rate": 0.003,
      "loss": 4.0909,
      "step": 14389
    },
    {
      "epoch": 0.1439,
      "grad_norm": 1.2021659561157405,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 14390
    },
    {
      "epoch": 0.14391,
      "grad_norm": 1.1205366066590694,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 14391
    },
    {
      "epoch": 0.14392,
      "grad_norm": 1.2062595819672428,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 14392
    },
    {
      "epoch": 0.14393,
      "grad_norm": 1.2771123386515066,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 14393
    },
    {
      "epoch": 0.14394,
      "grad_norm": 1.1521172255235568,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 14394
    },
    {
      "epoch": 0.14395,
      "grad_norm": 1.171395943337496,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 14395
    },
    {
      "epoch": 0.14396,
      "grad_norm": 1.2084041986998417,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 14396
    },
    {
      "epoch": 0.14397,
      "grad_norm": 0.9788561303985829,
      "learning_rate": 0.003,
      "loss": 4.0355,
      "step": 14397
    },
    {
      "epoch": 0.14398,
      "grad_norm": 1.0686881244883093,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 14398
    },
    {
      "epoch": 0.14399,
      "grad_norm": 1.3496435545184504,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 14399
    },
    {
      "epoch": 0.144,
      "grad_norm": 0.9615424273870988,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 14400
    },
    {
      "epoch": 0.14401,
      "grad_norm": 1.3318751453700992,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 14401
    },
    {
      "epoch": 0.14402,
      "grad_norm": 1.096003181810339,
      "learning_rate": 0.003,
      "loss": 4.0796,
      "step": 14402
    },
    {
      "epoch": 0.14403,
      "grad_norm": 1.11545228527377,
      "learning_rate": 0.003,
      "loss": 4.0777,
      "step": 14403
    },
    {
      "epoch": 0.14404,
      "grad_norm": 1.2041662877713406,
      "learning_rate": 0.003,
      "loss": 4.0796,
      "step": 14404
    },
    {
      "epoch": 0.14405,
      "grad_norm": 1.0513210905801005,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 14405
    },
    {
      "epoch": 0.14406,
      "grad_norm": 1.1606525548776172,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 14406
    },
    {
      "epoch": 0.14407,
      "grad_norm": 1.2641343564802707,
      "learning_rate": 0.003,
      "loss": 4.0729,
      "step": 14407
    },
    {
      "epoch": 0.14408,
      "grad_norm": 1.247940316562981,
      "learning_rate": 0.003,
      "loss": 4.0884,
      "step": 14408
    },
    {
      "epoch": 0.14409,
      "grad_norm": 1.2041270097613936,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 14409
    },
    {
      "epoch": 0.1441,
      "grad_norm": 1.0415938224350967,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 14410
    },
    {
      "epoch": 0.14411,
      "grad_norm": 1.2666962054012227,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 14411
    },
    {
      "epoch": 0.14412,
      "grad_norm": 1.1439247392330354,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 14412
    },
    {
      "epoch": 0.14413,
      "grad_norm": 1.2602037342993857,
      "learning_rate": 0.003,
      "loss": 4.0809,
      "step": 14413
    },
    {
      "epoch": 0.14414,
      "grad_norm": 1.1622242094178536,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 14414
    },
    {
      "epoch": 0.14415,
      "grad_norm": 1.2105148648661788,
      "learning_rate": 0.003,
      "loss": 4.0247,
      "step": 14415
    },
    {
      "epoch": 0.14416,
      "grad_norm": 1.1207045739791284,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 14416
    },
    {
      "epoch": 0.14417,
      "grad_norm": 1.4836229313973075,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 14417
    },
    {
      "epoch": 0.14418,
      "grad_norm": 0.9855947054257272,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 14418
    },
    {
      "epoch": 0.14419,
      "grad_norm": 1.3469614476056524,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 14419
    },
    {
      "epoch": 0.1442,
      "grad_norm": 1.1425115038010174,
      "learning_rate": 0.003,
      "loss": 4.0829,
      "step": 14420
    },
    {
      "epoch": 0.14421,
      "grad_norm": 1.2457255262964664,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 14421
    },
    {
      "epoch": 0.14422,
      "grad_norm": 1.1336401634870403,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 14422
    },
    {
      "epoch": 0.14423,
      "grad_norm": 1.0490914614301334,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 14423
    },
    {
      "epoch": 0.14424,
      "grad_norm": 1.2515768162712582,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 14424
    },
    {
      "epoch": 0.14425,
      "grad_norm": 1.177888633839058,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 14425
    },
    {
      "epoch": 0.14426,
      "grad_norm": 1.4308659781289155,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 14426
    },
    {
      "epoch": 0.14427,
      "grad_norm": 1.2623096956474995,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 14427
    },
    {
      "epoch": 0.14428,
      "grad_norm": 1.2325810704553675,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 14428
    },
    {
      "epoch": 0.14429,
      "grad_norm": 1.2871405107187142,
      "learning_rate": 0.003,
      "loss": 4.0935,
      "step": 14429
    },
    {
      "epoch": 0.1443,
      "grad_norm": 1.2934040487110332,
      "learning_rate": 0.003,
      "loss": 4.0815,
      "step": 14430
    },
    {
      "epoch": 0.14431,
      "grad_norm": 1.2189751427410236,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 14431
    },
    {
      "epoch": 0.14432,
      "grad_norm": 1.412281996903719,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 14432
    },
    {
      "epoch": 0.14433,
      "grad_norm": 1.0195358371839056,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 14433
    },
    {
      "epoch": 0.14434,
      "grad_norm": 1.2086298050739679,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 14434
    },
    {
      "epoch": 0.14435,
      "grad_norm": 1.0830766577799509,
      "learning_rate": 0.003,
      "loss": 4.075,
      "step": 14435
    },
    {
      "epoch": 0.14436,
      "grad_norm": 1.2975847416280557,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 14436
    },
    {
      "epoch": 0.14437,
      "grad_norm": 0.9731705441371323,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 14437
    },
    {
      "epoch": 0.14438,
      "grad_norm": 1.2630870673775951,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 14438
    },
    {
      "epoch": 0.14439,
      "grad_norm": 1.1358145259025993,
      "learning_rate": 0.003,
      "loss": 4.0976,
      "step": 14439
    },
    {
      "epoch": 0.1444,
      "grad_norm": 1.3018920164838934,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 14440
    },
    {
      "epoch": 0.14441,
      "grad_norm": 0.9520262031343586,
      "learning_rate": 0.003,
      "loss": 4.085,
      "step": 14441
    },
    {
      "epoch": 0.14442,
      "grad_norm": 1.37672864168369,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 14442
    },
    {
      "epoch": 0.14443,
      "grad_norm": 1.0880208080473694,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 14443
    },
    {
      "epoch": 0.14444,
      "grad_norm": 1.3299459072739503,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 14444
    },
    {
      "epoch": 0.14445,
      "grad_norm": 1.0251162319901956,
      "learning_rate": 0.003,
      "loss": 4.0684,
      "step": 14445
    },
    {
      "epoch": 0.14446,
      "grad_norm": 1.4596973738649757,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 14446
    },
    {
      "epoch": 0.14447,
      "grad_norm": 1.0297129556679012,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 14447
    },
    {
      "epoch": 0.14448,
      "grad_norm": 1.4479733966210386,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 14448
    },
    {
      "epoch": 0.14449,
      "grad_norm": 1.2439657330802247,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 14449
    },
    {
      "epoch": 0.1445,
      "grad_norm": 1.0180874319876076,
      "learning_rate": 0.003,
      "loss": 4.0751,
      "step": 14450
    },
    {
      "epoch": 0.14451,
      "grad_norm": 1.328472310811886,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 14451
    },
    {
      "epoch": 0.14452,
      "grad_norm": 1.1103269484626483,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 14452
    },
    {
      "epoch": 0.14453,
      "grad_norm": 1.2902268585395242,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 14453
    },
    {
      "epoch": 0.14454,
      "grad_norm": 1.2104492374632532,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 14454
    },
    {
      "epoch": 0.14455,
      "grad_norm": 1.2072891677667257,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 14455
    },
    {
      "epoch": 0.14456,
      "grad_norm": 1.196612239151037,
      "learning_rate": 0.003,
      "loss": 4.0285,
      "step": 14456
    },
    {
      "epoch": 0.14457,
      "grad_norm": 1.2599557462766016,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 14457
    },
    {
      "epoch": 0.14458,
      "grad_norm": 1.3945832178739188,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 14458
    },
    {
      "epoch": 0.14459,
      "grad_norm": 1.1456726401275719,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 14459
    },
    {
      "epoch": 0.1446,
      "grad_norm": 1.3704195688415575,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 14460
    },
    {
      "epoch": 0.14461,
      "grad_norm": 0.883410802755949,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 14461
    },
    {
      "epoch": 0.14462,
      "grad_norm": 0.9383054724629,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 14462
    },
    {
      "epoch": 0.14463,
      "grad_norm": 1.18692137658522,
      "learning_rate": 0.003,
      "loss": 4.075,
      "step": 14463
    },
    {
      "epoch": 0.14464,
      "grad_norm": 1.13057186010462,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 14464
    },
    {
      "epoch": 0.14465,
      "grad_norm": 1.4486273262774236,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 14465
    },
    {
      "epoch": 0.14466,
      "grad_norm": 1.0363963440455068,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 14466
    },
    {
      "epoch": 0.14467,
      "grad_norm": 1.2446597742072119,
      "learning_rate": 0.003,
      "loss": 4.1064,
      "step": 14467
    },
    {
      "epoch": 0.14468,
      "grad_norm": 1.2922347688614726,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 14468
    },
    {
      "epoch": 0.14469,
      "grad_norm": 1.2097053980473647,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 14469
    },
    {
      "epoch": 0.1447,
      "grad_norm": 1.351741293527175,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 14470
    },
    {
      "epoch": 0.14471,
      "grad_norm": 1.0702467467520114,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 14471
    },
    {
      "epoch": 0.14472,
      "grad_norm": 1.28495992989329,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 14472
    },
    {
      "epoch": 0.14473,
      "grad_norm": 1.0662656580764747,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 14473
    },
    {
      "epoch": 0.14474,
      "grad_norm": 1.197696239350392,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 14474
    },
    {
      "epoch": 0.14475,
      "grad_norm": 1.129624119151968,
      "learning_rate": 0.003,
      "loss": 4.0931,
      "step": 14475
    },
    {
      "epoch": 0.14476,
      "grad_norm": 1.2455647657848186,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 14476
    },
    {
      "epoch": 0.14477,
      "grad_norm": 1.1819455647373525,
      "learning_rate": 0.003,
      "loss": 4.0753,
      "step": 14477
    },
    {
      "epoch": 0.14478,
      "grad_norm": 1.3905545875085565,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 14478
    },
    {
      "epoch": 0.14479,
      "grad_norm": 1.170762766452118,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 14479
    },
    {
      "epoch": 0.1448,
      "grad_norm": 1.2513212509048317,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 14480
    },
    {
      "epoch": 0.14481,
      "grad_norm": 0.9761595945600573,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 14481
    },
    {
      "epoch": 0.14482,
      "grad_norm": 1.0174914668918622,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 14482
    },
    {
      "epoch": 0.14483,
      "grad_norm": 1.359980196411554,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 14483
    },
    {
      "epoch": 0.14484,
      "grad_norm": 1.0902831742596368,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 14484
    },
    {
      "epoch": 0.14485,
      "grad_norm": 1.2593625056395699,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 14485
    },
    {
      "epoch": 0.14486,
      "grad_norm": 0.9821002792210466,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 14486
    },
    {
      "epoch": 0.14487,
      "grad_norm": 1.2255247495177521,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 14487
    },
    {
      "epoch": 0.14488,
      "grad_norm": 1.2069811086964375,
      "learning_rate": 0.003,
      "loss": 4.0874,
      "step": 14488
    },
    {
      "epoch": 0.14489,
      "grad_norm": 1.0643181195045752,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 14489
    },
    {
      "epoch": 0.1449,
      "grad_norm": 1.326577652449159,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 14490
    },
    {
      "epoch": 0.14491,
      "grad_norm": 1.0539774466861362,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 14491
    },
    {
      "epoch": 0.14492,
      "grad_norm": 1.2060034543920808,
      "learning_rate": 0.003,
      "loss": 4.0694,
      "step": 14492
    },
    {
      "epoch": 0.14493,
      "grad_norm": 1.255149818281733,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 14493
    },
    {
      "epoch": 0.14494,
      "grad_norm": 1.5354233835193087,
      "learning_rate": 0.003,
      "loss": 4.091,
      "step": 14494
    },
    {
      "epoch": 0.14495,
      "grad_norm": 0.9562276465593462,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 14495
    },
    {
      "epoch": 0.14496,
      "grad_norm": 1.2394086556118171,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 14496
    },
    {
      "epoch": 0.14497,
      "grad_norm": 1.3351260622140366,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 14497
    },
    {
      "epoch": 0.14498,
      "grad_norm": 1.0365048636710072,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 14498
    },
    {
      "epoch": 0.14499,
      "grad_norm": 1.3895783940738689,
      "learning_rate": 0.003,
      "loss": 4.0878,
      "step": 14499
    },
    {
      "epoch": 0.145,
      "grad_norm": 1.092441014869526,
      "learning_rate": 0.003,
      "loss": 4.1018,
      "step": 14500
    },
    {
      "epoch": 0.14501,
      "grad_norm": 1.278832843735235,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 14501
    },
    {
      "epoch": 0.14502,
      "grad_norm": 1.2593853894413225,
      "learning_rate": 0.003,
      "loss": 4.0684,
      "step": 14502
    },
    {
      "epoch": 0.14503,
      "grad_norm": 1.0732786142231292,
      "learning_rate": 0.003,
      "loss": 4.0893,
      "step": 14503
    },
    {
      "epoch": 0.14504,
      "grad_norm": 1.3895213148451642,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 14504
    },
    {
      "epoch": 0.14505,
      "grad_norm": 1.0759802486567076,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 14505
    },
    {
      "epoch": 0.14506,
      "grad_norm": 1.5076770877355903,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 14506
    },
    {
      "epoch": 0.14507,
      "grad_norm": 1.100514514015529,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 14507
    },
    {
      "epoch": 0.14508,
      "grad_norm": 1.3372686597544416,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 14508
    },
    {
      "epoch": 0.14509,
      "grad_norm": 1.0311444282019597,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 14509
    },
    {
      "epoch": 0.1451,
      "grad_norm": 1.1733465513027923,
      "learning_rate": 0.003,
      "loss": 4.0806,
      "step": 14510
    },
    {
      "epoch": 0.14511,
      "grad_norm": 1.174627063648199,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 14511
    },
    {
      "epoch": 0.14512,
      "grad_norm": 1.0887227832658273,
      "learning_rate": 0.003,
      "loss": 4.1037,
      "step": 14512
    },
    {
      "epoch": 0.14513,
      "grad_norm": 1.3383210590853012,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 14513
    },
    {
      "epoch": 0.14514,
      "grad_norm": 1.199637874835041,
      "learning_rate": 0.003,
      "loss": 4.0871,
      "step": 14514
    },
    {
      "epoch": 0.14515,
      "grad_norm": 1.2820711854534685,
      "learning_rate": 0.003,
      "loss": 4.0759,
      "step": 14515
    },
    {
      "epoch": 0.14516,
      "grad_norm": 1.2169255591432746,
      "learning_rate": 0.003,
      "loss": 4.0853,
      "step": 14516
    },
    {
      "epoch": 0.14517,
      "grad_norm": 1.4259356736559685,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 14517
    },
    {
      "epoch": 0.14518,
      "grad_norm": 1.085508315724117,
      "learning_rate": 0.003,
      "loss": 4.0748,
      "step": 14518
    },
    {
      "epoch": 0.14519,
      "grad_norm": 1.2337796428518382,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 14519
    },
    {
      "epoch": 0.1452,
      "grad_norm": 1.2381036650493278,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 14520
    },
    {
      "epoch": 0.14521,
      "grad_norm": 1.222946112707692,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 14521
    },
    {
      "epoch": 0.14522,
      "grad_norm": 0.9838979388637205,
      "learning_rate": 0.003,
      "loss": 4.0262,
      "step": 14522
    },
    {
      "epoch": 0.14523,
      "grad_norm": 1.2459678211650227,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 14523
    },
    {
      "epoch": 0.14524,
      "grad_norm": 0.9726485250787739,
      "learning_rate": 0.003,
      "loss": 4.0853,
      "step": 14524
    },
    {
      "epoch": 0.14525,
      "grad_norm": 1.30750556386179,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 14525
    },
    {
      "epoch": 0.14526,
      "grad_norm": 1.3415239021375474,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 14526
    },
    {
      "epoch": 0.14527,
      "grad_norm": 1.2152644096308234,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 14527
    },
    {
      "epoch": 0.14528,
      "grad_norm": 1.0561859514540088,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 14528
    },
    {
      "epoch": 0.14529,
      "grad_norm": 1.1541866884114087,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 14529
    },
    {
      "epoch": 0.1453,
      "grad_norm": 1.2411854612012745,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 14530
    },
    {
      "epoch": 0.14531,
      "grad_norm": 1.2655536383647337,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 14531
    },
    {
      "epoch": 0.14532,
      "grad_norm": 1.6007632252405208,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 14532
    },
    {
      "epoch": 0.14533,
      "grad_norm": 1.013734516566645,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 14533
    },
    {
      "epoch": 0.14534,
      "grad_norm": 1.2815320279732259,
      "learning_rate": 0.003,
      "loss": 4.0793,
      "step": 14534
    },
    {
      "epoch": 0.14535,
      "grad_norm": 1.1098958620616504,
      "learning_rate": 0.003,
      "loss": 4.0829,
      "step": 14535
    },
    {
      "epoch": 0.14536,
      "grad_norm": 1.262519752412867,
      "learning_rate": 0.003,
      "loss": 4.0807,
      "step": 14536
    },
    {
      "epoch": 0.14537,
      "grad_norm": 1.229410593450138,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 14537
    },
    {
      "epoch": 0.14538,
      "grad_norm": 1.0249854737352893,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 14538
    },
    {
      "epoch": 0.14539,
      "grad_norm": 1.2106568712291543,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 14539
    },
    {
      "epoch": 0.1454,
      "grad_norm": 1.3375194318072543,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 14540
    },
    {
      "epoch": 0.14541,
      "grad_norm": 1.2808277806276076,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 14541
    },
    {
      "epoch": 0.14542,
      "grad_norm": 1.3645526732280286,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 14542
    },
    {
      "epoch": 0.14543,
      "grad_norm": 0.927561164935603,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 14543
    },
    {
      "epoch": 0.14544,
      "grad_norm": 1.2374565780761364,
      "learning_rate": 0.003,
      "loss": 4.0864,
      "step": 14544
    },
    {
      "epoch": 0.14545,
      "grad_norm": 1.172172317630963,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 14545
    },
    {
      "epoch": 0.14546,
      "grad_norm": 1.0587197270489002,
      "learning_rate": 0.003,
      "loss": 4.0694,
      "step": 14546
    },
    {
      "epoch": 0.14547,
      "grad_norm": 1.0765614881728571,
      "learning_rate": 0.003,
      "loss": 4.0809,
      "step": 14547
    },
    {
      "epoch": 0.14548,
      "grad_norm": 1.301588512798978,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 14548
    },
    {
      "epoch": 0.14549,
      "grad_norm": 1.2205407961821984,
      "learning_rate": 0.003,
      "loss": 4.0787,
      "step": 14549
    },
    {
      "epoch": 0.1455,
      "grad_norm": 1.1145718192779168,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 14550
    },
    {
      "epoch": 0.14551,
      "grad_norm": 1.179621630415264,
      "learning_rate": 0.003,
      "loss": 4.0672,
      "step": 14551
    },
    {
      "epoch": 0.14552,
      "grad_norm": 1.1880988740837897,
      "learning_rate": 0.003,
      "loss": 4.0856,
      "step": 14552
    },
    {
      "epoch": 0.14553,
      "grad_norm": 1.1808497016223631,
      "learning_rate": 0.003,
      "loss": 4.0821,
      "step": 14553
    },
    {
      "epoch": 0.14554,
      "grad_norm": 1.2605311608691536,
      "learning_rate": 0.003,
      "loss": 4.0564,
      "step": 14554
    },
    {
      "epoch": 0.14555,
      "grad_norm": 1.0056741277557708,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 14555
    },
    {
      "epoch": 0.14556,
      "grad_norm": 1.22080608763233,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 14556
    },
    {
      "epoch": 0.14557,
      "grad_norm": 1.2786913420968125,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 14557
    },
    {
      "epoch": 0.14558,
      "grad_norm": 1.0155114758361838,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 14558
    },
    {
      "epoch": 0.14559,
      "grad_norm": 1.346244687983044,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 14559
    },
    {
      "epoch": 0.1456,
      "grad_norm": 0.9875529737539662,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 14560
    },
    {
      "epoch": 0.14561,
      "grad_norm": 1.3121646791886872,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 14561
    },
    {
      "epoch": 0.14562,
      "grad_norm": 1.2255825121213335,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 14562
    },
    {
      "epoch": 0.14563,
      "grad_norm": 1.267275515399711,
      "learning_rate": 0.003,
      "loss": 4.0781,
      "step": 14563
    },
    {
      "epoch": 0.14564,
      "grad_norm": 1.4494582533880669,
      "learning_rate": 0.003,
      "loss": 4.0311,
      "step": 14564
    },
    {
      "epoch": 0.14565,
      "grad_norm": 0.9840388509427782,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 14565
    },
    {
      "epoch": 0.14566,
      "grad_norm": 1.1418983249704502,
      "learning_rate": 0.003,
      "loss": 4.087,
      "step": 14566
    },
    {
      "epoch": 0.14567,
      "grad_norm": 1.1638000329298885,
      "learning_rate": 0.003,
      "loss": 4.0694,
      "step": 14567
    },
    {
      "epoch": 0.14568,
      "grad_norm": 1.2215265284809167,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 14568
    },
    {
      "epoch": 0.14569,
      "grad_norm": 1.278431476453285,
      "learning_rate": 0.003,
      "loss": 4.0982,
      "step": 14569
    },
    {
      "epoch": 0.1457,
      "grad_norm": 1.0550827420241753,
      "learning_rate": 0.003,
      "loss": 4.0656,
      "step": 14570
    },
    {
      "epoch": 0.14571,
      "grad_norm": 1.3879840743112057,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 14571
    },
    {
      "epoch": 0.14572,
      "grad_norm": 1.1270340714390352,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 14572
    },
    {
      "epoch": 0.14573,
      "grad_norm": 1.0760229161533796,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 14573
    },
    {
      "epoch": 0.14574,
      "grad_norm": 1.3566984962366937,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 14574
    },
    {
      "epoch": 0.14575,
      "grad_norm": 1.0191836638824994,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 14575
    },
    {
      "epoch": 0.14576,
      "grad_norm": 1.2673730075312062,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 14576
    },
    {
      "epoch": 0.14577,
      "grad_norm": 1.176399466905642,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 14577
    },
    {
      "epoch": 0.14578,
      "grad_norm": 1.170054090010369,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 14578
    },
    {
      "epoch": 0.14579,
      "grad_norm": 1.060788554025084,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 14579
    },
    {
      "epoch": 0.1458,
      "grad_norm": 1.2276134150389053,
      "learning_rate": 0.003,
      "loss": 4.0757,
      "step": 14580
    },
    {
      "epoch": 0.14581,
      "grad_norm": 1.3744823581511467,
      "learning_rate": 0.003,
      "loss": 4.033,
      "step": 14581
    },
    {
      "epoch": 0.14582,
      "grad_norm": 1.2962302966317039,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 14582
    },
    {
      "epoch": 0.14583,
      "grad_norm": 1.1529484222967947,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 14583
    },
    {
      "epoch": 0.14584,
      "grad_norm": 1.2619267189116472,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 14584
    },
    {
      "epoch": 0.14585,
      "grad_norm": 1.299212021768132,
      "learning_rate": 0.003,
      "loss": 4.079,
      "step": 14585
    },
    {
      "epoch": 0.14586,
      "grad_norm": 1.2045962939061319,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 14586
    },
    {
      "epoch": 0.14587,
      "grad_norm": 1.082534399396641,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 14587
    },
    {
      "epoch": 0.14588,
      "grad_norm": 1.3499879459859705,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 14588
    },
    {
      "epoch": 0.14589,
      "grad_norm": 1.0895206117845913,
      "learning_rate": 0.003,
      "loss": 4.0253,
      "step": 14589
    },
    {
      "epoch": 0.1459,
      "grad_norm": 1.3209310273651989,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 14590
    },
    {
      "epoch": 0.14591,
      "grad_norm": 1.0109199459082603,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 14591
    },
    {
      "epoch": 0.14592,
      "grad_norm": 1.3237592900981143,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 14592
    },
    {
      "epoch": 0.14593,
      "grad_norm": 1.2350511124004422,
      "learning_rate": 0.003,
      "loss": 4.0878,
      "step": 14593
    },
    {
      "epoch": 0.14594,
      "grad_norm": 1.3199162857821065,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 14594
    },
    {
      "epoch": 0.14595,
      "grad_norm": 1.1754752002000035,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 14595
    },
    {
      "epoch": 0.14596,
      "grad_norm": 1.1615829473896362,
      "learning_rate": 0.003,
      "loss": 4.0694,
      "step": 14596
    },
    {
      "epoch": 0.14597,
      "grad_norm": 1.2390998641870503,
      "learning_rate": 0.003,
      "loss": 4.0989,
      "step": 14597
    },
    {
      "epoch": 0.14598,
      "grad_norm": 1.1328430900161557,
      "learning_rate": 0.003,
      "loss": 4.1069,
      "step": 14598
    },
    {
      "epoch": 0.14599,
      "grad_norm": 1.2876515222337628,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 14599
    },
    {
      "epoch": 0.146,
      "grad_norm": 1.0675039173430005,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 14600
    },
    {
      "epoch": 0.14601,
      "grad_norm": 1.1882138300330307,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 14601
    },
    {
      "epoch": 0.14602,
      "grad_norm": 0.9090556479930595,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 14602
    },
    {
      "epoch": 0.14603,
      "grad_norm": 1.244940507479042,
      "learning_rate": 0.003,
      "loss": 4.0879,
      "step": 14603
    },
    {
      "epoch": 0.14604,
      "grad_norm": 1.382787613320107,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 14604
    },
    {
      "epoch": 0.14605,
      "grad_norm": 1.2648621268954807,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 14605
    },
    {
      "epoch": 0.14606,
      "grad_norm": 1.189385499215917,
      "learning_rate": 0.003,
      "loss": 4.0549,
      "step": 14606
    },
    {
      "epoch": 0.14607,
      "grad_norm": 1.2313921134389831,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 14607
    },
    {
      "epoch": 0.14608,
      "grad_norm": 1.2198898892078907,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 14608
    },
    {
      "epoch": 0.14609,
      "grad_norm": 1.2774933099669845,
      "learning_rate": 0.003,
      "loss": 4.0261,
      "step": 14609
    },
    {
      "epoch": 0.1461,
      "grad_norm": 1.0161959784494827,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 14610
    },
    {
      "epoch": 0.14611,
      "grad_norm": 1.1696317224606492,
      "learning_rate": 0.003,
      "loss": 4.0804,
      "step": 14611
    },
    {
      "epoch": 0.14612,
      "grad_norm": 1.2497934127953523,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 14612
    },
    {
      "epoch": 0.14613,
      "grad_norm": 1.1170236643383034,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 14613
    },
    {
      "epoch": 0.14614,
      "grad_norm": 1.2429169519224235,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 14614
    },
    {
      "epoch": 0.14615,
      "grad_norm": 0.9616778368294037,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 14615
    },
    {
      "epoch": 0.14616,
      "grad_norm": 1.372503444062351,
      "learning_rate": 0.003,
      "loss": 4.0658,
      "step": 14616
    },
    {
      "epoch": 0.14617,
      "grad_norm": 1.2010856800097343,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 14617
    },
    {
      "epoch": 0.14618,
      "grad_norm": 1.4562051310403952,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 14618
    },
    {
      "epoch": 0.14619,
      "grad_norm": 1.0531531805489553,
      "learning_rate": 0.003,
      "loss": 4.0676,
      "step": 14619
    },
    {
      "epoch": 0.1462,
      "grad_norm": 1.3799675814826888,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 14620
    },
    {
      "epoch": 0.14621,
      "grad_norm": 0.9903401853565199,
      "learning_rate": 0.003,
      "loss": 4.0885,
      "step": 14621
    },
    {
      "epoch": 0.14622,
      "grad_norm": 1.4470191974780509,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 14622
    },
    {
      "epoch": 0.14623,
      "grad_norm": 1.086507214320285,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 14623
    },
    {
      "epoch": 0.14624,
      "grad_norm": 1.3369608097583916,
      "learning_rate": 0.003,
      "loss": 4.083,
      "step": 14624
    },
    {
      "epoch": 0.14625,
      "grad_norm": 1.22527580058732,
      "learning_rate": 0.003,
      "loss": 4.0862,
      "step": 14625
    },
    {
      "epoch": 0.14626,
      "grad_norm": 1.1552527438449494,
      "learning_rate": 0.003,
      "loss": 4.086,
      "step": 14626
    },
    {
      "epoch": 0.14627,
      "grad_norm": 1.1882929095233759,
      "learning_rate": 0.003,
      "loss": 4.0751,
      "step": 14627
    },
    {
      "epoch": 0.14628,
      "grad_norm": 1.0708626022018357,
      "learning_rate": 0.003,
      "loss": 4.0789,
      "step": 14628
    },
    {
      "epoch": 0.14629,
      "grad_norm": 1.183950295621809,
      "learning_rate": 0.003,
      "loss": 4.0438,
      "step": 14629
    },
    {
      "epoch": 0.1463,
      "grad_norm": 1.3449094679059561,
      "learning_rate": 0.003,
      "loss": 4.1035,
      "step": 14630
    },
    {
      "epoch": 0.14631,
      "grad_norm": 1.0995889188378865,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 14631
    },
    {
      "epoch": 0.14632,
      "grad_norm": 1.3625103167580896,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 14632
    },
    {
      "epoch": 0.14633,
      "grad_norm": 1.187469929229442,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 14633
    },
    {
      "epoch": 0.14634,
      "grad_norm": 1.3842712959457022,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 14634
    },
    {
      "epoch": 0.14635,
      "grad_norm": 1.1657763904446767,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 14635
    },
    {
      "epoch": 0.14636,
      "grad_norm": 1.0820491943073718,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 14636
    },
    {
      "epoch": 0.14637,
      "grad_norm": 1.2288146011263648,
      "learning_rate": 0.003,
      "loss": 4.0931,
      "step": 14637
    },
    {
      "epoch": 0.14638,
      "grad_norm": 1.0388897379918565,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 14638
    },
    {
      "epoch": 0.14639,
      "grad_norm": 1.4093240022149505,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 14639
    },
    {
      "epoch": 0.1464,
      "grad_norm": 1.0009021916306446,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 14640
    },
    {
      "epoch": 0.14641,
      "grad_norm": 1.6059976781374665,
      "learning_rate": 0.003,
      "loss": 4.0855,
      "step": 14641
    },
    {
      "epoch": 0.14642,
      "grad_norm": 1.005407837664379,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 14642
    },
    {
      "epoch": 0.14643,
      "grad_norm": 1.359081219147045,
      "learning_rate": 0.003,
      "loss": 4.0848,
      "step": 14643
    },
    {
      "epoch": 0.14644,
      "grad_norm": 1.225943668673731,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 14644
    },
    {
      "epoch": 0.14645,
      "grad_norm": 1.1407681364901596,
      "learning_rate": 0.003,
      "loss": 4.0812,
      "step": 14645
    },
    {
      "epoch": 0.14646,
      "grad_norm": 1.3033431686765575,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 14646
    },
    {
      "epoch": 0.14647,
      "grad_norm": 1.140009102359671,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 14647
    },
    {
      "epoch": 0.14648,
      "grad_norm": 1.1058876269366287,
      "learning_rate": 0.003,
      "loss": 4.0777,
      "step": 14648
    },
    {
      "epoch": 0.14649,
      "grad_norm": 1.5635184765600387,
      "learning_rate": 0.003,
      "loss": 4.0822,
      "step": 14649
    },
    {
      "epoch": 0.1465,
      "grad_norm": 1.1411867898542467,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 14650
    },
    {
      "epoch": 0.14651,
      "grad_norm": 1.3139899588159343,
      "learning_rate": 0.003,
      "loss": 4.0953,
      "step": 14651
    },
    {
      "epoch": 0.14652,
      "grad_norm": 1.0367864731998273,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 14652
    },
    {
      "epoch": 0.14653,
      "grad_norm": 1.3422765458253958,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 14653
    },
    {
      "epoch": 0.14654,
      "grad_norm": 1.126354692445179,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 14654
    },
    {
      "epoch": 0.14655,
      "grad_norm": 1.2419398434083484,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 14655
    },
    {
      "epoch": 0.14656,
      "grad_norm": 1.0078000026030147,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 14656
    },
    {
      "epoch": 0.14657,
      "grad_norm": 1.2022185194869899,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 14657
    },
    {
      "epoch": 0.14658,
      "grad_norm": 1.1633967049791696,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 14658
    },
    {
      "epoch": 0.14659,
      "grad_norm": 1.1010487324351983,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 14659
    },
    {
      "epoch": 0.1466,
      "grad_norm": 1.0010589971993746,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 14660
    },
    {
      "epoch": 0.14661,
      "grad_norm": 1.3786396459595662,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 14661
    },
    {
      "epoch": 0.14662,
      "grad_norm": 1.2255796984428124,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 14662
    },
    {
      "epoch": 0.14663,
      "grad_norm": 1.1980058196702907,
      "learning_rate": 0.003,
      "loss": 4.0884,
      "step": 14663
    },
    {
      "epoch": 0.14664,
      "grad_norm": 1.186504821107635,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 14664
    },
    {
      "epoch": 0.14665,
      "grad_norm": 1.0413325935959892,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 14665
    },
    {
      "epoch": 0.14666,
      "grad_norm": 1.2490642421528309,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 14666
    },
    {
      "epoch": 0.14667,
      "grad_norm": 0.965613004754449,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 14667
    },
    {
      "epoch": 0.14668,
      "grad_norm": 1.1489845934784366,
      "learning_rate": 0.003,
      "loss": 4.0917,
      "step": 14668
    },
    {
      "epoch": 0.14669,
      "grad_norm": 1.4148464166474155,
      "learning_rate": 0.003,
      "loss": 4.0897,
      "step": 14669
    },
    {
      "epoch": 0.1467,
      "grad_norm": 1.1174324667613753,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 14670
    },
    {
      "epoch": 0.14671,
      "grad_norm": 1.4705757103421846,
      "learning_rate": 0.003,
      "loss": 4.0873,
      "step": 14671
    },
    {
      "epoch": 0.14672,
      "grad_norm": 1.1503001407497926,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 14672
    },
    {
      "epoch": 0.14673,
      "grad_norm": 1.2246606338903052,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 14673
    },
    {
      "epoch": 0.14674,
      "grad_norm": 1.2790334951540812,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 14674
    },
    {
      "epoch": 0.14675,
      "grad_norm": 1.1307066190918795,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 14675
    },
    {
      "epoch": 0.14676,
      "grad_norm": 1.2569590416028746,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 14676
    },
    {
      "epoch": 0.14677,
      "grad_norm": 1.0829300408271783,
      "learning_rate": 0.003,
      "loss": 4.0478,
      "step": 14677
    },
    {
      "epoch": 0.14678,
      "grad_norm": 1.269313016423024,
      "learning_rate": 0.003,
      "loss": 4.0768,
      "step": 14678
    },
    {
      "epoch": 0.14679,
      "grad_norm": 0.9549088118669827,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 14679
    },
    {
      "epoch": 0.1468,
      "grad_norm": 1.4753734474615985,
      "learning_rate": 0.003,
      "loss": 4.0945,
      "step": 14680
    },
    {
      "epoch": 0.14681,
      "grad_norm": 0.9408217562756533,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 14681
    },
    {
      "epoch": 0.14682,
      "grad_norm": 1.4576604514636082,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 14682
    },
    {
      "epoch": 0.14683,
      "grad_norm": 1.1046458235008672,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 14683
    },
    {
      "epoch": 0.14684,
      "grad_norm": 1.2793133353188606,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 14684
    },
    {
      "epoch": 0.14685,
      "grad_norm": 1.1469602156718897,
      "learning_rate": 0.003,
      "loss": 4.0809,
      "step": 14685
    },
    {
      "epoch": 0.14686,
      "grad_norm": 1.346386790762256,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 14686
    },
    {
      "epoch": 0.14687,
      "grad_norm": 1.5623657287016393,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 14687
    },
    {
      "epoch": 0.14688,
      "grad_norm": 1.2249617734399507,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 14688
    },
    {
      "epoch": 0.14689,
      "grad_norm": 1.1500344130353606,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 14689
    },
    {
      "epoch": 0.1469,
      "grad_norm": 1.105124624695385,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 14690
    },
    {
      "epoch": 0.14691,
      "grad_norm": 1.2645902920592447,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 14691
    },
    {
      "epoch": 0.14692,
      "grad_norm": 1.0712841892770806,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 14692
    },
    {
      "epoch": 0.14693,
      "grad_norm": 1.134902703042126,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 14693
    },
    {
      "epoch": 0.14694,
      "grad_norm": 1.1108776270674083,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 14694
    },
    {
      "epoch": 0.14695,
      "grad_norm": 1.059649463246095,
      "learning_rate": 0.003,
      "loss": 4.0885,
      "step": 14695
    },
    {
      "epoch": 0.14696,
      "grad_norm": 1.3014136466686084,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 14696
    },
    {
      "epoch": 0.14697,
      "grad_norm": 1.0449429689531058,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 14697
    },
    {
      "epoch": 0.14698,
      "grad_norm": 1.4253373081448815,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 14698
    },
    {
      "epoch": 0.14699,
      "grad_norm": 1.297590735587724,
      "learning_rate": 0.003,
      "loss": 4.029,
      "step": 14699
    },
    {
      "epoch": 0.147,
      "grad_norm": 1.539234247196825,
      "learning_rate": 0.003,
      "loss": 4.0814,
      "step": 14700
    },
    {
      "epoch": 0.14701,
      "grad_norm": 1.0035249133499857,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 14701
    },
    {
      "epoch": 0.14702,
      "grad_norm": 1.0865586183742486,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 14702
    },
    {
      "epoch": 0.14703,
      "grad_norm": 1.1942781752177534,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 14703
    },
    {
      "epoch": 0.14704,
      "grad_norm": 1.3027030455420896,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 14704
    },
    {
      "epoch": 0.14705,
      "grad_norm": 1.0922265779538505,
      "learning_rate": 0.003,
      "loss": 4.0564,
      "step": 14705
    },
    {
      "epoch": 0.14706,
      "grad_norm": 1.2461119997772603,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 14706
    },
    {
      "epoch": 0.14707,
      "grad_norm": 1.0979541624663782,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 14707
    },
    {
      "epoch": 0.14708,
      "grad_norm": 1.0504304539572533,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 14708
    },
    {
      "epoch": 0.14709,
      "grad_norm": 1.2043107179364494,
      "learning_rate": 0.003,
      "loss": 4.0192,
      "step": 14709
    },
    {
      "epoch": 0.1471,
      "grad_norm": 1.1676241791623339,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 14710
    },
    {
      "epoch": 0.14711,
      "grad_norm": 1.210562928850982,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 14711
    },
    {
      "epoch": 0.14712,
      "grad_norm": 1.2477750222221868,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 14712
    },
    {
      "epoch": 0.14713,
      "grad_norm": 1.0360888983840815,
      "learning_rate": 0.003,
      "loss": 4.0293,
      "step": 14713
    },
    {
      "epoch": 0.14714,
      "grad_norm": 1.2496428384055074,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 14714
    },
    {
      "epoch": 0.14715,
      "grad_norm": 1.1964986772086843,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 14715
    },
    {
      "epoch": 0.14716,
      "grad_norm": 1.3580341432365273,
      "learning_rate": 0.003,
      "loss": 4.0946,
      "step": 14716
    },
    {
      "epoch": 0.14717,
      "grad_norm": 1.0931902152639112,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 14717
    },
    {
      "epoch": 0.14718,
      "grad_norm": 1.3500717654172187,
      "learning_rate": 0.003,
      "loss": 4.0346,
      "step": 14718
    },
    {
      "epoch": 0.14719,
      "grad_norm": 1.1276161718533264,
      "learning_rate": 0.003,
      "loss": 4.0332,
      "step": 14719
    },
    {
      "epoch": 0.1472,
      "grad_norm": 1.132809481240904,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 14720
    },
    {
      "epoch": 0.14721,
      "grad_norm": 1.3768027297001184,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 14721
    },
    {
      "epoch": 0.14722,
      "grad_norm": 0.9530890757395711,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 14722
    },
    {
      "epoch": 0.14723,
      "grad_norm": 1.0827926656367546,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 14723
    },
    {
      "epoch": 0.14724,
      "grad_norm": 1.1187348846776448,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 14724
    },
    {
      "epoch": 0.14725,
      "grad_norm": 1.5315708362193274,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 14725
    },
    {
      "epoch": 0.14726,
      "grad_norm": 0.9385900526540597,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 14726
    },
    {
      "epoch": 0.14727,
      "grad_norm": 1.2573944213072323,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 14727
    },
    {
      "epoch": 0.14728,
      "grad_norm": 1.2554160816324138,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 14728
    },
    {
      "epoch": 0.14729,
      "grad_norm": 1.1407009187347024,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 14729
    },
    {
      "epoch": 0.1473,
      "grad_norm": 1.1233097547412818,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 14730
    },
    {
      "epoch": 0.14731,
      "grad_norm": 1.1880767072159575,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 14731
    },
    {
      "epoch": 0.14732,
      "grad_norm": 1.1293569513781274,
      "learning_rate": 0.003,
      "loss": 4.0848,
      "step": 14732
    },
    {
      "epoch": 0.14733,
      "grad_norm": 1.0806254931971826,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 14733
    },
    {
      "epoch": 0.14734,
      "grad_norm": 1.2303839421783829,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 14734
    },
    {
      "epoch": 0.14735,
      "grad_norm": 1.1038210616468882,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 14735
    },
    {
      "epoch": 0.14736,
      "grad_norm": 1.4684827863386878,
      "learning_rate": 0.003,
      "loss": 4.0751,
      "step": 14736
    },
    {
      "epoch": 0.14737,
      "grad_norm": 0.9586319555713341,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 14737
    },
    {
      "epoch": 0.14738,
      "grad_norm": 1.3252229291648583,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 14738
    },
    {
      "epoch": 0.14739,
      "grad_norm": 1.0321800907435257,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 14739
    },
    {
      "epoch": 0.1474,
      "grad_norm": 1.2514911163218652,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 14740
    },
    {
      "epoch": 0.14741,
      "grad_norm": 1.098563671851662,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 14741
    },
    {
      "epoch": 0.14742,
      "grad_norm": 1.2164774786994783,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 14742
    },
    {
      "epoch": 0.14743,
      "grad_norm": 1.3127094532072934,
      "learning_rate": 0.003,
      "loss": 4.0861,
      "step": 14743
    },
    {
      "epoch": 0.14744,
      "grad_norm": 1.6661720086832064,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 14744
    },
    {
      "epoch": 0.14745,
      "grad_norm": 1.1512949942038893,
      "learning_rate": 0.003,
      "loss": 4.0939,
      "step": 14745
    },
    {
      "epoch": 0.14746,
      "grad_norm": 1.1407807110626125,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 14746
    },
    {
      "epoch": 0.14747,
      "grad_norm": 1.252229562812047,
      "learning_rate": 0.003,
      "loss": 4.0757,
      "step": 14747
    },
    {
      "epoch": 0.14748,
      "grad_norm": 1.2099183759820158,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 14748
    },
    {
      "epoch": 0.14749,
      "grad_norm": 1.1472340119243152,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 14749
    },
    {
      "epoch": 0.1475,
      "grad_norm": 1.2165033715530738,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 14750
    },
    {
      "epoch": 0.14751,
      "grad_norm": 1.2680735466183857,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 14751
    },
    {
      "epoch": 0.14752,
      "grad_norm": 1.1487999400150248,
      "learning_rate": 0.003,
      "loss": 4.0912,
      "step": 14752
    },
    {
      "epoch": 0.14753,
      "grad_norm": 1.0689480660851851,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 14753
    },
    {
      "epoch": 0.14754,
      "grad_norm": 1.1452910366618543,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 14754
    },
    {
      "epoch": 0.14755,
      "grad_norm": 1.07772546469753,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 14755
    },
    {
      "epoch": 0.14756,
      "grad_norm": 1.665425389885917,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 14756
    },
    {
      "epoch": 0.14757,
      "grad_norm": 0.9772616298363169,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 14757
    },
    {
      "epoch": 0.14758,
      "grad_norm": 1.2775909224056938,
      "learning_rate": 0.003,
      "loss": 4.0777,
      "step": 14758
    },
    {
      "epoch": 0.14759,
      "grad_norm": 1.0999525314929848,
      "learning_rate": 0.003,
      "loss": 4.0801,
      "step": 14759
    },
    {
      "epoch": 0.1476,
      "grad_norm": 1.1310319747116835,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 14760
    },
    {
      "epoch": 0.14761,
      "grad_norm": 1.4201830819922163,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 14761
    },
    {
      "epoch": 0.14762,
      "grad_norm": 1.0700611618861156,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 14762
    },
    {
      "epoch": 0.14763,
      "grad_norm": 1.2204969586231962,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 14763
    },
    {
      "epoch": 0.14764,
      "grad_norm": 1.1176178847569394,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 14764
    },
    {
      "epoch": 0.14765,
      "grad_norm": 1.3089771832935406,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 14765
    },
    {
      "epoch": 0.14766,
      "grad_norm": 1.1554055840105277,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 14766
    },
    {
      "epoch": 0.14767,
      "grad_norm": 1.6609072176542494,
      "learning_rate": 0.003,
      "loss": 4.1078,
      "step": 14767
    },
    {
      "epoch": 0.14768,
      "grad_norm": 1.246840048758139,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 14768
    },
    {
      "epoch": 0.14769,
      "grad_norm": 1.0241407891675958,
      "learning_rate": 0.003,
      "loss": 4.044,
      "step": 14769
    },
    {
      "epoch": 0.1477,
      "grad_norm": 1.4294137772300697,
      "learning_rate": 0.003,
      "loss": 4.1007,
      "step": 14770
    },
    {
      "epoch": 0.14771,
      "grad_norm": 1.1823718803966925,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 14771
    },
    {
      "epoch": 0.14772,
      "grad_norm": 1.0938050717152028,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 14772
    },
    {
      "epoch": 0.14773,
      "grad_norm": 1.2027402156460119,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 14773
    },
    {
      "epoch": 0.14774,
      "grad_norm": 1.0051254526180466,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 14774
    },
    {
      "epoch": 0.14775,
      "grad_norm": 1.3381363065056453,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 14775
    },
    {
      "epoch": 0.14776,
      "grad_norm": 0.984399063906672,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 14776
    },
    {
      "epoch": 0.14777,
      "grad_norm": 1.4129631638050066,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 14777
    },
    {
      "epoch": 0.14778,
      "grad_norm": 1.4308281610427214,
      "learning_rate": 0.003,
      "loss": 4.0854,
      "step": 14778
    },
    {
      "epoch": 0.14779,
      "grad_norm": 0.955116698206692,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 14779
    },
    {
      "epoch": 0.1478,
      "grad_norm": 1.0683568116905549,
      "learning_rate": 0.003,
      "loss": 4.0619,
      "step": 14780
    },
    {
      "epoch": 0.14781,
      "grad_norm": 1.5234885998554772,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 14781
    },
    {
      "epoch": 0.14782,
      "grad_norm": 1.4803704733366099,
      "learning_rate": 0.003,
      "loss": 4.0896,
      "step": 14782
    },
    {
      "epoch": 0.14783,
      "grad_norm": 1.1172227373523123,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 14783
    },
    {
      "epoch": 0.14784,
      "grad_norm": 1.1037040152748328,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 14784
    },
    {
      "epoch": 0.14785,
      "grad_norm": 1.2164798999169917,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 14785
    },
    {
      "epoch": 0.14786,
      "grad_norm": 1.1894087594233522,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 14786
    },
    {
      "epoch": 0.14787,
      "grad_norm": 1.1490904386061846,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 14787
    },
    {
      "epoch": 0.14788,
      "grad_norm": 1.3607172116554545,
      "learning_rate": 0.003,
      "loss": 4.03,
      "step": 14788
    },
    {
      "epoch": 0.14789,
      "grad_norm": 1.1301412998785627,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 14789
    },
    {
      "epoch": 0.1479,
      "grad_norm": 1.3003399748050446,
      "learning_rate": 0.003,
      "loss": 4.0617,
      "step": 14790
    },
    {
      "epoch": 0.14791,
      "grad_norm": 1.1210026984885646,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 14791
    },
    {
      "epoch": 0.14792,
      "grad_norm": 1.0584248662119486,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 14792
    },
    {
      "epoch": 0.14793,
      "grad_norm": 0.9511820155749877,
      "learning_rate": 0.003,
      "loss": 4.0656,
      "step": 14793
    },
    {
      "epoch": 0.14794,
      "grad_norm": 1.3841734053580776,
      "learning_rate": 0.003,
      "loss": 4.0743,
      "step": 14794
    },
    {
      "epoch": 0.14795,
      "grad_norm": 1.2774076165864907,
      "learning_rate": 0.003,
      "loss": 4.112,
      "step": 14795
    },
    {
      "epoch": 0.14796,
      "grad_norm": 1.170578313981786,
      "learning_rate": 0.003,
      "loss": 4.0311,
      "step": 14796
    },
    {
      "epoch": 0.14797,
      "grad_norm": 1.0440748467826015,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 14797
    },
    {
      "epoch": 0.14798,
      "grad_norm": 1.4750922628950431,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 14798
    },
    {
      "epoch": 0.14799,
      "grad_norm": 1.1759901251394247,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 14799
    },
    {
      "epoch": 0.148,
      "grad_norm": 1.2119667526250828,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 14800
    },
    {
      "epoch": 0.14801,
      "grad_norm": 1.2058549245284778,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 14801
    },
    {
      "epoch": 0.14802,
      "grad_norm": 1.2094443172169136,
      "learning_rate": 0.003,
      "loss": 4.0658,
      "step": 14802
    },
    {
      "epoch": 0.14803,
      "grad_norm": 1.4077305093241759,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 14803
    },
    {
      "epoch": 0.14804,
      "grad_norm": 0.8884195461234354,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 14804
    },
    {
      "epoch": 0.14805,
      "grad_norm": 1.2496416639608543,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 14805
    },
    {
      "epoch": 0.14806,
      "grad_norm": 1.1645065589649986,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 14806
    },
    {
      "epoch": 0.14807,
      "grad_norm": 1.069211692346441,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 14807
    },
    {
      "epoch": 0.14808,
      "grad_norm": 1.3547410373763051,
      "learning_rate": 0.003,
      "loss": 4.0899,
      "step": 14808
    },
    {
      "epoch": 0.14809,
      "grad_norm": 0.9696174075795378,
      "learning_rate": 0.003,
      "loss": 4.0272,
      "step": 14809
    },
    {
      "epoch": 0.1481,
      "grad_norm": 1.0054300191239651,
      "learning_rate": 0.003,
      "loss": 4.0656,
      "step": 14810
    },
    {
      "epoch": 0.14811,
      "grad_norm": 1.3234824372039642,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 14811
    },
    {
      "epoch": 0.14812,
      "grad_norm": 1.1056347507868667,
      "learning_rate": 0.003,
      "loss": 4.0123,
      "step": 14812
    },
    {
      "epoch": 0.14813,
      "grad_norm": 1.2361695219915105,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 14813
    },
    {
      "epoch": 0.14814,
      "grad_norm": 1.2422214003764849,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 14814
    },
    {
      "epoch": 0.14815,
      "grad_norm": 1.4860662128172797,
      "learning_rate": 0.003,
      "loss": 4.097,
      "step": 14815
    },
    {
      "epoch": 0.14816,
      "grad_norm": 1.095204934951978,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 14816
    },
    {
      "epoch": 0.14817,
      "grad_norm": 1.0549027373607387,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 14817
    },
    {
      "epoch": 0.14818,
      "grad_norm": 1.4257982540616287,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 14818
    },
    {
      "epoch": 0.14819,
      "grad_norm": 1.0022689449589357,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 14819
    },
    {
      "epoch": 0.1482,
      "grad_norm": 1.430846028214134,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 14820
    },
    {
      "epoch": 0.14821,
      "grad_norm": 1.222262674104428,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 14821
    },
    {
      "epoch": 0.14822,
      "grad_norm": 1.0746599463987394,
      "learning_rate": 0.003,
      "loss": 4.0287,
      "step": 14822
    },
    {
      "epoch": 0.14823,
      "grad_norm": 1.2059917161328615,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 14823
    },
    {
      "epoch": 0.14824,
      "grad_norm": 1.258274546832546,
      "learning_rate": 0.003,
      "loss": 4.0943,
      "step": 14824
    },
    {
      "epoch": 0.14825,
      "grad_norm": 1.1351255207782565,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 14825
    },
    {
      "epoch": 0.14826,
      "grad_norm": 1.1827242960166198,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 14826
    },
    {
      "epoch": 0.14827,
      "grad_norm": 1.075962939782082,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 14827
    },
    {
      "epoch": 0.14828,
      "grad_norm": 1.2771551815105726,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 14828
    },
    {
      "epoch": 0.14829,
      "grad_norm": 1.1121069099361611,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 14829
    },
    {
      "epoch": 0.1483,
      "grad_norm": 1.0363683408331907,
      "learning_rate": 0.003,
      "loss": 4.0178,
      "step": 14830
    },
    {
      "epoch": 0.14831,
      "grad_norm": 1.3818657844749311,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 14831
    },
    {
      "epoch": 0.14832,
      "grad_norm": 1.0801904208892987,
      "learning_rate": 0.003,
      "loss": 4.0839,
      "step": 14832
    },
    {
      "epoch": 0.14833,
      "grad_norm": 1.4360579789876684,
      "learning_rate": 0.003,
      "loss": 4.0783,
      "step": 14833
    },
    {
      "epoch": 0.14834,
      "grad_norm": 1.1531827140412088,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 14834
    },
    {
      "epoch": 0.14835,
      "grad_norm": 1.201525696367292,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 14835
    },
    {
      "epoch": 0.14836,
      "grad_norm": 1.0297719350270018,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 14836
    },
    {
      "epoch": 0.14837,
      "grad_norm": 1.3917502124564454,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 14837
    },
    {
      "epoch": 0.14838,
      "grad_norm": 0.9675947716587852,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 14838
    },
    {
      "epoch": 0.14839,
      "grad_norm": 1.0119005973054083,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 14839
    },
    {
      "epoch": 0.1484,
      "grad_norm": 1.325765639484017,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 14840
    },
    {
      "epoch": 0.14841,
      "grad_norm": 1.1782336088419427,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 14841
    },
    {
      "epoch": 0.14842,
      "grad_norm": 1.183012556136071,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 14842
    },
    {
      "epoch": 0.14843,
      "grad_norm": 1.3779721557010383,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 14843
    },
    {
      "epoch": 0.14844,
      "grad_norm": 1.1266000946049708,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 14844
    },
    {
      "epoch": 0.14845,
      "grad_norm": 1.1059549469170604,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 14845
    },
    {
      "epoch": 0.14846,
      "grad_norm": 1.1584890768547578,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 14846
    },
    {
      "epoch": 0.14847,
      "grad_norm": 1.3109204457867119,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 14847
    },
    {
      "epoch": 0.14848,
      "grad_norm": 1.033589018387824,
      "learning_rate": 0.003,
      "loss": 4.0781,
      "step": 14848
    },
    {
      "epoch": 0.14849,
      "grad_norm": 1.4595391695249693,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 14849
    },
    {
      "epoch": 0.1485,
      "grad_norm": 1.102563723807411,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 14850
    },
    {
      "epoch": 0.14851,
      "grad_norm": 1.2581620355315035,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 14851
    },
    {
      "epoch": 0.14852,
      "grad_norm": 1.1974611892483713,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 14852
    },
    {
      "epoch": 0.14853,
      "grad_norm": 1.1440570289725562,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 14853
    },
    {
      "epoch": 0.14854,
      "grad_norm": 1.4276809502727477,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 14854
    },
    {
      "epoch": 0.14855,
      "grad_norm": 1.0365841652167092,
      "learning_rate": 0.003,
      "loss": 4.0804,
      "step": 14855
    },
    {
      "epoch": 0.14856,
      "grad_norm": 1.3196888860347857,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 14856
    },
    {
      "epoch": 0.14857,
      "grad_norm": 1.2096838857761567,
      "learning_rate": 0.003,
      "loss": 4.0825,
      "step": 14857
    },
    {
      "epoch": 0.14858,
      "grad_norm": 1.1539413366906002,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 14858
    },
    {
      "epoch": 0.14859,
      "grad_norm": 1.1643571950626377,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 14859
    },
    {
      "epoch": 0.1486,
      "grad_norm": 1.1594988548082985,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 14860
    },
    {
      "epoch": 0.14861,
      "grad_norm": 1.3592161138599952,
      "learning_rate": 0.003,
      "loss": 4.0705,
      "step": 14861
    },
    {
      "epoch": 0.14862,
      "grad_norm": 1.0451287814201298,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 14862
    },
    {
      "epoch": 0.14863,
      "grad_norm": 1.5454792569609612,
      "learning_rate": 0.003,
      "loss": 4.0763,
      "step": 14863
    },
    {
      "epoch": 0.14864,
      "grad_norm": 0.8998383802889542,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 14864
    },
    {
      "epoch": 0.14865,
      "grad_norm": 1.1008145659534658,
      "learning_rate": 0.003,
      "loss": 4.0928,
      "step": 14865
    },
    {
      "epoch": 0.14866,
      "grad_norm": 1.4084066436583653,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 14866
    },
    {
      "epoch": 0.14867,
      "grad_norm": 1.1237411421162848,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 14867
    },
    {
      "epoch": 0.14868,
      "grad_norm": 1.2005867055587227,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 14868
    },
    {
      "epoch": 0.14869,
      "grad_norm": 1.1066652314635532,
      "learning_rate": 0.003,
      "loss": 4.0675,
      "step": 14869
    },
    {
      "epoch": 0.1487,
      "grad_norm": 0.9622231423033722,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 14870
    },
    {
      "epoch": 0.14871,
      "grad_norm": 1.1404170600749484,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 14871
    },
    {
      "epoch": 0.14872,
      "grad_norm": 1.3609976338398784,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 14872
    },
    {
      "epoch": 0.14873,
      "grad_norm": 1.2893910141485294,
      "learning_rate": 0.003,
      "loss": 4.0856,
      "step": 14873
    },
    {
      "epoch": 0.14874,
      "grad_norm": 1.2931408987911757,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 14874
    },
    {
      "epoch": 0.14875,
      "grad_norm": 1.471807512574448,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 14875
    },
    {
      "epoch": 0.14876,
      "grad_norm": 1.2090411680675062,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 14876
    },
    {
      "epoch": 0.14877,
      "grad_norm": 1.0829059112698427,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 14877
    },
    {
      "epoch": 0.14878,
      "grad_norm": 1.4129900132682789,
      "learning_rate": 0.003,
      "loss": 4.0703,
      "step": 14878
    },
    {
      "epoch": 0.14879,
      "grad_norm": 0.8776243325674575,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 14879
    },
    {
      "epoch": 0.1488,
      "grad_norm": 1.0485285841302954,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 14880
    },
    {
      "epoch": 0.14881,
      "grad_norm": 1.35981606046848,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 14881
    },
    {
      "epoch": 0.14882,
      "grad_norm": 1.0436195240498212,
      "learning_rate": 0.003,
      "loss": 4.0763,
      "step": 14882
    },
    {
      "epoch": 0.14883,
      "grad_norm": 1.3226492069477507,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 14883
    },
    {
      "epoch": 0.14884,
      "grad_norm": 0.9986253500027842,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 14884
    },
    {
      "epoch": 0.14885,
      "grad_norm": 1.5201063299213164,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 14885
    },
    {
      "epoch": 0.14886,
      "grad_norm": 0.9757732233985466,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 14886
    },
    {
      "epoch": 0.14887,
      "grad_norm": 1.4476204357522973,
      "learning_rate": 0.003,
      "loss": 4.0675,
      "step": 14887
    },
    {
      "epoch": 0.14888,
      "grad_norm": 1.27074467688112,
      "learning_rate": 0.003,
      "loss": 4.1077,
      "step": 14888
    },
    {
      "epoch": 0.14889,
      "grad_norm": 1.1092463388578182,
      "learning_rate": 0.003,
      "loss": 4.0748,
      "step": 14889
    },
    {
      "epoch": 0.1489,
      "grad_norm": 1.214671239122057,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 14890
    },
    {
      "epoch": 0.14891,
      "grad_norm": 1.0775666477090526,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 14891
    },
    {
      "epoch": 0.14892,
      "grad_norm": 1.1798038251945897,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 14892
    },
    {
      "epoch": 0.14893,
      "grad_norm": 1.046311938985408,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 14893
    },
    {
      "epoch": 0.14894,
      "grad_norm": 1.2238737392714125,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 14894
    },
    {
      "epoch": 0.14895,
      "grad_norm": 1.1095924412059364,
      "learning_rate": 0.003,
      "loss": 4.0901,
      "step": 14895
    },
    {
      "epoch": 0.14896,
      "grad_norm": 1.2527084956854004,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 14896
    },
    {
      "epoch": 0.14897,
      "grad_norm": 1.1403618171116803,
      "learning_rate": 0.003,
      "loss": 4.0694,
      "step": 14897
    },
    {
      "epoch": 0.14898,
      "grad_norm": 1.12014813866488,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 14898
    },
    {
      "epoch": 0.14899,
      "grad_norm": 1.1011878648338038,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 14899
    },
    {
      "epoch": 0.149,
      "grad_norm": 1.322568954041676,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 14900
    },
    {
      "epoch": 0.14901,
      "grad_norm": 1.1483799144543763,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 14901
    },
    {
      "epoch": 0.14902,
      "grad_norm": 1.710842623398834,
      "learning_rate": 0.003,
      "loss": 4.0877,
      "step": 14902
    },
    {
      "epoch": 0.14903,
      "grad_norm": 1.244171088897236,
      "learning_rate": 0.003,
      "loss": 4.0792,
      "step": 14903
    },
    {
      "epoch": 0.14904,
      "grad_norm": 1.2131362467533762,
      "learning_rate": 0.003,
      "loss": 4.0926,
      "step": 14904
    },
    {
      "epoch": 0.14905,
      "grad_norm": 1.2296471336790669,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 14905
    },
    {
      "epoch": 0.14906,
      "grad_norm": 1.2984408456967405,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 14906
    },
    {
      "epoch": 0.14907,
      "grad_norm": 1.0883845872709204,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 14907
    },
    {
      "epoch": 0.14908,
      "grad_norm": 1.0840404616703376,
      "learning_rate": 0.003,
      "loss": 4.0842,
      "step": 14908
    },
    {
      "epoch": 0.14909,
      "grad_norm": 1.181748263320111,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 14909
    },
    {
      "epoch": 0.1491,
      "grad_norm": 1.2663945605998859,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 14910
    },
    {
      "epoch": 0.14911,
      "grad_norm": 1.187648049989691,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 14911
    },
    {
      "epoch": 0.14912,
      "grad_norm": 1.3811751228991052,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 14912
    },
    {
      "epoch": 0.14913,
      "grad_norm": 1.0219001247634045,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 14913
    },
    {
      "epoch": 0.14914,
      "grad_norm": 1.1593021387672653,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 14914
    },
    {
      "epoch": 0.14915,
      "grad_norm": 1.1568549264162649,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 14915
    },
    {
      "epoch": 0.14916,
      "grad_norm": 1.3202195197549011,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 14916
    },
    {
      "epoch": 0.14917,
      "grad_norm": 0.9020011592700239,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 14917
    },
    {
      "epoch": 0.14918,
      "grad_norm": 1.215424432279608,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 14918
    },
    {
      "epoch": 0.14919,
      "grad_norm": 1.1570968488310853,
      "learning_rate": 0.003,
      "loss": 4.0849,
      "step": 14919
    },
    {
      "epoch": 0.1492,
      "grad_norm": 1.1472721476899068,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 14920
    },
    {
      "epoch": 0.14921,
      "grad_norm": 1.0858959326819342,
      "learning_rate": 0.003,
      "loss": 4.0702,
      "step": 14921
    },
    {
      "epoch": 0.14922,
      "grad_norm": 1.2509352187724492,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 14922
    },
    {
      "epoch": 0.14923,
      "grad_norm": 1.2945174545370837,
      "learning_rate": 0.003,
      "loss": 4.0331,
      "step": 14923
    },
    {
      "epoch": 0.14924,
      "grad_norm": 1.1346005544615452,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 14924
    },
    {
      "epoch": 0.14925,
      "grad_norm": 1.1845853865243938,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 14925
    },
    {
      "epoch": 0.14926,
      "grad_norm": 1.1090740148524028,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 14926
    },
    {
      "epoch": 0.14927,
      "grad_norm": 1.358250117436443,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 14927
    },
    {
      "epoch": 0.14928,
      "grad_norm": 1.3453422985843648,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 14928
    },
    {
      "epoch": 0.14929,
      "grad_norm": 1.1702428717059403,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 14929
    },
    {
      "epoch": 0.1493,
      "grad_norm": 1.1656251202564534,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 14930
    },
    {
      "epoch": 0.14931,
      "grad_norm": 1.098970939588012,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 14931
    },
    {
      "epoch": 0.14932,
      "grad_norm": 1.279436473773213,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 14932
    },
    {
      "epoch": 0.14933,
      "grad_norm": 1.0961977273192847,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 14933
    },
    {
      "epoch": 0.14934,
      "grad_norm": 1.5309509003949007,
      "learning_rate": 0.003,
      "loss": 4.0811,
      "step": 14934
    },
    {
      "epoch": 0.14935,
      "grad_norm": 1.1528254041188803,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 14935
    },
    {
      "epoch": 0.14936,
      "grad_norm": 1.2167835400582947,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 14936
    },
    {
      "epoch": 0.14937,
      "grad_norm": 1.3413070347149811,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 14937
    },
    {
      "epoch": 0.14938,
      "grad_norm": 1.0683864300767574,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 14938
    },
    {
      "epoch": 0.14939,
      "grad_norm": 1.2664966641085762,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 14939
    },
    {
      "epoch": 0.1494,
      "grad_norm": 1.0407237225742514,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 14940
    },
    {
      "epoch": 0.14941,
      "grad_norm": 1.3931658434384966,
      "learning_rate": 0.003,
      "loss": 4.1025,
      "step": 14941
    },
    {
      "epoch": 0.14942,
      "grad_norm": 1.1109856813737475,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 14942
    },
    {
      "epoch": 0.14943,
      "grad_norm": 1.2908550200559337,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 14943
    },
    {
      "epoch": 0.14944,
      "grad_norm": 1.186703869958113,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 14944
    },
    {
      "epoch": 0.14945,
      "grad_norm": 1.1522749739993157,
      "learning_rate": 0.003,
      "loss": 4.0383,
      "step": 14945
    },
    {
      "epoch": 0.14946,
      "grad_norm": 1.0896949660175999,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 14946
    },
    {
      "epoch": 0.14947,
      "grad_norm": 1.3521263557717234,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 14947
    },
    {
      "epoch": 0.14948,
      "grad_norm": 1.1608496283133627,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 14948
    },
    {
      "epoch": 0.14949,
      "grad_norm": 1.2930816709791524,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 14949
    },
    {
      "epoch": 0.1495,
      "grad_norm": 0.985064802221724,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 14950
    },
    {
      "epoch": 0.14951,
      "grad_norm": 1.3801184624443306,
      "learning_rate": 0.003,
      "loss": 4.0304,
      "step": 14951
    },
    {
      "epoch": 0.14952,
      "grad_norm": 1.2195738258753523,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 14952
    },
    {
      "epoch": 0.14953,
      "grad_norm": 1.1345932271080388,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 14953
    },
    {
      "epoch": 0.14954,
      "grad_norm": 1.5557880953706804,
      "learning_rate": 0.003,
      "loss": 4.1055,
      "step": 14954
    },
    {
      "epoch": 0.14955,
      "grad_norm": 0.9071133427738178,
      "learning_rate": 0.003,
      "loss": 4.03,
      "step": 14955
    },
    {
      "epoch": 0.14956,
      "grad_norm": 1.468982543843281,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 14956
    },
    {
      "epoch": 0.14957,
      "grad_norm": 1.0423328924259037,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 14957
    },
    {
      "epoch": 0.14958,
      "grad_norm": 1.300583567325534,
      "learning_rate": 0.003,
      "loss": 4.0953,
      "step": 14958
    },
    {
      "epoch": 0.14959,
      "grad_norm": 1.0981910385097837,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 14959
    },
    {
      "epoch": 0.1496,
      "grad_norm": 1.0490751947643875,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 14960
    },
    {
      "epoch": 0.14961,
      "grad_norm": 1.2816163773619358,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 14961
    },
    {
      "epoch": 0.14962,
      "grad_norm": 1.1466675209377373,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 14962
    },
    {
      "epoch": 0.14963,
      "grad_norm": 1.290152566123964,
      "learning_rate": 0.003,
      "loss": 4.0684,
      "step": 14963
    },
    {
      "epoch": 0.14964,
      "grad_norm": 1.261020616938976,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 14964
    },
    {
      "epoch": 0.14965,
      "grad_norm": 1.4649546679785381,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 14965
    },
    {
      "epoch": 0.14966,
      "grad_norm": 1.1441136056324674,
      "learning_rate": 0.003,
      "loss": 4.0756,
      "step": 14966
    },
    {
      "epoch": 0.14967,
      "grad_norm": 1.2019750797486521,
      "learning_rate": 0.003,
      "loss": 4.0921,
      "step": 14967
    },
    {
      "epoch": 0.14968,
      "grad_norm": 1.2306693484236046,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 14968
    },
    {
      "epoch": 0.14969,
      "grad_norm": 1.2777053540849892,
      "learning_rate": 0.003,
      "loss": 4.0777,
      "step": 14969
    },
    {
      "epoch": 0.1497,
      "grad_norm": 1.1084346925337791,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 14970
    },
    {
      "epoch": 0.14971,
      "grad_norm": 1.3229335658679369,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 14971
    },
    {
      "epoch": 0.14972,
      "grad_norm": 1.095538056703894,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 14972
    },
    {
      "epoch": 0.14973,
      "grad_norm": 1.204405878347447,
      "learning_rate": 0.003,
      "loss": 4.0189,
      "step": 14973
    },
    {
      "epoch": 0.14974,
      "grad_norm": 1.2368040421511077,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 14974
    },
    {
      "epoch": 0.14975,
      "grad_norm": 1.4194541955071356,
      "learning_rate": 0.003,
      "loss": 4.0739,
      "step": 14975
    },
    {
      "epoch": 0.14976,
      "grad_norm": 1.0096839483393467,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 14976
    },
    {
      "epoch": 0.14977,
      "grad_norm": 1.3009380635532586,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 14977
    },
    {
      "epoch": 0.14978,
      "grad_norm": 1.0562909428594593,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 14978
    },
    {
      "epoch": 0.14979,
      "grad_norm": 1.2309819538095335,
      "learning_rate": 0.003,
      "loss": 4.0623,
      "step": 14979
    },
    {
      "epoch": 0.1498,
      "grad_norm": 1.1181451782884846,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 14980
    },
    {
      "epoch": 0.14981,
      "grad_norm": 1.1433863177365757,
      "learning_rate": 0.003,
      "loss": 4.0768,
      "step": 14981
    },
    {
      "epoch": 0.14982,
      "grad_norm": 1.2583042755829557,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 14982
    },
    {
      "epoch": 0.14983,
      "grad_norm": 1.1190272068614602,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 14983
    },
    {
      "epoch": 0.14984,
      "grad_norm": 1.4018613975193144,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 14984
    },
    {
      "epoch": 0.14985,
      "grad_norm": 1.0417216427534308,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 14985
    },
    {
      "epoch": 0.14986,
      "grad_norm": 1.134960157527509,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 14986
    },
    {
      "epoch": 0.14987,
      "grad_norm": 1.1174795370938495,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 14987
    },
    {
      "epoch": 0.14988,
      "grad_norm": 1.296639112026413,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 14988
    },
    {
      "epoch": 0.14989,
      "grad_norm": 1.3261095373498002,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 14989
    },
    {
      "epoch": 0.1499,
      "grad_norm": 1.2075226522964326,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 14990
    },
    {
      "epoch": 0.14991,
      "grad_norm": 1.008897068961723,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 14991
    },
    {
      "epoch": 0.14992,
      "grad_norm": 1.1942021518709378,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 14992
    },
    {
      "epoch": 0.14993,
      "grad_norm": 1.2211219368278374,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 14993
    },
    {
      "epoch": 0.14994,
      "grad_norm": 1.0870179584685973,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 14994
    },
    {
      "epoch": 0.14995,
      "grad_norm": 1.399608332516027,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 14995
    },
    {
      "epoch": 0.14996,
      "grad_norm": 1.0137612423797608,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 14996
    },
    {
      "epoch": 0.14997,
      "grad_norm": 1.3744351672232484,
      "learning_rate": 0.003,
      "loss": 4.0935,
      "step": 14997
    },
    {
      "epoch": 0.14998,
      "grad_norm": 0.9940359625848266,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 14998
    },
    {
      "epoch": 0.14999,
      "grad_norm": 1.4399413719162288,
      "learning_rate": 0.003,
      "loss": 4.0897,
      "step": 14999
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.4974736632895418,
      "learning_rate": 0.003,
      "loss": 4.0882,
      "step": 15000
    },
    {
      "epoch": 0.15001,
      "grad_norm": 1.0668423995174225,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 15001
    },
    {
      "epoch": 0.15002,
      "grad_norm": 1.2110486697491223,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 15002
    },
    {
      "epoch": 0.15003,
      "grad_norm": 1.1795308324685607,
      "learning_rate": 0.003,
      "loss": 4.0913,
      "step": 15003
    },
    {
      "epoch": 0.15004,
      "grad_norm": 1.182856925873376,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 15004
    },
    {
      "epoch": 0.15005,
      "grad_norm": 1.2054796107168688,
      "learning_rate": 0.003,
      "loss": 4.0925,
      "step": 15005
    },
    {
      "epoch": 0.15006,
      "grad_norm": 1.1233127843365838,
      "learning_rate": 0.003,
      "loss": 4.0952,
      "step": 15006
    },
    {
      "epoch": 0.15007,
      "grad_norm": 1.190740139784129,
      "learning_rate": 0.003,
      "loss": 4.0938,
      "step": 15007
    },
    {
      "epoch": 0.15008,
      "grad_norm": 1.1628208665821074,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 15008
    },
    {
      "epoch": 0.15009,
      "grad_norm": 1.184497287054005,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 15009
    },
    {
      "epoch": 0.1501,
      "grad_norm": 1.2170802962909146,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 15010
    },
    {
      "epoch": 0.15011,
      "grad_norm": 1.2448713816234198,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 15011
    },
    {
      "epoch": 0.15012,
      "grad_norm": 1.2795966193025246,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 15012
    },
    {
      "epoch": 0.15013,
      "grad_norm": 1.1253323596450409,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 15013
    },
    {
      "epoch": 0.15014,
      "grad_norm": 1.3692104101867797,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 15014
    },
    {
      "epoch": 0.15015,
      "grad_norm": 1.2394377326991453,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 15015
    },
    {
      "epoch": 0.15016,
      "grad_norm": 1.1450312041451869,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 15016
    },
    {
      "epoch": 0.15017,
      "grad_norm": 1.1937362084722123,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 15017
    },
    {
      "epoch": 0.15018,
      "grad_norm": 1.288688457822522,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 15018
    },
    {
      "epoch": 0.15019,
      "grad_norm": 1.1409664304451768,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 15019
    },
    {
      "epoch": 0.1502,
      "grad_norm": 1.0873027688479973,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 15020
    },
    {
      "epoch": 0.15021,
      "grad_norm": 1.2665845047581592,
      "learning_rate": 0.003,
      "loss": 4.0702,
      "step": 15021
    },
    {
      "epoch": 0.15022,
      "grad_norm": 0.9745123669411565,
      "learning_rate": 0.003,
      "loss": 4.0796,
      "step": 15022
    },
    {
      "epoch": 0.15023,
      "grad_norm": 1.297378059616036,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 15023
    },
    {
      "epoch": 0.15024,
      "grad_norm": 1.0679672509082843,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 15024
    },
    {
      "epoch": 0.15025,
      "grad_norm": 1.3637170088458421,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 15025
    },
    {
      "epoch": 0.15026,
      "grad_norm": 1.204927160864958,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 15026
    },
    {
      "epoch": 0.15027,
      "grad_norm": 1.2156307138708313,
      "learning_rate": 0.003,
      "loss": 4.0808,
      "step": 15027
    },
    {
      "epoch": 0.15028,
      "grad_norm": 1.1531164890056576,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 15028
    },
    {
      "epoch": 0.15029,
      "grad_norm": 1.2553312157873282,
      "learning_rate": 0.003,
      "loss": 4.046,
      "step": 15029
    },
    {
      "epoch": 0.1503,
      "grad_norm": 0.9522168739397687,
      "learning_rate": 0.003,
      "loss": 4.0205,
      "step": 15030
    },
    {
      "epoch": 0.15031,
      "grad_norm": 1.226122357521874,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 15031
    },
    {
      "epoch": 0.15032,
      "grad_norm": 1.5474125500292253,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 15032
    },
    {
      "epoch": 0.15033,
      "grad_norm": 1.2647460146585578,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 15033
    },
    {
      "epoch": 0.15034,
      "grad_norm": 1.3907590251108537,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 15034
    },
    {
      "epoch": 0.15035,
      "grad_norm": 1.0234787803166052,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 15035
    },
    {
      "epoch": 0.15036,
      "grad_norm": 1.337378293856488,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 15036
    },
    {
      "epoch": 0.15037,
      "grad_norm": 1.1095014826228464,
      "learning_rate": 0.003,
      "loss": 4.0917,
      "step": 15037
    },
    {
      "epoch": 0.15038,
      "grad_norm": 1.2913904076297182,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 15038
    },
    {
      "epoch": 0.15039,
      "grad_norm": 1.0887255711713681,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 15039
    },
    {
      "epoch": 0.1504,
      "grad_norm": 1.2547438431290638,
      "learning_rate": 0.003,
      "loss": 4.0658,
      "step": 15040
    },
    {
      "epoch": 0.15041,
      "grad_norm": 1.4240124528037976,
      "learning_rate": 0.003,
      "loss": 4.1082,
      "step": 15041
    },
    {
      "epoch": 0.15042,
      "grad_norm": 1.1673943418670996,
      "learning_rate": 0.003,
      "loss": 4.0355,
      "step": 15042
    },
    {
      "epoch": 0.15043,
      "grad_norm": 1.0867675070481164,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 15043
    },
    {
      "epoch": 0.15044,
      "grad_norm": 1.1782378462470475,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 15044
    },
    {
      "epoch": 0.15045,
      "grad_norm": 1.0872039407960756,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 15045
    },
    {
      "epoch": 0.15046,
      "grad_norm": 1.3736289236684958,
      "learning_rate": 0.003,
      "loss": 4.0841,
      "step": 15046
    },
    {
      "epoch": 0.15047,
      "grad_norm": 1.1308513861955674,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 15047
    },
    {
      "epoch": 0.15048,
      "grad_norm": 1.2090821969797114,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 15048
    },
    {
      "epoch": 0.15049,
      "grad_norm": 1.1373820398442709,
      "learning_rate": 0.003,
      "loss": 4.0853,
      "step": 15049
    },
    {
      "epoch": 0.1505,
      "grad_norm": 1.0984446995600496,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 15050
    },
    {
      "epoch": 0.15051,
      "grad_norm": 1.155054800503449,
      "learning_rate": 0.003,
      "loss": 4.0302,
      "step": 15051
    },
    {
      "epoch": 0.15052,
      "grad_norm": 1.3827231447355273,
      "learning_rate": 0.003,
      "loss": 4.0801,
      "step": 15052
    },
    {
      "epoch": 0.15053,
      "grad_norm": 1.1833574261390736,
      "learning_rate": 0.003,
      "loss": 4.0814,
      "step": 15053
    },
    {
      "epoch": 0.15054,
      "grad_norm": 1.195554739703261,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 15054
    },
    {
      "epoch": 0.15055,
      "grad_norm": 1.2453938734519396,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 15055
    },
    {
      "epoch": 0.15056,
      "grad_norm": 1.2110376922895725,
      "learning_rate": 0.003,
      "loss": 4.0174,
      "step": 15056
    },
    {
      "epoch": 0.15057,
      "grad_norm": 1.2151309358317508,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 15057
    },
    {
      "epoch": 0.15058,
      "grad_norm": 1.259729714323035,
      "learning_rate": 0.003,
      "loss": 4.0914,
      "step": 15058
    },
    {
      "epoch": 0.15059,
      "grad_norm": 1.1680098473605123,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 15059
    },
    {
      "epoch": 0.1506,
      "grad_norm": 1.0365628276921912,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 15060
    },
    {
      "epoch": 0.15061,
      "grad_norm": 1.1860587795685422,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 15061
    },
    {
      "epoch": 0.15062,
      "grad_norm": 1.1652134132806091,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 15062
    },
    {
      "epoch": 0.15063,
      "grad_norm": 1.1893670897157664,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 15063
    },
    {
      "epoch": 0.15064,
      "grad_norm": 1.2641455031540747,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 15064
    },
    {
      "epoch": 0.15065,
      "grad_norm": 1.179907937763059,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 15065
    },
    {
      "epoch": 0.15066,
      "grad_norm": 1.256981536680123,
      "learning_rate": 0.003,
      "loss": 4.083,
      "step": 15066
    },
    {
      "epoch": 0.15067,
      "grad_norm": 1.0820909858204335,
      "learning_rate": 0.003,
      "loss": 4.0907,
      "step": 15067
    },
    {
      "epoch": 0.15068,
      "grad_norm": 1.184433524088628,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 15068
    },
    {
      "epoch": 0.15069,
      "grad_norm": 1.2716471033861365,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 15069
    },
    {
      "epoch": 0.1507,
      "grad_norm": 1.1683331028574926,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 15070
    },
    {
      "epoch": 0.15071,
      "grad_norm": 1.1797972267504941,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 15071
    },
    {
      "epoch": 0.15072,
      "grad_norm": 1.1302381466298992,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 15072
    },
    {
      "epoch": 0.15073,
      "grad_norm": 1.355350169867995,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 15073
    },
    {
      "epoch": 0.15074,
      "grad_norm": 1.044651911324449,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 15074
    },
    {
      "epoch": 0.15075,
      "grad_norm": 1.3046654742749575,
      "learning_rate": 0.003,
      "loss": 4.0743,
      "step": 15075
    },
    {
      "epoch": 0.15076,
      "grad_norm": 1.1648922397030492,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 15076
    },
    {
      "epoch": 0.15077,
      "grad_norm": 1.2806126751874845,
      "learning_rate": 0.003,
      "loss": 4.0343,
      "step": 15077
    },
    {
      "epoch": 0.15078,
      "grad_norm": 1.3528938353220739,
      "learning_rate": 0.003,
      "loss": 4.044,
      "step": 15078
    },
    {
      "epoch": 0.15079,
      "grad_norm": 1.1861142100163455,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 15079
    },
    {
      "epoch": 0.1508,
      "grad_norm": 1.0589951871882235,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 15080
    },
    {
      "epoch": 0.15081,
      "grad_norm": 1.2318643421068618,
      "learning_rate": 0.003,
      "loss": 4.0912,
      "step": 15081
    },
    {
      "epoch": 0.15082,
      "grad_norm": 1.123727186298089,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 15082
    },
    {
      "epoch": 0.15083,
      "grad_norm": 1.3178690838827816,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 15083
    },
    {
      "epoch": 0.15084,
      "grad_norm": 1.0237784168087798,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 15084
    },
    {
      "epoch": 0.15085,
      "grad_norm": 1.492155585068999,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 15085
    },
    {
      "epoch": 0.15086,
      "grad_norm": 1.1925963482272997,
      "learning_rate": 0.003,
      "loss": 4.0311,
      "step": 15086
    },
    {
      "epoch": 0.15087,
      "grad_norm": 1.4319500990788507,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 15087
    },
    {
      "epoch": 0.15088,
      "grad_norm": 1.060355543294847,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 15088
    },
    {
      "epoch": 0.15089,
      "grad_norm": 1.3104618148468035,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 15089
    },
    {
      "epoch": 0.1509,
      "grad_norm": 1.2251162778987241,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 15090
    },
    {
      "epoch": 0.15091,
      "grad_norm": 1.306080204600944,
      "learning_rate": 0.003,
      "loss": 4.0861,
      "step": 15091
    },
    {
      "epoch": 0.15092,
      "grad_norm": 1.0915870298734536,
      "learning_rate": 0.003,
      "loss": 4.0849,
      "step": 15092
    },
    {
      "epoch": 0.15093,
      "grad_norm": 1.2119632574467158,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 15093
    },
    {
      "epoch": 0.15094,
      "grad_norm": 1.1595876781351768,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 15094
    },
    {
      "epoch": 0.15095,
      "grad_norm": 1.119325709139666,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 15095
    },
    {
      "epoch": 0.15096,
      "grad_norm": 1.2044471540428243,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 15096
    },
    {
      "epoch": 0.15097,
      "grad_norm": 1.4245175575214675,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 15097
    },
    {
      "epoch": 0.15098,
      "grad_norm": 1.5517681537973362,
      "learning_rate": 0.003,
      "loss": 4.0549,
      "step": 15098
    },
    {
      "epoch": 0.15099,
      "grad_norm": 1.0067593744034253,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 15099
    },
    {
      "epoch": 0.151,
      "grad_norm": 1.2089884789065866,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 15100
    },
    {
      "epoch": 0.15101,
      "grad_norm": 1.105057199052122,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 15101
    },
    {
      "epoch": 0.15102,
      "grad_norm": 1.3101763159722803,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 15102
    },
    {
      "epoch": 0.15103,
      "grad_norm": 1.0291772237772652,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 15103
    },
    {
      "epoch": 0.15104,
      "grad_norm": 1.3908177507943922,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 15104
    },
    {
      "epoch": 0.15105,
      "grad_norm": 0.993120529030314,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 15105
    },
    {
      "epoch": 0.15106,
      "grad_norm": 1.2450894355942614,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 15106
    },
    {
      "epoch": 0.15107,
      "grad_norm": 1.056113877015572,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 15107
    },
    {
      "epoch": 0.15108,
      "grad_norm": 1.1662206734963574,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 15108
    },
    {
      "epoch": 0.15109,
      "grad_norm": 1.2621098326636795,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 15109
    },
    {
      "epoch": 0.1511,
      "grad_norm": 1.3570631825072197,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 15110
    },
    {
      "epoch": 0.15111,
      "grad_norm": 1.3311454757868875,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 15111
    },
    {
      "epoch": 0.15112,
      "grad_norm": 1.2989363196563721,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 15112
    },
    {
      "epoch": 0.15113,
      "grad_norm": 0.9189229359882644,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 15113
    },
    {
      "epoch": 0.15114,
      "grad_norm": 1.240809467869954,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 15114
    },
    {
      "epoch": 0.15115,
      "grad_norm": 1.1928550783856977,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 15115
    },
    {
      "epoch": 0.15116,
      "grad_norm": 1.2624829089617984,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 15116
    },
    {
      "epoch": 0.15117,
      "grad_norm": 1.0273366211589472,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 15117
    },
    {
      "epoch": 0.15118,
      "grad_norm": 1.3046658005508183,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 15118
    },
    {
      "epoch": 0.15119,
      "grad_norm": 1.159606861497468,
      "learning_rate": 0.003,
      "loss": 4.0753,
      "step": 15119
    },
    {
      "epoch": 0.1512,
      "grad_norm": 1.3059467524421076,
      "learning_rate": 0.003,
      "loss": 4.0899,
      "step": 15120
    },
    {
      "epoch": 0.15121,
      "grad_norm": 1.029847433937725,
      "learning_rate": 0.003,
      "loss": 4.0373,
      "step": 15121
    },
    {
      "epoch": 0.15122,
      "grad_norm": 1.4482892460541101,
      "learning_rate": 0.003,
      "loss": 4.0701,
      "step": 15122
    },
    {
      "epoch": 0.15123,
      "grad_norm": 1.0429287950728148,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 15123
    },
    {
      "epoch": 0.15124,
      "grad_norm": 1.2259003329089468,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 15124
    },
    {
      "epoch": 0.15125,
      "grad_norm": 1.278091232104381,
      "learning_rate": 0.003,
      "loss": 4.0792,
      "step": 15125
    },
    {
      "epoch": 0.15126,
      "grad_norm": 1.1938494318875001,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 15126
    },
    {
      "epoch": 0.15127,
      "grad_norm": 1.2589849654916394,
      "learning_rate": 0.003,
      "loss": 4.0812,
      "step": 15127
    },
    {
      "epoch": 0.15128,
      "grad_norm": 1.1330547145294245,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 15128
    },
    {
      "epoch": 0.15129,
      "grad_norm": 1.097005960935907,
      "learning_rate": 0.003,
      "loss": 4.0864,
      "step": 15129
    },
    {
      "epoch": 0.1513,
      "grad_norm": 1.1729004214319463,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 15130
    },
    {
      "epoch": 0.15131,
      "grad_norm": 1.1726540565553825,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 15131
    },
    {
      "epoch": 0.15132,
      "grad_norm": 1.2424736420971347,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 15132
    },
    {
      "epoch": 0.15133,
      "grad_norm": 1.1448268613315058,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 15133
    },
    {
      "epoch": 0.15134,
      "grad_norm": 1.5708460693967223,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 15134
    },
    {
      "epoch": 0.15135,
      "grad_norm": 1.040378867053243,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 15135
    },
    {
      "epoch": 0.15136,
      "grad_norm": 1.4040593854991856,
      "learning_rate": 0.003,
      "loss": 4.0318,
      "step": 15136
    },
    {
      "epoch": 0.15137,
      "grad_norm": 1.2011938592676896,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 15137
    },
    {
      "epoch": 0.15138,
      "grad_norm": 1.2367047396246862,
      "learning_rate": 0.003,
      "loss": 4.1022,
      "step": 15138
    },
    {
      "epoch": 0.15139,
      "grad_norm": 1.1293395957439654,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 15139
    },
    {
      "epoch": 0.1514,
      "grad_norm": 1.3090085398590288,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 15140
    },
    {
      "epoch": 0.15141,
      "grad_norm": 1.0075436032183622,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 15141
    },
    {
      "epoch": 0.15142,
      "grad_norm": 1.33714212691153,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 15142
    },
    {
      "epoch": 0.15143,
      "grad_norm": 1.1663913366871692,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 15143
    },
    {
      "epoch": 0.15144,
      "grad_norm": 1.2132377498419114,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 15144
    },
    {
      "epoch": 0.15145,
      "grad_norm": 1.2259208725380872,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 15145
    },
    {
      "epoch": 0.15146,
      "grad_norm": 1.297997547043007,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 15146
    },
    {
      "epoch": 0.15147,
      "grad_norm": 1.1330532631224162,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 15147
    },
    {
      "epoch": 0.15148,
      "grad_norm": 1.2123133697932726,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 15148
    },
    {
      "epoch": 0.15149,
      "grad_norm": 1.2721907366832415,
      "learning_rate": 0.003,
      "loss": 4.0867,
      "step": 15149
    },
    {
      "epoch": 0.1515,
      "grad_norm": 1.2258759438061497,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 15150
    },
    {
      "epoch": 0.15151,
      "grad_norm": 1.2837753617747198,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 15151
    },
    {
      "epoch": 0.15152,
      "grad_norm": 1.0306162316863614,
      "learning_rate": 0.003,
      "loss": 4.0983,
      "step": 15152
    },
    {
      "epoch": 0.15153,
      "grad_norm": 1.5644759764698157,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 15153
    },
    {
      "epoch": 0.15154,
      "grad_norm": 0.9980410349143073,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 15154
    },
    {
      "epoch": 0.15155,
      "grad_norm": 1.3128438968987235,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 15155
    },
    {
      "epoch": 0.15156,
      "grad_norm": 1.044644352242352,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 15156
    },
    {
      "epoch": 0.15157,
      "grad_norm": 1.190633698957856,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 15157
    },
    {
      "epoch": 0.15158,
      "grad_norm": 1.1669918387521137,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 15158
    },
    {
      "epoch": 0.15159,
      "grad_norm": 1.222507008110017,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 15159
    },
    {
      "epoch": 0.1516,
      "grad_norm": 1.1100243033486945,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 15160
    },
    {
      "epoch": 0.15161,
      "grad_norm": 1.2258618521306344,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 15161
    },
    {
      "epoch": 0.15162,
      "grad_norm": 1.4331452700729912,
      "learning_rate": 0.003,
      "loss": 4.0986,
      "step": 15162
    },
    {
      "epoch": 0.15163,
      "grad_norm": 1.4096854957474376,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 15163
    },
    {
      "epoch": 0.15164,
      "grad_norm": 1.0366396321569484,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 15164
    },
    {
      "epoch": 0.15165,
      "grad_norm": 1.192406791153559,
      "learning_rate": 0.003,
      "loss": 4.035,
      "step": 15165
    },
    {
      "epoch": 0.15166,
      "grad_norm": 1.2189832259548985,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 15166
    },
    {
      "epoch": 0.15167,
      "grad_norm": 0.9551976170429402,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 15167
    },
    {
      "epoch": 0.15168,
      "grad_norm": 1.119167860282972,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 15168
    },
    {
      "epoch": 0.15169,
      "grad_norm": 1.1447472033605182,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 15169
    },
    {
      "epoch": 0.1517,
      "grad_norm": 1.2429156332586646,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 15170
    },
    {
      "epoch": 0.15171,
      "grad_norm": 1.298998789476462,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 15171
    },
    {
      "epoch": 0.15172,
      "grad_norm": 1.3241459773471154,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 15172
    },
    {
      "epoch": 0.15173,
      "grad_norm": 1.464002389595656,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 15173
    },
    {
      "epoch": 0.15174,
      "grad_norm": 1.1619733096151428,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 15174
    },
    {
      "epoch": 0.15175,
      "grad_norm": 1.220311346333655,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 15175
    },
    {
      "epoch": 0.15176,
      "grad_norm": 1.1774755736623603,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 15176
    },
    {
      "epoch": 0.15177,
      "grad_norm": 1.270705325277135,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 15177
    },
    {
      "epoch": 0.15178,
      "grad_norm": 1.0688786362101994,
      "learning_rate": 0.003,
      "loss": 4.0282,
      "step": 15178
    },
    {
      "epoch": 0.15179,
      "grad_norm": 1.433993614691496,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 15179
    },
    {
      "epoch": 0.1518,
      "grad_norm": 1.045346819367408,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 15180
    },
    {
      "epoch": 0.15181,
      "grad_norm": 1.370623206374627,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 15181
    },
    {
      "epoch": 0.15182,
      "grad_norm": 1.1695371902586094,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 15182
    },
    {
      "epoch": 0.15183,
      "grad_norm": 1.3886462368807206,
      "learning_rate": 0.003,
      "loss": 4.0423,
      "step": 15183
    },
    {
      "epoch": 0.15184,
      "grad_norm": 0.9645218148634334,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 15184
    },
    {
      "epoch": 0.15185,
      "grad_norm": 1.2966774954952045,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 15185
    },
    {
      "epoch": 0.15186,
      "grad_norm": 1.2335208988336277,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 15186
    },
    {
      "epoch": 0.15187,
      "grad_norm": 1.0050835608908404,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 15187
    },
    {
      "epoch": 0.15188,
      "grad_norm": 1.3701899653186935,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 15188
    },
    {
      "epoch": 0.15189,
      "grad_norm": 1.135170710607172,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 15189
    },
    {
      "epoch": 0.1519,
      "grad_norm": 1.2432212879200064,
      "learning_rate": 0.003,
      "loss": 4.0821,
      "step": 15190
    },
    {
      "epoch": 0.15191,
      "grad_norm": 1.1103180631951515,
      "learning_rate": 0.003,
      "loss": 4.0948,
      "step": 15191
    },
    {
      "epoch": 0.15192,
      "grad_norm": 1.2425724660435107,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 15192
    },
    {
      "epoch": 0.15193,
      "grad_norm": 1.3533195363117116,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 15193
    },
    {
      "epoch": 0.15194,
      "grad_norm": 1.3515973455544246,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 15194
    },
    {
      "epoch": 0.15195,
      "grad_norm": 1.0261684737287875,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 15195
    },
    {
      "epoch": 0.15196,
      "grad_norm": 1.5674528909788603,
      "learning_rate": 0.003,
      "loss": 4.0932,
      "step": 15196
    },
    {
      "epoch": 0.15197,
      "grad_norm": 1.3803955355175979,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 15197
    },
    {
      "epoch": 0.15198,
      "grad_norm": 1.5062575871981334,
      "learning_rate": 0.003,
      "loss": 4.081,
      "step": 15198
    },
    {
      "epoch": 0.15199,
      "grad_norm": 1.024630024329231,
      "learning_rate": 0.003,
      "loss": 4.0703,
      "step": 15199
    },
    {
      "epoch": 0.152,
      "grad_norm": 1.213495646655414,
      "learning_rate": 0.003,
      "loss": 4.0705,
      "step": 15200
    },
    {
      "epoch": 0.15201,
      "grad_norm": 1.4510169131842288,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 15201
    },
    {
      "epoch": 0.15202,
      "grad_norm": 1.1302118547381454,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 15202
    },
    {
      "epoch": 0.15203,
      "grad_norm": 1.2725092157742763,
      "learning_rate": 0.003,
      "loss": 4.0547,
      "step": 15203
    },
    {
      "epoch": 0.15204,
      "grad_norm": 1.137982520437394,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 15204
    },
    {
      "epoch": 0.15205,
      "grad_norm": 1.0223284267568893,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 15205
    },
    {
      "epoch": 0.15206,
      "grad_norm": 1.0565281836081886,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 15206
    },
    {
      "epoch": 0.15207,
      "grad_norm": 1.3043821397289166,
      "learning_rate": 0.003,
      "loss": 4.0422,
      "step": 15207
    },
    {
      "epoch": 0.15208,
      "grad_norm": 1.105080361488173,
      "learning_rate": 0.003,
      "loss": 4.0264,
      "step": 15208
    },
    {
      "epoch": 0.15209,
      "grad_norm": 1.1399072306971487,
      "learning_rate": 0.003,
      "loss": 4.0829,
      "step": 15209
    },
    {
      "epoch": 0.1521,
      "grad_norm": 1.1386602307645344,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 15210
    },
    {
      "epoch": 0.15211,
      "grad_norm": 1.0134889061744425,
      "learning_rate": 0.003,
      "loss": 4.0729,
      "step": 15211
    },
    {
      "epoch": 0.15212,
      "grad_norm": 1.2881827316598067,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 15212
    },
    {
      "epoch": 0.15213,
      "grad_norm": 0.9346737937438413,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 15213
    },
    {
      "epoch": 0.15214,
      "grad_norm": 1.2436543960136706,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 15214
    },
    {
      "epoch": 0.15215,
      "grad_norm": 1.1459849786740162,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 15215
    },
    {
      "epoch": 0.15216,
      "grad_norm": 1.2205207180453597,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 15216
    },
    {
      "epoch": 0.15217,
      "grad_norm": 1.5972490115073228,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 15217
    },
    {
      "epoch": 0.15218,
      "grad_norm": 1.0397158701269222,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 15218
    },
    {
      "epoch": 0.15219,
      "grad_norm": 1.454284438026809,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 15219
    },
    {
      "epoch": 0.1522,
      "grad_norm": 1.0013808946685574,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 15220
    },
    {
      "epoch": 0.15221,
      "grad_norm": 1.3739928964687649,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 15221
    },
    {
      "epoch": 0.15222,
      "grad_norm": 1.0891144380179014,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 15222
    },
    {
      "epoch": 0.15223,
      "grad_norm": 1.526998946193764,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 15223
    },
    {
      "epoch": 0.15224,
      "grad_norm": 1.330605515094238,
      "learning_rate": 0.003,
      "loss": 4.0849,
      "step": 15224
    },
    {
      "epoch": 0.15225,
      "grad_norm": 0.9326977690364996,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 15225
    },
    {
      "epoch": 0.15226,
      "grad_norm": 1.299351030717249,
      "learning_rate": 0.003,
      "loss": 4.0675,
      "step": 15226
    },
    {
      "epoch": 0.15227,
      "grad_norm": 1.205549490078649,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 15227
    },
    {
      "epoch": 0.15228,
      "grad_norm": 1.3237257889809773,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 15228
    },
    {
      "epoch": 0.15229,
      "grad_norm": 1.1757579002320533,
      "learning_rate": 0.003,
      "loss": 4.0268,
      "step": 15229
    },
    {
      "epoch": 0.1523,
      "grad_norm": 1.0704092999538808,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 15230
    },
    {
      "epoch": 0.15231,
      "grad_norm": 1.3457605300447015,
      "learning_rate": 0.003,
      "loss": 4.0764,
      "step": 15231
    },
    {
      "epoch": 0.15232,
      "grad_norm": 1.0255019166629449,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 15232
    },
    {
      "epoch": 0.15233,
      "grad_norm": 1.4300673251118192,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 15233
    },
    {
      "epoch": 0.15234,
      "grad_norm": 1.0734449075243604,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 15234
    },
    {
      "epoch": 0.15235,
      "grad_norm": 1.473003403443299,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 15235
    },
    {
      "epoch": 0.15236,
      "grad_norm": 1.0636588121743216,
      "learning_rate": 0.003,
      "loss": 4.0866,
      "step": 15236
    },
    {
      "epoch": 0.15237,
      "grad_norm": 1.216439505946412,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 15237
    },
    {
      "epoch": 0.15238,
      "grad_norm": 1.1762090328789125,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 15238
    },
    {
      "epoch": 0.15239,
      "grad_norm": 1.2784513886299516,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 15239
    },
    {
      "epoch": 0.1524,
      "grad_norm": 1.0374994043846635,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 15240
    },
    {
      "epoch": 0.15241,
      "grad_norm": 1.221527477801365,
      "learning_rate": 0.003,
      "loss": 4.0893,
      "step": 15241
    },
    {
      "epoch": 0.15242,
      "grad_norm": 1.4062968215005007,
      "learning_rate": 0.003,
      "loss": 4.0924,
      "step": 15242
    },
    {
      "epoch": 0.15243,
      "grad_norm": 1.0214884999824123,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 15243
    },
    {
      "epoch": 0.15244,
      "grad_norm": 1.263794425337306,
      "learning_rate": 0.003,
      "loss": 4.0825,
      "step": 15244
    },
    {
      "epoch": 0.15245,
      "grad_norm": 1.0302338675328382,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 15245
    },
    {
      "epoch": 0.15246,
      "grad_norm": 1.136932342643618,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 15246
    },
    {
      "epoch": 0.15247,
      "grad_norm": 1.3867514442765265,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 15247
    },
    {
      "epoch": 0.15248,
      "grad_norm": 1.343751673856623,
      "learning_rate": 0.003,
      "loss": 4.0845,
      "step": 15248
    },
    {
      "epoch": 0.15249,
      "grad_norm": 1.2947833796560833,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 15249
    },
    {
      "epoch": 0.1525,
      "grad_norm": 0.8717763519936194,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 15250
    },
    {
      "epoch": 0.15251,
      "grad_norm": 1.2661112616588408,
      "learning_rate": 0.003,
      "loss": 4.1,
      "step": 15251
    },
    {
      "epoch": 0.15252,
      "grad_norm": 1.4149494227736545,
      "learning_rate": 0.003,
      "loss": 4.0928,
      "step": 15252
    },
    {
      "epoch": 0.15253,
      "grad_norm": 1.1173745271801705,
      "learning_rate": 0.003,
      "loss": 4.0361,
      "step": 15253
    },
    {
      "epoch": 0.15254,
      "grad_norm": 1.0403524152240142,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 15254
    },
    {
      "epoch": 0.15255,
      "grad_norm": 1.3111776704502551,
      "learning_rate": 0.003,
      "loss": 4.0602,
      "step": 15255
    },
    {
      "epoch": 0.15256,
      "grad_norm": 1.0953509594301363,
      "learning_rate": 0.003,
      "loss": 4.0602,
      "step": 15256
    },
    {
      "epoch": 0.15257,
      "grad_norm": 1.1169720379175168,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 15257
    },
    {
      "epoch": 0.15258,
      "grad_norm": 1.301900403801355,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 15258
    },
    {
      "epoch": 0.15259,
      "grad_norm": 1.106392313755815,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 15259
    },
    {
      "epoch": 0.1526,
      "grad_norm": 1.7125824444678244,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 15260
    },
    {
      "epoch": 0.15261,
      "grad_norm": 1.249930993267981,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 15261
    },
    {
      "epoch": 0.15262,
      "grad_norm": 1.149457244300075,
      "learning_rate": 0.003,
      "loss": 4.0672,
      "step": 15262
    },
    {
      "epoch": 0.15263,
      "grad_norm": 1.3977655537172164,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 15263
    },
    {
      "epoch": 0.15264,
      "grad_norm": 1.007233880569925,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 15264
    },
    {
      "epoch": 0.15265,
      "grad_norm": 1.223511465635572,
      "learning_rate": 0.003,
      "loss": 4.0705,
      "step": 15265
    },
    {
      "epoch": 0.15266,
      "grad_norm": 1.1709854606116625,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 15266
    },
    {
      "epoch": 0.15267,
      "grad_norm": 1.060570100692955,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 15267
    },
    {
      "epoch": 0.15268,
      "grad_norm": 1.170916883693262,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 15268
    },
    {
      "epoch": 0.15269,
      "grad_norm": 1.13507419180462,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 15269
    },
    {
      "epoch": 0.1527,
      "grad_norm": 1.3497710321548095,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 15270
    },
    {
      "epoch": 0.15271,
      "grad_norm": 1.0625889576185044,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 15271
    },
    {
      "epoch": 0.15272,
      "grad_norm": 1.2984489575320157,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 15272
    },
    {
      "epoch": 0.15273,
      "grad_norm": 0.9872476347269957,
      "learning_rate": 0.003,
      "loss": 4.029,
      "step": 15273
    },
    {
      "epoch": 0.15274,
      "grad_norm": 1.2339173710754143,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 15274
    },
    {
      "epoch": 0.15275,
      "grad_norm": 1.4088951536106782,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 15275
    },
    {
      "epoch": 0.15276,
      "grad_norm": 1.0339165586886583,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 15276
    },
    {
      "epoch": 0.15277,
      "grad_norm": 1.276415098913713,
      "learning_rate": 0.003,
      "loss": 4.0318,
      "step": 15277
    },
    {
      "epoch": 0.15278,
      "grad_norm": 1.2089381455177826,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 15278
    },
    {
      "epoch": 0.15279,
      "grad_norm": 1.2323476700147227,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 15279
    },
    {
      "epoch": 0.1528,
      "grad_norm": 1.0261540015381687,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 15280
    },
    {
      "epoch": 0.15281,
      "grad_norm": 1.4081233348548858,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 15281
    },
    {
      "epoch": 0.15282,
      "grad_norm": 0.9943608644134634,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 15282
    },
    {
      "epoch": 0.15283,
      "grad_norm": 1.4353065923668198,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 15283
    },
    {
      "epoch": 0.15284,
      "grad_norm": 1.1158782446879179,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 15284
    },
    {
      "epoch": 0.15285,
      "grad_norm": 1.378278155658196,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 15285
    },
    {
      "epoch": 0.15286,
      "grad_norm": 1.0485484157388185,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 15286
    },
    {
      "epoch": 0.15287,
      "grad_norm": 1.237331451668235,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 15287
    },
    {
      "epoch": 0.15288,
      "grad_norm": 1.1032372363197314,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 15288
    },
    {
      "epoch": 0.15289,
      "grad_norm": 1.2870811875647183,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 15289
    },
    {
      "epoch": 0.1529,
      "grad_norm": 1.232656198062413,
      "learning_rate": 0.003,
      "loss": 4.0675,
      "step": 15290
    },
    {
      "epoch": 0.15291,
      "grad_norm": 1.0414566423936231,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 15291
    },
    {
      "epoch": 0.15292,
      "grad_norm": 1.6205413571895269,
      "learning_rate": 0.003,
      "loss": 4.0964,
      "step": 15292
    },
    {
      "epoch": 0.15293,
      "grad_norm": 0.971170882527214,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 15293
    },
    {
      "epoch": 0.15294,
      "grad_norm": 1.348736972371927,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 15294
    },
    {
      "epoch": 0.15295,
      "grad_norm": 1.0345246527365257,
      "learning_rate": 0.003,
      "loss": 4.0756,
      "step": 15295
    },
    {
      "epoch": 0.15296,
      "grad_norm": 1.3846095991384118,
      "learning_rate": 0.003,
      "loss": 4.0965,
      "step": 15296
    },
    {
      "epoch": 0.15297,
      "grad_norm": 1.1386240471237765,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 15297
    },
    {
      "epoch": 0.15298,
      "grad_norm": 1.1205088253245086,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 15298
    },
    {
      "epoch": 0.15299,
      "grad_norm": 1.1018736843383212,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 15299
    },
    {
      "epoch": 0.153,
      "grad_norm": 1.2649416067278314,
      "learning_rate": 0.003,
      "loss": 4.0944,
      "step": 15300
    },
    {
      "epoch": 0.15301,
      "grad_norm": 1.3461673681342328,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 15301
    },
    {
      "epoch": 0.15302,
      "grad_norm": 1.5236898373506294,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 15302
    },
    {
      "epoch": 0.15303,
      "grad_norm": 1.421307585707187,
      "learning_rate": 0.003,
      "loss": 4.096,
      "step": 15303
    },
    {
      "epoch": 0.15304,
      "grad_norm": 1.0770487276285168,
      "learning_rate": 0.003,
      "loss": 4.0931,
      "step": 15304
    },
    {
      "epoch": 0.15305,
      "grad_norm": 1.2803489328118753,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 15305
    },
    {
      "epoch": 0.15306,
      "grad_norm": 1.173623701273869,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 15306
    },
    {
      "epoch": 0.15307,
      "grad_norm": 1.3290158642222427,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 15307
    },
    {
      "epoch": 0.15308,
      "grad_norm": 1.2386634096017313,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 15308
    },
    {
      "epoch": 0.15309,
      "grad_norm": 0.9787141666673741,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 15309
    },
    {
      "epoch": 0.1531,
      "grad_norm": 1.274687433764411,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 15310
    },
    {
      "epoch": 0.15311,
      "grad_norm": 1.2427929343705486,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 15311
    },
    {
      "epoch": 0.15312,
      "grad_norm": 0.9266597260781467,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 15312
    },
    {
      "epoch": 0.15313,
      "grad_norm": 1.2328317591820357,
      "learning_rate": 0.003,
      "loss": 4.0809,
      "step": 15313
    },
    {
      "epoch": 0.15314,
      "grad_norm": 1.0652956670195421,
      "learning_rate": 0.003,
      "loss": 4.0676,
      "step": 15314
    },
    {
      "epoch": 0.15315,
      "grad_norm": 1.2815764068774655,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 15315
    },
    {
      "epoch": 0.15316,
      "grad_norm": 1.1158678154899941,
      "learning_rate": 0.003,
      "loss": 4.0849,
      "step": 15316
    },
    {
      "epoch": 0.15317,
      "grad_norm": 1.0487746743010973,
      "learning_rate": 0.003,
      "loss": 4.0857,
      "step": 15317
    },
    {
      "epoch": 0.15318,
      "grad_norm": 1.2063070441600514,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 15318
    },
    {
      "epoch": 0.15319,
      "grad_norm": 1.1941164133060218,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 15319
    },
    {
      "epoch": 0.1532,
      "grad_norm": 1.362717851724568,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 15320
    },
    {
      "epoch": 0.15321,
      "grad_norm": 1.1536782993394188,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 15321
    },
    {
      "epoch": 0.15322,
      "grad_norm": 1.3971050672436391,
      "learning_rate": 0.003,
      "loss": 4.0851,
      "step": 15322
    },
    {
      "epoch": 0.15323,
      "grad_norm": 1.0713944798806787,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 15323
    },
    {
      "epoch": 0.15324,
      "grad_norm": 1.4856335726244028,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 15324
    },
    {
      "epoch": 0.15325,
      "grad_norm": 0.9987382371883158,
      "learning_rate": 0.003,
      "loss": 4.0839,
      "step": 15325
    },
    {
      "epoch": 0.15326,
      "grad_norm": 1.1336635102779324,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 15326
    },
    {
      "epoch": 0.15327,
      "grad_norm": 1.1534136647775999,
      "learning_rate": 0.003,
      "loss": 4.0845,
      "step": 15327
    },
    {
      "epoch": 0.15328,
      "grad_norm": 1.122548485263682,
      "learning_rate": 0.003,
      "loss": 4.0316,
      "step": 15328
    },
    {
      "epoch": 0.15329,
      "grad_norm": 1.1152038630462762,
      "learning_rate": 0.003,
      "loss": 4.0549,
      "step": 15329
    },
    {
      "epoch": 0.1533,
      "grad_norm": 1.411285240833272,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 15330
    },
    {
      "epoch": 0.15331,
      "grad_norm": 1.1149263633444346,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 15331
    },
    {
      "epoch": 0.15332,
      "grad_norm": 1.1647721531569564,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 15332
    },
    {
      "epoch": 0.15333,
      "grad_norm": 1.1676832997788698,
      "learning_rate": 0.003,
      "loss": 4.0753,
      "step": 15333
    },
    {
      "epoch": 0.15334,
      "grad_norm": 1.2533323067325388,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 15334
    },
    {
      "epoch": 0.15335,
      "grad_norm": 1.225618474950692,
      "learning_rate": 0.003,
      "loss": 4.0901,
      "step": 15335
    },
    {
      "epoch": 0.15336,
      "grad_norm": 1.126694619597195,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 15336
    },
    {
      "epoch": 0.15337,
      "grad_norm": 1.3866292038106043,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 15337
    },
    {
      "epoch": 0.15338,
      "grad_norm": 1.0129310147081365,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 15338
    },
    {
      "epoch": 0.15339,
      "grad_norm": 1.3182981482150227,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 15339
    },
    {
      "epoch": 0.1534,
      "grad_norm": 1.0536027013917006,
      "learning_rate": 0.003,
      "loss": 4.0617,
      "step": 15340
    },
    {
      "epoch": 0.15341,
      "grad_norm": 1.3112499607823593,
      "learning_rate": 0.003,
      "loss": 4.0823,
      "step": 15341
    },
    {
      "epoch": 0.15342,
      "grad_norm": 1.1314261343534997,
      "learning_rate": 0.003,
      "loss": 4.0703,
      "step": 15342
    },
    {
      "epoch": 0.15343,
      "grad_norm": 1.4555014481375317,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 15343
    },
    {
      "epoch": 0.15344,
      "grad_norm": 1.0460042053898686,
      "learning_rate": 0.003,
      "loss": 4.0729,
      "step": 15344
    },
    {
      "epoch": 0.15345,
      "grad_norm": 1.418197133113981,
      "learning_rate": 0.003,
      "loss": 4.087,
      "step": 15345
    },
    {
      "epoch": 0.15346,
      "grad_norm": 0.8737250485311147,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 15346
    },
    {
      "epoch": 0.15347,
      "grad_norm": 1.1180711173208815,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 15347
    },
    {
      "epoch": 0.15348,
      "grad_norm": 1.1934974290749425,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 15348
    },
    {
      "epoch": 0.15349,
      "grad_norm": 1.2919597559085614,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 15349
    },
    {
      "epoch": 0.1535,
      "grad_norm": 1.2062159759058706,
      "learning_rate": 0.003,
      "loss": 4.0712,
      "step": 15350
    },
    {
      "epoch": 0.15351,
      "grad_norm": 1.1297498995697322,
      "learning_rate": 0.003,
      "loss": 4.022,
      "step": 15351
    },
    {
      "epoch": 0.15352,
      "grad_norm": 1.4763402501322123,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 15352
    },
    {
      "epoch": 0.15353,
      "grad_norm": 1.1012937369280016,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 15353
    },
    {
      "epoch": 0.15354,
      "grad_norm": 1.1566135011916077,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 15354
    },
    {
      "epoch": 0.15355,
      "grad_norm": 1.4332475868537249,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 15355
    },
    {
      "epoch": 0.15356,
      "grad_norm": 1.120567900992729,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 15356
    },
    {
      "epoch": 0.15357,
      "grad_norm": 1.2898623769989723,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 15357
    },
    {
      "epoch": 0.15358,
      "grad_norm": 1.1751466448499435,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 15358
    },
    {
      "epoch": 0.15359,
      "grad_norm": 1.1583971676769786,
      "learning_rate": 0.003,
      "loss": 4.0373,
      "step": 15359
    },
    {
      "epoch": 0.1536,
      "grad_norm": 1.0556389547544935,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 15360
    },
    {
      "epoch": 0.15361,
      "grad_norm": 1.5931953804066483,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 15361
    },
    {
      "epoch": 0.15362,
      "grad_norm": 1.0044840368714696,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 15362
    },
    {
      "epoch": 0.15363,
      "grad_norm": 1.4726867281373388,
      "learning_rate": 0.003,
      "loss": 4.1001,
      "step": 15363
    },
    {
      "epoch": 0.15364,
      "grad_norm": 1.0650785921619903,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 15364
    },
    {
      "epoch": 0.15365,
      "grad_norm": 1.0864983987638424,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 15365
    },
    {
      "epoch": 0.15366,
      "grad_norm": 1.299374370037176,
      "learning_rate": 0.003,
      "loss": 4.0934,
      "step": 15366
    },
    {
      "epoch": 0.15367,
      "grad_norm": 1.0634044848199793,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 15367
    },
    {
      "epoch": 0.15368,
      "grad_norm": 1.1976379564094535,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 15368
    },
    {
      "epoch": 0.15369,
      "grad_norm": 1.2144313440821537,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 15369
    },
    {
      "epoch": 0.1537,
      "grad_norm": 1.4634358111483572,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 15370
    },
    {
      "epoch": 0.15371,
      "grad_norm": 0.9345829397981521,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 15371
    },
    {
      "epoch": 0.15372,
      "grad_norm": 1.2087216895515303,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 15372
    },
    {
      "epoch": 0.15373,
      "grad_norm": 1.2333298012088245,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 15373
    },
    {
      "epoch": 0.15374,
      "grad_norm": 1.3674821509295068,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 15374
    },
    {
      "epoch": 0.15375,
      "grad_norm": 1.099436386068766,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 15375
    },
    {
      "epoch": 0.15376,
      "grad_norm": 1.3148981477185882,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 15376
    },
    {
      "epoch": 0.15377,
      "grad_norm": 1.0775682871515682,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 15377
    },
    {
      "epoch": 0.15378,
      "grad_norm": 1.2954200985903883,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 15378
    },
    {
      "epoch": 0.15379,
      "grad_norm": 1.304413999793567,
      "learning_rate": 0.003,
      "loss": 4.0908,
      "step": 15379
    },
    {
      "epoch": 0.1538,
      "grad_norm": 1.0702911542230267,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 15380
    },
    {
      "epoch": 0.15381,
      "grad_norm": 1.2258801818381888,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 15381
    },
    {
      "epoch": 0.15382,
      "grad_norm": 1.4176031465193795,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 15382
    },
    {
      "epoch": 0.15383,
      "grad_norm": 1.098414082125524,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 15383
    },
    {
      "epoch": 0.15384,
      "grad_norm": 1.3295703898676385,
      "learning_rate": 0.003,
      "loss": 4.0617,
      "step": 15384
    },
    {
      "epoch": 0.15385,
      "grad_norm": 0.981606428503668,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 15385
    },
    {
      "epoch": 0.15386,
      "grad_norm": 1.4435323663276558,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 15386
    },
    {
      "epoch": 0.15387,
      "grad_norm": 0.9267463710947388,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 15387
    },
    {
      "epoch": 0.15388,
      "grad_norm": 1.3629792496605608,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 15388
    },
    {
      "epoch": 0.15389,
      "grad_norm": 1.1319156951659917,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 15389
    },
    {
      "epoch": 0.1539,
      "grad_norm": 1.234161943185369,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 15390
    },
    {
      "epoch": 0.15391,
      "grad_norm": 1.3987267358306366,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 15391
    },
    {
      "epoch": 0.15392,
      "grad_norm": 1.2372901652941222,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 15392
    },
    {
      "epoch": 0.15393,
      "grad_norm": 1.1868185575964618,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 15393
    },
    {
      "epoch": 0.15394,
      "grad_norm": 1.2551610808976825,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 15394
    },
    {
      "epoch": 0.15395,
      "grad_norm": 1.4110003770316828,
      "learning_rate": 0.003,
      "loss": 4.0801,
      "step": 15395
    },
    {
      "epoch": 0.15396,
      "grad_norm": 1.0311286977956948,
      "learning_rate": 0.003,
      "loss": 4.0312,
      "step": 15396
    },
    {
      "epoch": 0.15397,
      "grad_norm": 1.3176922148726504,
      "learning_rate": 0.003,
      "loss": 4.0954,
      "step": 15397
    },
    {
      "epoch": 0.15398,
      "grad_norm": 1.119210963152209,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 15398
    },
    {
      "epoch": 0.15399,
      "grad_norm": 1.1585313167937918,
      "learning_rate": 0.003,
      "loss": 4.0272,
      "step": 15399
    },
    {
      "epoch": 0.154,
      "grad_norm": 1.1718901018265493,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 15400
    },
    {
      "epoch": 0.15401,
      "grad_norm": 1.2295322444343344,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 15401
    },
    {
      "epoch": 0.15402,
      "grad_norm": 1.2411543091235737,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 15402
    },
    {
      "epoch": 0.15403,
      "grad_norm": 1.0399534030778381,
      "learning_rate": 0.003,
      "loss": 4.0361,
      "step": 15403
    },
    {
      "epoch": 0.15404,
      "grad_norm": 1.345481710843217,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 15404
    },
    {
      "epoch": 0.15405,
      "grad_norm": 1.0782567322337016,
      "learning_rate": 0.003,
      "loss": 4.0969,
      "step": 15405
    },
    {
      "epoch": 0.15406,
      "grad_norm": 1.3770651287882087,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 15406
    },
    {
      "epoch": 0.15407,
      "grad_norm": 1.1791156159933935,
      "learning_rate": 0.003,
      "loss": 4.0311,
      "step": 15407
    },
    {
      "epoch": 0.15408,
      "grad_norm": 1.2591927464401316,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 15408
    },
    {
      "epoch": 0.15409,
      "grad_norm": 1.1787754971139592,
      "learning_rate": 0.003,
      "loss": 4.088,
      "step": 15409
    },
    {
      "epoch": 0.1541,
      "grad_norm": 1.2428022611947518,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 15410
    },
    {
      "epoch": 0.15411,
      "grad_norm": 1.1173057583099402,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 15411
    },
    {
      "epoch": 0.15412,
      "grad_norm": 1.252529165084718,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 15412
    },
    {
      "epoch": 0.15413,
      "grad_norm": 1.250156560920163,
      "learning_rate": 0.003,
      "loss": 4.0971,
      "step": 15413
    },
    {
      "epoch": 0.15414,
      "grad_norm": 1.2241752494741582,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 15414
    },
    {
      "epoch": 0.15415,
      "grad_norm": 1.2707570259367225,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 15415
    },
    {
      "epoch": 0.15416,
      "grad_norm": 1.426633007592224,
      "learning_rate": 0.003,
      "loss": 4.0819,
      "step": 15416
    },
    {
      "epoch": 0.15417,
      "grad_norm": 1.5907670541264263,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 15417
    },
    {
      "epoch": 0.15418,
      "grad_norm": 0.9091544803403122,
      "learning_rate": 0.003,
      "loss": 4.0284,
      "step": 15418
    },
    {
      "epoch": 0.15419,
      "grad_norm": 1.047751881825804,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 15419
    },
    {
      "epoch": 0.1542,
      "grad_norm": 1.4170082204482124,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 15420
    },
    {
      "epoch": 0.15421,
      "grad_norm": 1.5733130067127954,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 15421
    },
    {
      "epoch": 0.15422,
      "grad_norm": 1.1820234369725136,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 15422
    },
    {
      "epoch": 0.15423,
      "grad_norm": 1.1659647288488164,
      "learning_rate": 0.003,
      "loss": 4.0216,
      "step": 15423
    },
    {
      "epoch": 0.15424,
      "grad_norm": 1.0479943433146457,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 15424
    },
    {
      "epoch": 0.15425,
      "grad_norm": 1.4604447382383878,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 15425
    },
    {
      "epoch": 0.15426,
      "grad_norm": 1.0268914205354007,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 15426
    },
    {
      "epoch": 0.15427,
      "grad_norm": 1.1655023774090285,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 15427
    },
    {
      "epoch": 0.15428,
      "grad_norm": 1.2744321487748451,
      "learning_rate": 0.003,
      "loss": 4.0853,
      "step": 15428
    },
    {
      "epoch": 0.15429,
      "grad_norm": 1.044821370728945,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 15429
    },
    {
      "epoch": 0.1543,
      "grad_norm": 1.336660814677801,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 15430
    },
    {
      "epoch": 0.15431,
      "grad_norm": 1.0115445320314047,
      "learning_rate": 0.003,
      "loss": 4.0866,
      "step": 15431
    },
    {
      "epoch": 0.15432,
      "grad_norm": 1.3471221521419243,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 15432
    },
    {
      "epoch": 0.15433,
      "grad_norm": 1.218306878259319,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 15433
    },
    {
      "epoch": 0.15434,
      "grad_norm": 1.2667733712071507,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 15434
    },
    {
      "epoch": 0.15435,
      "grad_norm": 1.515485139828299,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 15435
    },
    {
      "epoch": 0.15436,
      "grad_norm": 1.0256710056302927,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 15436
    },
    {
      "epoch": 0.15437,
      "grad_norm": 1.3457950188332866,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 15437
    },
    {
      "epoch": 0.15438,
      "grad_norm": 1.0676201322248198,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 15438
    },
    {
      "epoch": 0.15439,
      "grad_norm": 1.2795541407972861,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 15439
    },
    {
      "epoch": 0.1544,
      "grad_norm": 1.0362570095334005,
      "learning_rate": 0.003,
      "loss": 4.0371,
      "step": 15440
    },
    {
      "epoch": 0.15441,
      "grad_norm": 1.3352973418819123,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 15441
    },
    {
      "epoch": 0.15442,
      "grad_norm": 1.5627360313033476,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 15442
    },
    {
      "epoch": 0.15443,
      "grad_norm": 0.8954312056496571,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 15443
    },
    {
      "epoch": 0.15444,
      "grad_norm": 0.9003385139611089,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 15444
    },
    {
      "epoch": 0.15445,
      "grad_norm": 1.1641098471539841,
      "learning_rate": 0.003,
      "loss": 4.0229,
      "step": 15445
    },
    {
      "epoch": 0.15446,
      "grad_norm": 1.038599211201421,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 15446
    },
    {
      "epoch": 0.15447,
      "grad_norm": 1.3357295990512643,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 15447
    },
    {
      "epoch": 0.15448,
      "grad_norm": 1.1546627490334793,
      "learning_rate": 0.003,
      "loss": 4.0845,
      "step": 15448
    },
    {
      "epoch": 0.15449,
      "grad_norm": 1.1299525347651804,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 15449
    },
    {
      "epoch": 0.1545,
      "grad_norm": 1.407049291550648,
      "learning_rate": 0.003,
      "loss": 4.0781,
      "step": 15450
    },
    {
      "epoch": 0.15451,
      "grad_norm": 1.0122402823659429,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 15451
    },
    {
      "epoch": 0.15452,
      "grad_norm": 1.3964304058717305,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 15452
    },
    {
      "epoch": 0.15453,
      "grad_norm": 1.1379627627656579,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 15453
    },
    {
      "epoch": 0.15454,
      "grad_norm": 1.1716005696161556,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 15454
    },
    {
      "epoch": 0.15455,
      "grad_norm": 1.0512955853342485,
      "learning_rate": 0.003,
      "loss": 4.087,
      "step": 15455
    },
    {
      "epoch": 0.15456,
      "grad_norm": 1.477149573778637,
      "learning_rate": 0.003,
      "loss": 4.0422,
      "step": 15456
    },
    {
      "epoch": 0.15457,
      "grad_norm": 1.0542623589033444,
      "learning_rate": 0.003,
      "loss": 4.0701,
      "step": 15457
    },
    {
      "epoch": 0.15458,
      "grad_norm": 1.4777046926994861,
      "learning_rate": 0.003,
      "loss": 4.0724,
      "step": 15458
    },
    {
      "epoch": 0.15459,
      "grad_norm": 1.0398416915025779,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 15459
    },
    {
      "epoch": 0.1546,
      "grad_norm": 1.3202079609830617,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 15460
    },
    {
      "epoch": 0.15461,
      "grad_norm": 1.172625188072539,
      "learning_rate": 0.003,
      "loss": 4.0398,
      "step": 15461
    },
    {
      "epoch": 0.15462,
      "grad_norm": 1.3455025181894154,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 15462
    },
    {
      "epoch": 0.15463,
      "grad_norm": 1.1862746747288408,
      "learning_rate": 0.003,
      "loss": 4.0332,
      "step": 15463
    },
    {
      "epoch": 0.15464,
      "grad_norm": 1.3410554902084484,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 15464
    },
    {
      "epoch": 0.15465,
      "grad_norm": 0.7670962679225621,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 15465
    },
    {
      "epoch": 0.15466,
      "grad_norm": 1.2231600802959381,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 15466
    },
    {
      "epoch": 0.15467,
      "grad_norm": 1.3683096303657931,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 15467
    },
    {
      "epoch": 0.15468,
      "grad_norm": 1.0633665339305103,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 15468
    },
    {
      "epoch": 0.15469,
      "grad_norm": 1.1569441747859937,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 15469
    },
    {
      "epoch": 0.1547,
      "grad_norm": 1.2900173704913636,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 15470
    },
    {
      "epoch": 0.15471,
      "grad_norm": 0.8564636912847731,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 15471
    },
    {
      "epoch": 0.15472,
      "grad_norm": 0.958179425574608,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 15472
    },
    {
      "epoch": 0.15473,
      "grad_norm": 1.2239514432578584,
      "learning_rate": 0.003,
      "loss": 4.086,
      "step": 15473
    },
    {
      "epoch": 0.15474,
      "grad_norm": 1.0718699054270044,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 15474
    },
    {
      "epoch": 0.15475,
      "grad_norm": 1.1614611181808692,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 15475
    },
    {
      "epoch": 0.15476,
      "grad_norm": 1.3226572290841543,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 15476
    },
    {
      "epoch": 0.15477,
      "grad_norm": 1.2292997948946798,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 15477
    },
    {
      "epoch": 0.15478,
      "grad_norm": 1.1967741002548855,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 15478
    },
    {
      "epoch": 0.15479,
      "grad_norm": 1.3506286726490333,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 15479
    },
    {
      "epoch": 0.1548,
      "grad_norm": 1.2115615904440533,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 15480
    },
    {
      "epoch": 0.15481,
      "grad_norm": 1.2256804181231584,
      "learning_rate": 0.003,
      "loss": 4.0676,
      "step": 15481
    },
    {
      "epoch": 0.15482,
      "grad_norm": 1.19838963798865,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 15482
    },
    {
      "epoch": 0.15483,
      "grad_norm": 1.140780741362575,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 15483
    },
    {
      "epoch": 0.15484,
      "grad_norm": 1.3496605791591862,
      "learning_rate": 0.003,
      "loss": 4.0984,
      "step": 15484
    },
    {
      "epoch": 0.15485,
      "grad_norm": 1.4588788825008616,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 15485
    },
    {
      "epoch": 0.15486,
      "grad_norm": 0.9756831933110768,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 15486
    },
    {
      "epoch": 0.15487,
      "grad_norm": 1.1689926631074683,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 15487
    },
    {
      "epoch": 0.15488,
      "grad_norm": 1.2231147673104008,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 15488
    },
    {
      "epoch": 0.15489,
      "grad_norm": 1.2436681537852814,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 15489
    },
    {
      "epoch": 0.1549,
      "grad_norm": 1.1856690267859664,
      "learning_rate": 0.003,
      "loss": 4.0658,
      "step": 15490
    },
    {
      "epoch": 0.15491,
      "grad_norm": 1.1528720868109508,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 15491
    },
    {
      "epoch": 0.15492,
      "grad_norm": 1.2449752035398804,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 15492
    },
    {
      "epoch": 0.15493,
      "grad_norm": 1.2889529574754994,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 15493
    },
    {
      "epoch": 0.15494,
      "grad_norm": 1.1847213418178684,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 15494
    },
    {
      "epoch": 0.15495,
      "grad_norm": 1.1044995912636943,
      "learning_rate": 0.003,
      "loss": 4.0821,
      "step": 15495
    },
    {
      "epoch": 0.15496,
      "grad_norm": 1.385225884920843,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 15496
    },
    {
      "epoch": 0.15497,
      "grad_norm": 1.1573457439443438,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 15497
    },
    {
      "epoch": 0.15498,
      "grad_norm": 1.0805457747390674,
      "learning_rate": 0.003,
      "loss": 4.0092,
      "step": 15498
    },
    {
      "epoch": 0.15499,
      "grad_norm": 1.245268042241472,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 15499
    },
    {
      "epoch": 0.155,
      "grad_norm": 1.0798871257694764,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 15500
    },
    {
      "epoch": 0.15501,
      "grad_norm": 1.3062139606925551,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 15501
    },
    {
      "epoch": 0.15502,
      "grad_norm": 1.0361323349085323,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 15502
    },
    {
      "epoch": 0.15503,
      "grad_norm": 1.3788101018795207,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 15503
    },
    {
      "epoch": 0.15504,
      "grad_norm": 1.1210327336799735,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 15504
    },
    {
      "epoch": 0.15505,
      "grad_norm": 1.1745939313235108,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 15505
    },
    {
      "epoch": 0.15506,
      "grad_norm": 1.1844245158495095,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 15506
    },
    {
      "epoch": 0.15507,
      "grad_norm": 1.4079253310507698,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 15507
    },
    {
      "epoch": 0.15508,
      "grad_norm": 1.0722617625019373,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 15508
    },
    {
      "epoch": 0.15509,
      "grad_norm": 1.4814012345526648,
      "learning_rate": 0.003,
      "loss": 4.0423,
      "step": 15509
    },
    {
      "epoch": 0.1551,
      "grad_norm": 0.9902479457488543,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 15510
    },
    {
      "epoch": 0.15511,
      "grad_norm": 1.524427659657888,
      "learning_rate": 0.003,
      "loss": 4.0998,
      "step": 15511
    },
    {
      "epoch": 0.15512,
      "grad_norm": 1.2749984682610411,
      "learning_rate": 0.003,
      "loss": 4.0967,
      "step": 15512
    },
    {
      "epoch": 0.15513,
      "grad_norm": 1.1272472479632105,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 15513
    },
    {
      "epoch": 0.15514,
      "grad_norm": 1.241946440642086,
      "learning_rate": 0.003,
      "loss": 4.0944,
      "step": 15514
    },
    {
      "epoch": 0.15515,
      "grad_norm": 1.1902532115752005,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 15515
    },
    {
      "epoch": 0.15516,
      "grad_norm": 1.0176104417246836,
      "learning_rate": 0.003,
      "loss": 4.0712,
      "step": 15516
    },
    {
      "epoch": 0.15517,
      "grad_norm": 1.1905163593066492,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 15517
    },
    {
      "epoch": 0.15518,
      "grad_norm": 1.2953977991788461,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 15518
    },
    {
      "epoch": 0.15519,
      "grad_norm": 1.3096714320460605,
      "learning_rate": 0.003,
      "loss": 4.0478,
      "step": 15519
    },
    {
      "epoch": 0.1552,
      "grad_norm": 0.9938131193534318,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 15520
    },
    {
      "epoch": 0.15521,
      "grad_norm": 1.2698929503832623,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 15521
    },
    {
      "epoch": 0.15522,
      "grad_norm": 1.2406608158792833,
      "learning_rate": 0.003,
      "loss": 4.087,
      "step": 15522
    },
    {
      "epoch": 0.15523,
      "grad_norm": 1.3692338528521906,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 15523
    },
    {
      "epoch": 0.15524,
      "grad_norm": 0.982338615316267,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 15524
    },
    {
      "epoch": 0.15525,
      "grad_norm": 1.1851100242763046,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 15525
    },
    {
      "epoch": 0.15526,
      "grad_norm": 1.5136744226877126,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 15526
    },
    {
      "epoch": 0.15527,
      "grad_norm": 1.128916057984434,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 15527
    },
    {
      "epoch": 0.15528,
      "grad_norm": 1.2309658190479194,
      "learning_rate": 0.003,
      "loss": 4.04,
      "step": 15528
    },
    {
      "epoch": 0.15529,
      "grad_norm": 1.1849305960657193,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 15529
    },
    {
      "epoch": 0.1553,
      "grad_norm": 1.2412067053524356,
      "learning_rate": 0.003,
      "loss": 4.0912,
      "step": 15530
    },
    {
      "epoch": 0.15531,
      "grad_norm": 0.9597583454661515,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 15531
    },
    {
      "epoch": 0.15532,
      "grad_norm": 1.3845715607138387,
      "learning_rate": 0.003,
      "loss": 4.0972,
      "step": 15532
    },
    {
      "epoch": 0.15533,
      "grad_norm": 1.189346116306467,
      "learning_rate": 0.003,
      "loss": 4.079,
      "step": 15533
    },
    {
      "epoch": 0.15534,
      "grad_norm": 1.2230592177407105,
      "learning_rate": 0.003,
      "loss": 4.0861,
      "step": 15534
    },
    {
      "epoch": 0.15535,
      "grad_norm": 1.1659354130678887,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 15535
    },
    {
      "epoch": 0.15536,
      "grad_norm": 1.1925263639024195,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 15536
    },
    {
      "epoch": 0.15537,
      "grad_norm": 1.0822641134617847,
      "learning_rate": 0.003,
      "loss": 4.0801,
      "step": 15537
    },
    {
      "epoch": 0.15538,
      "grad_norm": 1.161595204822747,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 15538
    },
    {
      "epoch": 0.15539,
      "grad_norm": 1.3862142005882838,
      "learning_rate": 0.003,
      "loss": 4.0986,
      "step": 15539
    },
    {
      "epoch": 0.1554,
      "grad_norm": 1.229526343493269,
      "learning_rate": 0.003,
      "loss": 4.0985,
      "step": 15540
    },
    {
      "epoch": 0.15541,
      "grad_norm": 1.3488023480037135,
      "learning_rate": 0.003,
      "loss": 4.0898,
      "step": 15541
    },
    {
      "epoch": 0.15542,
      "grad_norm": 1.1274545055092502,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 15542
    },
    {
      "epoch": 0.15543,
      "grad_norm": 1.175224591693728,
      "learning_rate": 0.003,
      "loss": 4.0361,
      "step": 15543
    },
    {
      "epoch": 0.15544,
      "grad_norm": 1.0955511795444062,
      "learning_rate": 0.003,
      "loss": 4.1061,
      "step": 15544
    },
    {
      "epoch": 0.15545,
      "grad_norm": 1.2112066874099896,
      "learning_rate": 0.003,
      "loss": 4.0363,
      "step": 15545
    },
    {
      "epoch": 0.15546,
      "grad_norm": 1.1832663010198956,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 15546
    },
    {
      "epoch": 0.15547,
      "grad_norm": 0.9592680884857658,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 15547
    },
    {
      "epoch": 0.15548,
      "grad_norm": 1.2826548106039155,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 15548
    },
    {
      "epoch": 0.15549,
      "grad_norm": 1.180632470945826,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 15549
    },
    {
      "epoch": 0.1555,
      "grad_norm": 1.1456310983281182,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 15550
    },
    {
      "epoch": 0.15551,
      "grad_norm": 1.2055256823594842,
      "learning_rate": 0.003,
      "loss": 4.0763,
      "step": 15551
    },
    {
      "epoch": 0.15552,
      "grad_norm": 1.199809677412245,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 15552
    },
    {
      "epoch": 0.15553,
      "grad_norm": 1.0730626887780514,
      "learning_rate": 0.003,
      "loss": 4.021,
      "step": 15553
    },
    {
      "epoch": 0.15554,
      "grad_norm": 1.1391559272958152,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 15554
    },
    {
      "epoch": 0.15555,
      "grad_norm": 1.1054273997346828,
      "learning_rate": 0.003,
      "loss": 4.0232,
      "step": 15555
    },
    {
      "epoch": 0.15556,
      "grad_norm": 1.3381741488249064,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 15556
    },
    {
      "epoch": 0.15557,
      "grad_norm": 1.0129453484587203,
      "learning_rate": 0.003,
      "loss": 4.0623,
      "step": 15557
    },
    {
      "epoch": 0.15558,
      "grad_norm": 1.3550247336343197,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 15558
    },
    {
      "epoch": 0.15559,
      "grad_norm": 1.0800661526811102,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 15559
    },
    {
      "epoch": 0.1556,
      "grad_norm": 1.2794205298372396,
      "learning_rate": 0.003,
      "loss": 4.0763,
      "step": 15560
    },
    {
      "epoch": 0.15561,
      "grad_norm": 1.2484059967266228,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 15561
    },
    {
      "epoch": 0.15562,
      "grad_norm": 1.2579795851592477,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 15562
    },
    {
      "epoch": 0.15563,
      "grad_norm": 1.3985523212529833,
      "learning_rate": 0.003,
      "loss": 4.0848,
      "step": 15563
    },
    {
      "epoch": 0.15564,
      "grad_norm": 1.299900491125251,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 15564
    },
    {
      "epoch": 0.15565,
      "grad_norm": 1.22289263232539,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 15565
    },
    {
      "epoch": 0.15566,
      "grad_norm": 1.1682975186023075,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 15566
    },
    {
      "epoch": 0.15567,
      "grad_norm": 1.2231282085928956,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 15567
    },
    {
      "epoch": 0.15568,
      "grad_norm": 1.2840712432383061,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 15568
    },
    {
      "epoch": 0.15569,
      "grad_norm": 1.2048914943418152,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 15569
    },
    {
      "epoch": 0.1557,
      "grad_norm": 1.1334564677078711,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 15570
    },
    {
      "epoch": 0.15571,
      "grad_norm": 1.2492044466984566,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 15571
    },
    {
      "epoch": 0.15572,
      "grad_norm": 1.2787685966793079,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 15572
    },
    {
      "epoch": 0.15573,
      "grad_norm": 1.2109642972554564,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 15573
    },
    {
      "epoch": 0.15574,
      "grad_norm": 1.2469054441137288,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 15574
    },
    {
      "epoch": 0.15575,
      "grad_norm": 1.3721895705961396,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 15575
    },
    {
      "epoch": 0.15576,
      "grad_norm": 1.183178923658135,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 15576
    },
    {
      "epoch": 0.15577,
      "grad_norm": 1.2709919241182668,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 15577
    },
    {
      "epoch": 0.15578,
      "grad_norm": 1.1787930793479175,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 15578
    },
    {
      "epoch": 0.15579,
      "grad_norm": 1.3582338253437232,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 15579
    },
    {
      "epoch": 0.1558,
      "grad_norm": 1.319750457522074,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 15580
    },
    {
      "epoch": 0.15581,
      "grad_norm": 1.1439198612979784,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 15581
    },
    {
      "epoch": 0.15582,
      "grad_norm": 1.207775361596036,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 15582
    },
    {
      "epoch": 0.15583,
      "grad_norm": 1.2628986971845215,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 15583
    },
    {
      "epoch": 0.15584,
      "grad_norm": 1.208959099329531,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 15584
    },
    {
      "epoch": 0.15585,
      "grad_norm": 1.217579157488508,
      "learning_rate": 0.003,
      "loss": 4.0871,
      "step": 15585
    },
    {
      "epoch": 0.15586,
      "grad_norm": 1.1710200109179474,
      "learning_rate": 0.003,
      "loss": 4.0176,
      "step": 15586
    },
    {
      "epoch": 0.15587,
      "grad_norm": 1.1298927902719316,
      "learning_rate": 0.003,
      "loss": 4.0337,
      "step": 15587
    },
    {
      "epoch": 0.15588,
      "grad_norm": 1.351308668801087,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 15588
    },
    {
      "epoch": 0.15589,
      "grad_norm": 1.1533687643932535,
      "learning_rate": 0.003,
      "loss": 4.0253,
      "step": 15589
    },
    {
      "epoch": 0.1559,
      "grad_norm": 1.3577624193107098,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 15590
    },
    {
      "epoch": 0.15591,
      "grad_norm": 1.0378217988790521,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 15591
    },
    {
      "epoch": 0.15592,
      "grad_norm": 1.3241158196415868,
      "learning_rate": 0.003,
      "loss": 4.0819,
      "step": 15592
    },
    {
      "epoch": 0.15593,
      "grad_norm": 0.9357014159701508,
      "learning_rate": 0.003,
      "loss": 4.019,
      "step": 15593
    },
    {
      "epoch": 0.15594,
      "grad_norm": 1.3290171088512335,
      "learning_rate": 0.003,
      "loss": 4.046,
      "step": 15594
    },
    {
      "epoch": 0.15595,
      "grad_norm": 1.1905298663725166,
      "learning_rate": 0.003,
      "loss": 4.034,
      "step": 15595
    },
    {
      "epoch": 0.15596,
      "grad_norm": 1.1842922519903687,
      "learning_rate": 0.003,
      "loss": 4.0342,
      "step": 15596
    },
    {
      "epoch": 0.15597,
      "grad_norm": 1.393171668065257,
      "learning_rate": 0.003,
      "loss": 4.0751,
      "step": 15597
    },
    {
      "epoch": 0.15598,
      "grad_norm": 1.2093019393117734,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 15598
    },
    {
      "epoch": 0.15599,
      "grad_norm": 1.3478666873443483,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 15599
    },
    {
      "epoch": 0.156,
      "grad_norm": 1.1657516417400606,
      "learning_rate": 0.003,
      "loss": 4.0357,
      "step": 15600
    },
    {
      "epoch": 0.15601,
      "grad_norm": 1.358518532556128,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 15601
    },
    {
      "epoch": 0.15602,
      "grad_norm": 1.1803239294661774,
      "learning_rate": 0.003,
      "loss": 4.0739,
      "step": 15602
    },
    {
      "epoch": 0.15603,
      "grad_norm": 1.2955861708868923,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 15603
    },
    {
      "epoch": 0.15604,
      "grad_norm": 1.0399032409655542,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 15604
    },
    {
      "epoch": 0.15605,
      "grad_norm": 1.2609466127421523,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 15605
    },
    {
      "epoch": 0.15606,
      "grad_norm": 0.9914988599779099,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 15606
    },
    {
      "epoch": 0.15607,
      "grad_norm": 1.4370190397876632,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 15607
    },
    {
      "epoch": 0.15608,
      "grad_norm": 1.1799076597681437,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 15608
    },
    {
      "epoch": 0.15609,
      "grad_norm": 1.4752831292983322,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 15609
    },
    {
      "epoch": 0.1561,
      "grad_norm": 1.039199974330079,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 15610
    },
    {
      "epoch": 0.15611,
      "grad_norm": 1.2253487449069982,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 15611
    },
    {
      "epoch": 0.15612,
      "grad_norm": 1.093840574448093,
      "learning_rate": 0.003,
      "loss": 4.0423,
      "step": 15612
    },
    {
      "epoch": 0.15613,
      "grad_norm": 1.3571446766506403,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 15613
    },
    {
      "epoch": 0.15614,
      "grad_norm": 1.0553760636201928,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 15614
    },
    {
      "epoch": 0.15615,
      "grad_norm": 1.3235541151541643,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 15615
    },
    {
      "epoch": 0.15616,
      "grad_norm": 1.2661053326679264,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 15616
    },
    {
      "epoch": 0.15617,
      "grad_norm": 0.9442433454080968,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 15617
    },
    {
      "epoch": 0.15618,
      "grad_norm": 1.5305838600867183,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 15618
    },
    {
      "epoch": 0.15619,
      "grad_norm": 1.0971756564789878,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 15619
    },
    {
      "epoch": 0.1562,
      "grad_norm": 1.534084990212608,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 15620
    },
    {
      "epoch": 0.15621,
      "grad_norm": 1.0651697006964178,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 15621
    },
    {
      "epoch": 0.15622,
      "grad_norm": 1.2181410597137075,
      "learning_rate": 0.003,
      "loss": 4.0864,
      "step": 15622
    },
    {
      "epoch": 0.15623,
      "grad_norm": 1.3182947944513597,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 15623
    },
    {
      "epoch": 0.15624,
      "grad_norm": 1.0913383790969486,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 15624
    },
    {
      "epoch": 0.15625,
      "grad_norm": 1.3833835876314324,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 15625
    },
    {
      "epoch": 0.15626,
      "grad_norm": 0.9309552488047658,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 15626
    },
    {
      "epoch": 0.15627,
      "grad_norm": 1.1570848674176404,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 15627
    },
    {
      "epoch": 0.15628,
      "grad_norm": 1.176935804098361,
      "learning_rate": 0.003,
      "loss": 4.042,
      "step": 15628
    },
    {
      "epoch": 0.15629,
      "grad_norm": 1.0719613956642227,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 15629
    },
    {
      "epoch": 0.1563,
      "grad_norm": 1.212478073422822,
      "learning_rate": 0.003,
      "loss": 4.0822,
      "step": 15630
    },
    {
      "epoch": 0.15631,
      "grad_norm": 1.2547829857423518,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 15631
    },
    {
      "epoch": 0.15632,
      "grad_norm": 1.435656586125539,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 15632
    },
    {
      "epoch": 0.15633,
      "grad_norm": 1.2244113407724508,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 15633
    },
    {
      "epoch": 0.15634,
      "grad_norm": 1.3451385172699777,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 15634
    },
    {
      "epoch": 0.15635,
      "grad_norm": 0.9828019537442368,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 15635
    },
    {
      "epoch": 0.15636,
      "grad_norm": 1.309784793149415,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 15636
    },
    {
      "epoch": 0.15637,
      "grad_norm": 1.3062190678521401,
      "learning_rate": 0.003,
      "loss": 4.091,
      "step": 15637
    },
    {
      "epoch": 0.15638,
      "grad_norm": 1.165473176393098,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 15638
    },
    {
      "epoch": 0.15639,
      "grad_norm": 1.1670306993408306,
      "learning_rate": 0.003,
      "loss": 4.0432,
      "step": 15639
    },
    {
      "epoch": 0.1564,
      "grad_norm": 1.1486624058525117,
      "learning_rate": 0.003,
      "loss": 4.0672,
      "step": 15640
    },
    {
      "epoch": 0.15641,
      "grad_norm": 1.139740792872974,
      "learning_rate": 0.003,
      "loss": 4.0434,
      "step": 15641
    },
    {
      "epoch": 0.15642,
      "grad_norm": 1.1265148410314216,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 15642
    },
    {
      "epoch": 0.15643,
      "grad_norm": 1.275213876165096,
      "learning_rate": 0.003,
      "loss": 4.0812,
      "step": 15643
    },
    {
      "epoch": 0.15644,
      "grad_norm": 1.0717084212859374,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 15644
    },
    {
      "epoch": 0.15645,
      "grad_norm": 1.583705740950635,
      "learning_rate": 0.003,
      "loss": 4.0808,
      "step": 15645
    },
    {
      "epoch": 0.15646,
      "grad_norm": 1.102649284741737,
      "learning_rate": 0.003,
      "loss": 4.028,
      "step": 15646
    },
    {
      "epoch": 0.15647,
      "grad_norm": 1.2455052029010671,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 15647
    },
    {
      "epoch": 0.15648,
      "grad_norm": 1.1972247406763779,
      "learning_rate": 0.003,
      "loss": 4.0777,
      "step": 15648
    },
    {
      "epoch": 0.15649,
      "grad_norm": 1.226549854995466,
      "learning_rate": 0.003,
      "loss": 4.0665,
      "step": 15649
    },
    {
      "epoch": 0.1565,
      "grad_norm": 1.1847153962415675,
      "learning_rate": 0.003,
      "loss": 4.0168,
      "step": 15650
    },
    {
      "epoch": 0.15651,
      "grad_norm": 1.2756284529825144,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 15651
    },
    {
      "epoch": 0.15652,
      "grad_norm": 1.041978770124662,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 15652
    },
    {
      "epoch": 0.15653,
      "grad_norm": 1.4074424570284112,
      "learning_rate": 0.003,
      "loss": 4.057,
      "step": 15653
    },
    {
      "epoch": 0.15654,
      "grad_norm": 1.0815867887693429,
      "learning_rate": 0.003,
      "loss": 4.0547,
      "step": 15654
    },
    {
      "epoch": 0.15655,
      "grad_norm": 1.1911344281142204,
      "learning_rate": 0.003,
      "loss": 4.1043,
      "step": 15655
    },
    {
      "epoch": 0.15656,
      "grad_norm": 1.3363273582630495,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 15656
    },
    {
      "epoch": 0.15657,
      "grad_norm": 1.4034885185783113,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 15657
    },
    {
      "epoch": 0.15658,
      "grad_norm": 1.3287131511830883,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 15658
    },
    {
      "epoch": 0.15659,
      "grad_norm": 1.272868305526661,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 15659
    },
    {
      "epoch": 0.1566,
      "grad_norm": 1.3610441579515373,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 15660
    },
    {
      "epoch": 0.15661,
      "grad_norm": 1.1070335416027541,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 15661
    },
    {
      "epoch": 0.15662,
      "grad_norm": 1.137121323673713,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 15662
    },
    {
      "epoch": 0.15663,
      "grad_norm": 1.1688201307165267,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 15663
    },
    {
      "epoch": 0.15664,
      "grad_norm": 1.1884300419398568,
      "learning_rate": 0.003,
      "loss": 4.0724,
      "step": 15664
    },
    {
      "epoch": 0.15665,
      "grad_norm": 1.1153395770391112,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 15665
    },
    {
      "epoch": 0.15666,
      "grad_norm": 1.3654060929395346,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 15666
    },
    {
      "epoch": 0.15667,
      "grad_norm": 1.056118733006509,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 15667
    },
    {
      "epoch": 0.15668,
      "grad_norm": 1.3586731584276457,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 15668
    },
    {
      "epoch": 0.15669,
      "grad_norm": 1.1796131792325408,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 15669
    },
    {
      "epoch": 0.1567,
      "grad_norm": 1.5477999205246045,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 15670
    },
    {
      "epoch": 0.15671,
      "grad_norm": 1.0676276941822342,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 15671
    },
    {
      "epoch": 0.15672,
      "grad_norm": 1.2235310848942378,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 15672
    },
    {
      "epoch": 0.15673,
      "grad_norm": 1.1832277832368536,
      "learning_rate": 0.003,
      "loss": 4.0827,
      "step": 15673
    },
    {
      "epoch": 0.15674,
      "grad_norm": 1.1765804606495207,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 15674
    },
    {
      "epoch": 0.15675,
      "grad_norm": 1.2041687186960341,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 15675
    },
    {
      "epoch": 0.15676,
      "grad_norm": 1.0160506887649807,
      "learning_rate": 0.003,
      "loss": 4.0743,
      "step": 15676
    },
    {
      "epoch": 0.15677,
      "grad_norm": 1.2126187401023711,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 15677
    },
    {
      "epoch": 0.15678,
      "grad_norm": 0.9716988481086387,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 15678
    },
    {
      "epoch": 0.15679,
      "grad_norm": 1.2613322000198528,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 15679
    },
    {
      "epoch": 0.1568,
      "grad_norm": 1.0382859046183213,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 15680
    },
    {
      "epoch": 0.15681,
      "grad_norm": 1.3604619130798723,
      "learning_rate": 0.003,
      "loss": 4.0981,
      "step": 15681
    },
    {
      "epoch": 0.15682,
      "grad_norm": 1.1817349333719727,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 15682
    },
    {
      "epoch": 0.15683,
      "grad_norm": 1.1425818004623922,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 15683
    },
    {
      "epoch": 0.15684,
      "grad_norm": 1.4118023735842253,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 15684
    },
    {
      "epoch": 0.15685,
      "grad_norm": 1.0797767177312902,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 15685
    },
    {
      "epoch": 0.15686,
      "grad_norm": 1.3559641912318932,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 15686
    },
    {
      "epoch": 0.15687,
      "grad_norm": 1.180834137733882,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 15687
    },
    {
      "epoch": 0.15688,
      "grad_norm": 1.2901718630974206,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 15688
    },
    {
      "epoch": 0.15689,
      "grad_norm": 1.057343694698553,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 15689
    },
    {
      "epoch": 0.1569,
      "grad_norm": 1.4531352327451286,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 15690
    },
    {
      "epoch": 0.15691,
      "grad_norm": 1.059809813801114,
      "learning_rate": 0.003,
      "loss": 4.0547,
      "step": 15691
    },
    {
      "epoch": 0.15692,
      "grad_norm": 1.326585675340708,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 15692
    },
    {
      "epoch": 0.15693,
      "grad_norm": 1.0546296617877726,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 15693
    },
    {
      "epoch": 0.15694,
      "grad_norm": 1.1803547660970806,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 15694
    },
    {
      "epoch": 0.15695,
      "grad_norm": 1.1205652925533187,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 15695
    },
    {
      "epoch": 0.15696,
      "grad_norm": 1.0476974385425437,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 15696
    },
    {
      "epoch": 0.15697,
      "grad_norm": 1.284802117087022,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 15697
    },
    {
      "epoch": 0.15698,
      "grad_norm": 1.0799119338890766,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 15698
    },
    {
      "epoch": 0.15699,
      "grad_norm": 1.3706470506218387,
      "learning_rate": 0.003,
      "loss": 4.1048,
      "step": 15699
    },
    {
      "epoch": 0.157,
      "grad_norm": 1.0082630839398643,
      "learning_rate": 0.003,
      "loss": 4.0363,
      "step": 15700
    },
    {
      "epoch": 0.15701,
      "grad_norm": 1.445676153125903,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 15701
    },
    {
      "epoch": 0.15702,
      "grad_norm": 1.064353835615799,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 15702
    },
    {
      "epoch": 0.15703,
      "grad_norm": 1.2484061812785103,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 15703
    },
    {
      "epoch": 0.15704,
      "grad_norm": 1.4801219200898403,
      "learning_rate": 0.003,
      "loss": 4.0727,
      "step": 15704
    },
    {
      "epoch": 0.15705,
      "grad_norm": 1.2584117781793354,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 15705
    },
    {
      "epoch": 0.15706,
      "grad_norm": 1.3361614955363434,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 15706
    },
    {
      "epoch": 0.15707,
      "grad_norm": 1.2896463924521695,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 15707
    },
    {
      "epoch": 0.15708,
      "grad_norm": 1.1868240849069513,
      "learning_rate": 0.003,
      "loss": 4.0656,
      "step": 15708
    },
    {
      "epoch": 0.15709,
      "grad_norm": 1.2353964823213757,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 15709
    },
    {
      "epoch": 0.1571,
      "grad_norm": 1.3438687182860078,
      "learning_rate": 0.003,
      "loss": 4.0865,
      "step": 15710
    },
    {
      "epoch": 0.15711,
      "grad_norm": 1.3063674960988663,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 15711
    },
    {
      "epoch": 0.15712,
      "grad_norm": 1.1174297258576622,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 15712
    },
    {
      "epoch": 0.15713,
      "grad_norm": 1.2206113182977072,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 15713
    },
    {
      "epoch": 0.15714,
      "grad_norm": 0.9891530851223427,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 15714
    },
    {
      "epoch": 0.15715,
      "grad_norm": 1.5402493043887262,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 15715
    },
    {
      "epoch": 0.15716,
      "grad_norm": 1.0024584399723404,
      "learning_rate": 0.003,
      "loss": 4.0812,
      "step": 15716
    },
    {
      "epoch": 0.15717,
      "grad_norm": 1.5360847032225067,
      "learning_rate": 0.003,
      "loss": 4.0727,
      "step": 15717
    },
    {
      "epoch": 0.15718,
      "grad_norm": 1.063704056965126,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 15718
    },
    {
      "epoch": 0.15719,
      "grad_norm": 1.342678153024432,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 15719
    },
    {
      "epoch": 0.1572,
      "grad_norm": 1.1265734694726142,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 15720
    },
    {
      "epoch": 0.15721,
      "grad_norm": 1.2662125532950392,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 15721
    },
    {
      "epoch": 0.15722,
      "grad_norm": 1.1696121939389466,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 15722
    },
    {
      "epoch": 0.15723,
      "grad_norm": 1.1858634514564075,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 15723
    },
    {
      "epoch": 0.15724,
      "grad_norm": 1.2401541222621788,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 15724
    },
    {
      "epoch": 0.15725,
      "grad_norm": 1.1757637701745292,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 15725
    },
    {
      "epoch": 0.15726,
      "grad_norm": 1.1936334840612361,
      "learning_rate": 0.003,
      "loss": 4.0344,
      "step": 15726
    },
    {
      "epoch": 0.15727,
      "grad_norm": 0.9496985596585019,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 15727
    },
    {
      "epoch": 0.15728,
      "grad_norm": 1.3178393298136768,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 15728
    },
    {
      "epoch": 0.15729,
      "grad_norm": 1.375278498748954,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 15729
    },
    {
      "epoch": 0.1573,
      "grad_norm": 1.224750356040554,
      "learning_rate": 0.003,
      "loss": 4.0684,
      "step": 15730
    },
    {
      "epoch": 0.15731,
      "grad_norm": 1.1842857042512631,
      "learning_rate": 0.003,
      "loss": 4.0422,
      "step": 15731
    },
    {
      "epoch": 0.15732,
      "grad_norm": 1.276922944243024,
      "learning_rate": 0.003,
      "loss": 4.0383,
      "step": 15732
    },
    {
      "epoch": 0.15733,
      "grad_norm": 1.1616866651523663,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 15733
    },
    {
      "epoch": 0.15734,
      "grad_norm": 1.271758163927367,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 15734
    },
    {
      "epoch": 0.15735,
      "grad_norm": 1.0451445922159956,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 15735
    },
    {
      "epoch": 0.15736,
      "grad_norm": 1.12669786390475,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 15736
    },
    {
      "epoch": 0.15737,
      "grad_norm": 1.081969957241331,
      "learning_rate": 0.003,
      "loss": 4.0727,
      "step": 15737
    },
    {
      "epoch": 0.15738,
      "grad_norm": 1.3554078152104214,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 15738
    },
    {
      "epoch": 0.15739,
      "grad_norm": 1.1487176707527527,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 15739
    },
    {
      "epoch": 0.1574,
      "grad_norm": 1.3685503250200426,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 15740
    },
    {
      "epoch": 0.15741,
      "grad_norm": 1.1669559736978832,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 15741
    },
    {
      "epoch": 0.15742,
      "grad_norm": 1.2886763102225725,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 15742
    },
    {
      "epoch": 0.15743,
      "grad_norm": 1.3166304422510648,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 15743
    },
    {
      "epoch": 0.15744,
      "grad_norm": 1.4377739852078564,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 15744
    },
    {
      "epoch": 0.15745,
      "grad_norm": 1.0733206043203236,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 15745
    },
    {
      "epoch": 0.15746,
      "grad_norm": 1.302632631536053,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 15746
    },
    {
      "epoch": 0.15747,
      "grad_norm": 1.1271031367568127,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 15747
    },
    {
      "epoch": 0.15748,
      "grad_norm": 1.249087683227782,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 15748
    },
    {
      "epoch": 0.15749,
      "grad_norm": 0.9972463264110328,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 15749
    },
    {
      "epoch": 0.1575,
      "grad_norm": 1.29105919726399,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 15750
    },
    {
      "epoch": 0.15751,
      "grad_norm": 0.9443062712650601,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 15751
    },
    {
      "epoch": 0.15752,
      "grad_norm": 1.230321224780944,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 15752
    },
    {
      "epoch": 0.15753,
      "grad_norm": 1.1393553130448237,
      "learning_rate": 0.003,
      "loss": 4.0783,
      "step": 15753
    },
    {
      "epoch": 0.15754,
      "grad_norm": 1.1904480545785132,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 15754
    },
    {
      "epoch": 0.15755,
      "grad_norm": 1.068113412072056,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 15755
    },
    {
      "epoch": 0.15756,
      "grad_norm": 1.5412000458981607,
      "learning_rate": 0.003,
      "loss": 4.094,
      "step": 15756
    },
    {
      "epoch": 0.15757,
      "grad_norm": 1.1302032111036926,
      "learning_rate": 0.003,
      "loss": 4.0313,
      "step": 15757
    },
    {
      "epoch": 0.15758,
      "grad_norm": 1.4361441260651815,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 15758
    },
    {
      "epoch": 0.15759,
      "grad_norm": 1.185263160492388,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 15759
    },
    {
      "epoch": 0.1576,
      "grad_norm": 1.1936371622024107,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 15760
    },
    {
      "epoch": 0.15761,
      "grad_norm": 1.2967703711658523,
      "learning_rate": 0.003,
      "loss": 4.0212,
      "step": 15761
    },
    {
      "epoch": 0.15762,
      "grad_norm": 1.1455304588777764,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 15762
    },
    {
      "epoch": 0.15763,
      "grad_norm": 1.4246356741152995,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 15763
    },
    {
      "epoch": 0.15764,
      "grad_norm": 0.9812512488800511,
      "learning_rate": 0.003,
      "loss": 4.0193,
      "step": 15764
    },
    {
      "epoch": 0.15765,
      "grad_norm": 1.0685826322848317,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 15765
    },
    {
      "epoch": 0.15766,
      "grad_norm": 1.442939948848548,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 15766
    },
    {
      "epoch": 0.15767,
      "grad_norm": 1.0450538289646265,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 15767
    },
    {
      "epoch": 0.15768,
      "grad_norm": 1.355954874873054,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 15768
    },
    {
      "epoch": 0.15769,
      "grad_norm": 0.9825217012228542,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 15769
    },
    {
      "epoch": 0.1577,
      "grad_norm": 1.2113682167958801,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 15770
    },
    {
      "epoch": 0.15771,
      "grad_norm": 1.1721148970936335,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 15771
    },
    {
      "epoch": 0.15772,
      "grad_norm": 1.170800408276965,
      "learning_rate": 0.003,
      "loss": 4.0939,
      "step": 15772
    },
    {
      "epoch": 0.15773,
      "grad_norm": 1.4323770606252804,
      "learning_rate": 0.003,
      "loss": 4.0763,
      "step": 15773
    },
    {
      "epoch": 0.15774,
      "grad_norm": 1.1113737649682414,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 15774
    },
    {
      "epoch": 0.15775,
      "grad_norm": 1.2427021366448472,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 15775
    },
    {
      "epoch": 0.15776,
      "grad_norm": 1.2605703342585746,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 15776
    },
    {
      "epoch": 0.15777,
      "grad_norm": 1.1862357476961214,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 15777
    },
    {
      "epoch": 0.15778,
      "grad_norm": 1.1628968647887679,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 15778
    },
    {
      "epoch": 0.15779,
      "grad_norm": 1.135395616114251,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 15779
    },
    {
      "epoch": 0.1578,
      "grad_norm": 1.2833211835181242,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 15780
    },
    {
      "epoch": 0.15781,
      "grad_norm": 1.219054473115084,
      "learning_rate": 0.003,
      "loss": 4.0039,
      "step": 15781
    },
    {
      "epoch": 0.15782,
      "grad_norm": 1.3614090599157573,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 15782
    },
    {
      "epoch": 0.15783,
      "grad_norm": 1.1023328220848818,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 15783
    },
    {
      "epoch": 0.15784,
      "grad_norm": 1.3507662672711478,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 15784
    },
    {
      "epoch": 0.15785,
      "grad_norm": 1.277368068591102,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 15785
    },
    {
      "epoch": 0.15786,
      "grad_norm": 1.2831362884284865,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 15786
    },
    {
      "epoch": 0.15787,
      "grad_norm": 1.2327201269925094,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 15787
    },
    {
      "epoch": 0.15788,
      "grad_norm": 1.3013672361448259,
      "learning_rate": 0.003,
      "loss": 4.0834,
      "step": 15788
    },
    {
      "epoch": 0.15789,
      "grad_norm": 1.1977439741562261,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 15789
    },
    {
      "epoch": 0.1579,
      "grad_norm": 1.3018864470607883,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 15790
    },
    {
      "epoch": 0.15791,
      "grad_norm": 1.028971036425535,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 15791
    },
    {
      "epoch": 0.15792,
      "grad_norm": 1.316986973616366,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 15792
    },
    {
      "epoch": 0.15793,
      "grad_norm": 1.0587295674584893,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 15793
    },
    {
      "epoch": 0.15794,
      "grad_norm": 1.4539141011250214,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 15794
    },
    {
      "epoch": 0.15795,
      "grad_norm": 1.301643316539598,
      "learning_rate": 0.003,
      "loss": 4.0815,
      "step": 15795
    },
    {
      "epoch": 0.15796,
      "grad_norm": 1.240351388241122,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 15796
    },
    {
      "epoch": 0.15797,
      "grad_norm": 1.2002682131440259,
      "learning_rate": 0.003,
      "loss": 4.0283,
      "step": 15797
    },
    {
      "epoch": 0.15798,
      "grad_norm": 1.056315126378722,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 15798
    },
    {
      "epoch": 0.15799,
      "grad_norm": 1.4008096337287652,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 15799
    },
    {
      "epoch": 0.158,
      "grad_norm": 1.1423018732564754,
      "learning_rate": 0.003,
      "loss": 4.0438,
      "step": 15800
    },
    {
      "epoch": 0.15801,
      "grad_norm": 1.171708557390231,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 15801
    },
    {
      "epoch": 0.15802,
      "grad_norm": 0.9453108222646237,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 15802
    },
    {
      "epoch": 0.15803,
      "grad_norm": 1.1751428344710695,
      "learning_rate": 0.003,
      "loss": 4.0329,
      "step": 15803
    },
    {
      "epoch": 0.15804,
      "grad_norm": 1.306193556816233,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 15804
    },
    {
      "epoch": 0.15805,
      "grad_norm": 1.030461592193479,
      "learning_rate": 0.003,
      "loss": 4.0763,
      "step": 15805
    },
    {
      "epoch": 0.15806,
      "grad_norm": 1.245678365494413,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 15806
    },
    {
      "epoch": 0.15807,
      "grad_norm": 1.1662603675502075,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 15807
    },
    {
      "epoch": 0.15808,
      "grad_norm": 1.3387633832600623,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 15808
    },
    {
      "epoch": 0.15809,
      "grad_norm": 1.240465003088661,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 15809
    },
    {
      "epoch": 0.1581,
      "grad_norm": 1.209119805612708,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 15810
    },
    {
      "epoch": 0.15811,
      "grad_norm": 1.3162005653315245,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 15811
    },
    {
      "epoch": 0.15812,
      "grad_norm": 1.0192654482072048,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 15812
    },
    {
      "epoch": 0.15813,
      "grad_norm": 1.1285453507352443,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 15813
    },
    {
      "epoch": 0.15814,
      "grad_norm": 1.0936354239745236,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 15814
    },
    {
      "epoch": 0.15815,
      "grad_norm": 1.1330306276771145,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 15815
    },
    {
      "epoch": 0.15816,
      "grad_norm": 1.3275264189382847,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 15816
    },
    {
      "epoch": 0.15817,
      "grad_norm": 1.2532559093461328,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 15817
    },
    {
      "epoch": 0.15818,
      "grad_norm": 1.312860969230454,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 15818
    },
    {
      "epoch": 0.15819,
      "grad_norm": 1.2683436307454612,
      "learning_rate": 0.003,
      "loss": 4.0194,
      "step": 15819
    },
    {
      "epoch": 0.1582,
      "grad_norm": 1.0569106961087773,
      "learning_rate": 0.003,
      "loss": 4.0935,
      "step": 15820
    },
    {
      "epoch": 0.15821,
      "grad_norm": 1.3513616887372883,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 15821
    },
    {
      "epoch": 0.15822,
      "grad_norm": 1.1322845703855622,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 15822
    },
    {
      "epoch": 0.15823,
      "grad_norm": 1.3709174657164709,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 15823
    },
    {
      "epoch": 0.15824,
      "grad_norm": 1.0535463272210899,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 15824
    },
    {
      "epoch": 0.15825,
      "grad_norm": 1.2813176007090163,
      "learning_rate": 0.003,
      "loss": 4.0729,
      "step": 15825
    },
    {
      "epoch": 0.15826,
      "grad_norm": 1.16816717861855,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 15826
    },
    {
      "epoch": 0.15827,
      "grad_norm": 1.5224553137595678,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 15827
    },
    {
      "epoch": 0.15828,
      "grad_norm": 1.0193731373062342,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 15828
    },
    {
      "epoch": 0.15829,
      "grad_norm": 1.3539267964637687,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 15829
    },
    {
      "epoch": 0.1583,
      "grad_norm": 0.8968372894061043,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 15830
    },
    {
      "epoch": 0.15831,
      "grad_norm": 1.393815292827624,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 15831
    },
    {
      "epoch": 0.15832,
      "grad_norm": 1.435291386789168,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 15832
    },
    {
      "epoch": 0.15833,
      "grad_norm": 1.089936174306673,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 15833
    },
    {
      "epoch": 0.15834,
      "grad_norm": 1.3391268534335066,
      "learning_rate": 0.003,
      "loss": 4.0757,
      "step": 15834
    },
    {
      "epoch": 0.15835,
      "grad_norm": 1.1837628962250406,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 15835
    },
    {
      "epoch": 0.15836,
      "grad_norm": 1.2177305620915122,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 15836
    },
    {
      "epoch": 0.15837,
      "grad_norm": 1.2467793854853642,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 15837
    },
    {
      "epoch": 0.15838,
      "grad_norm": 1.054378133249128,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 15838
    },
    {
      "epoch": 0.15839,
      "grad_norm": 1.2407604744450886,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 15839
    },
    {
      "epoch": 0.1584,
      "grad_norm": 1.0413195885226798,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 15840
    },
    {
      "epoch": 0.15841,
      "grad_norm": 1.5920472604532057,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 15841
    },
    {
      "epoch": 0.15842,
      "grad_norm": 1.1342416919557914,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 15842
    },
    {
      "epoch": 0.15843,
      "grad_norm": 1.393455769747034,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 15843
    },
    {
      "epoch": 0.15844,
      "grad_norm": 1.2981772033429155,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 15844
    },
    {
      "epoch": 0.15845,
      "grad_norm": 1.0921851196401828,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 15845
    },
    {
      "epoch": 0.15846,
      "grad_norm": 1.290066187713939,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 15846
    },
    {
      "epoch": 0.15847,
      "grad_norm": 1.2975045304029453,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 15847
    },
    {
      "epoch": 0.15848,
      "grad_norm": 0.9903688537248176,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 15848
    },
    {
      "epoch": 0.15849,
      "grad_norm": 1.6142623172682926,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 15849
    },
    {
      "epoch": 0.1585,
      "grad_norm": 0.985378213981797,
      "learning_rate": 0.003,
      "loss": 4.0882,
      "step": 15850
    },
    {
      "epoch": 0.15851,
      "grad_norm": 1.3318945081444562,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 15851
    },
    {
      "epoch": 0.15852,
      "grad_norm": 1.1256886562107775,
      "learning_rate": 0.003,
      "loss": 4.0389,
      "step": 15852
    },
    {
      "epoch": 0.15853,
      "grad_norm": 1.296262931962511,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 15853
    },
    {
      "epoch": 0.15854,
      "grad_norm": 1.2919035541279742,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 15854
    },
    {
      "epoch": 0.15855,
      "grad_norm": 1.3608356785547568,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 15855
    },
    {
      "epoch": 0.15856,
      "grad_norm": 1.2760762183953474,
      "learning_rate": 0.003,
      "loss": 4.0894,
      "step": 15856
    },
    {
      "epoch": 0.15857,
      "grad_norm": 1.1121899650478617,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 15857
    },
    {
      "epoch": 0.15858,
      "grad_norm": 1.363693216241739,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 15858
    },
    {
      "epoch": 0.15859,
      "grad_norm": 1.0983535650642993,
      "learning_rate": 0.003,
      "loss": 4.0852,
      "step": 15859
    },
    {
      "epoch": 0.1586,
      "grad_norm": 1.4565610602528292,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 15860
    },
    {
      "epoch": 0.15861,
      "grad_norm": 1.1219134267588653,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 15861
    },
    {
      "epoch": 0.15862,
      "grad_norm": 0.9972620894806278,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 15862
    },
    {
      "epoch": 0.15863,
      "grad_norm": 1.3566778235730177,
      "learning_rate": 0.003,
      "loss": 4.0418,
      "step": 15863
    },
    {
      "epoch": 0.15864,
      "grad_norm": 1.112461176260205,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 15864
    },
    {
      "epoch": 0.15865,
      "grad_norm": 1.1576143537930752,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 15865
    },
    {
      "epoch": 0.15866,
      "grad_norm": 1.0190099145159142,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 15866
    },
    {
      "epoch": 0.15867,
      "grad_norm": 1.402365337674979,
      "learning_rate": 0.003,
      "loss": 4.0675,
      "step": 15867
    },
    {
      "epoch": 0.15868,
      "grad_norm": 1.053246113011527,
      "learning_rate": 0.003,
      "loss": 4.0337,
      "step": 15868
    },
    {
      "epoch": 0.15869,
      "grad_norm": 1.2852503700895828,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 15869
    },
    {
      "epoch": 0.1587,
      "grad_norm": 1.132776331866809,
      "learning_rate": 0.003,
      "loss": 4.0422,
      "step": 15870
    },
    {
      "epoch": 0.15871,
      "grad_norm": 1.3188795308390149,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 15871
    },
    {
      "epoch": 0.15872,
      "grad_norm": 1.212507733266001,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 15872
    },
    {
      "epoch": 0.15873,
      "grad_norm": 1.308345556908113,
      "learning_rate": 0.003,
      "loss": 4.0702,
      "step": 15873
    },
    {
      "epoch": 0.15874,
      "grad_norm": 1.2011406331704382,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 15874
    },
    {
      "epoch": 0.15875,
      "grad_norm": 1.4341293399013142,
      "learning_rate": 0.003,
      "loss": 4.0394,
      "step": 15875
    },
    {
      "epoch": 0.15876,
      "grad_norm": 1.546959866294749,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 15876
    },
    {
      "epoch": 0.15877,
      "grad_norm": 1.0426492002607934,
      "learning_rate": 0.003,
      "loss": 4.079,
      "step": 15877
    },
    {
      "epoch": 0.15878,
      "grad_norm": 1.3141174054097085,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 15878
    },
    {
      "epoch": 0.15879,
      "grad_norm": 1.079309116038525,
      "learning_rate": 0.003,
      "loss": 4.0705,
      "step": 15879
    },
    {
      "epoch": 0.1588,
      "grad_norm": 1.1522466875849964,
      "learning_rate": 0.003,
      "loss": 4.0828,
      "step": 15880
    },
    {
      "epoch": 0.15881,
      "grad_norm": 1.3014708659787106,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 15881
    },
    {
      "epoch": 0.15882,
      "grad_norm": 1.0808883773056064,
      "learning_rate": 0.003,
      "loss": 4.0315,
      "step": 15882
    },
    {
      "epoch": 0.15883,
      "grad_norm": 1.2566124828480216,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 15883
    },
    {
      "epoch": 0.15884,
      "grad_norm": 1.2658895711654297,
      "learning_rate": 0.003,
      "loss": 4.0743,
      "step": 15884
    },
    {
      "epoch": 0.15885,
      "grad_norm": 0.9571184866919882,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 15885
    },
    {
      "epoch": 0.15886,
      "grad_norm": 1.283364138119938,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 15886
    },
    {
      "epoch": 0.15887,
      "grad_norm": 1.0923717676531308,
      "learning_rate": 0.003,
      "loss": 4.0811,
      "step": 15887
    },
    {
      "epoch": 0.15888,
      "grad_norm": 1.0991231881499555,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 15888
    },
    {
      "epoch": 0.15889,
      "grad_norm": 1.3272415803409467,
      "learning_rate": 0.003,
      "loss": 4.0888,
      "step": 15889
    },
    {
      "epoch": 0.1589,
      "grad_norm": 1.1785840209706901,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 15890
    },
    {
      "epoch": 0.15891,
      "grad_norm": 1.107506409072357,
      "learning_rate": 0.003,
      "loss": 4.0892,
      "step": 15891
    },
    {
      "epoch": 0.15892,
      "grad_norm": 1.2298490379923164,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 15892
    },
    {
      "epoch": 0.15893,
      "grad_norm": 1.1870014015172905,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 15893
    },
    {
      "epoch": 0.15894,
      "grad_norm": 1.7188385883097401,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 15894
    },
    {
      "epoch": 0.15895,
      "grad_norm": 1.2362021387221938,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 15895
    },
    {
      "epoch": 0.15896,
      "grad_norm": 1.34789117487494,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 15896
    },
    {
      "epoch": 0.15897,
      "grad_norm": 1.0695391433208734,
      "learning_rate": 0.003,
      "loss": 4.0268,
      "step": 15897
    },
    {
      "epoch": 0.15898,
      "grad_norm": 1.1777839125106804,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 15898
    },
    {
      "epoch": 0.15899,
      "grad_norm": 1.0976417218398744,
      "learning_rate": 0.003,
      "loss": 4.0244,
      "step": 15899
    },
    {
      "epoch": 0.159,
      "grad_norm": 1.1801152956563512,
      "learning_rate": 0.003,
      "loss": 4.0287,
      "step": 15900
    },
    {
      "epoch": 0.15901,
      "grad_norm": 1.0112695513469703,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 15901
    },
    {
      "epoch": 0.15902,
      "grad_norm": 1.4063076193955033,
      "learning_rate": 0.003,
      "loss": 4.1067,
      "step": 15902
    },
    {
      "epoch": 0.15903,
      "grad_norm": 1.0667191964445872,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 15903
    },
    {
      "epoch": 0.15904,
      "grad_norm": 1.3380817870927166,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 15904
    },
    {
      "epoch": 0.15905,
      "grad_norm": 1.0974570739841378,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 15905
    },
    {
      "epoch": 0.15906,
      "grad_norm": 1.426638793800246,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 15906
    },
    {
      "epoch": 0.15907,
      "grad_norm": 1.2647011527595533,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 15907
    },
    {
      "epoch": 0.15908,
      "grad_norm": 1.2032650178773125,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 15908
    },
    {
      "epoch": 0.15909,
      "grad_norm": 1.1950608703430268,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 15909
    },
    {
      "epoch": 0.1591,
      "grad_norm": 1.4601945009357429,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 15910
    },
    {
      "epoch": 0.15911,
      "grad_norm": 1.1067183552727093,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 15911
    },
    {
      "epoch": 0.15912,
      "grad_norm": 1.417817886596112,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 15912
    },
    {
      "epoch": 0.15913,
      "grad_norm": 1.2132424767181238,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 15913
    },
    {
      "epoch": 0.15914,
      "grad_norm": 1.1538329650841557,
      "learning_rate": 0.003,
      "loss": 4.09,
      "step": 15914
    },
    {
      "epoch": 0.15915,
      "grad_norm": 1.274924356409109,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 15915
    },
    {
      "epoch": 0.15916,
      "grad_norm": 1.2825839286168426,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 15916
    },
    {
      "epoch": 0.15917,
      "grad_norm": 1.1133627843857545,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 15917
    },
    {
      "epoch": 0.15918,
      "grad_norm": 1.185840545901858,
      "learning_rate": 0.003,
      "loss": 4.0684,
      "step": 15918
    },
    {
      "epoch": 0.15919,
      "grad_norm": 1.0864911009183758,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 15919
    },
    {
      "epoch": 0.1592,
      "grad_norm": 1.3851073990719955,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 15920
    },
    {
      "epoch": 0.15921,
      "grad_norm": 0.9656982199781505,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 15921
    },
    {
      "epoch": 0.15922,
      "grad_norm": 1.1446516677778402,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 15922
    },
    {
      "epoch": 0.15923,
      "grad_norm": 1.0606920603331644,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 15923
    },
    {
      "epoch": 0.15924,
      "grad_norm": 1.3304356753224753,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 15924
    },
    {
      "epoch": 0.15925,
      "grad_norm": 1.2508962554946679,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 15925
    },
    {
      "epoch": 0.15926,
      "grad_norm": 1.4449873882960205,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 15926
    },
    {
      "epoch": 0.15927,
      "grad_norm": 1.0599813172496817,
      "learning_rate": 0.003,
      "loss": 4.0243,
      "step": 15927
    },
    {
      "epoch": 0.15928,
      "grad_norm": 1.2402991954860334,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 15928
    },
    {
      "epoch": 0.15929,
      "grad_norm": 1.4530991867031946,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 15929
    },
    {
      "epoch": 0.1593,
      "grad_norm": 1.069378091186554,
      "learning_rate": 0.003,
      "loss": 4.0623,
      "step": 15930
    },
    {
      "epoch": 0.15931,
      "grad_norm": 1.2095128452010742,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 15931
    },
    {
      "epoch": 0.15932,
      "grad_norm": 1.2083494374648298,
      "learning_rate": 0.003,
      "loss": 4.0547,
      "step": 15932
    },
    {
      "epoch": 0.15933,
      "grad_norm": 1.3423400065295152,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 15933
    },
    {
      "epoch": 0.15934,
      "grad_norm": 1.064303273571097,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 15934
    },
    {
      "epoch": 0.15935,
      "grad_norm": 1.4546372575740287,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 15935
    },
    {
      "epoch": 0.15936,
      "grad_norm": 1.0870248420346131,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 15936
    },
    {
      "epoch": 0.15937,
      "grad_norm": 1.2222674384931338,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 15937
    },
    {
      "epoch": 0.15938,
      "grad_norm": 1.2353233739383782,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 15938
    },
    {
      "epoch": 0.15939,
      "grad_norm": 1.2656560872610918,
      "learning_rate": 0.003,
      "loss": 4.0623,
      "step": 15939
    },
    {
      "epoch": 0.1594,
      "grad_norm": 1.3038681911989853,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 15940
    },
    {
      "epoch": 0.15941,
      "grad_norm": 0.9973340686690166,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 15941
    },
    {
      "epoch": 0.15942,
      "grad_norm": 1.422979066690338,
      "learning_rate": 0.003,
      "loss": 4.0279,
      "step": 15942
    },
    {
      "epoch": 0.15943,
      "grad_norm": 1.198234746778187,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 15943
    },
    {
      "epoch": 0.15944,
      "grad_norm": 1.261941686794253,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 15944
    },
    {
      "epoch": 0.15945,
      "grad_norm": 1.185765211650111,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 15945
    },
    {
      "epoch": 0.15946,
      "grad_norm": 1.2466689219040417,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 15946
    },
    {
      "epoch": 0.15947,
      "grad_norm": 0.9899503882038371,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 15947
    },
    {
      "epoch": 0.15948,
      "grad_norm": 1.3385743779929415,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 15948
    },
    {
      "epoch": 0.15949,
      "grad_norm": 1.0195047180234955,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 15949
    },
    {
      "epoch": 0.1595,
      "grad_norm": 1.4255360401605368,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 15950
    },
    {
      "epoch": 0.15951,
      "grad_norm": 1.38313794297874,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 15951
    },
    {
      "epoch": 0.15952,
      "grad_norm": 1.176495580840481,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 15952
    },
    {
      "epoch": 0.15953,
      "grad_norm": 1.4944983800535494,
      "learning_rate": 0.003,
      "loss": 4.0842,
      "step": 15953
    },
    {
      "epoch": 0.15954,
      "grad_norm": 1.263338265873482,
      "learning_rate": 0.003,
      "loss": 4.0962,
      "step": 15954
    },
    {
      "epoch": 0.15955,
      "grad_norm": 1.0441497380391978,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 15955
    },
    {
      "epoch": 0.15956,
      "grad_norm": 1.3221945378646158,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 15956
    },
    {
      "epoch": 0.15957,
      "grad_norm": 0.9623396164481901,
      "learning_rate": 0.003,
      "loss": 4.0564,
      "step": 15957
    },
    {
      "epoch": 0.15958,
      "grad_norm": 1.1978286873707253,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 15958
    },
    {
      "epoch": 0.15959,
      "grad_norm": 1.2541150992104266,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 15959
    },
    {
      "epoch": 0.1596,
      "grad_norm": 1.4053468007862848,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 15960
    },
    {
      "epoch": 0.15961,
      "grad_norm": 1.2022954055802446,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 15961
    },
    {
      "epoch": 0.15962,
      "grad_norm": 1.2899479012614354,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 15962
    },
    {
      "epoch": 0.15963,
      "grad_norm": 1.0104604306834757,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 15963
    },
    {
      "epoch": 0.15964,
      "grad_norm": 1.3701365986520497,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 15964
    },
    {
      "epoch": 0.15965,
      "grad_norm": 0.915145979140205,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 15965
    },
    {
      "epoch": 0.15966,
      "grad_norm": 1.3395650250329996,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 15966
    },
    {
      "epoch": 0.15967,
      "grad_norm": 1.2875086574703656,
      "learning_rate": 0.003,
      "loss": 4.0617,
      "step": 15967
    },
    {
      "epoch": 0.15968,
      "grad_norm": 1.2504696248888783,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 15968
    },
    {
      "epoch": 0.15969,
      "grad_norm": 1.0215547112285952,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 15969
    },
    {
      "epoch": 0.1597,
      "grad_norm": 1.2703756402320694,
      "learning_rate": 0.003,
      "loss": 4.0824,
      "step": 15970
    },
    {
      "epoch": 0.15971,
      "grad_norm": 1.1870985294748964,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 15971
    },
    {
      "epoch": 0.15972,
      "grad_norm": 1.577281483064767,
      "learning_rate": 0.003,
      "loss": 4.0911,
      "step": 15972
    },
    {
      "epoch": 0.15973,
      "grad_norm": 1.1997563643841984,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 15973
    },
    {
      "epoch": 0.15974,
      "grad_norm": 1.1111073471566701,
      "learning_rate": 0.003,
      "loss": 4.0757,
      "step": 15974
    },
    {
      "epoch": 0.15975,
      "grad_norm": 1.1634488039787199,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 15975
    },
    {
      "epoch": 0.15976,
      "grad_norm": 1.2411269453094047,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 15976
    },
    {
      "epoch": 0.15977,
      "grad_norm": 1.461638922771171,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 15977
    },
    {
      "epoch": 0.15978,
      "grad_norm": 0.86336412630456,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 15978
    },
    {
      "epoch": 0.15979,
      "grad_norm": 1.1476716496753052,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 15979
    },
    {
      "epoch": 0.1598,
      "grad_norm": 1.189440127812191,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 15980
    },
    {
      "epoch": 0.15981,
      "grad_norm": 1.2509862236836928,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 15981
    },
    {
      "epoch": 0.15982,
      "grad_norm": 1.237805705201756,
      "learning_rate": 0.003,
      "loss": 4.0808,
      "step": 15982
    },
    {
      "epoch": 0.15983,
      "grad_norm": 1.3810342421390938,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 15983
    },
    {
      "epoch": 0.15984,
      "grad_norm": 1.1936090245544986,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 15984
    },
    {
      "epoch": 0.15985,
      "grad_norm": 1.5557605543282316,
      "learning_rate": 0.003,
      "loss": 4.0757,
      "step": 15985
    },
    {
      "epoch": 0.15986,
      "grad_norm": 1.290156158048075,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 15986
    },
    {
      "epoch": 0.15987,
      "grad_norm": 1.0745807071056974,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 15987
    },
    {
      "epoch": 0.15988,
      "grad_norm": 1.506036125111331,
      "learning_rate": 0.003,
      "loss": 4.0777,
      "step": 15988
    },
    {
      "epoch": 0.15989,
      "grad_norm": 0.9712888397562033,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 15989
    },
    {
      "epoch": 0.1599,
      "grad_norm": 1.4077697682259613,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 15990
    },
    {
      "epoch": 0.15991,
      "grad_norm": 1.156952447010478,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 15991
    },
    {
      "epoch": 0.15992,
      "grad_norm": 1.234014646961057,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 15992
    },
    {
      "epoch": 0.15993,
      "grad_norm": 0.9377390955202223,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 15993
    },
    {
      "epoch": 0.15994,
      "grad_norm": 1.5044058374437788,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 15994
    },
    {
      "epoch": 0.15995,
      "grad_norm": 1.047958226650085,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 15995
    },
    {
      "epoch": 0.15996,
      "grad_norm": 1.352017504920686,
      "learning_rate": 0.003,
      "loss": 4.0632,
      "step": 15996
    },
    {
      "epoch": 0.15997,
      "grad_norm": 1.02018421874671,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 15997
    },
    {
      "epoch": 0.15998,
      "grad_norm": 1.3361840507251346,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 15998
    },
    {
      "epoch": 0.15999,
      "grad_norm": 1.123832468238917,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 15999
    },
    {
      "epoch": 0.16,
      "grad_norm": 1.2282973173546792,
      "learning_rate": 0.003,
      "loss": 4.0783,
      "step": 16000
    },
    {
      "epoch": 0.16001,
      "grad_norm": 1.185876019053862,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 16001
    },
    {
      "epoch": 0.16002,
      "grad_norm": 1.0161405965512897,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 16002
    },
    {
      "epoch": 0.16003,
      "grad_norm": 1.3867974768957554,
      "learning_rate": 0.003,
      "loss": 4.0947,
      "step": 16003
    },
    {
      "epoch": 0.16004,
      "grad_norm": 1.2610512502089228,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 16004
    },
    {
      "epoch": 0.16005,
      "grad_norm": 1.290496838186172,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 16005
    },
    {
      "epoch": 0.16006,
      "grad_norm": 1.1766003813701296,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 16006
    },
    {
      "epoch": 0.16007,
      "grad_norm": 1.3807222232910399,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 16007
    },
    {
      "epoch": 0.16008,
      "grad_norm": 1.0151941528455057,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 16008
    },
    {
      "epoch": 0.16009,
      "grad_norm": 1.3315177267931306,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 16009
    },
    {
      "epoch": 0.1601,
      "grad_norm": 0.9928579574156897,
      "learning_rate": 0.003,
      "loss": 4.0416,
      "step": 16010
    },
    {
      "epoch": 0.16011,
      "grad_norm": 1.3602863221481476,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 16011
    },
    {
      "epoch": 0.16012,
      "grad_norm": 1.0132912108523175,
      "learning_rate": 0.003,
      "loss": 4.0799,
      "step": 16012
    },
    {
      "epoch": 0.16013,
      "grad_norm": 1.5088117293451413,
      "learning_rate": 0.003,
      "loss": 4.0996,
      "step": 16013
    },
    {
      "epoch": 0.16014,
      "grad_norm": 1.1583704828044479,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 16014
    },
    {
      "epoch": 0.16015,
      "grad_norm": 1.2705045977034461,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 16015
    },
    {
      "epoch": 0.16016,
      "grad_norm": 1.317851900152992,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 16016
    },
    {
      "epoch": 0.16017,
      "grad_norm": 1.1853045034271175,
      "learning_rate": 0.003,
      "loss": 4.0729,
      "step": 16017
    },
    {
      "epoch": 0.16018,
      "grad_norm": 1.2364527640293232,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 16018
    },
    {
      "epoch": 0.16019,
      "grad_norm": 1.2285386520990271,
      "learning_rate": 0.003,
      "loss": 4.026,
      "step": 16019
    },
    {
      "epoch": 0.1602,
      "grad_norm": 0.8858904555018807,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 16020
    },
    {
      "epoch": 0.16021,
      "grad_norm": 1.350921027596688,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 16021
    },
    {
      "epoch": 0.16022,
      "grad_norm": 1.4351101543673916,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 16022
    },
    {
      "epoch": 0.16023,
      "grad_norm": 1.255697509081209,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 16023
    },
    {
      "epoch": 0.16024,
      "grad_norm": 1.1824335549248934,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 16024
    },
    {
      "epoch": 0.16025,
      "grad_norm": 1.2662263515950418,
      "learning_rate": 0.003,
      "loss": 4.0665,
      "step": 16025
    },
    {
      "epoch": 0.16026,
      "grad_norm": 1.1399840534697918,
      "learning_rate": 0.003,
      "loss": 4.0299,
      "step": 16026
    },
    {
      "epoch": 0.16027,
      "grad_norm": 1.295445347906111,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 16027
    },
    {
      "epoch": 0.16028,
      "grad_norm": 1.0959290848383678,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 16028
    },
    {
      "epoch": 0.16029,
      "grad_norm": 1.2500054271602945,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 16029
    },
    {
      "epoch": 0.1603,
      "grad_norm": 0.7828254220254027,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 16030
    },
    {
      "epoch": 0.16031,
      "grad_norm": 0.9367431986280713,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 16031
    },
    {
      "epoch": 0.16032,
      "grad_norm": 1.1556516536007895,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 16032
    },
    {
      "epoch": 0.16033,
      "grad_norm": 1.208116076995649,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 16033
    },
    {
      "epoch": 0.16034,
      "grad_norm": 1.2607954671386226,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 16034
    },
    {
      "epoch": 0.16035,
      "grad_norm": 1.4102971953230792,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 16035
    },
    {
      "epoch": 0.16036,
      "grad_norm": 1.3184640225874744,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 16036
    },
    {
      "epoch": 0.16037,
      "grad_norm": 1.0168064820061287,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 16037
    },
    {
      "epoch": 0.16038,
      "grad_norm": 1.3409307266038692,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 16038
    },
    {
      "epoch": 0.16039,
      "grad_norm": 1.2873512931259226,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 16039
    },
    {
      "epoch": 0.1604,
      "grad_norm": 1.0602613953704065,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 16040
    },
    {
      "epoch": 0.16041,
      "grad_norm": 1.1722729803123337,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 16041
    },
    {
      "epoch": 0.16042,
      "grad_norm": 1.2144026204334712,
      "learning_rate": 0.003,
      "loss": 4.0764,
      "step": 16042
    },
    {
      "epoch": 0.16043,
      "grad_norm": 1.2670937938441484,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 16043
    },
    {
      "epoch": 0.16044,
      "grad_norm": 1.36102212031014,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 16044
    },
    {
      "epoch": 0.16045,
      "grad_norm": 1.313454575506855,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 16045
    },
    {
      "epoch": 0.16046,
      "grad_norm": 1.077842899825297,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 16046
    },
    {
      "epoch": 0.16047,
      "grad_norm": 1.2856899362195513,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 16047
    },
    {
      "epoch": 0.16048,
      "grad_norm": 1.1620704440883474,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 16048
    },
    {
      "epoch": 0.16049,
      "grad_norm": 1.1791352016767631,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 16049
    },
    {
      "epoch": 0.1605,
      "grad_norm": 1.2276731565409649,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 16050
    },
    {
      "epoch": 0.16051,
      "grad_norm": 1.215460082915433,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 16051
    },
    {
      "epoch": 0.16052,
      "grad_norm": 1.3024479744349649,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 16052
    },
    {
      "epoch": 0.16053,
      "grad_norm": 1.0434738105117647,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 16053
    },
    {
      "epoch": 0.16054,
      "grad_norm": 1.4922296754524016,
      "learning_rate": 0.003,
      "loss": 4.0371,
      "step": 16054
    },
    {
      "epoch": 0.16055,
      "grad_norm": 1.1139652143755538,
      "learning_rate": 0.003,
      "loss": 4.0712,
      "step": 16055
    },
    {
      "epoch": 0.16056,
      "grad_norm": 1.0482729569435472,
      "learning_rate": 0.003,
      "loss": 4.0767,
      "step": 16056
    },
    {
      "epoch": 0.16057,
      "grad_norm": 1.6201160330504003,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 16057
    },
    {
      "epoch": 0.16058,
      "grad_norm": 1.1310803320642941,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 16058
    },
    {
      "epoch": 0.16059,
      "grad_norm": 1.6347875117596131,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 16059
    },
    {
      "epoch": 0.1606,
      "grad_norm": 1.2376701938294354,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 16060
    },
    {
      "epoch": 0.16061,
      "grad_norm": 0.9828200049395512,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 16061
    },
    {
      "epoch": 0.16062,
      "grad_norm": 1.2588410567839852,
      "learning_rate": 0.003,
      "loss": 4.0632,
      "step": 16062
    },
    {
      "epoch": 0.16063,
      "grad_norm": 1.3762150425011925,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 16063
    },
    {
      "epoch": 0.16064,
      "grad_norm": 1.246496347742565,
      "learning_rate": 0.003,
      "loss": 4.0854,
      "step": 16064
    },
    {
      "epoch": 0.16065,
      "grad_norm": 1.196117012172844,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 16065
    },
    {
      "epoch": 0.16066,
      "grad_norm": 1.4471735569176785,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 16066
    },
    {
      "epoch": 0.16067,
      "grad_norm": 0.8566399612016478,
      "learning_rate": 0.003,
      "loss": 4.0753,
      "step": 16067
    },
    {
      "epoch": 0.16068,
      "grad_norm": 1.072391558803118,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 16068
    },
    {
      "epoch": 0.16069,
      "grad_norm": 1.2635911866254046,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 16069
    },
    {
      "epoch": 0.1607,
      "grad_norm": 1.1597614951049424,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 16070
    },
    {
      "epoch": 0.16071,
      "grad_norm": 1.2302599861777737,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 16071
    },
    {
      "epoch": 0.16072,
      "grad_norm": 1.3428540526302917,
      "learning_rate": 0.003,
      "loss": 4.0825,
      "step": 16072
    },
    {
      "epoch": 0.16073,
      "grad_norm": 1.085160823232457,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 16073
    },
    {
      "epoch": 0.16074,
      "grad_norm": 1.351673584398568,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 16074
    },
    {
      "epoch": 0.16075,
      "grad_norm": 1.1102246000748848,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 16075
    },
    {
      "epoch": 0.16076,
      "grad_norm": 1.4375465430859315,
      "learning_rate": 0.003,
      "loss": 4.0877,
      "step": 16076
    },
    {
      "epoch": 0.16077,
      "grad_norm": 1.216029659979363,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 16077
    },
    {
      "epoch": 0.16078,
      "grad_norm": 1.170066835033616,
      "learning_rate": 0.003,
      "loss": 4.026,
      "step": 16078
    },
    {
      "epoch": 0.16079,
      "grad_norm": 1.1498334678374798,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 16079
    },
    {
      "epoch": 0.1608,
      "grad_norm": 1.0705153383453425,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 16080
    },
    {
      "epoch": 0.16081,
      "grad_norm": 1.4351203776646833,
      "learning_rate": 0.003,
      "loss": 4.0821,
      "step": 16081
    },
    {
      "epoch": 0.16082,
      "grad_norm": 1.1773355247461257,
      "learning_rate": 0.003,
      "loss": 4.0318,
      "step": 16082
    },
    {
      "epoch": 0.16083,
      "grad_norm": 1.3970482590289004,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 16083
    },
    {
      "epoch": 0.16084,
      "grad_norm": 1.0310870012110842,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 16084
    },
    {
      "epoch": 0.16085,
      "grad_norm": 1.4005190205008389,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 16085
    },
    {
      "epoch": 0.16086,
      "grad_norm": 1.297700056125098,
      "learning_rate": 0.003,
      "loss": 4.0324,
      "step": 16086
    },
    {
      "epoch": 0.16087,
      "grad_norm": 1.0731629299873047,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 16087
    },
    {
      "epoch": 0.16088,
      "grad_norm": 1.2658779579928179,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 16088
    },
    {
      "epoch": 0.16089,
      "grad_norm": 1.1329288764364365,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 16089
    },
    {
      "epoch": 0.1609,
      "grad_norm": 1.2426179867228542,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 16090
    },
    {
      "epoch": 0.16091,
      "grad_norm": 1.0263552094840165,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 16091
    },
    {
      "epoch": 0.16092,
      "grad_norm": 1.2416680279212071,
      "learning_rate": 0.003,
      "loss": 4.0241,
      "step": 16092
    },
    {
      "epoch": 0.16093,
      "grad_norm": 1.260700386795223,
      "learning_rate": 0.003,
      "loss": 4.0237,
      "step": 16093
    },
    {
      "epoch": 0.16094,
      "grad_norm": 1.2875913173260094,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 16094
    },
    {
      "epoch": 0.16095,
      "grad_norm": 1.3656342469786085,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 16095
    },
    {
      "epoch": 0.16096,
      "grad_norm": 1.0897669652984088,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 16096
    },
    {
      "epoch": 0.16097,
      "grad_norm": 1.483569404996074,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 16097
    },
    {
      "epoch": 0.16098,
      "grad_norm": 0.9930638791381724,
      "learning_rate": 0.003,
      "loss": 4.0617,
      "step": 16098
    },
    {
      "epoch": 0.16099,
      "grad_norm": 1.332688523816265,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 16099
    },
    {
      "epoch": 0.161,
      "grad_norm": 1.1332123227431758,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 16100
    },
    {
      "epoch": 0.16101,
      "grad_norm": 1.4142004288182568,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 16101
    },
    {
      "epoch": 0.16102,
      "grad_norm": 1.0554095342428538,
      "learning_rate": 0.003,
      "loss": 4.0177,
      "step": 16102
    },
    {
      "epoch": 0.16103,
      "grad_norm": 1.5078649726370579,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 16103
    },
    {
      "epoch": 0.16104,
      "grad_norm": 1.1804539195656147,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 16104
    },
    {
      "epoch": 0.16105,
      "grad_norm": 1.3659350489796371,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 16105
    },
    {
      "epoch": 0.16106,
      "grad_norm": 1.2005682397916537,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 16106
    },
    {
      "epoch": 0.16107,
      "grad_norm": 1.3341577103762357,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 16107
    },
    {
      "epoch": 0.16108,
      "grad_norm": 1.3639069812836295,
      "learning_rate": 0.003,
      "loss": 4.0819,
      "step": 16108
    },
    {
      "epoch": 0.16109,
      "grad_norm": 1.2025429222345108,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 16109
    },
    {
      "epoch": 0.1611,
      "grad_norm": 1.2097383388696323,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 16110
    },
    {
      "epoch": 0.16111,
      "grad_norm": 1.2458176475290417,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 16111
    },
    {
      "epoch": 0.16112,
      "grad_norm": 1.2368323118535012,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 16112
    },
    {
      "epoch": 0.16113,
      "grad_norm": 1.142566868935622,
      "learning_rate": 0.003,
      "loss": 4.0705,
      "step": 16113
    },
    {
      "epoch": 0.16114,
      "grad_norm": 1.1802710966943095,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 16114
    },
    {
      "epoch": 0.16115,
      "grad_norm": 1.1055928491050242,
      "learning_rate": 0.003,
      "loss": 4.0617,
      "step": 16115
    },
    {
      "epoch": 0.16116,
      "grad_norm": 1.187695141379374,
      "learning_rate": 0.003,
      "loss": 4.0297,
      "step": 16116
    },
    {
      "epoch": 0.16117,
      "grad_norm": 1.031688892996054,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 16117
    },
    {
      "epoch": 0.16118,
      "grad_norm": 1.2575076087246992,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 16118
    },
    {
      "epoch": 0.16119,
      "grad_norm": 0.9999568372935328,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 16119
    },
    {
      "epoch": 0.1612,
      "grad_norm": 1.4193557198363458,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 16120
    },
    {
      "epoch": 0.16121,
      "grad_norm": 0.9899605942335881,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 16121
    },
    {
      "epoch": 0.16122,
      "grad_norm": 1.447924369250344,
      "learning_rate": 0.003,
      "loss": 4.0878,
      "step": 16122
    },
    {
      "epoch": 0.16123,
      "grad_norm": 1.0232117979767148,
      "learning_rate": 0.003,
      "loss": 4.0918,
      "step": 16123
    },
    {
      "epoch": 0.16124,
      "grad_norm": 1.4723857913910692,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 16124
    },
    {
      "epoch": 0.16125,
      "grad_norm": 1.0783329673441044,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 16125
    },
    {
      "epoch": 0.16126,
      "grad_norm": 1.2898787396854268,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 16126
    },
    {
      "epoch": 0.16127,
      "grad_norm": 1.1780275482968812,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 16127
    },
    {
      "epoch": 0.16128,
      "grad_norm": 1.3930139902311514,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 16128
    },
    {
      "epoch": 0.16129,
      "grad_norm": 1.2315390005845666,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 16129
    },
    {
      "epoch": 0.1613,
      "grad_norm": 1.1571113291929978,
      "learning_rate": 0.003,
      "loss": 4.0807,
      "step": 16130
    },
    {
      "epoch": 0.16131,
      "grad_norm": 1.3384100128422542,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 16131
    },
    {
      "epoch": 0.16132,
      "grad_norm": 1.072585086800576,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 16132
    },
    {
      "epoch": 0.16133,
      "grad_norm": 1.2729336759623386,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 16133
    },
    {
      "epoch": 0.16134,
      "grad_norm": 1.114749540234282,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 16134
    },
    {
      "epoch": 0.16135,
      "grad_norm": 1.1175124107985823,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 16135
    },
    {
      "epoch": 0.16136,
      "grad_norm": 1.2246547602788969,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 16136
    },
    {
      "epoch": 0.16137,
      "grad_norm": 1.2857717063706393,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 16137
    },
    {
      "epoch": 0.16138,
      "grad_norm": 1.4942308360429648,
      "learning_rate": 0.003,
      "loss": 4.0886,
      "step": 16138
    },
    {
      "epoch": 0.16139,
      "grad_norm": 1.1375748258649176,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 16139
    },
    {
      "epoch": 0.1614,
      "grad_norm": 1.1923552179035142,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 16140
    },
    {
      "epoch": 0.16141,
      "grad_norm": 1.5189022469135416,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 16141
    },
    {
      "epoch": 0.16142,
      "grad_norm": 1.2905301683105843,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 16142
    },
    {
      "epoch": 0.16143,
      "grad_norm": 1.0586844543041596,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 16143
    },
    {
      "epoch": 0.16144,
      "grad_norm": 1.078529221858877,
      "learning_rate": 0.003,
      "loss": 4.0886,
      "step": 16144
    },
    {
      "epoch": 0.16145,
      "grad_norm": 1.2734398060290861,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 16145
    },
    {
      "epoch": 0.16146,
      "grad_norm": 1.3953782131238706,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 16146
    },
    {
      "epoch": 0.16147,
      "grad_norm": 1.1646308253134419,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 16147
    },
    {
      "epoch": 0.16148,
      "grad_norm": 1.368083019000008,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 16148
    },
    {
      "epoch": 0.16149,
      "grad_norm": 1.240713218394061,
      "learning_rate": 0.003,
      "loss": 4.0478,
      "step": 16149
    },
    {
      "epoch": 0.1615,
      "grad_norm": 1.2105345191615164,
      "learning_rate": 0.003,
      "loss": 4.0792,
      "step": 16150
    },
    {
      "epoch": 0.16151,
      "grad_norm": 1.117951094917481,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 16151
    },
    {
      "epoch": 0.16152,
      "grad_norm": 1.3586250303202676,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 16152
    },
    {
      "epoch": 0.16153,
      "grad_norm": 1.1797380254267351,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 16153
    },
    {
      "epoch": 0.16154,
      "grad_norm": 1.2663320152817172,
      "learning_rate": 0.003,
      "loss": 4.0951,
      "step": 16154
    },
    {
      "epoch": 0.16155,
      "grad_norm": 1.1101839680137688,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 16155
    },
    {
      "epoch": 0.16156,
      "grad_norm": 1.4659568094993767,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 16156
    },
    {
      "epoch": 0.16157,
      "grad_norm": 1.1840417445148899,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 16157
    },
    {
      "epoch": 0.16158,
      "grad_norm": 1.2275331549066915,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 16158
    },
    {
      "epoch": 0.16159,
      "grad_norm": 1.2497562642871187,
      "learning_rate": 0.003,
      "loss": 4.0855,
      "step": 16159
    },
    {
      "epoch": 0.1616,
      "grad_norm": 1.3215853560109796,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 16160
    },
    {
      "epoch": 0.16161,
      "grad_norm": 1.1276735764177956,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 16161
    },
    {
      "epoch": 0.16162,
      "grad_norm": 1.2316294802425705,
      "learning_rate": 0.003,
      "loss": 4.03,
      "step": 16162
    },
    {
      "epoch": 0.16163,
      "grad_norm": 0.9928474126809026,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 16163
    },
    {
      "epoch": 0.16164,
      "grad_norm": 1.2823445831755897,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 16164
    },
    {
      "epoch": 0.16165,
      "grad_norm": 1.079049904434384,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 16165
    },
    {
      "epoch": 0.16166,
      "grad_norm": 1.3013455832012046,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 16166
    },
    {
      "epoch": 0.16167,
      "grad_norm": 1.1097297541578093,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 16167
    },
    {
      "epoch": 0.16168,
      "grad_norm": 1.3489914890034207,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 16168
    },
    {
      "epoch": 0.16169,
      "grad_norm": 1.2096691901994352,
      "learning_rate": 0.003,
      "loss": 4.0374,
      "step": 16169
    },
    {
      "epoch": 0.1617,
      "grad_norm": 1.3065672763313205,
      "learning_rate": 0.003,
      "loss": 4.0271,
      "step": 16170
    },
    {
      "epoch": 0.16171,
      "grad_norm": 1.341880768268709,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 16171
    },
    {
      "epoch": 0.16172,
      "grad_norm": 1.2454783442636452,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 16172
    },
    {
      "epoch": 0.16173,
      "grad_norm": 1.2473409038756866,
      "learning_rate": 0.003,
      "loss": 4.0907,
      "step": 16173
    },
    {
      "epoch": 0.16174,
      "grad_norm": 1.1579721074325866,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 16174
    },
    {
      "epoch": 0.16175,
      "grad_norm": 1.0720426369036318,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 16175
    },
    {
      "epoch": 0.16176,
      "grad_norm": 1.2194859985899926,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 16176
    },
    {
      "epoch": 0.16177,
      "grad_norm": 1.1420179520039173,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 16177
    },
    {
      "epoch": 0.16178,
      "grad_norm": 1.142372388101929,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 16178
    },
    {
      "epoch": 0.16179,
      "grad_norm": 1.305081858798519,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 16179
    },
    {
      "epoch": 0.1618,
      "grad_norm": 1.270635324379897,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 16180
    },
    {
      "epoch": 0.16181,
      "grad_norm": 1.2908644938680864,
      "learning_rate": 0.003,
      "loss": 4.0423,
      "step": 16181
    },
    {
      "epoch": 0.16182,
      "grad_norm": 1.0091203657975452,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 16182
    },
    {
      "epoch": 0.16183,
      "grad_norm": 1.5364701904545728,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 16183
    },
    {
      "epoch": 0.16184,
      "grad_norm": 1.0304836363577676,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 16184
    },
    {
      "epoch": 0.16185,
      "grad_norm": 1.4540775782228723,
      "learning_rate": 0.003,
      "loss": 4.0958,
      "step": 16185
    },
    {
      "epoch": 0.16186,
      "grad_norm": 0.9617748173512339,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 16186
    },
    {
      "epoch": 0.16187,
      "grad_norm": 1.217977899288468,
      "learning_rate": 0.003,
      "loss": 4.0814,
      "step": 16187
    },
    {
      "epoch": 0.16188,
      "grad_norm": 1.487051711688237,
      "learning_rate": 0.003,
      "loss": 4.0876,
      "step": 16188
    },
    {
      "epoch": 0.16189,
      "grad_norm": 1.17910209302436,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 16189
    },
    {
      "epoch": 0.1619,
      "grad_norm": 1.1446373184625196,
      "learning_rate": 0.003,
      "loss": 4.025,
      "step": 16190
    },
    {
      "epoch": 0.16191,
      "grad_norm": 1.284875588091632,
      "learning_rate": 0.003,
      "loss": 4.0675,
      "step": 16191
    },
    {
      "epoch": 0.16192,
      "grad_norm": 1.313551319883505,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 16192
    },
    {
      "epoch": 0.16193,
      "grad_norm": 1.2333812069105783,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 16193
    },
    {
      "epoch": 0.16194,
      "grad_norm": 1.2048765218001227,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 16194
    },
    {
      "epoch": 0.16195,
      "grad_norm": 1.1685474837899368,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 16195
    },
    {
      "epoch": 0.16196,
      "grad_norm": 1.234379565341423,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 16196
    },
    {
      "epoch": 0.16197,
      "grad_norm": 1.2416619629065064,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 16197
    },
    {
      "epoch": 0.16198,
      "grad_norm": 1.1506895479509611,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 16198
    },
    {
      "epoch": 0.16199,
      "grad_norm": 1.3069220528527965,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 16199
    },
    {
      "epoch": 0.162,
      "grad_norm": 1.2141660935638423,
      "learning_rate": 0.003,
      "loss": 4.0837,
      "step": 16200
    },
    {
      "epoch": 0.16201,
      "grad_norm": 1.277566042717582,
      "learning_rate": 0.003,
      "loss": 4.0727,
      "step": 16201
    },
    {
      "epoch": 0.16202,
      "grad_norm": 1.1887268494189691,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 16202
    },
    {
      "epoch": 0.16203,
      "grad_norm": 1.2395698830375579,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 16203
    },
    {
      "epoch": 0.16204,
      "grad_norm": 1.2176320361343518,
      "learning_rate": 0.003,
      "loss": 4.0867,
      "step": 16204
    },
    {
      "epoch": 0.16205,
      "grad_norm": 1.4108530624303173,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 16205
    },
    {
      "epoch": 0.16206,
      "grad_norm": 1.134903228670893,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 16206
    },
    {
      "epoch": 0.16207,
      "grad_norm": 1.2099066685612638,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 16207
    },
    {
      "epoch": 0.16208,
      "grad_norm": 1.2046684455575636,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 16208
    },
    {
      "epoch": 0.16209,
      "grad_norm": 1.20432078452343,
      "learning_rate": 0.003,
      "loss": 4.0675,
      "step": 16209
    },
    {
      "epoch": 0.1621,
      "grad_norm": 1.1205535134930835,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 16210
    },
    {
      "epoch": 0.16211,
      "grad_norm": 1.3164407472701114,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 16211
    },
    {
      "epoch": 0.16212,
      "grad_norm": 0.9504364598306422,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 16212
    },
    {
      "epoch": 0.16213,
      "grad_norm": 1.4074031635387623,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 16213
    },
    {
      "epoch": 0.16214,
      "grad_norm": 1.00809562486534,
      "learning_rate": 0.003,
      "loss": 4.0799,
      "step": 16214
    },
    {
      "epoch": 0.16215,
      "grad_norm": 1.334635201219315,
      "learning_rate": 0.003,
      "loss": 4.0911,
      "step": 16215
    },
    {
      "epoch": 0.16216,
      "grad_norm": 1.1974054616067982,
      "learning_rate": 0.003,
      "loss": 4.0438,
      "step": 16216
    },
    {
      "epoch": 0.16217,
      "grad_norm": 1.1746897452771488,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 16217
    },
    {
      "epoch": 0.16218,
      "grad_norm": 1.2765648358837007,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 16218
    },
    {
      "epoch": 0.16219,
      "grad_norm": 1.004880048598137,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 16219
    },
    {
      "epoch": 0.1622,
      "grad_norm": 1.4440937736237733,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 16220
    },
    {
      "epoch": 0.16221,
      "grad_norm": 1.2105566172203854,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 16221
    },
    {
      "epoch": 0.16222,
      "grad_norm": 1.3202740298730096,
      "learning_rate": 0.003,
      "loss": 4.0564,
      "step": 16222
    },
    {
      "epoch": 0.16223,
      "grad_norm": 1.280287335011057,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 16223
    },
    {
      "epoch": 0.16224,
      "grad_norm": 1.2458770506199293,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 16224
    },
    {
      "epoch": 0.16225,
      "grad_norm": 1.362888596559668,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 16225
    },
    {
      "epoch": 0.16226,
      "grad_norm": 1.1366893412204746,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 16226
    },
    {
      "epoch": 0.16227,
      "grad_norm": 1.4216958867902325,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 16227
    },
    {
      "epoch": 0.16228,
      "grad_norm": 1.381805161860225,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 16228
    },
    {
      "epoch": 0.16229,
      "grad_norm": 1.1295034159836423,
      "learning_rate": 0.003,
      "loss": 4.0753,
      "step": 16229
    },
    {
      "epoch": 0.1623,
      "grad_norm": 1.0784970122984692,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 16230
    },
    {
      "epoch": 0.16231,
      "grad_norm": 1.1336871653186584,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 16231
    },
    {
      "epoch": 0.16232,
      "grad_norm": 1.2501064912218984,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 16232
    },
    {
      "epoch": 0.16233,
      "grad_norm": 1.1649618958076693,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 16233
    },
    {
      "epoch": 0.16234,
      "grad_norm": 1.0368502437332756,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 16234
    },
    {
      "epoch": 0.16235,
      "grad_norm": 1.3480478678997219,
      "learning_rate": 0.003,
      "loss": 4.0767,
      "step": 16235
    },
    {
      "epoch": 0.16236,
      "grad_norm": 1.1367294472111962,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 16236
    },
    {
      "epoch": 0.16237,
      "grad_norm": 1.0659105492586416,
      "learning_rate": 0.003,
      "loss": 4.0804,
      "step": 16237
    },
    {
      "epoch": 0.16238,
      "grad_norm": 1.3748269521555407,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 16238
    },
    {
      "epoch": 0.16239,
      "grad_norm": 1.3121789858444732,
      "learning_rate": 0.003,
      "loss": 4.0825,
      "step": 16239
    },
    {
      "epoch": 0.1624,
      "grad_norm": 1.4056845341506228,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 16240
    },
    {
      "epoch": 0.16241,
      "grad_norm": 1.1679735202194832,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 16241
    },
    {
      "epoch": 0.16242,
      "grad_norm": 1.319184163343352,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 16242
    },
    {
      "epoch": 0.16243,
      "grad_norm": 1.2125087661596101,
      "learning_rate": 0.003,
      "loss": 4.0727,
      "step": 16243
    },
    {
      "epoch": 0.16244,
      "grad_norm": 1.2719103951861044,
      "learning_rate": 0.003,
      "loss": 4.0267,
      "step": 16244
    },
    {
      "epoch": 0.16245,
      "grad_norm": 1.1984346997620228,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 16245
    },
    {
      "epoch": 0.16246,
      "grad_norm": 1.1340434776667687,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 16246
    },
    {
      "epoch": 0.16247,
      "grad_norm": 1.1140869732254772,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 16247
    },
    {
      "epoch": 0.16248,
      "grad_norm": 1.227453917661384,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 16248
    },
    {
      "epoch": 0.16249,
      "grad_norm": 1.1925920328393311,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 16249
    },
    {
      "epoch": 0.1625,
      "grad_norm": 1.1399032518800811,
      "learning_rate": 0.003,
      "loss": 4.035,
      "step": 16250
    },
    {
      "epoch": 0.16251,
      "grad_norm": 1.2797498532161469,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 16251
    },
    {
      "epoch": 0.16252,
      "grad_norm": 1.019199769972669,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 16252
    },
    {
      "epoch": 0.16253,
      "grad_norm": 1.452625321554583,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 16253
    },
    {
      "epoch": 0.16254,
      "grad_norm": 1.144178826389817,
      "learning_rate": 0.003,
      "loss": 4.0296,
      "step": 16254
    },
    {
      "epoch": 0.16255,
      "grad_norm": 1.4030724132011527,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 16255
    },
    {
      "epoch": 0.16256,
      "grad_norm": 1.3017431523623153,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 16256
    },
    {
      "epoch": 0.16257,
      "grad_norm": 1.208476282338132,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 16257
    },
    {
      "epoch": 0.16258,
      "grad_norm": 1.3275210983008279,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 16258
    },
    {
      "epoch": 0.16259,
      "grad_norm": 1.1927129114231794,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 16259
    },
    {
      "epoch": 0.1626,
      "grad_norm": 1.184898032853809,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 16260
    },
    {
      "epoch": 0.16261,
      "grad_norm": 1.3221817149951827,
      "learning_rate": 0.003,
      "loss": 4.0316,
      "step": 16261
    },
    {
      "epoch": 0.16262,
      "grad_norm": 1.0597447397778326,
      "learning_rate": 0.003,
      "loss": 4.0675,
      "step": 16262
    },
    {
      "epoch": 0.16263,
      "grad_norm": 1.5015000416488335,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 16263
    },
    {
      "epoch": 0.16264,
      "grad_norm": 0.9484849035292638,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 16264
    },
    {
      "epoch": 0.16265,
      "grad_norm": 1.2497923166040579,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 16265
    },
    {
      "epoch": 0.16266,
      "grad_norm": 1.052695750134391,
      "learning_rate": 0.003,
      "loss": 4.0619,
      "step": 16266
    },
    {
      "epoch": 0.16267,
      "grad_norm": 1.323958844792137,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 16267
    },
    {
      "epoch": 0.16268,
      "grad_norm": 1.067533482038374,
      "learning_rate": 0.003,
      "loss": 4.0728,
      "step": 16268
    },
    {
      "epoch": 0.16269,
      "grad_norm": 1.3651789367884446,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 16269
    },
    {
      "epoch": 0.1627,
      "grad_norm": 1.2776129239950722,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 16270
    },
    {
      "epoch": 0.16271,
      "grad_norm": 1.6778241915864978,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 16271
    },
    {
      "epoch": 0.16272,
      "grad_norm": 0.9700957980425218,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 16272
    },
    {
      "epoch": 0.16273,
      "grad_norm": 1.2646988723773898,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 16273
    },
    {
      "epoch": 0.16274,
      "grad_norm": 1.4089292011783563,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 16274
    },
    {
      "epoch": 0.16275,
      "grad_norm": 1.166694981145315,
      "learning_rate": 0.003,
      "loss": 4.0963,
      "step": 16275
    },
    {
      "epoch": 0.16276,
      "grad_norm": 1.108640007705579,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 16276
    },
    {
      "epoch": 0.16277,
      "grad_norm": 1.202577317559944,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 16277
    },
    {
      "epoch": 0.16278,
      "grad_norm": 1.0624388101346522,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 16278
    },
    {
      "epoch": 0.16279,
      "grad_norm": 1.5170265623031196,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 16279
    },
    {
      "epoch": 0.1628,
      "grad_norm": 1.0856494741830944,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 16280
    },
    {
      "epoch": 0.16281,
      "grad_norm": 1.3976086613417265,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 16281
    },
    {
      "epoch": 0.16282,
      "grad_norm": 1.0250708449718593,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 16282
    },
    {
      "epoch": 0.16283,
      "grad_norm": 1.4883504341428777,
      "learning_rate": 0.003,
      "loss": 4.0291,
      "step": 16283
    },
    {
      "epoch": 0.16284,
      "grad_norm": 1.1823939648448438,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 16284
    },
    {
      "epoch": 0.16285,
      "grad_norm": 1.1090593980612389,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 16285
    },
    {
      "epoch": 0.16286,
      "grad_norm": 1.255754121545388,
      "learning_rate": 0.003,
      "loss": 4.0804,
      "step": 16286
    },
    {
      "epoch": 0.16287,
      "grad_norm": 1.132016724713699,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 16287
    },
    {
      "epoch": 0.16288,
      "grad_norm": 1.272722794652534,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 16288
    },
    {
      "epoch": 0.16289,
      "grad_norm": 1.3094349363350137,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 16289
    },
    {
      "epoch": 0.1629,
      "grad_norm": 1.2304301370076913,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 16290
    },
    {
      "epoch": 0.16291,
      "grad_norm": 1.3643880545394098,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 16291
    },
    {
      "epoch": 0.16292,
      "grad_norm": 1.0740680933845081,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 16292
    },
    {
      "epoch": 0.16293,
      "grad_norm": 1.3050220676600632,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 16293
    },
    {
      "epoch": 0.16294,
      "grad_norm": 1.0184695674557405,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 16294
    },
    {
      "epoch": 0.16295,
      "grad_norm": 1.6244685049589114,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 16295
    },
    {
      "epoch": 0.16296,
      "grad_norm": 0.9898495972924228,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 16296
    },
    {
      "epoch": 0.16297,
      "grad_norm": 1.373468676826956,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 16297
    },
    {
      "epoch": 0.16298,
      "grad_norm": 1.0752429483583343,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 16298
    },
    {
      "epoch": 0.16299,
      "grad_norm": 1.1489424900245766,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 16299
    },
    {
      "epoch": 0.163,
      "grad_norm": 1.3025797423455028,
      "learning_rate": 0.003,
      "loss": 4.0763,
      "step": 16300
    },
    {
      "epoch": 0.16301,
      "grad_norm": 1.0566881385453588,
      "learning_rate": 0.003,
      "loss": 4.0286,
      "step": 16301
    },
    {
      "epoch": 0.16302,
      "grad_norm": 1.2898357916465581,
      "learning_rate": 0.003,
      "loss": 4.0739,
      "step": 16302
    },
    {
      "epoch": 0.16303,
      "grad_norm": 1.1588859621825356,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 16303
    },
    {
      "epoch": 0.16304,
      "grad_norm": 1.1608499531827632,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 16304
    },
    {
      "epoch": 0.16305,
      "grad_norm": 1.123357233759964,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 16305
    },
    {
      "epoch": 0.16306,
      "grad_norm": 1.4211787825524893,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 16306
    },
    {
      "epoch": 0.16307,
      "grad_norm": 1.4222413627859347,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 16307
    },
    {
      "epoch": 0.16308,
      "grad_norm": 1.4241146551019983,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 16308
    },
    {
      "epoch": 0.16309,
      "grad_norm": 1.2873547417279407,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 16309
    },
    {
      "epoch": 0.1631,
      "grad_norm": 1.0979961313056252,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 16310
    },
    {
      "epoch": 0.16311,
      "grad_norm": 1.5158301454154433,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 16311
    },
    {
      "epoch": 0.16312,
      "grad_norm": 0.9623413202787349,
      "learning_rate": 0.003,
      "loss": 4.1004,
      "step": 16312
    },
    {
      "epoch": 0.16313,
      "grad_norm": 1.1579853235171516,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 16313
    },
    {
      "epoch": 0.16314,
      "grad_norm": 1.2370693069383596,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 16314
    },
    {
      "epoch": 0.16315,
      "grad_norm": 1.223963284605398,
      "learning_rate": 0.003,
      "loss": 4.0331,
      "step": 16315
    },
    {
      "epoch": 0.16316,
      "grad_norm": 1.2131062801443062,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 16316
    },
    {
      "epoch": 0.16317,
      "grad_norm": 0.9333364883918731,
      "learning_rate": 0.003,
      "loss": 4.0602,
      "step": 16317
    },
    {
      "epoch": 0.16318,
      "grad_norm": 1.2545637003766508,
      "learning_rate": 0.003,
      "loss": 4.0665,
      "step": 16318
    },
    {
      "epoch": 0.16319,
      "grad_norm": 1.1637564320008495,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 16319
    },
    {
      "epoch": 0.1632,
      "grad_norm": 1.3930824654232326,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 16320
    },
    {
      "epoch": 0.16321,
      "grad_norm": 1.2701909486548193,
      "learning_rate": 0.003,
      "loss": 4.0727,
      "step": 16321
    },
    {
      "epoch": 0.16322,
      "grad_norm": 1.1504704692579468,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 16322
    },
    {
      "epoch": 0.16323,
      "grad_norm": 1.2619124017098002,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 16323
    },
    {
      "epoch": 0.16324,
      "grad_norm": 1.6371242880230603,
      "learning_rate": 0.003,
      "loss": 4.0808,
      "step": 16324
    },
    {
      "epoch": 0.16325,
      "grad_norm": 1.2092492938774504,
      "learning_rate": 0.003,
      "loss": 4.0434,
      "step": 16325
    },
    {
      "epoch": 0.16326,
      "grad_norm": 1.4449491267608436,
      "learning_rate": 0.003,
      "loss": 4.0787,
      "step": 16326
    },
    {
      "epoch": 0.16327,
      "grad_norm": 1.2168703017267488,
      "learning_rate": 0.003,
      "loss": 4.0312,
      "step": 16327
    },
    {
      "epoch": 0.16328,
      "grad_norm": 1.353767460624721,
      "learning_rate": 0.003,
      "loss": 4.0823,
      "step": 16328
    },
    {
      "epoch": 0.16329,
      "grad_norm": 1.0766855812631326,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 16329
    },
    {
      "epoch": 0.1633,
      "grad_norm": 1.2225130893227987,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 16330
    },
    {
      "epoch": 0.16331,
      "grad_norm": 1.2415697408617543,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 16331
    },
    {
      "epoch": 0.16332,
      "grad_norm": 1.14481606629091,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 16332
    },
    {
      "epoch": 0.16333,
      "grad_norm": 1.1365437547684951,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 16333
    },
    {
      "epoch": 0.16334,
      "grad_norm": 1.2608703109317938,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 16334
    },
    {
      "epoch": 0.16335,
      "grad_norm": 1.1967000746733578,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 16335
    },
    {
      "epoch": 0.16336,
      "grad_norm": 1.1930161262170862,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 16336
    },
    {
      "epoch": 0.16337,
      "grad_norm": 1.2430520582370888,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 16337
    },
    {
      "epoch": 0.16338,
      "grad_norm": 1.3995785930958728,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 16338
    },
    {
      "epoch": 0.16339,
      "grad_norm": 1.2550047638086808,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 16339
    },
    {
      "epoch": 0.1634,
      "grad_norm": 1.0181579197625517,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 16340
    },
    {
      "epoch": 0.16341,
      "grad_norm": 1.1941714205548857,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 16341
    },
    {
      "epoch": 0.16342,
      "grad_norm": 1.1161593175141173,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 16342
    },
    {
      "epoch": 0.16343,
      "grad_norm": 1.3615843906357468,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 16343
    },
    {
      "epoch": 0.16344,
      "grad_norm": 1.017842175059377,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 16344
    },
    {
      "epoch": 0.16345,
      "grad_norm": 1.1738558883336643,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 16345
    },
    {
      "epoch": 0.16346,
      "grad_norm": 1.2112578199939257,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 16346
    },
    {
      "epoch": 0.16347,
      "grad_norm": 1.2394809936950661,
      "learning_rate": 0.003,
      "loss": 4.082,
      "step": 16347
    },
    {
      "epoch": 0.16348,
      "grad_norm": 1.475630322606032,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 16348
    },
    {
      "epoch": 0.16349,
      "grad_norm": 1.0248623737151932,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 16349
    },
    {
      "epoch": 0.1635,
      "grad_norm": 1.4091242271820457,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 16350
    },
    {
      "epoch": 0.16351,
      "grad_norm": 1.1368378857737884,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 16351
    },
    {
      "epoch": 0.16352,
      "grad_norm": 1.1872056869253942,
      "learning_rate": 0.003,
      "loss": 4.0289,
      "step": 16352
    },
    {
      "epoch": 0.16353,
      "grad_norm": 1.1185431794190572,
      "learning_rate": 0.003,
      "loss": 4.0302,
      "step": 16353
    },
    {
      "epoch": 0.16354,
      "grad_norm": 1.137493645130181,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 16354
    },
    {
      "epoch": 0.16355,
      "grad_norm": 1.2358146820289326,
      "learning_rate": 0.003,
      "loss": 4.0859,
      "step": 16355
    },
    {
      "epoch": 0.16356,
      "grad_norm": 1.2315889636488526,
      "learning_rate": 0.003,
      "loss": 4.0336,
      "step": 16356
    },
    {
      "epoch": 0.16357,
      "grad_norm": 1.2802968208976975,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 16357
    },
    {
      "epoch": 0.16358,
      "grad_norm": 1.260811899864562,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 16358
    },
    {
      "epoch": 0.16359,
      "grad_norm": 1.2121622704122177,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 16359
    },
    {
      "epoch": 0.1636,
      "grad_norm": 1.2357583365276035,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 16360
    },
    {
      "epoch": 0.16361,
      "grad_norm": 1.377904643878386,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 16361
    },
    {
      "epoch": 0.16362,
      "grad_norm": 0.9684549664648887,
      "learning_rate": 0.003,
      "loss": 4.0797,
      "step": 16362
    },
    {
      "epoch": 0.16363,
      "grad_norm": 1.4179259020766957,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 16363
    },
    {
      "epoch": 0.16364,
      "grad_norm": 1.1206211379617204,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 16364
    },
    {
      "epoch": 0.16365,
      "grad_norm": 1.279809554655433,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 16365
    },
    {
      "epoch": 0.16366,
      "grad_norm": 1.2594313513466797,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 16366
    },
    {
      "epoch": 0.16367,
      "grad_norm": 1.225775077542689,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 16367
    },
    {
      "epoch": 0.16368,
      "grad_norm": 1.3272212082797992,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 16368
    },
    {
      "epoch": 0.16369,
      "grad_norm": 1.2976282058453512,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 16369
    },
    {
      "epoch": 0.1637,
      "grad_norm": 1.234289289259054,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 16370
    },
    {
      "epoch": 0.16371,
      "grad_norm": 1.3518875427870496,
      "learning_rate": 0.003,
      "loss": 4.0852,
      "step": 16371
    },
    {
      "epoch": 0.16372,
      "grad_norm": 1.1901678123588724,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 16372
    },
    {
      "epoch": 0.16373,
      "grad_norm": 1.1922646263251482,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 16373
    },
    {
      "epoch": 0.16374,
      "grad_norm": 1.118598784078167,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 16374
    },
    {
      "epoch": 0.16375,
      "grad_norm": 1.3563234908303308,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 16375
    },
    {
      "epoch": 0.16376,
      "grad_norm": 0.9629005182541244,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 16376
    },
    {
      "epoch": 0.16377,
      "grad_norm": 1.3759361678038555,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 16377
    },
    {
      "epoch": 0.16378,
      "grad_norm": 1.1391749543532,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 16378
    },
    {
      "epoch": 0.16379,
      "grad_norm": 1.3822263931996204,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 16379
    },
    {
      "epoch": 0.1638,
      "grad_norm": 1.1339954320059182,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 16380
    },
    {
      "epoch": 0.16381,
      "grad_norm": 1.3466622107327346,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 16381
    },
    {
      "epoch": 0.16382,
      "grad_norm": 1.1597951508934399,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 16382
    },
    {
      "epoch": 0.16383,
      "grad_norm": 1.2302866378170052,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 16383
    },
    {
      "epoch": 0.16384,
      "grad_norm": 1.2515905665860958,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 16384
    },
    {
      "epoch": 0.16385,
      "grad_norm": 1.0422015271915603,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 16385
    },
    {
      "epoch": 0.16386,
      "grad_norm": 1.29635466700889,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 16386
    },
    {
      "epoch": 0.16387,
      "grad_norm": 1.0875135908370575,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 16387
    },
    {
      "epoch": 0.16388,
      "grad_norm": 1.2718309619067318,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 16388
    },
    {
      "epoch": 0.16389,
      "grad_norm": 1.0022806628307235,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 16389
    },
    {
      "epoch": 0.1639,
      "grad_norm": 1.4790777363163723,
      "learning_rate": 0.003,
      "loss": 4.0892,
      "step": 16390
    },
    {
      "epoch": 0.16391,
      "grad_norm": 1.14263807272635,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 16391
    },
    {
      "epoch": 0.16392,
      "grad_norm": 1.1865540155604621,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 16392
    },
    {
      "epoch": 0.16393,
      "grad_norm": 1.415860201754588,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 16393
    },
    {
      "epoch": 0.16394,
      "grad_norm": 1.3899506802540258,
      "learning_rate": 0.003,
      "loss": 4.0968,
      "step": 16394
    },
    {
      "epoch": 0.16395,
      "grad_norm": 1.5882227448719515,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 16395
    },
    {
      "epoch": 0.16396,
      "grad_norm": 1.08999481689989,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 16396
    },
    {
      "epoch": 0.16397,
      "grad_norm": 1.2994181147414126,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 16397
    },
    {
      "epoch": 0.16398,
      "grad_norm": 1.1958925573152632,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 16398
    },
    {
      "epoch": 0.16399,
      "grad_norm": 1.086394744003385,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 16399
    },
    {
      "epoch": 0.164,
      "grad_norm": 1.2930250221392108,
      "learning_rate": 0.003,
      "loss": 4.0777,
      "step": 16400
    },
    {
      "epoch": 0.16401,
      "grad_norm": 0.8959460745072638,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 16401
    },
    {
      "epoch": 0.16402,
      "grad_norm": 1.227152163682272,
      "learning_rate": 0.003,
      "loss": 4.0991,
      "step": 16402
    },
    {
      "epoch": 0.16403,
      "grad_norm": 1.2293512458095264,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 16403
    },
    {
      "epoch": 0.16404,
      "grad_norm": 1.0173681014886449,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 16404
    },
    {
      "epoch": 0.16405,
      "grad_norm": 1.3262938317259634,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 16405
    },
    {
      "epoch": 0.16406,
      "grad_norm": 1.054899121048987,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 16406
    },
    {
      "epoch": 0.16407,
      "grad_norm": 1.4160598366644352,
      "learning_rate": 0.003,
      "loss": 4.0951,
      "step": 16407
    },
    {
      "epoch": 0.16408,
      "grad_norm": 0.9675264790932663,
      "learning_rate": 0.003,
      "loss": 4.0564,
      "step": 16408
    },
    {
      "epoch": 0.16409,
      "grad_norm": 1.1201656449148025,
      "learning_rate": 0.003,
      "loss": 4.0357,
      "step": 16409
    },
    {
      "epoch": 0.1641,
      "grad_norm": 1.41925141094191,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 16410
    },
    {
      "epoch": 0.16411,
      "grad_norm": 1.466931189921125,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 16411
    },
    {
      "epoch": 0.16412,
      "grad_norm": 1.1504773378613948,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 16412
    },
    {
      "epoch": 0.16413,
      "grad_norm": 1.362644924490457,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 16413
    },
    {
      "epoch": 0.16414,
      "grad_norm": 1.0361407075459195,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 16414
    },
    {
      "epoch": 0.16415,
      "grad_norm": 1.3555931613478198,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 16415
    },
    {
      "epoch": 0.16416,
      "grad_norm": 1.231546061828751,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 16416
    },
    {
      "epoch": 0.16417,
      "grad_norm": 1.1195223025704868,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 16417
    },
    {
      "epoch": 0.16418,
      "grad_norm": 1.3278966187876984,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 16418
    },
    {
      "epoch": 0.16419,
      "grad_norm": 1.1440588893808925,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 16419
    },
    {
      "epoch": 0.1642,
      "grad_norm": 1.4434601081654999,
      "learning_rate": 0.003,
      "loss": 4.1053,
      "step": 16420
    },
    {
      "epoch": 0.16421,
      "grad_norm": 1.0226496351857584,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 16421
    },
    {
      "epoch": 0.16422,
      "grad_norm": 1.5841365599632116,
      "learning_rate": 0.003,
      "loss": 4.0383,
      "step": 16422
    },
    {
      "epoch": 0.16423,
      "grad_norm": 1.1434420576338018,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 16423
    },
    {
      "epoch": 0.16424,
      "grad_norm": 1.2985223382883153,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 16424
    },
    {
      "epoch": 0.16425,
      "grad_norm": 1.132505631613156,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 16425
    },
    {
      "epoch": 0.16426,
      "grad_norm": 1.3534589432652555,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 16426
    },
    {
      "epoch": 0.16427,
      "grad_norm": 1.4277535109802608,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 16427
    },
    {
      "epoch": 0.16428,
      "grad_norm": 1.196476803073257,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 16428
    },
    {
      "epoch": 0.16429,
      "grad_norm": 1.1345684075559352,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 16429
    },
    {
      "epoch": 0.1643,
      "grad_norm": 1.4574862318885673,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 16430
    },
    {
      "epoch": 0.16431,
      "grad_norm": 1.0817848202585982,
      "learning_rate": 0.003,
      "loss": 4.0623,
      "step": 16431
    },
    {
      "epoch": 0.16432,
      "grad_norm": 1.3027331901469046,
      "learning_rate": 0.003,
      "loss": 4.0777,
      "step": 16432
    },
    {
      "epoch": 0.16433,
      "grad_norm": 1.179901742427836,
      "learning_rate": 0.003,
      "loss": 4.0757,
      "step": 16433
    },
    {
      "epoch": 0.16434,
      "grad_norm": 1.1242233806228457,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 16434
    },
    {
      "epoch": 0.16435,
      "grad_norm": 1.1178056065486728,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 16435
    },
    {
      "epoch": 0.16436,
      "grad_norm": 1.198810463998239,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 16436
    },
    {
      "epoch": 0.16437,
      "grad_norm": 1.0265981605669419,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 16437
    },
    {
      "epoch": 0.16438,
      "grad_norm": 1.3442598281984819,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 16438
    },
    {
      "epoch": 0.16439,
      "grad_norm": 1.1147868203594866,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 16439
    },
    {
      "epoch": 0.1644,
      "grad_norm": 1.2091652208444614,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 16440
    },
    {
      "epoch": 0.16441,
      "grad_norm": 1.2902464501166047,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 16441
    },
    {
      "epoch": 0.16442,
      "grad_norm": 1.1058223615753715,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 16442
    },
    {
      "epoch": 0.16443,
      "grad_norm": 1.4162825495350395,
      "learning_rate": 0.003,
      "loss": 4.0909,
      "step": 16443
    },
    {
      "epoch": 0.16444,
      "grad_norm": 0.9704567342020411,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 16444
    },
    {
      "epoch": 0.16445,
      "grad_norm": 1.2929890493890461,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 16445
    },
    {
      "epoch": 0.16446,
      "grad_norm": 1.312806546807894,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 16446
    },
    {
      "epoch": 0.16447,
      "grad_norm": 1.3934869343729224,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 16447
    },
    {
      "epoch": 0.16448,
      "grad_norm": 1.238306320379035,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 16448
    },
    {
      "epoch": 0.16449,
      "grad_norm": 1.046299047424111,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 16449
    },
    {
      "epoch": 0.1645,
      "grad_norm": 1.37940166459787,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 16450
    },
    {
      "epoch": 0.16451,
      "grad_norm": 0.9769709791788973,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 16451
    },
    {
      "epoch": 0.16452,
      "grad_norm": 1.4713445311775446,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 16452
    },
    {
      "epoch": 0.16453,
      "grad_norm": 1.0333409989710807,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 16453
    },
    {
      "epoch": 0.16454,
      "grad_norm": 1.275234893906959,
      "learning_rate": 0.003,
      "loss": 4.0857,
      "step": 16454
    },
    {
      "epoch": 0.16455,
      "grad_norm": 1.2147269810274521,
      "learning_rate": 0.003,
      "loss": 4.0874,
      "step": 16455
    },
    {
      "epoch": 0.16456,
      "grad_norm": 1.3637218724665405,
      "learning_rate": 0.003,
      "loss": 4.0759,
      "step": 16456
    },
    {
      "epoch": 0.16457,
      "grad_norm": 1.104648426622214,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 16457
    },
    {
      "epoch": 0.16458,
      "grad_norm": 1.2502549309947466,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 16458
    },
    {
      "epoch": 0.16459,
      "grad_norm": 1.1989193222086119,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 16459
    },
    {
      "epoch": 0.1646,
      "grad_norm": 1.1567167697347025,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 16460
    },
    {
      "epoch": 0.16461,
      "grad_norm": 1.2334558387358596,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 16461
    },
    {
      "epoch": 0.16462,
      "grad_norm": 1.2588965894970137,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 16462
    },
    {
      "epoch": 0.16463,
      "grad_norm": 1.0711040285980924,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 16463
    },
    {
      "epoch": 0.16464,
      "grad_norm": 1.3324142770374714,
      "learning_rate": 0.003,
      "loss": 4.044,
      "step": 16464
    },
    {
      "epoch": 0.16465,
      "grad_norm": 1.2924020103371354,
      "learning_rate": 0.003,
      "loss": 4.0727,
      "step": 16465
    },
    {
      "epoch": 0.16466,
      "grad_norm": 1.2271686092882501,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 16466
    },
    {
      "epoch": 0.16467,
      "grad_norm": 1.1391827547004902,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 16467
    },
    {
      "epoch": 0.16468,
      "grad_norm": 1.2727746840360479,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 16468
    },
    {
      "epoch": 0.16469,
      "grad_norm": 1.1221544359042348,
      "learning_rate": 0.003,
      "loss": 4.0739,
      "step": 16469
    },
    {
      "epoch": 0.1647,
      "grad_norm": 1.308102129076781,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 16470
    },
    {
      "epoch": 0.16471,
      "grad_norm": 1.3310089253793593,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 16471
    },
    {
      "epoch": 0.16472,
      "grad_norm": 1.099131548971726,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 16472
    },
    {
      "epoch": 0.16473,
      "grad_norm": 1.1507369134549297,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 16473
    },
    {
      "epoch": 0.16474,
      "grad_norm": 1.1696433055407094,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 16474
    },
    {
      "epoch": 0.16475,
      "grad_norm": 1.185818918775687,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 16475
    },
    {
      "epoch": 0.16476,
      "grad_norm": 1.0511754226018224,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 16476
    },
    {
      "epoch": 0.16477,
      "grad_norm": 1.342190845266813,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 16477
    },
    {
      "epoch": 0.16478,
      "grad_norm": 1.396898197039214,
      "learning_rate": 0.003,
      "loss": 4.0907,
      "step": 16478
    },
    {
      "epoch": 0.16479,
      "grad_norm": 1.0029197019363287,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 16479
    },
    {
      "epoch": 0.1648,
      "grad_norm": 1.3474363421958357,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 16480
    },
    {
      "epoch": 0.16481,
      "grad_norm": 1.203370400537817,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 16481
    },
    {
      "epoch": 0.16482,
      "grad_norm": 1.3496925563552822,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 16482
    },
    {
      "epoch": 0.16483,
      "grad_norm": 1.140456352470189,
      "learning_rate": 0.003,
      "loss": 4.0373,
      "step": 16483
    },
    {
      "epoch": 0.16484,
      "grad_norm": 1.161661118843948,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 16484
    },
    {
      "epoch": 0.16485,
      "grad_norm": 1.2702972031072666,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 16485
    },
    {
      "epoch": 0.16486,
      "grad_norm": 1.214327740218,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 16486
    },
    {
      "epoch": 0.16487,
      "grad_norm": 1.4686085233106276,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 16487
    },
    {
      "epoch": 0.16488,
      "grad_norm": 1.1129341580421621,
      "learning_rate": 0.003,
      "loss": 4.086,
      "step": 16488
    },
    {
      "epoch": 0.16489,
      "grad_norm": 1.4217588281003763,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 16489
    },
    {
      "epoch": 0.1649,
      "grad_norm": 1.0703469821845155,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 16490
    },
    {
      "epoch": 0.16491,
      "grad_norm": 1.265477567719717,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 16491
    },
    {
      "epoch": 0.16492,
      "grad_norm": 1.0621408450657313,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 16492
    },
    {
      "epoch": 0.16493,
      "grad_norm": 1.3972589457929456,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 16493
    },
    {
      "epoch": 0.16494,
      "grad_norm": 1.2650926087062098,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 16494
    },
    {
      "epoch": 0.16495,
      "grad_norm": 1.2978586043422418,
      "learning_rate": 0.003,
      "loss": 4.0912,
      "step": 16495
    },
    {
      "epoch": 0.16496,
      "grad_norm": 1.261472305227728,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 16496
    },
    {
      "epoch": 0.16497,
      "grad_norm": 1.242683064248222,
      "learning_rate": 0.003,
      "loss": 4.0211,
      "step": 16497
    },
    {
      "epoch": 0.16498,
      "grad_norm": 1.2036963193234782,
      "learning_rate": 0.003,
      "loss": 4.0108,
      "step": 16498
    },
    {
      "epoch": 0.16499,
      "grad_norm": 1.3948241551460334,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 16499
    },
    {
      "epoch": 0.165,
      "grad_norm": 1.07581359154424,
      "learning_rate": 0.003,
      "loss": 4.0389,
      "step": 16500
    },
    {
      "epoch": 0.16501,
      "grad_norm": 1.5118180487772566,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 16501
    },
    {
      "epoch": 0.16502,
      "grad_norm": 1.261475590974746,
      "learning_rate": 0.003,
      "loss": 4.0632,
      "step": 16502
    },
    {
      "epoch": 0.16503,
      "grad_norm": 1.0255140844117148,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 16503
    },
    {
      "epoch": 0.16504,
      "grad_norm": 1.3639751753138154,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 16504
    },
    {
      "epoch": 0.16505,
      "grad_norm": 1.202677925288568,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 16505
    },
    {
      "epoch": 0.16506,
      "grad_norm": 1.2378749997616612,
      "learning_rate": 0.003,
      "loss": 4.0796,
      "step": 16506
    },
    {
      "epoch": 0.16507,
      "grad_norm": 1.145146155706816,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 16507
    },
    {
      "epoch": 0.16508,
      "grad_norm": 1.2535730286186197,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 16508
    },
    {
      "epoch": 0.16509,
      "grad_norm": 1.1298757008410567,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 16509
    },
    {
      "epoch": 0.1651,
      "grad_norm": 1.038127001875258,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 16510
    },
    {
      "epoch": 0.16511,
      "grad_norm": 1.2620275203759714,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 16511
    },
    {
      "epoch": 0.16512,
      "grad_norm": 1.1271879091413812,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 16512
    },
    {
      "epoch": 0.16513,
      "grad_norm": 1.3324389570308686,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 16513
    },
    {
      "epoch": 0.16514,
      "grad_norm": 1.0241500602776423,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 16514
    },
    {
      "epoch": 0.16515,
      "grad_norm": 1.2954261214680667,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 16515
    },
    {
      "epoch": 0.16516,
      "grad_norm": 1.224561218138641,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 16516
    },
    {
      "epoch": 0.16517,
      "grad_norm": 1.4856014637916959,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 16517
    },
    {
      "epoch": 0.16518,
      "grad_norm": 1.3083785465492175,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 16518
    },
    {
      "epoch": 0.16519,
      "grad_norm": 1.1754221139693608,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 16519
    },
    {
      "epoch": 0.1652,
      "grad_norm": 1.383733782221718,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 16520
    },
    {
      "epoch": 0.16521,
      "grad_norm": 0.9224264945042131,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 16521
    },
    {
      "epoch": 0.16522,
      "grad_norm": 1.271845436140457,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 16522
    },
    {
      "epoch": 0.16523,
      "grad_norm": 1.2684184328179449,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 16523
    },
    {
      "epoch": 0.16524,
      "grad_norm": 1.2957251132249745,
      "learning_rate": 0.003,
      "loss": 4.04,
      "step": 16524
    },
    {
      "epoch": 0.16525,
      "grad_norm": 1.0759420818376795,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 16525
    },
    {
      "epoch": 0.16526,
      "grad_norm": 1.3680200743833646,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 16526
    },
    {
      "epoch": 0.16527,
      "grad_norm": 1.3403723906917697,
      "learning_rate": 0.003,
      "loss": 4.081,
      "step": 16527
    },
    {
      "epoch": 0.16528,
      "grad_norm": 1.08893530152959,
      "learning_rate": 0.003,
      "loss": 4.0295,
      "step": 16528
    },
    {
      "epoch": 0.16529,
      "grad_norm": 1.3497970090602143,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 16529
    },
    {
      "epoch": 0.1653,
      "grad_norm": 1.0818049101829756,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 16530
    },
    {
      "epoch": 0.16531,
      "grad_norm": 1.4560504817993078,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 16531
    },
    {
      "epoch": 0.16532,
      "grad_norm": 1.2019258840381728,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 16532
    },
    {
      "epoch": 0.16533,
      "grad_norm": 1.2671708842962617,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 16533
    },
    {
      "epoch": 0.16534,
      "grad_norm": 1.1933625613664025,
      "learning_rate": 0.003,
      "loss": 4.0729,
      "step": 16534
    },
    {
      "epoch": 0.16535,
      "grad_norm": 1.0915886188920134,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 16535
    },
    {
      "epoch": 0.16536,
      "grad_norm": 1.2468246178670828,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 16536
    },
    {
      "epoch": 0.16537,
      "grad_norm": 1.0420479871195687,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 16537
    },
    {
      "epoch": 0.16538,
      "grad_norm": 1.4846408040185886,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 16538
    },
    {
      "epoch": 0.16539,
      "grad_norm": 1.0010507915854987,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 16539
    },
    {
      "epoch": 0.1654,
      "grad_norm": 1.3841859663118798,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 16540
    },
    {
      "epoch": 0.16541,
      "grad_norm": 0.9995675238909607,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 16541
    },
    {
      "epoch": 0.16542,
      "grad_norm": 1.4135590670613585,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 16542
    },
    {
      "epoch": 0.16543,
      "grad_norm": 1.271032768090048,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 16543
    },
    {
      "epoch": 0.16544,
      "grad_norm": 1.0003598220576706,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 16544
    },
    {
      "epoch": 0.16545,
      "grad_norm": 1.4438228510842843,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 16545
    },
    {
      "epoch": 0.16546,
      "grad_norm": 1.079194189276615,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 16546
    },
    {
      "epoch": 0.16547,
      "grad_norm": 1.3462259069028204,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 16547
    },
    {
      "epoch": 0.16548,
      "grad_norm": 1.0604888414253373,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 16548
    },
    {
      "epoch": 0.16549,
      "grad_norm": 1.4315815511781533,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 16549
    },
    {
      "epoch": 0.1655,
      "grad_norm": 1.279012800053747,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 16550
    },
    {
      "epoch": 0.16551,
      "grad_norm": 1.3147197009267735,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 16551
    },
    {
      "epoch": 0.16552,
      "grad_norm": 1.3008364923026423,
      "learning_rate": 0.003,
      "loss": 4.0814,
      "step": 16552
    },
    {
      "epoch": 0.16553,
      "grad_norm": 1.0376654244687642,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 16553
    },
    {
      "epoch": 0.16554,
      "grad_norm": 1.3682159327472294,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 16554
    },
    {
      "epoch": 0.16555,
      "grad_norm": 1.287686464491986,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 16555
    },
    {
      "epoch": 0.16556,
      "grad_norm": 1.0984911002136095,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 16556
    },
    {
      "epoch": 0.16557,
      "grad_norm": 1.4013179191300427,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 16557
    },
    {
      "epoch": 0.16558,
      "grad_norm": 1.2721433708285537,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 16558
    },
    {
      "epoch": 0.16559,
      "grad_norm": 1.2072626658190684,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 16559
    },
    {
      "epoch": 0.1656,
      "grad_norm": 1.2627153176527717,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 16560
    },
    {
      "epoch": 0.16561,
      "grad_norm": 1.3613505028934798,
      "learning_rate": 0.003,
      "loss": 4.0852,
      "step": 16561
    },
    {
      "epoch": 0.16562,
      "grad_norm": 1.3669212213985202,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 16562
    },
    {
      "epoch": 0.16563,
      "grad_norm": 1.1351984881474968,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 16563
    },
    {
      "epoch": 0.16564,
      "grad_norm": 1.245779541760801,
      "learning_rate": 0.003,
      "loss": 4.0239,
      "step": 16564
    },
    {
      "epoch": 0.16565,
      "grad_norm": 1.2270767489407162,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 16565
    },
    {
      "epoch": 0.16566,
      "grad_norm": 1.1773840818283083,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 16566
    },
    {
      "epoch": 0.16567,
      "grad_norm": 1.1912047515356559,
      "learning_rate": 0.003,
      "loss": 4.0389,
      "step": 16567
    },
    {
      "epoch": 0.16568,
      "grad_norm": 1.123840748354702,
      "learning_rate": 0.003,
      "loss": 4.028,
      "step": 16568
    },
    {
      "epoch": 0.16569,
      "grad_norm": 1.1655040178671072,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 16569
    },
    {
      "epoch": 0.1657,
      "grad_norm": 1.3526719963038978,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 16570
    },
    {
      "epoch": 0.16571,
      "grad_norm": 1.1875691743114833,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 16571
    },
    {
      "epoch": 0.16572,
      "grad_norm": 1.2235636981375877,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 16572
    },
    {
      "epoch": 0.16573,
      "grad_norm": 1.2977232871300532,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 16573
    },
    {
      "epoch": 0.16574,
      "grad_norm": 1.2268435006440799,
      "learning_rate": 0.003,
      "loss": 4.0312,
      "step": 16574
    },
    {
      "epoch": 0.16575,
      "grad_norm": 1.158616634691927,
      "learning_rate": 0.003,
      "loss": 4.0841,
      "step": 16575
    },
    {
      "epoch": 0.16576,
      "grad_norm": 1.2259858847843446,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 16576
    },
    {
      "epoch": 0.16577,
      "grad_norm": 1.109482144570921,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 16577
    },
    {
      "epoch": 0.16578,
      "grad_norm": 1.2766512889347732,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 16578
    },
    {
      "epoch": 0.16579,
      "grad_norm": 1.0916250750966277,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 16579
    },
    {
      "epoch": 0.1658,
      "grad_norm": 1.3784453360171725,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 16580
    },
    {
      "epoch": 0.16581,
      "grad_norm": 1.3106198650486158,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 16581
    },
    {
      "epoch": 0.16582,
      "grad_norm": 1.1538738897341496,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 16582
    },
    {
      "epoch": 0.16583,
      "grad_norm": 1.289280959030002,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 16583
    },
    {
      "epoch": 0.16584,
      "grad_norm": 1.1419639947253375,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 16584
    },
    {
      "epoch": 0.16585,
      "grad_norm": 1.1909428219339122,
      "learning_rate": 0.003,
      "loss": 4.0316,
      "step": 16585
    },
    {
      "epoch": 0.16586,
      "grad_norm": 1.233121475694777,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 16586
    },
    {
      "epoch": 0.16587,
      "grad_norm": 1.281209517667142,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 16587
    },
    {
      "epoch": 0.16588,
      "grad_norm": 1.307368701030308,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 16588
    },
    {
      "epoch": 0.16589,
      "grad_norm": 1.0210678628509378,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 16589
    },
    {
      "epoch": 0.1659,
      "grad_norm": 1.50612650511528,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 16590
    },
    {
      "epoch": 0.16591,
      "grad_norm": 0.9982600987829718,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 16591
    },
    {
      "epoch": 0.16592,
      "grad_norm": 1.4759984528929282,
      "learning_rate": 0.003,
      "loss": 4.0701,
      "step": 16592
    },
    {
      "epoch": 0.16593,
      "grad_norm": 1.2897250493468178,
      "learning_rate": 0.003,
      "loss": 4.0672,
      "step": 16593
    },
    {
      "epoch": 0.16594,
      "grad_norm": 1.5208296147653992,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 16594
    },
    {
      "epoch": 0.16595,
      "grad_norm": 1.0583249028775887,
      "learning_rate": 0.003,
      "loss": 4.0812,
      "step": 16595
    },
    {
      "epoch": 0.16596,
      "grad_norm": 1.2899447233941335,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 16596
    },
    {
      "epoch": 0.16597,
      "grad_norm": 1.0402907849884682,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 16597
    },
    {
      "epoch": 0.16598,
      "grad_norm": 1.2329279897475582,
      "learning_rate": 0.003,
      "loss": 4.0226,
      "step": 16598
    },
    {
      "epoch": 0.16599,
      "grad_norm": 1.2710444937587715,
      "learning_rate": 0.003,
      "loss": 4.0873,
      "step": 16599
    },
    {
      "epoch": 0.166,
      "grad_norm": 1.0563254861048152,
      "learning_rate": 0.003,
      "loss": 4.0198,
      "step": 16600
    },
    {
      "epoch": 0.16601,
      "grad_norm": 1.2066109622890218,
      "learning_rate": 0.003,
      "loss": 4.0933,
      "step": 16601
    },
    {
      "epoch": 0.16602,
      "grad_norm": 1.2329930980591854,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 16602
    },
    {
      "epoch": 0.16603,
      "grad_norm": 1.2790475248579785,
      "learning_rate": 0.003,
      "loss": 4.0957,
      "step": 16603
    },
    {
      "epoch": 0.16604,
      "grad_norm": 1.225644885990106,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 16604
    },
    {
      "epoch": 0.16605,
      "grad_norm": 1.244570324311943,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 16605
    },
    {
      "epoch": 0.16606,
      "grad_norm": 1.524337993014525,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 16606
    },
    {
      "epoch": 0.16607,
      "grad_norm": 1.0802970479558118,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 16607
    },
    {
      "epoch": 0.16608,
      "grad_norm": 1.479143935511754,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 16608
    },
    {
      "epoch": 0.16609,
      "grad_norm": 1.0502722454074231,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 16609
    },
    {
      "epoch": 0.1661,
      "grad_norm": 1.09474150083467,
      "learning_rate": 0.003,
      "loss": 4.0287,
      "step": 16610
    },
    {
      "epoch": 0.16611,
      "grad_norm": 1.2960369195096926,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 16611
    },
    {
      "epoch": 0.16612,
      "grad_norm": 1.1297372361576241,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 16612
    },
    {
      "epoch": 0.16613,
      "grad_norm": 1.4419374093664417,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 16613
    },
    {
      "epoch": 0.16614,
      "grad_norm": 0.9300048151347583,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 16614
    },
    {
      "epoch": 0.16615,
      "grad_norm": 1.2192941217633135,
      "learning_rate": 0.003,
      "loss": 4.0257,
      "step": 16615
    },
    {
      "epoch": 0.16616,
      "grad_norm": 1.3430237358940136,
      "learning_rate": 0.003,
      "loss": 4.1127,
      "step": 16616
    },
    {
      "epoch": 0.16617,
      "grad_norm": 1.1587947661119633,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 16617
    },
    {
      "epoch": 0.16618,
      "grad_norm": 1.0769665339153838,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 16618
    },
    {
      "epoch": 0.16619,
      "grad_norm": 1.1729264327104374,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 16619
    },
    {
      "epoch": 0.1662,
      "grad_norm": 1.3988852778387986,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 16620
    },
    {
      "epoch": 0.16621,
      "grad_norm": 0.998962651394616,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 16621
    },
    {
      "epoch": 0.16622,
      "grad_norm": 1.2830511904376256,
      "learning_rate": 0.003,
      "loss": 4.0244,
      "step": 16622
    },
    {
      "epoch": 0.16623,
      "grad_norm": 1.1378218315230693,
      "learning_rate": 0.003,
      "loss": 4.0893,
      "step": 16623
    },
    {
      "epoch": 0.16624,
      "grad_norm": 1.3807881831736453,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 16624
    },
    {
      "epoch": 0.16625,
      "grad_norm": 0.9858965160582824,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 16625
    },
    {
      "epoch": 0.16626,
      "grad_norm": 1.7187760223123387,
      "learning_rate": 0.003,
      "loss": 4.0891,
      "step": 16626
    },
    {
      "epoch": 0.16627,
      "grad_norm": 1.2227095831403285,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 16627
    },
    {
      "epoch": 0.16628,
      "grad_norm": 1.2230579568409723,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 16628
    },
    {
      "epoch": 0.16629,
      "grad_norm": 1.156960828242787,
      "learning_rate": 0.003,
      "loss": 4.1007,
      "step": 16629
    },
    {
      "epoch": 0.1663,
      "grad_norm": 1.1059706078495835,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 16630
    },
    {
      "epoch": 0.16631,
      "grad_norm": 1.2862955782814594,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 16631
    },
    {
      "epoch": 0.16632,
      "grad_norm": 1.2112016157602854,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 16632
    },
    {
      "epoch": 0.16633,
      "grad_norm": 1.3286289272363898,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 16633
    },
    {
      "epoch": 0.16634,
      "grad_norm": 1.1538531993132737,
      "learning_rate": 0.003,
      "loss": 4.0705,
      "step": 16634
    },
    {
      "epoch": 0.16635,
      "grad_norm": 1.2576693976330668,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 16635
    },
    {
      "epoch": 0.16636,
      "grad_norm": 1.1026365269531988,
      "learning_rate": 0.003,
      "loss": 4.0053,
      "step": 16636
    },
    {
      "epoch": 0.16637,
      "grad_norm": 1.548931553914938,
      "learning_rate": 0.003,
      "loss": 4.0265,
      "step": 16637
    },
    {
      "epoch": 0.16638,
      "grad_norm": 1.117314280310054,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 16638
    },
    {
      "epoch": 0.16639,
      "grad_norm": 1.5756770892701766,
      "learning_rate": 0.003,
      "loss": 4.0799,
      "step": 16639
    },
    {
      "epoch": 0.1664,
      "grad_norm": 0.9983836493429521,
      "learning_rate": 0.003,
      "loss": 4.0815,
      "step": 16640
    },
    {
      "epoch": 0.16641,
      "grad_norm": 1.0402184994585415,
      "learning_rate": 0.003,
      "loss": 4.0831,
      "step": 16641
    },
    {
      "epoch": 0.16642,
      "grad_norm": 1.251995281802957,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 16642
    },
    {
      "epoch": 0.16643,
      "grad_norm": 1.1515925373876876,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 16643
    },
    {
      "epoch": 0.16644,
      "grad_norm": 1.309248830234756,
      "learning_rate": 0.003,
      "loss": 4.0839,
      "step": 16644
    },
    {
      "epoch": 0.16645,
      "grad_norm": 0.970468496424474,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 16645
    },
    {
      "epoch": 0.16646,
      "grad_norm": 1.3792261702226625,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 16646
    },
    {
      "epoch": 0.16647,
      "grad_norm": 1.4702319843822849,
      "learning_rate": 0.003,
      "loss": 4.057,
      "step": 16647
    },
    {
      "epoch": 0.16648,
      "grad_norm": 1.0627611009219051,
      "learning_rate": 0.003,
      "loss": 4.0271,
      "step": 16648
    },
    {
      "epoch": 0.16649,
      "grad_norm": 1.4639207361370845,
      "learning_rate": 0.003,
      "loss": 4.0948,
      "step": 16649
    },
    {
      "epoch": 0.1665,
      "grad_norm": 0.9345316126168038,
      "learning_rate": 0.003,
      "loss": 4.0675,
      "step": 16650
    },
    {
      "epoch": 0.16651,
      "grad_norm": 1.1455019229999,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 16651
    },
    {
      "epoch": 0.16652,
      "grad_norm": 1.2811163596097883,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 16652
    },
    {
      "epoch": 0.16653,
      "grad_norm": 1.3224004194363164,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 16653
    },
    {
      "epoch": 0.16654,
      "grad_norm": 1.3176969368553564,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 16654
    },
    {
      "epoch": 0.16655,
      "grad_norm": 1.0965596801476316,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 16655
    },
    {
      "epoch": 0.16656,
      "grad_norm": 1.4494251414404067,
      "learning_rate": 0.003,
      "loss": 4.0478,
      "step": 16656
    },
    {
      "epoch": 0.16657,
      "grad_norm": 1.0490595911504679,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 16657
    },
    {
      "epoch": 0.16658,
      "grad_norm": 1.5623421520822351,
      "learning_rate": 0.003,
      "loss": 4.0602,
      "step": 16658
    },
    {
      "epoch": 0.16659,
      "grad_norm": 1.0680816050788435,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 16659
    },
    {
      "epoch": 0.1666,
      "grad_norm": 1.2744220397032522,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 16660
    },
    {
      "epoch": 0.16661,
      "grad_norm": 1.2102224257041356,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 16661
    },
    {
      "epoch": 0.16662,
      "grad_norm": 1.2072423576390283,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 16662
    },
    {
      "epoch": 0.16663,
      "grad_norm": 1.142951208437605,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 16663
    },
    {
      "epoch": 0.16664,
      "grad_norm": 1.2096617764630802,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 16664
    },
    {
      "epoch": 0.16665,
      "grad_norm": 1.3496536003210005,
      "learning_rate": 0.003,
      "loss": 4.0871,
      "step": 16665
    },
    {
      "epoch": 0.16666,
      "grad_norm": 1.1119443012894936,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 16666
    },
    {
      "epoch": 0.16667,
      "grad_norm": 1.3637097231304605,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 16667
    },
    {
      "epoch": 0.16668,
      "grad_norm": 1.136525798824719,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 16668
    },
    {
      "epoch": 0.16669,
      "grad_norm": 1.2871812450875617,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 16669
    },
    {
      "epoch": 0.1667,
      "grad_norm": 1.0484800005031734,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 16670
    },
    {
      "epoch": 0.16671,
      "grad_norm": 1.359449088041725,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 16671
    },
    {
      "epoch": 0.16672,
      "grad_norm": 0.9942863342115491,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 16672
    },
    {
      "epoch": 0.16673,
      "grad_norm": 1.3423120246085614,
      "learning_rate": 0.003,
      "loss": 4.0259,
      "step": 16673
    },
    {
      "epoch": 0.16674,
      "grad_norm": 1.2140808730736126,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 16674
    },
    {
      "epoch": 0.16675,
      "grad_norm": 1.4548008551067455,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 16675
    },
    {
      "epoch": 0.16676,
      "grad_norm": 1.1394767621560034,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 16676
    },
    {
      "epoch": 0.16677,
      "grad_norm": 1.109650645696266,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 16677
    },
    {
      "epoch": 0.16678,
      "grad_norm": 1.3453213444110206,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 16678
    },
    {
      "epoch": 0.16679,
      "grad_norm": 0.9973657059977181,
      "learning_rate": 0.003,
      "loss": 4.0632,
      "step": 16679
    },
    {
      "epoch": 0.1668,
      "grad_norm": 1.405969876693654,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 16680
    },
    {
      "epoch": 0.16681,
      "grad_norm": 1.4372242054083253,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 16681
    },
    {
      "epoch": 0.16682,
      "grad_norm": 1.2049277306896602,
      "learning_rate": 0.003,
      "loss": 4.0316,
      "step": 16682
    },
    {
      "epoch": 0.16683,
      "grad_norm": 1.1270606338060425,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 16683
    },
    {
      "epoch": 0.16684,
      "grad_norm": 1.3861618443258414,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 16684
    },
    {
      "epoch": 0.16685,
      "grad_norm": 1.300637850400735,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 16685
    },
    {
      "epoch": 0.16686,
      "grad_norm": 1.4305195354924658,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 16686
    },
    {
      "epoch": 0.16687,
      "grad_norm": 0.9834443407385918,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 16687
    },
    {
      "epoch": 0.16688,
      "grad_norm": 1.1516380475585228,
      "learning_rate": 0.003,
      "loss": 4.0623,
      "step": 16688
    },
    {
      "epoch": 0.16689,
      "grad_norm": 1.3003089848062008,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 16689
    },
    {
      "epoch": 0.1669,
      "grad_norm": 1.1630158367131944,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 16690
    },
    {
      "epoch": 0.16691,
      "grad_norm": 1.3116129953554654,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 16691
    },
    {
      "epoch": 0.16692,
      "grad_norm": 1.2074662314667017,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 16692
    },
    {
      "epoch": 0.16693,
      "grad_norm": 1.117522657261956,
      "learning_rate": 0.003,
      "loss": 4.0306,
      "step": 16693
    },
    {
      "epoch": 0.16694,
      "grad_norm": 1.2954370481690003,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 16694
    },
    {
      "epoch": 0.16695,
      "grad_norm": 1.1841582254798,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 16695
    },
    {
      "epoch": 0.16696,
      "grad_norm": 1.1602177828544977,
      "learning_rate": 0.003,
      "loss": 4.0374,
      "step": 16696
    },
    {
      "epoch": 0.16697,
      "grad_norm": 1.1429155185627244,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 16697
    },
    {
      "epoch": 0.16698,
      "grad_norm": 1.345459979414429,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 16698
    },
    {
      "epoch": 0.16699,
      "grad_norm": 1.0878738142344293,
      "learning_rate": 0.003,
      "loss": 4.0363,
      "step": 16699
    },
    {
      "epoch": 0.167,
      "grad_norm": 1.4486378160079911,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 16700
    },
    {
      "epoch": 0.16701,
      "grad_norm": 1.1832505013898313,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 16701
    },
    {
      "epoch": 0.16702,
      "grad_norm": 1.2511837552947567,
      "learning_rate": 0.003,
      "loss": 4.0277,
      "step": 16702
    },
    {
      "epoch": 0.16703,
      "grad_norm": 1.3032500450570588,
      "learning_rate": 0.003,
      "loss": 4.0228,
      "step": 16703
    },
    {
      "epoch": 0.16704,
      "grad_norm": 1.2286025139769188,
      "learning_rate": 0.003,
      "loss": 4.0892,
      "step": 16704
    },
    {
      "epoch": 0.16705,
      "grad_norm": 1.2664712081187757,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 16705
    },
    {
      "epoch": 0.16706,
      "grad_norm": 1.145412191225435,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 16706
    },
    {
      "epoch": 0.16707,
      "grad_norm": 1.0940826598922664,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 16707
    },
    {
      "epoch": 0.16708,
      "grad_norm": 1.4093829939888982,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 16708
    },
    {
      "epoch": 0.16709,
      "grad_norm": 1.1847099195012318,
      "learning_rate": 0.003,
      "loss": 4.0743,
      "step": 16709
    },
    {
      "epoch": 0.1671,
      "grad_norm": 1.2357493273984,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 16710
    },
    {
      "epoch": 0.16711,
      "grad_norm": 1.1098081847606114,
      "learning_rate": 0.003,
      "loss": 4.0293,
      "step": 16711
    },
    {
      "epoch": 0.16712,
      "grad_norm": 1.5584462871650122,
      "learning_rate": 0.003,
      "loss": 4.0195,
      "step": 16712
    },
    {
      "epoch": 0.16713,
      "grad_norm": 1.0848551739989956,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 16713
    },
    {
      "epoch": 0.16714,
      "grad_norm": 1.4922464999775857,
      "learning_rate": 0.003,
      "loss": 4.0792,
      "step": 16714
    },
    {
      "epoch": 0.16715,
      "grad_norm": 1.0532986551389765,
      "learning_rate": 0.003,
      "loss": 4.0658,
      "step": 16715
    },
    {
      "epoch": 0.16716,
      "grad_norm": 1.2566759003383463,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 16716
    },
    {
      "epoch": 0.16717,
      "grad_norm": 1.3231547729401212,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 16717
    },
    {
      "epoch": 0.16718,
      "grad_norm": 1.0533646976294528,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 16718
    },
    {
      "epoch": 0.16719,
      "grad_norm": 1.2786192435431603,
      "learning_rate": 0.003,
      "loss": 4.0602,
      "step": 16719
    },
    {
      "epoch": 0.1672,
      "grad_norm": 1.2718527016177648,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 16720
    },
    {
      "epoch": 0.16721,
      "grad_norm": 1.3344911184399857,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 16721
    },
    {
      "epoch": 0.16722,
      "grad_norm": 1.1602486472769074,
      "learning_rate": 0.003,
      "loss": 4.04,
      "step": 16722
    },
    {
      "epoch": 0.16723,
      "grad_norm": 1.251733459671244,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 16723
    },
    {
      "epoch": 0.16724,
      "grad_norm": 1.2158861810514705,
      "learning_rate": 0.003,
      "loss": 4.0934,
      "step": 16724
    },
    {
      "epoch": 0.16725,
      "grad_norm": 1.471518052860995,
      "learning_rate": 0.003,
      "loss": 4.0927,
      "step": 16725
    },
    {
      "epoch": 0.16726,
      "grad_norm": 1.0095992941540062,
      "learning_rate": 0.003,
      "loss": 4.0304,
      "step": 16726
    },
    {
      "epoch": 0.16727,
      "grad_norm": 1.324918748254166,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 16727
    },
    {
      "epoch": 0.16728,
      "grad_norm": 1.0844912101007658,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 16728
    },
    {
      "epoch": 0.16729,
      "grad_norm": 1.3535669261603624,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 16729
    },
    {
      "epoch": 0.1673,
      "grad_norm": 1.2555576937702524,
      "learning_rate": 0.003,
      "loss": 4.1071,
      "step": 16730
    },
    {
      "epoch": 0.16731,
      "grad_norm": 1.3869000673568552,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 16731
    },
    {
      "epoch": 0.16732,
      "grad_norm": 1.217822597439683,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 16732
    },
    {
      "epoch": 0.16733,
      "grad_norm": 1.1401702983763804,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 16733
    },
    {
      "epoch": 0.16734,
      "grad_norm": 1.5260237630917093,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 16734
    },
    {
      "epoch": 0.16735,
      "grad_norm": 1.0878394273854046,
      "learning_rate": 0.003,
      "loss": 4.0931,
      "step": 16735
    },
    {
      "epoch": 0.16736,
      "grad_norm": 1.4571879519148716,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 16736
    },
    {
      "epoch": 0.16737,
      "grad_norm": 1.1484215831393614,
      "learning_rate": 0.003,
      "loss": 4.057,
      "step": 16737
    },
    {
      "epoch": 0.16738,
      "grad_norm": 1.239249105824225,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 16738
    },
    {
      "epoch": 0.16739,
      "grad_norm": 1.0846360676996116,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 16739
    },
    {
      "epoch": 0.1674,
      "grad_norm": 1.2457041173856414,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 16740
    },
    {
      "epoch": 0.16741,
      "grad_norm": 1.278139066819567,
      "learning_rate": 0.003,
      "loss": 4.0866,
      "step": 16741
    },
    {
      "epoch": 0.16742,
      "grad_norm": 1.288984975340617,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 16742
    },
    {
      "epoch": 0.16743,
      "grad_norm": 1.2454372914172789,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 16743
    },
    {
      "epoch": 0.16744,
      "grad_norm": 1.1742155484056467,
      "learning_rate": 0.003,
      "loss": 4.0873,
      "step": 16744
    },
    {
      "epoch": 0.16745,
      "grad_norm": 1.085445817969519,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 16745
    },
    {
      "epoch": 0.16746,
      "grad_norm": 1.2708236741095844,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 16746
    },
    {
      "epoch": 0.16747,
      "grad_norm": 1.2218806583737991,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 16747
    },
    {
      "epoch": 0.16748,
      "grad_norm": 1.2064425335775943,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 16748
    },
    {
      "epoch": 0.16749,
      "grad_norm": 1.1619865034622123,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 16749
    },
    {
      "epoch": 0.1675,
      "grad_norm": 1.1704000966985963,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 16750
    },
    {
      "epoch": 0.16751,
      "grad_norm": 1.1183024761571165,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 16751
    },
    {
      "epoch": 0.16752,
      "grad_norm": 1.0937304186087908,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 16752
    },
    {
      "epoch": 0.16753,
      "grad_norm": 1.373737523421354,
      "learning_rate": 0.003,
      "loss": 4.0602,
      "step": 16753
    },
    {
      "epoch": 0.16754,
      "grad_norm": 1.3196540953296025,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 16754
    },
    {
      "epoch": 0.16755,
      "grad_norm": 1.1139155806499885,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 16755
    },
    {
      "epoch": 0.16756,
      "grad_norm": 1.4359115891977452,
      "learning_rate": 0.003,
      "loss": 4.0291,
      "step": 16756
    },
    {
      "epoch": 0.16757,
      "grad_norm": 1.1165242308607433,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 16757
    },
    {
      "epoch": 0.16758,
      "grad_norm": 1.182216776928444,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 16758
    },
    {
      "epoch": 0.16759,
      "grad_norm": 1.4008740145971497,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 16759
    },
    {
      "epoch": 0.1676,
      "grad_norm": 1.1061524789097879,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 16760
    },
    {
      "epoch": 0.16761,
      "grad_norm": 1.2007991816682586,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 16761
    },
    {
      "epoch": 0.16762,
      "grad_norm": 0.9883652542426329,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 16762
    },
    {
      "epoch": 0.16763,
      "grad_norm": 1.2734810036538666,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 16763
    },
    {
      "epoch": 0.16764,
      "grad_norm": 0.999922945265591,
      "learning_rate": 0.003,
      "loss": 4.0297,
      "step": 16764
    },
    {
      "epoch": 0.16765,
      "grad_norm": 1.4319142076681646,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 16765
    },
    {
      "epoch": 0.16766,
      "grad_norm": 1.1343143488760452,
      "learning_rate": 0.003,
      "loss": 4.1002,
      "step": 16766
    },
    {
      "epoch": 0.16767,
      "grad_norm": 1.2168925527908758,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 16767
    },
    {
      "epoch": 0.16768,
      "grad_norm": 1.1988282700392212,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 16768
    },
    {
      "epoch": 0.16769,
      "grad_norm": 1.1145099318152145,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 16769
    },
    {
      "epoch": 0.1677,
      "grad_norm": 1.5275396374681,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 16770
    },
    {
      "epoch": 0.16771,
      "grad_norm": 1.192165268592216,
      "learning_rate": 0.003,
      "loss": 4.0829,
      "step": 16771
    },
    {
      "epoch": 0.16772,
      "grad_norm": 1.2292219207859179,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 16772
    },
    {
      "epoch": 0.16773,
      "grad_norm": 1.3214290398435105,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 16773
    },
    {
      "epoch": 0.16774,
      "grad_norm": 1.1882177696146676,
      "learning_rate": 0.003,
      "loss": 4.0676,
      "step": 16774
    },
    {
      "epoch": 0.16775,
      "grad_norm": 1.548376013128292,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 16775
    },
    {
      "epoch": 0.16776,
      "grad_norm": 1.182191136580024,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 16776
    },
    {
      "epoch": 0.16777,
      "grad_norm": 1.4569800869382106,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 16777
    },
    {
      "epoch": 0.16778,
      "grad_norm": 1.0865748237703814,
      "learning_rate": 0.003,
      "loss": 4.089,
      "step": 16778
    },
    {
      "epoch": 0.16779,
      "grad_norm": 1.277386244617626,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 16779
    },
    {
      "epoch": 0.1678,
      "grad_norm": 1.0391810445636955,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 16780
    },
    {
      "epoch": 0.16781,
      "grad_norm": 1.1643208648906385,
      "learning_rate": 0.003,
      "loss": 4.1017,
      "step": 16781
    },
    {
      "epoch": 0.16782,
      "grad_norm": 1.1873531329716722,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 16782
    },
    {
      "epoch": 0.16783,
      "grad_norm": 1.1934136985580575,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 16783
    },
    {
      "epoch": 0.16784,
      "grad_norm": 1.2671358079736963,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 16784
    },
    {
      "epoch": 0.16785,
      "grad_norm": 1.3854175483820694,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 16785
    },
    {
      "epoch": 0.16786,
      "grad_norm": 1.2081305889906775,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 16786
    },
    {
      "epoch": 0.16787,
      "grad_norm": 1.214557089350369,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 16787
    },
    {
      "epoch": 0.16788,
      "grad_norm": 1.2221538985193494,
      "learning_rate": 0.003,
      "loss": 4.0892,
      "step": 16788
    },
    {
      "epoch": 0.16789,
      "grad_norm": 1.2767535267757835,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 16789
    },
    {
      "epoch": 0.1679,
      "grad_norm": 1.1098423119957812,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 16790
    },
    {
      "epoch": 0.16791,
      "grad_norm": 1.4444174542229276,
      "learning_rate": 0.003,
      "loss": 4.0842,
      "step": 16791
    },
    {
      "epoch": 0.16792,
      "grad_norm": 1.151166125718121,
      "learning_rate": 0.003,
      "loss": 4.0245,
      "step": 16792
    },
    {
      "epoch": 0.16793,
      "grad_norm": 1.456350573735467,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 16793
    },
    {
      "epoch": 0.16794,
      "grad_norm": 1.313794112815368,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 16794
    },
    {
      "epoch": 0.16795,
      "grad_norm": 1.0236949668333608,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 16795
    },
    {
      "epoch": 0.16796,
      "grad_norm": 1.4150976958572334,
      "learning_rate": 0.003,
      "loss": 4.0292,
      "step": 16796
    },
    {
      "epoch": 0.16797,
      "grad_norm": 1.137530296809528,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 16797
    },
    {
      "epoch": 0.16798,
      "grad_norm": 1.4597846986507814,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 16798
    },
    {
      "epoch": 0.16799,
      "grad_norm": 1.0377322869212624,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 16799
    },
    {
      "epoch": 0.168,
      "grad_norm": 1.2591973812317476,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 16800
    },
    {
      "epoch": 0.16801,
      "grad_norm": 1.1764894782139463,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 16801
    },
    {
      "epoch": 0.16802,
      "grad_norm": 1.2632774612926907,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 16802
    },
    {
      "epoch": 0.16803,
      "grad_norm": 1.29296779956215,
      "learning_rate": 0.003,
      "loss": 4.0954,
      "step": 16803
    },
    {
      "epoch": 0.16804,
      "grad_norm": 1.0144603330901674,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 16804
    },
    {
      "epoch": 0.16805,
      "grad_norm": 1.2248370621605944,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 16805
    },
    {
      "epoch": 0.16806,
      "grad_norm": 1.606685061239406,
      "learning_rate": 0.003,
      "loss": 4.0888,
      "step": 16806
    },
    {
      "epoch": 0.16807,
      "grad_norm": 1.1500315710151148,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 16807
    },
    {
      "epoch": 0.16808,
      "grad_norm": 1.3947512644262978,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 16808
    },
    {
      "epoch": 0.16809,
      "grad_norm": 1.1025214300653976,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 16809
    },
    {
      "epoch": 0.1681,
      "grad_norm": 1.3507202635811593,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 16810
    },
    {
      "epoch": 0.16811,
      "grad_norm": 1.1370812976046,
      "learning_rate": 0.003,
      "loss": 4.0873,
      "step": 16811
    },
    {
      "epoch": 0.16812,
      "grad_norm": 1.3641818804777335,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 16812
    },
    {
      "epoch": 0.16813,
      "grad_norm": 1.2657995069383365,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 16813
    },
    {
      "epoch": 0.16814,
      "grad_norm": 1.0623824621060314,
      "learning_rate": 0.003,
      "loss": 4.0342,
      "step": 16814
    },
    {
      "epoch": 0.16815,
      "grad_norm": 1.2610460250197402,
      "learning_rate": 0.003,
      "loss": 4.1043,
      "step": 16815
    },
    {
      "epoch": 0.16816,
      "grad_norm": 1.2207105633397042,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 16816
    },
    {
      "epoch": 0.16817,
      "grad_norm": 1.2889344728156151,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 16817
    },
    {
      "epoch": 0.16818,
      "grad_norm": 1.405483760348606,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 16818
    },
    {
      "epoch": 0.16819,
      "grad_norm": 1.0386857054925762,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 16819
    },
    {
      "epoch": 0.1682,
      "grad_norm": 1.2465438352281049,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 16820
    },
    {
      "epoch": 0.16821,
      "grad_norm": 1.196637130653281,
      "learning_rate": 0.003,
      "loss": 4.0278,
      "step": 16821
    },
    {
      "epoch": 0.16822,
      "grad_norm": 1.457820682101557,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 16822
    },
    {
      "epoch": 0.16823,
      "grad_norm": 1.1613232889985434,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 16823
    },
    {
      "epoch": 0.16824,
      "grad_norm": 1.2706295316842788,
      "learning_rate": 0.003,
      "loss": 4.0764,
      "step": 16824
    },
    {
      "epoch": 0.16825,
      "grad_norm": 1.0911487428394981,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 16825
    },
    {
      "epoch": 0.16826,
      "grad_norm": 1.3595954191392943,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 16826
    },
    {
      "epoch": 0.16827,
      "grad_norm": 1.2658568820915035,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 16827
    },
    {
      "epoch": 0.16828,
      "grad_norm": 1.397019207943614,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 16828
    },
    {
      "epoch": 0.16829,
      "grad_norm": 1.1829917724750292,
      "learning_rate": 0.003,
      "loss": 4.0665,
      "step": 16829
    },
    {
      "epoch": 0.1683,
      "grad_norm": 1.1737431792927684,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 16830
    },
    {
      "epoch": 0.16831,
      "grad_norm": 1.2377992906490987,
      "learning_rate": 0.003,
      "loss": 4.0373,
      "step": 16831
    },
    {
      "epoch": 0.16832,
      "grad_norm": 1.2105804851112891,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 16832
    },
    {
      "epoch": 0.16833,
      "grad_norm": 1.1670172496235744,
      "learning_rate": 0.003,
      "loss": 4.03,
      "step": 16833
    },
    {
      "epoch": 0.16834,
      "grad_norm": 1.111923517385695,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 16834
    },
    {
      "epoch": 0.16835,
      "grad_norm": 1.2673999166716663,
      "learning_rate": 0.003,
      "loss": 4.0109,
      "step": 16835
    },
    {
      "epoch": 0.16836,
      "grad_norm": 1.2584695185196386,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 16836
    },
    {
      "epoch": 0.16837,
      "grad_norm": 1.0391253683909778,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 16837
    },
    {
      "epoch": 0.16838,
      "grad_norm": 1.2686470581121168,
      "learning_rate": 0.003,
      "loss": 4.0849,
      "step": 16838
    },
    {
      "epoch": 0.16839,
      "grad_norm": 0.939093786741332,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 16839
    },
    {
      "epoch": 0.1684,
      "grad_norm": 1.2721900411497336,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 16840
    },
    {
      "epoch": 0.16841,
      "grad_norm": 1.2357322793990546,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 16841
    },
    {
      "epoch": 0.16842,
      "grad_norm": 1.1881231361127453,
      "learning_rate": 0.003,
      "loss": 4.0564,
      "step": 16842
    },
    {
      "epoch": 0.16843,
      "grad_norm": 1.1508538543573568,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 16843
    },
    {
      "epoch": 0.16844,
      "grad_norm": 1.2773939686381024,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 16844
    },
    {
      "epoch": 0.16845,
      "grad_norm": 1.2152273111502037,
      "learning_rate": 0.003,
      "loss": 4.0724,
      "step": 16845
    },
    {
      "epoch": 0.16846,
      "grad_norm": 1.3766362139974697,
      "learning_rate": 0.003,
      "loss": 4.0799,
      "step": 16846
    },
    {
      "epoch": 0.16847,
      "grad_norm": 1.378993780999002,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 16847
    },
    {
      "epoch": 0.16848,
      "grad_norm": 1.5109246400409688,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 16848
    },
    {
      "epoch": 0.16849,
      "grad_norm": 0.953824324235403,
      "learning_rate": 0.003,
      "loss": 4.0383,
      "step": 16849
    },
    {
      "epoch": 0.1685,
      "grad_norm": 1.339281249581703,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 16850
    },
    {
      "epoch": 0.16851,
      "grad_norm": 1.2998593399676501,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 16851
    },
    {
      "epoch": 0.16852,
      "grad_norm": 1.0164201352560711,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 16852
    },
    {
      "epoch": 0.16853,
      "grad_norm": 1.4406245656231773,
      "learning_rate": 0.003,
      "loss": 4.0843,
      "step": 16853
    },
    {
      "epoch": 0.16854,
      "grad_norm": 0.9835109188131523,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 16854
    },
    {
      "epoch": 0.16855,
      "grad_norm": 1.3767727285210112,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 16855
    },
    {
      "epoch": 0.16856,
      "grad_norm": 1.1044606829707675,
      "learning_rate": 0.003,
      "loss": 4.0656,
      "step": 16856
    },
    {
      "epoch": 0.16857,
      "grad_norm": 1.299670854958639,
      "learning_rate": 0.003,
      "loss": 4.0215,
      "step": 16857
    },
    {
      "epoch": 0.16858,
      "grad_norm": 1.2416317130185637,
      "learning_rate": 0.003,
      "loss": 4.0325,
      "step": 16858
    },
    {
      "epoch": 0.16859,
      "grad_norm": 1.1737138450195503,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 16859
    },
    {
      "epoch": 0.1686,
      "grad_norm": 1.3630157587564866,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 16860
    },
    {
      "epoch": 0.16861,
      "grad_norm": 1.1138327516009972,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 16861
    },
    {
      "epoch": 0.16862,
      "grad_norm": 1.4377750117381896,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 16862
    },
    {
      "epoch": 0.16863,
      "grad_norm": 1.087949320127546,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 16863
    },
    {
      "epoch": 0.16864,
      "grad_norm": 1.4837405757909432,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 16864
    },
    {
      "epoch": 0.16865,
      "grad_norm": 1.533739270840856,
      "learning_rate": 0.003,
      "loss": 4.0394,
      "step": 16865
    },
    {
      "epoch": 0.16866,
      "grad_norm": 0.9784322019819925,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 16866
    },
    {
      "epoch": 0.16867,
      "grad_norm": 1.299062648690208,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 16867
    },
    {
      "epoch": 0.16868,
      "grad_norm": 1.2283298033113765,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 16868
    },
    {
      "epoch": 0.16869,
      "grad_norm": 1.1134405692621459,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 16869
    },
    {
      "epoch": 0.1687,
      "grad_norm": 1.1747016739767506,
      "learning_rate": 0.003,
      "loss": 4.0768,
      "step": 16870
    },
    {
      "epoch": 0.16871,
      "grad_norm": 1.2950017560021867,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 16871
    },
    {
      "epoch": 0.16872,
      "grad_norm": 1.0149044248103394,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 16872
    },
    {
      "epoch": 0.16873,
      "grad_norm": 1.391205949700118,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 16873
    },
    {
      "epoch": 0.16874,
      "grad_norm": 0.9455363272123867,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 16874
    },
    {
      "epoch": 0.16875,
      "grad_norm": 1.129067712212612,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 16875
    },
    {
      "epoch": 0.16876,
      "grad_norm": 1.3755510568160323,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 16876
    },
    {
      "epoch": 0.16877,
      "grad_norm": 1.1459983143566221,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 16877
    },
    {
      "epoch": 0.16878,
      "grad_norm": 1.1875243488588099,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 16878
    },
    {
      "epoch": 0.16879,
      "grad_norm": 1.5235497085078586,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 16879
    },
    {
      "epoch": 0.1688,
      "grad_norm": 1.2689704443388405,
      "learning_rate": 0.003,
      "loss": 4.086,
      "step": 16880
    },
    {
      "epoch": 0.16881,
      "grad_norm": 1.3657585585182919,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 16881
    },
    {
      "epoch": 0.16882,
      "grad_norm": 1.2518826080151741,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 16882
    },
    {
      "epoch": 0.16883,
      "grad_norm": 1.2398375070189747,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 16883
    },
    {
      "epoch": 0.16884,
      "grad_norm": 1.2043682351070955,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 16884
    },
    {
      "epoch": 0.16885,
      "grad_norm": 1.1290648098157186,
      "learning_rate": 0.003,
      "loss": 4.0293,
      "step": 16885
    },
    {
      "epoch": 0.16886,
      "grad_norm": 1.1214426109760152,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 16886
    },
    {
      "epoch": 0.16887,
      "grad_norm": 1.1907149719752812,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 16887
    },
    {
      "epoch": 0.16888,
      "grad_norm": 1.171586890870058,
      "learning_rate": 0.003,
      "loss": 4.0837,
      "step": 16888
    },
    {
      "epoch": 0.16889,
      "grad_norm": 1.2615857750256425,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 16889
    },
    {
      "epoch": 0.1689,
      "grad_norm": 1.1590161604632432,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 16890
    },
    {
      "epoch": 0.16891,
      "grad_norm": 1.201379688982921,
      "learning_rate": 0.003,
      "loss": 4.0438,
      "step": 16891
    },
    {
      "epoch": 0.16892,
      "grad_norm": 1.2085583376593114,
      "learning_rate": 0.003,
      "loss": 4.0327,
      "step": 16892
    },
    {
      "epoch": 0.16893,
      "grad_norm": 1.2833285113963626,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 16893
    },
    {
      "epoch": 0.16894,
      "grad_norm": 1.2010659385222722,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 16894
    },
    {
      "epoch": 0.16895,
      "grad_norm": 1.268911100327642,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 16895
    },
    {
      "epoch": 0.16896,
      "grad_norm": 1.4269464293669927,
      "learning_rate": 0.003,
      "loss": 4.057,
      "step": 16896
    },
    {
      "epoch": 0.16897,
      "grad_norm": 1.0134031667736194,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 16897
    },
    {
      "epoch": 0.16898,
      "grad_norm": 1.3552971992354175,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 16898
    },
    {
      "epoch": 0.16899,
      "grad_norm": 1.1969400108620583,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 16899
    },
    {
      "epoch": 0.169,
      "grad_norm": 1.2440903886771624,
      "learning_rate": 0.003,
      "loss": 4.0312,
      "step": 16900
    },
    {
      "epoch": 0.16901,
      "grad_norm": 1.2832883433021833,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 16901
    },
    {
      "epoch": 0.16902,
      "grad_norm": 1.27406726673779,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 16902
    },
    {
      "epoch": 0.16903,
      "grad_norm": 1.2260976174013318,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 16903
    },
    {
      "epoch": 0.16904,
      "grad_norm": 1.1966634019522435,
      "learning_rate": 0.003,
      "loss": 4.0957,
      "step": 16904
    },
    {
      "epoch": 0.16905,
      "grad_norm": 1.1490515794269593,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 16905
    },
    {
      "epoch": 0.16906,
      "grad_norm": 1.198462401149591,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 16906
    },
    {
      "epoch": 0.16907,
      "grad_norm": 1.3693505230975247,
      "learning_rate": 0.003,
      "loss": 4.0478,
      "step": 16907
    },
    {
      "epoch": 0.16908,
      "grad_norm": 1.2447177579825452,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 16908
    },
    {
      "epoch": 0.16909,
      "grad_norm": 1.2208258128228069,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 16909
    },
    {
      "epoch": 0.1691,
      "grad_norm": 1.1390878433907936,
      "learning_rate": 0.003,
      "loss": 4.0097,
      "step": 16910
    },
    {
      "epoch": 0.16911,
      "grad_norm": 1.2772889738612396,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 16911
    },
    {
      "epoch": 0.16912,
      "grad_norm": 1.0495387417713018,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 16912
    },
    {
      "epoch": 0.16913,
      "grad_norm": 1.236094976534584,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 16913
    },
    {
      "epoch": 0.16914,
      "grad_norm": 1.3062207475022636,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 16914
    },
    {
      "epoch": 0.16915,
      "grad_norm": 1.064390364463006,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 16915
    },
    {
      "epoch": 0.16916,
      "grad_norm": 1.5191869092136066,
      "learning_rate": 0.003,
      "loss": 4.0783,
      "step": 16916
    },
    {
      "epoch": 0.16917,
      "grad_norm": 1.3670090944649158,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 16917
    },
    {
      "epoch": 0.16918,
      "grad_norm": 1.2314801927745396,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 16918
    },
    {
      "epoch": 0.16919,
      "grad_norm": 1.2714919494982853,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 16919
    },
    {
      "epoch": 0.1692,
      "grad_norm": 1.0245371421311447,
      "learning_rate": 0.003,
      "loss": 4.0299,
      "step": 16920
    },
    {
      "epoch": 0.16921,
      "grad_norm": 1.399187969255701,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 16921
    },
    {
      "epoch": 0.16922,
      "grad_norm": 1.1394392772369148,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 16922
    },
    {
      "epoch": 0.16923,
      "grad_norm": 1.2358651424794553,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 16923
    },
    {
      "epoch": 0.16924,
      "grad_norm": 1.274086921485851,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 16924
    },
    {
      "epoch": 0.16925,
      "grad_norm": 1.2177939284397616,
      "learning_rate": 0.003,
      "loss": 4.0728,
      "step": 16925
    },
    {
      "epoch": 0.16926,
      "grad_norm": 1.2178018449888186,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 16926
    },
    {
      "epoch": 0.16927,
      "grad_norm": 1.2757120385924865,
      "learning_rate": 0.003,
      "loss": 4.0355,
      "step": 16927
    },
    {
      "epoch": 0.16928,
      "grad_norm": 1.2282608548823535,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 16928
    },
    {
      "epoch": 0.16929,
      "grad_norm": 1.3189168954328372,
      "learning_rate": 0.003,
      "loss": 4.0438,
      "step": 16929
    },
    {
      "epoch": 0.1693,
      "grad_norm": 1.092074440286679,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 16930
    },
    {
      "epoch": 0.16931,
      "grad_norm": 1.196966518540893,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 16931
    },
    {
      "epoch": 0.16932,
      "grad_norm": 1.320981547763483,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 16932
    },
    {
      "epoch": 0.16933,
      "grad_norm": 1.1825425271113728,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 16933
    },
    {
      "epoch": 0.16934,
      "grad_norm": 1.3499495322767026,
      "learning_rate": 0.003,
      "loss": 4.075,
      "step": 16934
    },
    {
      "epoch": 0.16935,
      "grad_norm": 1.0805460557226116,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 16935
    },
    {
      "epoch": 0.16936,
      "grad_norm": 1.2386571350265883,
      "learning_rate": 0.003,
      "loss": 4.0317,
      "step": 16936
    },
    {
      "epoch": 0.16937,
      "grad_norm": 1.0856980807139698,
      "learning_rate": 0.003,
      "loss": 4.0276,
      "step": 16937
    },
    {
      "epoch": 0.16938,
      "grad_norm": 1.2197054766341284,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 16938
    },
    {
      "epoch": 0.16939,
      "grad_norm": 1.2668817756255695,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 16939
    },
    {
      "epoch": 0.1694,
      "grad_norm": 1.1224968209854305,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 16940
    },
    {
      "epoch": 0.16941,
      "grad_norm": 1.2091332805281065,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 16941
    },
    {
      "epoch": 0.16942,
      "grad_norm": 1.184514981822149,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 16942
    },
    {
      "epoch": 0.16943,
      "grad_norm": 1.1619732892487817,
      "learning_rate": 0.003,
      "loss": 4.0142,
      "step": 16943
    },
    {
      "epoch": 0.16944,
      "grad_norm": 1.2575293378460344,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 16944
    },
    {
      "epoch": 0.16945,
      "grad_norm": 1.2800738467900814,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 16945
    },
    {
      "epoch": 0.16946,
      "grad_norm": 0.9543077777657314,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 16946
    },
    {
      "epoch": 0.16947,
      "grad_norm": 1.2539968033580715,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 16947
    },
    {
      "epoch": 0.16948,
      "grad_norm": 1.134897054004596,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 16948
    },
    {
      "epoch": 0.16949,
      "grad_norm": 1.1783067609268147,
      "learning_rate": 0.003,
      "loss": 4.0173,
      "step": 16949
    },
    {
      "epoch": 0.1695,
      "grad_norm": 1.3061849316390377,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 16950
    },
    {
      "epoch": 0.16951,
      "grad_norm": 1.3587387222632443,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 16951
    },
    {
      "epoch": 0.16952,
      "grad_norm": 1.3807827782986366,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 16952
    },
    {
      "epoch": 0.16953,
      "grad_norm": 1.443310911870979,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 16953
    },
    {
      "epoch": 0.16954,
      "grad_norm": 1.1588162955104133,
      "learning_rate": 0.003,
      "loss": 4.095,
      "step": 16954
    },
    {
      "epoch": 0.16955,
      "grad_norm": 1.366344017441944,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 16955
    },
    {
      "epoch": 0.16956,
      "grad_norm": 1.0687092197667276,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 16956
    },
    {
      "epoch": 0.16957,
      "grad_norm": 1.5070998108760714,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 16957
    },
    {
      "epoch": 0.16958,
      "grad_norm": 1.0440279203002143,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 16958
    },
    {
      "epoch": 0.16959,
      "grad_norm": 1.2029541519854532,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 16959
    },
    {
      "epoch": 0.1696,
      "grad_norm": 1.1639026967557624,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 16960
    },
    {
      "epoch": 0.16961,
      "grad_norm": 1.2879390201346683,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 16961
    },
    {
      "epoch": 0.16962,
      "grad_norm": 1.2338983857667059,
      "learning_rate": 0.003,
      "loss": 4.0867,
      "step": 16962
    },
    {
      "epoch": 0.16963,
      "grad_norm": 1.334995760107833,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 16963
    },
    {
      "epoch": 0.16964,
      "grad_norm": 1.2032484577130451,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 16964
    },
    {
      "epoch": 0.16965,
      "grad_norm": 1.3254940084493645,
      "learning_rate": 0.003,
      "loss": 4.0313,
      "step": 16965
    },
    {
      "epoch": 0.16966,
      "grad_norm": 1.1808584653625602,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 16966
    },
    {
      "epoch": 0.16967,
      "grad_norm": 1.0956024495103562,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 16967
    },
    {
      "epoch": 0.16968,
      "grad_norm": 1.3582823693965818,
      "learning_rate": 0.003,
      "loss": 4.0764,
      "step": 16968
    },
    {
      "epoch": 0.16969,
      "grad_norm": 1.3127841660646782,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 16969
    },
    {
      "epoch": 0.1697,
      "grad_norm": 1.0590613504253792,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 16970
    },
    {
      "epoch": 0.16971,
      "grad_norm": 1.3370003005267381,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 16971
    },
    {
      "epoch": 0.16972,
      "grad_norm": 1.0825998424017913,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 16972
    },
    {
      "epoch": 0.16973,
      "grad_norm": 1.5805086190217519,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 16973
    },
    {
      "epoch": 0.16974,
      "grad_norm": 1.21296940946939,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 16974
    },
    {
      "epoch": 0.16975,
      "grad_norm": 1.2413744805373763,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 16975
    },
    {
      "epoch": 0.16976,
      "grad_norm": 1.1799095411296254,
      "learning_rate": 0.003,
      "loss": 4.0156,
      "step": 16976
    },
    {
      "epoch": 0.16977,
      "grad_norm": 1.3159578433009453,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 16977
    },
    {
      "epoch": 0.16978,
      "grad_norm": 1.5239983621956583,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 16978
    },
    {
      "epoch": 0.16979,
      "grad_norm": 1.2117505051118325,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 16979
    },
    {
      "epoch": 0.1698,
      "grad_norm": 1.1247154415502227,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 16980
    },
    {
      "epoch": 0.16981,
      "grad_norm": 1.3699757772248973,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 16981
    },
    {
      "epoch": 0.16982,
      "grad_norm": 1.017653817874775,
      "learning_rate": 0.003,
      "loss": 4.0768,
      "step": 16982
    },
    {
      "epoch": 0.16983,
      "grad_norm": 1.4629027360363662,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 16983
    },
    {
      "epoch": 0.16984,
      "grad_norm": 0.9247191316273929,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 16984
    },
    {
      "epoch": 0.16985,
      "grad_norm": 1.4410991040670977,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 16985
    },
    {
      "epoch": 0.16986,
      "grad_norm": 1.3009971277751673,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 16986
    },
    {
      "epoch": 0.16987,
      "grad_norm": 1.1143033414580263,
      "learning_rate": 0.003,
      "loss": 4.0919,
      "step": 16987
    },
    {
      "epoch": 0.16988,
      "grad_norm": 1.5558709985350159,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 16988
    },
    {
      "epoch": 0.16989,
      "grad_norm": 1.1190764199677528,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 16989
    },
    {
      "epoch": 0.1699,
      "grad_norm": 1.271684890657497,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 16990
    },
    {
      "epoch": 0.16991,
      "grad_norm": 1.1418945148210984,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 16991
    },
    {
      "epoch": 0.16992,
      "grad_norm": 1.3841242182659466,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 16992
    },
    {
      "epoch": 0.16993,
      "grad_norm": 0.9437527262546115,
      "learning_rate": 0.003,
      "loss": 4.0703,
      "step": 16993
    },
    {
      "epoch": 0.16994,
      "grad_norm": 1.2806800712476167,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 16994
    },
    {
      "epoch": 0.16995,
      "grad_norm": 1.0295146916247497,
      "learning_rate": 0.003,
      "loss": 4.0275,
      "step": 16995
    },
    {
      "epoch": 0.16996,
      "grad_norm": 1.367398593535846,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 16996
    },
    {
      "epoch": 0.16997,
      "grad_norm": 1.114513362033281,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 16997
    },
    {
      "epoch": 0.16998,
      "grad_norm": 1.6710717846457315,
      "learning_rate": 0.003,
      "loss": 4.0872,
      "step": 16998
    },
    {
      "epoch": 0.16999,
      "grad_norm": 1.08533786983493,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 16999
    },
    {
      "epoch": 0.17,
      "grad_norm": 1.1300516478506852,
      "learning_rate": 0.003,
      "loss": 4.0245,
      "step": 17000
    },
    {
      "epoch": 0.17001,
      "grad_norm": 1.4854644865056545,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 17001
    },
    {
      "epoch": 0.17002,
      "grad_norm": 1.0647905698786158,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 17002
    },
    {
      "epoch": 0.17003,
      "grad_norm": 1.4684027361951262,
      "learning_rate": 0.003,
      "loss": 4.0343,
      "step": 17003
    },
    {
      "epoch": 0.17004,
      "grad_norm": 1.0702179810626333,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 17004
    },
    {
      "epoch": 0.17005,
      "grad_norm": 1.109196939589509,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 17005
    },
    {
      "epoch": 0.17006,
      "grad_norm": 1.1788556480619246,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 17006
    },
    {
      "epoch": 0.17007,
      "grad_norm": 1.1618337354719386,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 17007
    },
    {
      "epoch": 0.17008,
      "grad_norm": 1.4112447351543733,
      "learning_rate": 0.003,
      "loss": 4.09,
      "step": 17008
    },
    {
      "epoch": 0.17009,
      "grad_norm": 1.0210019746504395,
      "learning_rate": 0.003,
      "loss": 4.044,
      "step": 17009
    },
    {
      "epoch": 0.1701,
      "grad_norm": 1.5186241163420218,
      "learning_rate": 0.003,
      "loss": 4.0623,
      "step": 17010
    },
    {
      "epoch": 0.17011,
      "grad_norm": 0.9681540699367649,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 17011
    },
    {
      "epoch": 0.17012,
      "grad_norm": 1.347101933964949,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 17012
    },
    {
      "epoch": 0.17013,
      "grad_norm": 1.320421310437298,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 17013
    },
    {
      "epoch": 0.17014,
      "grad_norm": 1.2416515545235678,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 17014
    },
    {
      "epoch": 0.17015,
      "grad_norm": 1.2594764298389944,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 17015
    },
    {
      "epoch": 0.17016,
      "grad_norm": 1.0665417499892844,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 17016
    },
    {
      "epoch": 0.17017,
      "grad_norm": 1.2560262315788144,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 17017
    },
    {
      "epoch": 0.17018,
      "grad_norm": 1.1509264809125181,
      "learning_rate": 0.003,
      "loss": 4.0807,
      "step": 17018
    },
    {
      "epoch": 0.17019,
      "grad_norm": 1.2088345627948451,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 17019
    },
    {
      "epoch": 0.1702,
      "grad_norm": 1.411378690404628,
      "learning_rate": 0.003,
      "loss": 4.0684,
      "step": 17020
    },
    {
      "epoch": 0.17021,
      "grad_norm": 1.1801370251695185,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 17021
    },
    {
      "epoch": 0.17022,
      "grad_norm": 1.4776693771393323,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 17022
    },
    {
      "epoch": 0.17023,
      "grad_norm": 1.0551018726002896,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 17023
    },
    {
      "epoch": 0.17024,
      "grad_norm": 1.2469741530408414,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 17024
    },
    {
      "epoch": 0.17025,
      "grad_norm": 1.1500908920402089,
      "learning_rate": 0.003,
      "loss": 4.0273,
      "step": 17025
    },
    {
      "epoch": 0.17026,
      "grad_norm": 1.201045732731781,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 17026
    },
    {
      "epoch": 0.17027,
      "grad_norm": 1.1096566430181591,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 17027
    },
    {
      "epoch": 0.17028,
      "grad_norm": 1.4834179558886718,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 17028
    },
    {
      "epoch": 0.17029,
      "grad_norm": 1.1165586509695928,
      "learning_rate": 0.003,
      "loss": 4.0819,
      "step": 17029
    },
    {
      "epoch": 0.1703,
      "grad_norm": 1.274416067501693,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 17030
    },
    {
      "epoch": 0.17031,
      "grad_norm": 1.056483448533938,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 17031
    },
    {
      "epoch": 0.17032,
      "grad_norm": 1.6005804495978606,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 17032
    },
    {
      "epoch": 0.17033,
      "grad_norm": 0.860041113410521,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 17033
    },
    {
      "epoch": 0.17034,
      "grad_norm": 1.0279815590719537,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 17034
    },
    {
      "epoch": 0.17035,
      "grad_norm": 1.666759738599301,
      "learning_rate": 0.003,
      "loss": 4.0434,
      "step": 17035
    },
    {
      "epoch": 0.17036,
      "grad_norm": 1.0416404861330888,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 17036
    },
    {
      "epoch": 0.17037,
      "grad_norm": 1.336235499498572,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 17037
    },
    {
      "epoch": 0.17038,
      "grad_norm": 1.1070780681384462,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 17038
    },
    {
      "epoch": 0.17039,
      "grad_norm": 1.1474080014229298,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 17039
    },
    {
      "epoch": 0.1704,
      "grad_norm": 1.2356096392683003,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 17040
    },
    {
      "epoch": 0.17041,
      "grad_norm": 1.2116338646318794,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 17041
    },
    {
      "epoch": 0.17042,
      "grad_norm": 1.3185251194346252,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 17042
    },
    {
      "epoch": 0.17043,
      "grad_norm": 1.1305643242555485,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 17043
    },
    {
      "epoch": 0.17044,
      "grad_norm": 1.4747500406152476,
      "learning_rate": 0.003,
      "loss": 4.0796,
      "step": 17044
    },
    {
      "epoch": 0.17045,
      "grad_norm": 0.9367047255629887,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 17045
    },
    {
      "epoch": 0.17046,
      "grad_norm": 1.4376927861567141,
      "learning_rate": 0.003,
      "loss": 4.0811,
      "step": 17046
    },
    {
      "epoch": 0.17047,
      "grad_norm": 1.1354376573084526,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 17047
    },
    {
      "epoch": 0.17048,
      "grad_norm": 1.2809581279291067,
      "learning_rate": 0.003,
      "loss": 4.0885,
      "step": 17048
    },
    {
      "epoch": 0.17049,
      "grad_norm": 1.1060134587814072,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 17049
    },
    {
      "epoch": 0.1705,
      "grad_norm": 1.3903949900172548,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 17050
    },
    {
      "epoch": 0.17051,
      "grad_norm": 1.2102423319200646,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 17051
    },
    {
      "epoch": 0.17052,
      "grad_norm": 1.392974983130736,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 17052
    },
    {
      "epoch": 0.17053,
      "grad_norm": 1.2761802143619123,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 17053
    },
    {
      "epoch": 0.17054,
      "grad_norm": 1.340852438654309,
      "learning_rate": 0.003,
      "loss": 4.098,
      "step": 17054
    },
    {
      "epoch": 0.17055,
      "grad_norm": 1.1047860580331121,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 17055
    },
    {
      "epoch": 0.17056,
      "grad_norm": 1.2925607206559326,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 17056
    },
    {
      "epoch": 0.17057,
      "grad_norm": 1.2118379864560753,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 17057
    },
    {
      "epoch": 0.17058,
      "grad_norm": 1.2102751990134006,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 17058
    },
    {
      "epoch": 0.17059,
      "grad_norm": 1.2238964106513677,
      "learning_rate": 0.003,
      "loss": 4.0314,
      "step": 17059
    },
    {
      "epoch": 0.1706,
      "grad_norm": 1.2902083032121994,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 17060
    },
    {
      "epoch": 0.17061,
      "grad_norm": 0.9884657706767966,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 17061
    },
    {
      "epoch": 0.17062,
      "grad_norm": 1.362392521026528,
      "learning_rate": 0.003,
      "loss": 4.0236,
      "step": 17062
    },
    {
      "epoch": 0.17063,
      "grad_norm": 1.033157284902731,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 17063
    },
    {
      "epoch": 0.17064,
      "grad_norm": 1.3522412439083653,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 17064
    },
    {
      "epoch": 0.17065,
      "grad_norm": 1.1503250545725914,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 17065
    },
    {
      "epoch": 0.17066,
      "grad_norm": 1.2561473842039022,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 17066
    },
    {
      "epoch": 0.17067,
      "grad_norm": 1.4375575271359797,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 17067
    },
    {
      "epoch": 0.17068,
      "grad_norm": 1.024327865973433,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 17068
    },
    {
      "epoch": 0.17069,
      "grad_norm": 1.4739457588751657,
      "learning_rate": 0.003,
      "loss": 4.0837,
      "step": 17069
    },
    {
      "epoch": 0.1707,
      "grad_norm": 1.042496759524706,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 17070
    },
    {
      "epoch": 0.17071,
      "grad_norm": 1.2787623283171865,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 17071
    },
    {
      "epoch": 0.17072,
      "grad_norm": 1.2739738294358849,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 17072
    },
    {
      "epoch": 0.17073,
      "grad_norm": 1.3069607361903082,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 17073
    },
    {
      "epoch": 0.17074,
      "grad_norm": 1.1109209840193457,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 17074
    },
    {
      "epoch": 0.17075,
      "grad_norm": 1.3578483278539972,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 17075
    },
    {
      "epoch": 0.17076,
      "grad_norm": 1.2057365219041163,
      "learning_rate": 0.003,
      "loss": 4.0374,
      "step": 17076
    },
    {
      "epoch": 0.17077,
      "grad_norm": 1.1902615298987598,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 17077
    },
    {
      "epoch": 0.17078,
      "grad_norm": 1.0977206185792603,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 17078
    },
    {
      "epoch": 0.17079,
      "grad_norm": 1.2100246532932626,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 17079
    },
    {
      "epoch": 0.1708,
      "grad_norm": 1.1747207972823042,
      "learning_rate": 0.003,
      "loss": 4.0434,
      "step": 17080
    },
    {
      "epoch": 0.17081,
      "grad_norm": 1.4256099582840815,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 17081
    },
    {
      "epoch": 0.17082,
      "grad_norm": 1.0428864359588268,
      "learning_rate": 0.003,
      "loss": 4.0914,
      "step": 17082
    },
    {
      "epoch": 0.17083,
      "grad_norm": 1.4943451862253978,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 17083
    },
    {
      "epoch": 0.17084,
      "grad_norm": 1.119894011493226,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 17084
    },
    {
      "epoch": 0.17085,
      "grad_norm": 1.4031043390497535,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 17085
    },
    {
      "epoch": 0.17086,
      "grad_norm": 1.1318173853206333,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 17086
    },
    {
      "epoch": 0.17087,
      "grad_norm": 1.627373600531823,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 17087
    },
    {
      "epoch": 0.17088,
      "grad_norm": 1.2141086452195955,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 17088
    },
    {
      "epoch": 0.17089,
      "grad_norm": 1.2285668262442404,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 17089
    },
    {
      "epoch": 0.1709,
      "grad_norm": 1.074949899276511,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 17090
    },
    {
      "epoch": 0.17091,
      "grad_norm": 1.4761983475605394,
      "learning_rate": 0.003,
      "loss": 4.0892,
      "step": 17091
    },
    {
      "epoch": 0.17092,
      "grad_norm": 1.0780631791435658,
      "learning_rate": 0.003,
      "loss": 4.0309,
      "step": 17092
    },
    {
      "epoch": 0.17093,
      "grad_norm": 1.6138046600098406,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 17093
    },
    {
      "epoch": 0.17094,
      "grad_norm": 0.9592767113879691,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 17094
    },
    {
      "epoch": 0.17095,
      "grad_norm": 1.267181289397601,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 17095
    },
    {
      "epoch": 0.17096,
      "grad_norm": 1.1983977944981508,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 17096
    },
    {
      "epoch": 0.17097,
      "grad_norm": 1.7081677681732466,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 17097
    },
    {
      "epoch": 0.17098,
      "grad_norm": 1.0094432444386825,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 17098
    },
    {
      "epoch": 0.17099,
      "grad_norm": 1.2879564300679882,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 17099
    },
    {
      "epoch": 0.171,
      "grad_norm": 1.2241451007568611,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 17100
    },
    {
      "epoch": 0.17101,
      "grad_norm": 1.2005359846033563,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 17101
    },
    {
      "epoch": 0.17102,
      "grad_norm": 1.2775872159975104,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 17102
    },
    {
      "epoch": 0.17103,
      "grad_norm": 1.1794188282881768,
      "learning_rate": 0.003,
      "loss": 4.0619,
      "step": 17103
    },
    {
      "epoch": 0.17104,
      "grad_norm": 1.1650774374183848,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 17104
    },
    {
      "epoch": 0.17105,
      "grad_norm": 1.2195995474646042,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 17105
    },
    {
      "epoch": 0.17106,
      "grad_norm": 1.2893412901033197,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 17106
    },
    {
      "epoch": 0.17107,
      "grad_norm": 1.319909128826875,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 17107
    },
    {
      "epoch": 0.17108,
      "grad_norm": 1.1690540591132834,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 17108
    },
    {
      "epoch": 0.17109,
      "grad_norm": 1.2115142944406738,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 17109
    },
    {
      "epoch": 0.1711,
      "grad_norm": 1.2627038138659699,
      "learning_rate": 0.003,
      "loss": 4.0623,
      "step": 17110
    },
    {
      "epoch": 0.17111,
      "grad_norm": 1.6278553032524519,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 17111
    },
    {
      "epoch": 0.17112,
      "grad_norm": 1.0070548525930745,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 17112
    },
    {
      "epoch": 0.17113,
      "grad_norm": 1.1407931098727693,
      "learning_rate": 0.003,
      "loss": 4.0393,
      "step": 17113
    },
    {
      "epoch": 0.17114,
      "grad_norm": 1.5466415497004846,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 17114
    },
    {
      "epoch": 0.17115,
      "grad_norm": 1.3913316031898841,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 17115
    },
    {
      "epoch": 0.17116,
      "grad_norm": 1.1203436523118306,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 17116
    },
    {
      "epoch": 0.17117,
      "grad_norm": 1.137359047504489,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 17117
    },
    {
      "epoch": 0.17118,
      "grad_norm": 1.2331556996086932,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 17118
    },
    {
      "epoch": 0.17119,
      "grad_norm": 1.269085644507956,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 17119
    },
    {
      "epoch": 0.1712,
      "grad_norm": 1.1478520730090644,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 17120
    },
    {
      "epoch": 0.17121,
      "grad_norm": 1.096765511893826,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 17121
    },
    {
      "epoch": 0.17122,
      "grad_norm": 1.394911999429465,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 17122
    },
    {
      "epoch": 0.17123,
      "grad_norm": 1.1459402376211092,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 17123
    },
    {
      "epoch": 0.17124,
      "grad_norm": 1.3012976253681308,
      "learning_rate": 0.003,
      "loss": 4.0177,
      "step": 17124
    },
    {
      "epoch": 0.17125,
      "grad_norm": 1.1823411170357148,
      "learning_rate": 0.003,
      "loss": 4.088,
      "step": 17125
    },
    {
      "epoch": 0.17126,
      "grad_norm": 1.3339255381643982,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 17126
    },
    {
      "epoch": 0.17127,
      "grad_norm": 1.114045506324468,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 17127
    },
    {
      "epoch": 0.17128,
      "grad_norm": 1.2408804559454076,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 17128
    },
    {
      "epoch": 0.17129,
      "grad_norm": 1.1429967648203054,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 17129
    },
    {
      "epoch": 0.1713,
      "grad_norm": 1.1435463857854034,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 17130
    },
    {
      "epoch": 0.17131,
      "grad_norm": 1.1192740481677632,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 17131
    },
    {
      "epoch": 0.17132,
      "grad_norm": 1.1649496145325733,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 17132
    },
    {
      "epoch": 0.17133,
      "grad_norm": 1.1653298575255848,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 17133
    },
    {
      "epoch": 0.17134,
      "grad_norm": 1.2927260128753215,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 17134
    },
    {
      "epoch": 0.17135,
      "grad_norm": 1.1052873686354054,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 17135
    },
    {
      "epoch": 0.17136,
      "grad_norm": 1.4240461447369932,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 17136
    },
    {
      "epoch": 0.17137,
      "grad_norm": 1.0025371795503932,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 17137
    },
    {
      "epoch": 0.17138,
      "grad_norm": 1.2783951799923268,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 17138
    },
    {
      "epoch": 0.17139,
      "grad_norm": 1.2915244743297567,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 17139
    },
    {
      "epoch": 0.1714,
      "grad_norm": 1.2239294795654907,
      "learning_rate": 0.003,
      "loss": 4.0838,
      "step": 17140
    },
    {
      "epoch": 0.17141,
      "grad_norm": 1.287674278636702,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 17141
    },
    {
      "epoch": 0.17142,
      "grad_norm": 1.2329873107473168,
      "learning_rate": 0.003,
      "loss": 4.0258,
      "step": 17142
    },
    {
      "epoch": 0.17143,
      "grad_norm": 1.1853654198506531,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 17143
    },
    {
      "epoch": 0.17144,
      "grad_norm": 1.6144709209863437,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 17144
    },
    {
      "epoch": 0.17145,
      "grad_norm": 1.0842925800239687,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 17145
    },
    {
      "epoch": 0.17146,
      "grad_norm": 1.4143467272752115,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 17146
    },
    {
      "epoch": 0.17147,
      "grad_norm": 0.836879174133386,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 17147
    },
    {
      "epoch": 0.17148,
      "grad_norm": 1.1927653211616587,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 17148
    },
    {
      "epoch": 0.17149,
      "grad_norm": 1.1736106709041612,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 17149
    },
    {
      "epoch": 0.1715,
      "grad_norm": 1.4105477692900008,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 17150
    },
    {
      "epoch": 0.17151,
      "grad_norm": 1.2359755066435034,
      "learning_rate": 0.003,
      "loss": 4.0632,
      "step": 17151
    },
    {
      "epoch": 0.17152,
      "grad_norm": 1.4072251037006216,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 17152
    },
    {
      "epoch": 0.17153,
      "grad_norm": 1.2121102719247743,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 17153
    },
    {
      "epoch": 0.17154,
      "grad_norm": 1.0708423088931183,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 17154
    },
    {
      "epoch": 0.17155,
      "grad_norm": 1.4164021755321157,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 17155
    },
    {
      "epoch": 0.17156,
      "grad_norm": 1.0757005546234806,
      "learning_rate": 0.003,
      "loss": 4.0799,
      "step": 17156
    },
    {
      "epoch": 0.17157,
      "grad_norm": 1.4312245312473366,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 17157
    },
    {
      "epoch": 0.17158,
      "grad_norm": 1.0794673781354265,
      "learning_rate": 0.003,
      "loss": 4.0329,
      "step": 17158
    },
    {
      "epoch": 0.17159,
      "grad_norm": 1.415711859144589,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 17159
    },
    {
      "epoch": 0.1716,
      "grad_norm": 1.062912828210717,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 17160
    },
    {
      "epoch": 0.17161,
      "grad_norm": 1.4631637785064096,
      "learning_rate": 0.003,
      "loss": 4.0943,
      "step": 17161
    },
    {
      "epoch": 0.17162,
      "grad_norm": 1.0663778939242714,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 17162
    },
    {
      "epoch": 0.17163,
      "grad_norm": 1.6987545202361207,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 17163
    },
    {
      "epoch": 0.17164,
      "grad_norm": 1.0323738877625745,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 17164
    },
    {
      "epoch": 0.17165,
      "grad_norm": 1.3288184898116433,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 17165
    },
    {
      "epoch": 0.17166,
      "grad_norm": 1.1158937376781226,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 17166
    },
    {
      "epoch": 0.17167,
      "grad_norm": 1.26856294914553,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 17167
    },
    {
      "epoch": 0.17168,
      "grad_norm": 1.2245825834066786,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 17168
    },
    {
      "epoch": 0.17169,
      "grad_norm": 1.2851830135787832,
      "learning_rate": 0.003,
      "loss": 4.0855,
      "step": 17169
    },
    {
      "epoch": 0.1717,
      "grad_norm": 0.9162180990378845,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 17170
    },
    {
      "epoch": 0.17171,
      "grad_norm": 1.2643111081821041,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 17171
    },
    {
      "epoch": 0.17172,
      "grad_norm": 1.3660061580806508,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 17172
    },
    {
      "epoch": 0.17173,
      "grad_norm": 1.4266468625355122,
      "learning_rate": 0.003,
      "loss": 4.0316,
      "step": 17173
    },
    {
      "epoch": 0.17174,
      "grad_norm": 1.3602724508990756,
      "learning_rate": 0.003,
      "loss": 4.0824,
      "step": 17174
    },
    {
      "epoch": 0.17175,
      "grad_norm": 0.9471462822131239,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 17175
    },
    {
      "epoch": 0.17176,
      "grad_norm": 1.2282409267406897,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 17176
    },
    {
      "epoch": 0.17177,
      "grad_norm": 1.3409203348563423,
      "learning_rate": 0.003,
      "loss": 4.0374,
      "step": 17177
    },
    {
      "epoch": 0.17178,
      "grad_norm": 1.2178308315203064,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 17178
    },
    {
      "epoch": 0.17179,
      "grad_norm": 1.0951387805996513,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 17179
    },
    {
      "epoch": 0.1718,
      "grad_norm": 1.2908997990033155,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 17180
    },
    {
      "epoch": 0.17181,
      "grad_norm": 1.1445767219936012,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 17181
    },
    {
      "epoch": 0.17182,
      "grad_norm": 1.1380557271009788,
      "learning_rate": 0.003,
      "loss": 4.0312,
      "step": 17182
    },
    {
      "epoch": 0.17183,
      "grad_norm": 1.275095676428426,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 17183
    },
    {
      "epoch": 0.17184,
      "grad_norm": 1.2225802772918613,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 17184
    },
    {
      "epoch": 0.17185,
      "grad_norm": 1.6827976521471713,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 17185
    },
    {
      "epoch": 0.17186,
      "grad_norm": 1.120126198618937,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 17186
    },
    {
      "epoch": 0.17187,
      "grad_norm": 1.2685479238932016,
      "learning_rate": 0.003,
      "loss": 4.0751,
      "step": 17187
    },
    {
      "epoch": 0.17188,
      "grad_norm": 1.2273429494321608,
      "learning_rate": 0.003,
      "loss": 4.0837,
      "step": 17188
    },
    {
      "epoch": 0.17189,
      "grad_norm": 1.175547919825017,
      "learning_rate": 0.003,
      "loss": 4.0359,
      "step": 17189
    },
    {
      "epoch": 0.1719,
      "grad_norm": 1.4270859354766496,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 17190
    },
    {
      "epoch": 0.17191,
      "grad_norm": 1.21603731282858,
      "learning_rate": 0.003,
      "loss": 4.0873,
      "step": 17191
    },
    {
      "epoch": 0.17192,
      "grad_norm": 1.1269431238317735,
      "learning_rate": 0.003,
      "loss": 4.0564,
      "step": 17192
    },
    {
      "epoch": 0.17193,
      "grad_norm": 1.0547513507423332,
      "learning_rate": 0.003,
      "loss": 4.0418,
      "step": 17193
    },
    {
      "epoch": 0.17194,
      "grad_norm": 1.3598685088552334,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 17194
    },
    {
      "epoch": 0.17195,
      "grad_norm": 1.1824352169266816,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 17195
    },
    {
      "epoch": 0.17196,
      "grad_norm": 1.293936300977291,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 17196
    },
    {
      "epoch": 0.17197,
      "grad_norm": 1.3017028808489668,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 17197
    },
    {
      "epoch": 0.17198,
      "grad_norm": 1.2113699499764334,
      "learning_rate": 0.003,
      "loss": 4.1044,
      "step": 17198
    },
    {
      "epoch": 0.17199,
      "grad_norm": 1.3628410320018296,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 17199
    },
    {
      "epoch": 0.172,
      "grad_norm": 1.1963881630514184,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 17200
    },
    {
      "epoch": 0.17201,
      "grad_norm": 1.2352001983736032,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 17201
    },
    {
      "epoch": 0.17202,
      "grad_norm": 1.2540179225481027,
      "learning_rate": 0.003,
      "loss": 4.0383,
      "step": 17202
    },
    {
      "epoch": 0.17203,
      "grad_norm": 1.1253322468332425,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 17203
    },
    {
      "epoch": 0.17204,
      "grad_norm": 1.2995373145495206,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 17204
    },
    {
      "epoch": 0.17205,
      "grad_norm": 1.1866529753856296,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 17205
    },
    {
      "epoch": 0.17206,
      "grad_norm": 1.268599132845124,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 17206
    },
    {
      "epoch": 0.17207,
      "grad_norm": 1.2904055430536574,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 17207
    },
    {
      "epoch": 0.17208,
      "grad_norm": 1.3318610535485012,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 17208
    },
    {
      "epoch": 0.17209,
      "grad_norm": 1.2517193849636388,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 17209
    },
    {
      "epoch": 0.1721,
      "grad_norm": 1.359154555885252,
      "learning_rate": 0.003,
      "loss": 4.0295,
      "step": 17210
    },
    {
      "epoch": 0.17211,
      "grad_norm": 1.153342960723365,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 17211
    },
    {
      "epoch": 0.17212,
      "grad_norm": 1.2571055996638105,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 17212
    },
    {
      "epoch": 0.17213,
      "grad_norm": 0.9998084123979417,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 17213
    },
    {
      "epoch": 0.17214,
      "grad_norm": 1.2799901188175968,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 17214
    },
    {
      "epoch": 0.17215,
      "grad_norm": 0.9469252917559144,
      "learning_rate": 0.003,
      "loss": 4.0416,
      "step": 17215
    },
    {
      "epoch": 0.17216,
      "grad_norm": 1.4177754942789411,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 17216
    },
    {
      "epoch": 0.17217,
      "grad_norm": 1.280720785201341,
      "learning_rate": 0.003,
      "loss": 4.018,
      "step": 17217
    },
    {
      "epoch": 0.17218,
      "grad_norm": 1.1338877661442035,
      "learning_rate": 0.003,
      "loss": 4.0236,
      "step": 17218
    },
    {
      "epoch": 0.17219,
      "grad_norm": 1.4341819613445428,
      "learning_rate": 0.003,
      "loss": 4.0346,
      "step": 17219
    },
    {
      "epoch": 0.1722,
      "grad_norm": 0.8765015139996959,
      "learning_rate": 0.003,
      "loss": 4.0178,
      "step": 17220
    },
    {
      "epoch": 0.17221,
      "grad_norm": 1.1053439625899335,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 17221
    },
    {
      "epoch": 0.17222,
      "grad_norm": 1.4071027750109097,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 17222
    },
    {
      "epoch": 0.17223,
      "grad_norm": 1.0695360583694458,
      "learning_rate": 0.003,
      "loss": 4.0162,
      "step": 17223
    },
    {
      "epoch": 0.17224,
      "grad_norm": 1.4011965510898687,
      "learning_rate": 0.003,
      "loss": 4.0849,
      "step": 17224
    },
    {
      "epoch": 0.17225,
      "grad_norm": 1.179663336846994,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 17225
    },
    {
      "epoch": 0.17226,
      "grad_norm": 1.2039026930037622,
      "learning_rate": 0.003,
      "loss": 4.0801,
      "step": 17226
    },
    {
      "epoch": 0.17227,
      "grad_norm": 1.2446959723832283,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 17227
    },
    {
      "epoch": 0.17228,
      "grad_norm": 1.2130664411452436,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 17228
    },
    {
      "epoch": 0.17229,
      "grad_norm": 1.279387723205202,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 17229
    },
    {
      "epoch": 0.1723,
      "grad_norm": 1.2646549830766647,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 17230
    },
    {
      "epoch": 0.17231,
      "grad_norm": 1.1476763293666046,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 17231
    },
    {
      "epoch": 0.17232,
      "grad_norm": 1.2667751820120776,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 17232
    },
    {
      "epoch": 0.17233,
      "grad_norm": 1.33390482237294,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 17233
    },
    {
      "epoch": 0.17234,
      "grad_norm": 1.2556664716742125,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 17234
    },
    {
      "epoch": 0.17235,
      "grad_norm": 1.1846048324078389,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 17235
    },
    {
      "epoch": 0.17236,
      "grad_norm": 1.0903822402335819,
      "learning_rate": 0.003,
      "loss": 4.0834,
      "step": 17236
    },
    {
      "epoch": 0.17237,
      "grad_norm": 1.331981127769312,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 17237
    },
    {
      "epoch": 0.17238,
      "grad_norm": 1.1383088320999761,
      "learning_rate": 0.003,
      "loss": 4.0862,
      "step": 17238
    },
    {
      "epoch": 0.17239,
      "grad_norm": 1.3565717249246343,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 17239
    },
    {
      "epoch": 0.1724,
      "grad_norm": 1.0118820735186458,
      "learning_rate": 0.003,
      "loss": 4.0016,
      "step": 17240
    },
    {
      "epoch": 0.17241,
      "grad_norm": 1.3922275482107882,
      "learning_rate": 0.003,
      "loss": 4.0819,
      "step": 17241
    },
    {
      "epoch": 0.17242,
      "grad_norm": 1.1687677777125651,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 17242
    },
    {
      "epoch": 0.17243,
      "grad_norm": 1.2327638917695471,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 17243
    },
    {
      "epoch": 0.17244,
      "grad_norm": 1.339453077024193,
      "learning_rate": 0.003,
      "loss": 4.0994,
      "step": 17244
    },
    {
      "epoch": 0.17245,
      "grad_norm": 1.5186126884766142,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 17245
    },
    {
      "epoch": 0.17246,
      "grad_norm": 1.05824978685617,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 17246
    },
    {
      "epoch": 0.17247,
      "grad_norm": 1.255995773051049,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 17247
    },
    {
      "epoch": 0.17248,
      "grad_norm": 1.2315503113323412,
      "learning_rate": 0.003,
      "loss": 4.0823,
      "step": 17248
    },
    {
      "epoch": 0.17249,
      "grad_norm": 0.9318070395997096,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 17249
    },
    {
      "epoch": 0.1725,
      "grad_norm": 1.1908491186156405,
      "learning_rate": 0.003,
      "loss": 4.0814,
      "step": 17250
    },
    {
      "epoch": 0.17251,
      "grad_norm": 1.1915153514630226,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 17251
    },
    {
      "epoch": 0.17252,
      "grad_norm": 1.1563311259365638,
      "learning_rate": 0.003,
      "loss": 4.0777,
      "step": 17252
    },
    {
      "epoch": 0.17253,
      "grad_norm": 1.270705816063968,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 17253
    },
    {
      "epoch": 0.17254,
      "grad_norm": 1.0384070607139333,
      "learning_rate": 0.003,
      "loss": 4.0258,
      "step": 17254
    },
    {
      "epoch": 0.17255,
      "grad_norm": 1.4061055380075593,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 17255
    },
    {
      "epoch": 0.17256,
      "grad_norm": 1.3552131673979657,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 17256
    },
    {
      "epoch": 0.17257,
      "grad_norm": 1.3011398340871603,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 17257
    },
    {
      "epoch": 0.17258,
      "grad_norm": 1.1959542406063346,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 17258
    },
    {
      "epoch": 0.17259,
      "grad_norm": 1.2030988997548404,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 17259
    },
    {
      "epoch": 0.1726,
      "grad_norm": 1.3521643865300224,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 17260
    },
    {
      "epoch": 0.17261,
      "grad_norm": 1.1479353012867946,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 17261
    },
    {
      "epoch": 0.17262,
      "grad_norm": 1.4021285788014592,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 17262
    },
    {
      "epoch": 0.17263,
      "grad_norm": 1.097132875295684,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 17263
    },
    {
      "epoch": 0.17264,
      "grad_norm": 1.468951819612964,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 17264
    },
    {
      "epoch": 0.17265,
      "grad_norm": 1.173350174634137,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 17265
    },
    {
      "epoch": 0.17266,
      "grad_norm": 1.3197111849349104,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 17266
    },
    {
      "epoch": 0.17267,
      "grad_norm": 1.1182822292349939,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 17267
    },
    {
      "epoch": 0.17268,
      "grad_norm": 1.408288619100258,
      "learning_rate": 0.003,
      "loss": 4.0743,
      "step": 17268
    },
    {
      "epoch": 0.17269,
      "grad_norm": 1.0378872949043103,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 17269
    },
    {
      "epoch": 0.1727,
      "grad_norm": 1.323592295870236,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 17270
    },
    {
      "epoch": 0.17271,
      "grad_norm": 1.1529028314777992,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 17271
    },
    {
      "epoch": 0.17272,
      "grad_norm": 1.3556256576605632,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 17272
    },
    {
      "epoch": 0.17273,
      "grad_norm": 1.3919913794708458,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 17273
    },
    {
      "epoch": 0.17274,
      "grad_norm": 1.0873879746321833,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 17274
    },
    {
      "epoch": 0.17275,
      "grad_norm": 1.216810707377631,
      "learning_rate": 0.003,
      "loss": 4.0286,
      "step": 17275
    },
    {
      "epoch": 0.17276,
      "grad_norm": 1.0698529071889722,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 17276
    },
    {
      "epoch": 0.17277,
      "grad_norm": 1.350392714923593,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 17277
    },
    {
      "epoch": 0.17278,
      "grad_norm": 1.104244064909029,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 17278
    },
    {
      "epoch": 0.17279,
      "grad_norm": 1.2353810154030969,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 17279
    },
    {
      "epoch": 0.1728,
      "grad_norm": 1.4745630962674738,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 17280
    },
    {
      "epoch": 0.17281,
      "grad_norm": 1.1006183337244388,
      "learning_rate": 0.003,
      "loss": 4.0105,
      "step": 17281
    },
    {
      "epoch": 0.17282,
      "grad_norm": 1.1945203534698534,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 17282
    },
    {
      "epoch": 0.17283,
      "grad_norm": 1.1795349161011404,
      "learning_rate": 0.003,
      "loss": 4.0619,
      "step": 17283
    },
    {
      "epoch": 0.17284,
      "grad_norm": 1.1510241962711578,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 17284
    },
    {
      "epoch": 0.17285,
      "grad_norm": 1.2127775054776724,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 17285
    },
    {
      "epoch": 0.17286,
      "grad_norm": 1.1555907178306937,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 17286
    },
    {
      "epoch": 0.17287,
      "grad_norm": 1.2965122071414634,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 17287
    },
    {
      "epoch": 0.17288,
      "grad_norm": 0.99620776378164,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 17288
    },
    {
      "epoch": 0.17289,
      "grad_norm": 1.4769229074340224,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 17289
    },
    {
      "epoch": 0.1729,
      "grad_norm": 1.0784906715158313,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 17290
    },
    {
      "epoch": 0.17291,
      "grad_norm": 1.3670448482541506,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 17291
    },
    {
      "epoch": 0.17292,
      "grad_norm": 0.8972122750790381,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 17292
    },
    {
      "epoch": 0.17293,
      "grad_norm": 1.1533510954730688,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 17293
    },
    {
      "epoch": 0.17294,
      "grad_norm": 1.5653563273040265,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 17294
    },
    {
      "epoch": 0.17295,
      "grad_norm": 1.2090100400889283,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 17295
    },
    {
      "epoch": 0.17296,
      "grad_norm": 1.671789202098,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 17296
    },
    {
      "epoch": 0.17297,
      "grad_norm": 0.9866922678480271,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 17297
    },
    {
      "epoch": 0.17298,
      "grad_norm": 1.1652443394502294,
      "learning_rate": 0.003,
      "loss": 4.0371,
      "step": 17298
    },
    {
      "epoch": 0.17299,
      "grad_norm": 1.2859266559722047,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 17299
    },
    {
      "epoch": 0.173,
      "grad_norm": 1.0738415213616017,
      "learning_rate": 0.003,
      "loss": 4.0793,
      "step": 17300
    },
    {
      "epoch": 0.17301,
      "grad_norm": 1.4778060060409284,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 17301
    },
    {
      "epoch": 0.17302,
      "grad_norm": 0.9797779022071426,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 17302
    },
    {
      "epoch": 0.17303,
      "grad_norm": 1.3111110817384728,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 17303
    },
    {
      "epoch": 0.17304,
      "grad_norm": 1.2661216619762456,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 17304
    },
    {
      "epoch": 0.17305,
      "grad_norm": 1.4112246801763713,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 17305
    },
    {
      "epoch": 0.17306,
      "grad_norm": 1.1174961321385242,
      "learning_rate": 0.003,
      "loss": 4.0337,
      "step": 17306
    },
    {
      "epoch": 0.17307,
      "grad_norm": 1.485030346166996,
      "learning_rate": 0.003,
      "loss": 4.0656,
      "step": 17307
    },
    {
      "epoch": 0.17308,
      "grad_norm": 0.9852911769361422,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 17308
    },
    {
      "epoch": 0.17309,
      "grad_norm": 1.3304128966550497,
      "learning_rate": 0.003,
      "loss": 4.0919,
      "step": 17309
    },
    {
      "epoch": 0.1731,
      "grad_norm": 1.0230794649394752,
      "learning_rate": 0.003,
      "loss": 4.0861,
      "step": 17310
    },
    {
      "epoch": 0.17311,
      "grad_norm": 1.3701800499998642,
      "learning_rate": 0.003,
      "loss": 4.0933,
      "step": 17311
    },
    {
      "epoch": 0.17312,
      "grad_norm": 1.3044692407123164,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 17312
    },
    {
      "epoch": 0.17313,
      "grad_norm": 1.0733473023342064,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 17313
    },
    {
      "epoch": 0.17314,
      "grad_norm": 1.6499598634474362,
      "learning_rate": 0.003,
      "loss": 4.0871,
      "step": 17314
    },
    {
      "epoch": 0.17315,
      "grad_norm": 1.1770519491082734,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 17315
    },
    {
      "epoch": 0.17316,
      "grad_norm": 1.311011617413508,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 17316
    },
    {
      "epoch": 0.17317,
      "grad_norm": 1.1164103479989218,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 17317
    },
    {
      "epoch": 0.17318,
      "grad_norm": 1.5049104682902432,
      "learning_rate": 0.003,
      "loss": 4.0963,
      "step": 17318
    },
    {
      "epoch": 0.17319,
      "grad_norm": 1.1184468457167045,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 17319
    },
    {
      "epoch": 0.1732,
      "grad_norm": 1.2817762642047776,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 17320
    },
    {
      "epoch": 0.17321,
      "grad_norm": 1.2009349227032486,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 17321
    },
    {
      "epoch": 0.17322,
      "grad_norm": 1.046563884718884,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 17322
    },
    {
      "epoch": 0.17323,
      "grad_norm": 1.3383076588281584,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 17323
    },
    {
      "epoch": 0.17324,
      "grad_norm": 1.016047648867495,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 17324
    },
    {
      "epoch": 0.17325,
      "grad_norm": 1.5114589017916467,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 17325
    },
    {
      "epoch": 0.17326,
      "grad_norm": 0.9455930518856717,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 17326
    },
    {
      "epoch": 0.17327,
      "grad_norm": 1.234539836178666,
      "learning_rate": 0.003,
      "loss": 4.0959,
      "step": 17327
    },
    {
      "epoch": 0.17328,
      "grad_norm": 1.1030151397789953,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 17328
    },
    {
      "epoch": 0.17329,
      "grad_norm": 1.193455723878663,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 17329
    },
    {
      "epoch": 0.1733,
      "grad_norm": 1.1870621228560552,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 17330
    },
    {
      "epoch": 0.17331,
      "grad_norm": 1.181074107227128,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 17331
    },
    {
      "epoch": 0.17332,
      "grad_norm": 1.2535305004121582,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 17332
    },
    {
      "epoch": 0.17333,
      "grad_norm": 1.1677861497915796,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 17333
    },
    {
      "epoch": 0.17334,
      "grad_norm": 1.3835848238520283,
      "learning_rate": 0.003,
      "loss": 4.042,
      "step": 17334
    },
    {
      "epoch": 0.17335,
      "grad_norm": 1.001867916778088,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 17335
    },
    {
      "epoch": 0.17336,
      "grad_norm": 1.1941878147378127,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 17336
    },
    {
      "epoch": 0.17337,
      "grad_norm": 1.4436640146540043,
      "learning_rate": 0.003,
      "loss": 4.0812,
      "step": 17337
    },
    {
      "epoch": 0.17338,
      "grad_norm": 1.2000636357731604,
      "learning_rate": 0.003,
      "loss": 4.0274,
      "step": 17338
    },
    {
      "epoch": 0.17339,
      "grad_norm": 1.425669125341233,
      "learning_rate": 0.003,
      "loss": 4.088,
      "step": 17339
    },
    {
      "epoch": 0.1734,
      "grad_norm": 1.1067087319978788,
      "learning_rate": 0.003,
      "loss": 4.0856,
      "step": 17340
    },
    {
      "epoch": 0.17341,
      "grad_norm": 1.5781509645823653,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 17341
    },
    {
      "epoch": 0.17342,
      "grad_norm": 1.3895482867320283,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 17342
    },
    {
      "epoch": 0.17343,
      "grad_norm": 1.394289370505111,
      "learning_rate": 0.003,
      "loss": 4.0822,
      "step": 17343
    },
    {
      "epoch": 0.17344,
      "grad_norm": 1.1254299309636389,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 17344
    },
    {
      "epoch": 0.17345,
      "grad_norm": 1.3039505706052283,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 17345
    },
    {
      "epoch": 0.17346,
      "grad_norm": 1.0314295078687075,
      "learning_rate": 0.003,
      "loss": 4.0676,
      "step": 17346
    },
    {
      "epoch": 0.17347,
      "grad_norm": 1.428709188191048,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 17347
    },
    {
      "epoch": 0.17348,
      "grad_norm": 1.0377680148142387,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 17348
    },
    {
      "epoch": 0.17349,
      "grad_norm": 1.2438023988122742,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 17349
    },
    {
      "epoch": 0.1735,
      "grad_norm": 1.0543355503086393,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 17350
    },
    {
      "epoch": 0.17351,
      "grad_norm": 1.3034446575461487,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 17351
    },
    {
      "epoch": 0.17352,
      "grad_norm": 1.0396478433823113,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 17352
    },
    {
      "epoch": 0.17353,
      "grad_norm": 1.3464171912001612,
      "learning_rate": 0.003,
      "loss": 4.0182,
      "step": 17353
    },
    {
      "epoch": 0.17354,
      "grad_norm": 1.2469167332453905,
      "learning_rate": 0.003,
      "loss": 4.0763,
      "step": 17354
    },
    {
      "epoch": 0.17355,
      "grad_norm": 1.3267021457687498,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 17355
    },
    {
      "epoch": 0.17356,
      "grad_norm": 1.0710192732798718,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 17356
    },
    {
      "epoch": 0.17357,
      "grad_norm": 1.0544001713144933,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 17357
    },
    {
      "epoch": 0.17358,
      "grad_norm": 1.0685268509528416,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 17358
    },
    {
      "epoch": 0.17359,
      "grad_norm": 1.1413721600516764,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 17359
    },
    {
      "epoch": 0.1736,
      "grad_norm": 1.0506366536428504,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 17360
    },
    {
      "epoch": 0.17361,
      "grad_norm": 1.3447342054920186,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 17361
    },
    {
      "epoch": 0.17362,
      "grad_norm": 1.0637095744551777,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 17362
    },
    {
      "epoch": 0.17363,
      "grad_norm": 1.7388362693003458,
      "learning_rate": 0.003,
      "loss": 4.0936,
      "step": 17363
    },
    {
      "epoch": 0.17364,
      "grad_norm": 0.9831432521140705,
      "learning_rate": 0.003,
      "loss": 4.0216,
      "step": 17364
    },
    {
      "epoch": 0.17365,
      "grad_norm": 1.4705586825909038,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 17365
    },
    {
      "epoch": 0.17366,
      "grad_norm": 1.2134443365457936,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 17366
    },
    {
      "epoch": 0.17367,
      "grad_norm": 1.0760335322232322,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 17367
    },
    {
      "epoch": 0.17368,
      "grad_norm": 1.4050896742620886,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 17368
    },
    {
      "epoch": 0.17369,
      "grad_norm": 1.1519306351139893,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 17369
    },
    {
      "epoch": 0.1737,
      "grad_norm": 1.2715432556807118,
      "learning_rate": 0.003,
      "loss": 4.0767,
      "step": 17370
    },
    {
      "epoch": 0.17371,
      "grad_norm": 1.0798038752130645,
      "learning_rate": 0.003,
      "loss": 4.0658,
      "step": 17371
    },
    {
      "epoch": 0.17372,
      "grad_norm": 1.1916009628730764,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 17372
    },
    {
      "epoch": 0.17373,
      "grad_norm": 1.34387916507962,
      "learning_rate": 0.003,
      "loss": 4.0851,
      "step": 17373
    },
    {
      "epoch": 0.17374,
      "grad_norm": 1.2660161673516706,
      "learning_rate": 0.003,
      "loss": 4.094,
      "step": 17374
    },
    {
      "epoch": 0.17375,
      "grad_norm": 1.2639639529750466,
      "learning_rate": 0.003,
      "loss": 4.0656,
      "step": 17375
    },
    {
      "epoch": 0.17376,
      "grad_norm": 0.9883293538639081,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 17376
    },
    {
      "epoch": 0.17377,
      "grad_norm": 1.349228294150077,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 17377
    },
    {
      "epoch": 0.17378,
      "grad_norm": 1.1976298692014888,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 17378
    },
    {
      "epoch": 0.17379,
      "grad_norm": 1.2593558266892997,
      "learning_rate": 0.003,
      "loss": 4.0293,
      "step": 17379
    },
    {
      "epoch": 0.1738,
      "grad_norm": 1.125174432735208,
      "learning_rate": 0.003,
      "loss": 4.0166,
      "step": 17380
    },
    {
      "epoch": 0.17381,
      "grad_norm": 1.134657811220434,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 17381
    },
    {
      "epoch": 0.17382,
      "grad_norm": 1.3860411347672301,
      "learning_rate": 0.003,
      "loss": 4.0925,
      "step": 17382
    },
    {
      "epoch": 0.17383,
      "grad_norm": 1.1571068925438313,
      "learning_rate": 0.003,
      "loss": 4.0373,
      "step": 17383
    },
    {
      "epoch": 0.17384,
      "grad_norm": 1.3121596939110471,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 17384
    },
    {
      "epoch": 0.17385,
      "grad_norm": 1.1660777370818731,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 17385
    },
    {
      "epoch": 0.17386,
      "grad_norm": 1.359590834657511,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 17386
    },
    {
      "epoch": 0.17387,
      "grad_norm": 1.2396381385843909,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 17387
    },
    {
      "epoch": 0.17388,
      "grad_norm": 1.3750258841566776,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 17388
    },
    {
      "epoch": 0.17389,
      "grad_norm": 1.0431582342894719,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 17389
    },
    {
      "epoch": 0.1739,
      "grad_norm": 1.092776645824461,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 17390
    },
    {
      "epoch": 0.17391,
      "grad_norm": 1.2212321930368153,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 17391
    },
    {
      "epoch": 0.17392,
      "grad_norm": 1.1880519929514128,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 17392
    },
    {
      "epoch": 0.17393,
      "grad_norm": 1.2987233694070297,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 17393
    },
    {
      "epoch": 0.17394,
      "grad_norm": 1.1547617681859796,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 17394
    },
    {
      "epoch": 0.17395,
      "grad_norm": 1.18312823952414,
      "learning_rate": 0.003,
      "loss": 4.0265,
      "step": 17395
    },
    {
      "epoch": 0.17396,
      "grad_norm": 1.2214029007933558,
      "learning_rate": 0.003,
      "loss": 4.0285,
      "step": 17396
    },
    {
      "epoch": 0.17397,
      "grad_norm": 1.1923965263096545,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 17397
    },
    {
      "epoch": 0.17398,
      "grad_norm": 1.1939984511215258,
      "learning_rate": 0.003,
      "loss": 4.0862,
      "step": 17398
    },
    {
      "epoch": 0.17399,
      "grad_norm": 1.3508573792006229,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 17399
    },
    {
      "epoch": 0.174,
      "grad_norm": 0.9756792054531213,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 17400
    },
    {
      "epoch": 0.17401,
      "grad_norm": 1.540516399421629,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 17401
    },
    {
      "epoch": 0.17402,
      "grad_norm": 1.1929414416125332,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 17402
    },
    {
      "epoch": 0.17403,
      "grad_norm": 1.456731591899645,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 17403
    },
    {
      "epoch": 0.17404,
      "grad_norm": 1.3061742791024789,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 17404
    },
    {
      "epoch": 0.17405,
      "grad_norm": 1.0258277823038597,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 17405
    },
    {
      "epoch": 0.17406,
      "grad_norm": 1.3363912977895924,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 17406
    },
    {
      "epoch": 0.17407,
      "grad_norm": 1.0277188435368505,
      "learning_rate": 0.003,
      "loss": 4.0221,
      "step": 17407
    },
    {
      "epoch": 0.17408,
      "grad_norm": 1.4616916007944838,
      "learning_rate": 0.003,
      "loss": 4.079,
      "step": 17408
    },
    {
      "epoch": 0.17409,
      "grad_norm": 1.0754104873918373,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 17409
    },
    {
      "epoch": 0.1741,
      "grad_norm": 1.3433569144712922,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 17410
    },
    {
      "epoch": 0.17411,
      "grad_norm": 1.2290776012565996,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 17411
    },
    {
      "epoch": 0.17412,
      "grad_norm": 1.4395562337305694,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 17412
    },
    {
      "epoch": 0.17413,
      "grad_norm": 1.059776161876866,
      "learning_rate": 0.003,
      "loss": 4.0264,
      "step": 17413
    },
    {
      "epoch": 0.17414,
      "grad_norm": 1.1961370968530112,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 17414
    },
    {
      "epoch": 0.17415,
      "grad_norm": 1.1513711456485125,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 17415
    },
    {
      "epoch": 0.17416,
      "grad_norm": 1.1713676939812365,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 17416
    },
    {
      "epoch": 0.17417,
      "grad_norm": 1.21215240137682,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 17417
    },
    {
      "epoch": 0.17418,
      "grad_norm": 1.5039122488959036,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 17418
    },
    {
      "epoch": 0.17419,
      "grad_norm": 1.0521888970913764,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 17419
    },
    {
      "epoch": 0.1742,
      "grad_norm": 1.7427240299517976,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 17420
    },
    {
      "epoch": 0.17421,
      "grad_norm": 1.2071011119302868,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 17421
    },
    {
      "epoch": 0.17422,
      "grad_norm": 1.1554356510982162,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 17422
    },
    {
      "epoch": 0.17423,
      "grad_norm": 1.0665614824926966,
      "learning_rate": 0.003,
      "loss": 4.0564,
      "step": 17423
    },
    {
      "epoch": 0.17424,
      "grad_norm": 1.0319775786698153,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 17424
    },
    {
      "epoch": 0.17425,
      "grad_norm": 1.3222593694531697,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 17425
    },
    {
      "epoch": 0.17426,
      "grad_norm": 1.0752339619562283,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 17426
    },
    {
      "epoch": 0.17427,
      "grad_norm": 1.1739220640278247,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 17427
    },
    {
      "epoch": 0.17428,
      "grad_norm": 1.2056576134654182,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 17428
    },
    {
      "epoch": 0.17429,
      "grad_norm": 1.2450995026317917,
      "learning_rate": 0.003,
      "loss": 4.0965,
      "step": 17429
    },
    {
      "epoch": 0.1743,
      "grad_norm": 1.31570224111731,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 17430
    },
    {
      "epoch": 0.17431,
      "grad_norm": 1.0332357869599802,
      "learning_rate": 0.003,
      "loss": 4.0238,
      "step": 17431
    },
    {
      "epoch": 0.17432,
      "grad_norm": 1.5605484066922581,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 17432
    },
    {
      "epoch": 0.17433,
      "grad_norm": 1.3264933956197713,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 17433
    },
    {
      "epoch": 0.17434,
      "grad_norm": 1.2936850640111643,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 17434
    },
    {
      "epoch": 0.17435,
      "grad_norm": 1.2244355500203998,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 17435
    },
    {
      "epoch": 0.17436,
      "grad_norm": 1.2315533707703679,
      "learning_rate": 0.003,
      "loss": 4.0217,
      "step": 17436
    },
    {
      "epoch": 0.17437,
      "grad_norm": 1.2586522237325786,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 17437
    },
    {
      "epoch": 0.17438,
      "grad_norm": 1.1841362837337046,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 17438
    },
    {
      "epoch": 0.17439,
      "grad_norm": 1.2176337631673573,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 17439
    },
    {
      "epoch": 0.1744,
      "grad_norm": 0.8833942887639723,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 17440
    },
    {
      "epoch": 0.17441,
      "grad_norm": 1.0862041245266658,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 17441
    },
    {
      "epoch": 0.17442,
      "grad_norm": 1.5753903842429755,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 17442
    },
    {
      "epoch": 0.17443,
      "grad_norm": 1.24787047520156,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 17443
    },
    {
      "epoch": 0.17444,
      "grad_norm": 1.5739716818429952,
      "learning_rate": 0.003,
      "loss": 4.0243,
      "step": 17444
    },
    {
      "epoch": 0.17445,
      "grad_norm": 0.9822706554680806,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 17445
    },
    {
      "epoch": 0.17446,
      "grad_norm": 1.1543423233155667,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 17446
    },
    {
      "epoch": 0.17447,
      "grad_norm": 1.3171341261232,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 17447
    },
    {
      "epoch": 0.17448,
      "grad_norm": 1.0885413195518518,
      "learning_rate": 0.003,
      "loss": 4.0757,
      "step": 17448
    },
    {
      "epoch": 0.17449,
      "grad_norm": 1.5900315349528589,
      "learning_rate": 0.003,
      "loss": 4.0658,
      "step": 17449
    },
    {
      "epoch": 0.1745,
      "grad_norm": 1.0495111283484926,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 17450
    },
    {
      "epoch": 0.17451,
      "grad_norm": 1.2986543620469437,
      "learning_rate": 0.003,
      "loss": 4.0301,
      "step": 17451
    },
    {
      "epoch": 0.17452,
      "grad_norm": 1.1124923261037898,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 17452
    },
    {
      "epoch": 0.17453,
      "grad_norm": 1.1499695987840877,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 17453
    },
    {
      "epoch": 0.17454,
      "grad_norm": 1.3980024711271888,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 17454
    },
    {
      "epoch": 0.17455,
      "grad_norm": 1.1463215124019255,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 17455
    },
    {
      "epoch": 0.17456,
      "grad_norm": 1.416338854831411,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 17456
    },
    {
      "epoch": 0.17457,
      "grad_norm": 1.0268140875842684,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 17457
    },
    {
      "epoch": 0.17458,
      "grad_norm": 1.2748176967428673,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 17458
    },
    {
      "epoch": 0.17459,
      "grad_norm": 1.1358222810883827,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 17459
    },
    {
      "epoch": 0.1746,
      "grad_norm": 1.1405717283917192,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 17460
    },
    {
      "epoch": 0.17461,
      "grad_norm": 1.1963364224680375,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 17461
    },
    {
      "epoch": 0.17462,
      "grad_norm": 1.2574941412065117,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 17462
    },
    {
      "epoch": 0.17463,
      "grad_norm": 1.1582105387470332,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 17463
    },
    {
      "epoch": 0.17464,
      "grad_norm": 1.1285020721400938,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 17464
    },
    {
      "epoch": 0.17465,
      "grad_norm": 1.1918468012067522,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 17465
    },
    {
      "epoch": 0.17466,
      "grad_norm": 1.2707539191826716,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 17466
    },
    {
      "epoch": 0.17467,
      "grad_norm": 1.1256103256155763,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 17467
    },
    {
      "epoch": 0.17468,
      "grad_norm": 1.2050832262061841,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 17468
    },
    {
      "epoch": 0.17469,
      "grad_norm": 1.1627586590453378,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 17469
    },
    {
      "epoch": 0.1747,
      "grad_norm": 1.4769393152301924,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 17470
    },
    {
      "epoch": 0.17471,
      "grad_norm": 1.4019072501158703,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 17471
    },
    {
      "epoch": 0.17472,
      "grad_norm": 1.274208524354242,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 17472
    },
    {
      "epoch": 0.17473,
      "grad_norm": 1.0939524218488357,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 17473
    },
    {
      "epoch": 0.17474,
      "grad_norm": 1.034345914998027,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 17474
    },
    {
      "epoch": 0.17475,
      "grad_norm": 1.6101262917447974,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 17475
    },
    {
      "epoch": 0.17476,
      "grad_norm": 0.9626673674877094,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 17476
    },
    {
      "epoch": 0.17477,
      "grad_norm": 1.480219461509833,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 17477
    },
    {
      "epoch": 0.17478,
      "grad_norm": 1.016698696410458,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 17478
    },
    {
      "epoch": 0.17479,
      "grad_norm": 1.3362403645797698,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 17479
    },
    {
      "epoch": 0.1748,
      "grad_norm": 1.1132221595496428,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 17480
    },
    {
      "epoch": 0.17481,
      "grad_norm": 1.3258542160701117,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 17481
    },
    {
      "epoch": 0.17482,
      "grad_norm": 1.2929608645634088,
      "learning_rate": 0.003,
      "loss": 4.034,
      "step": 17482
    },
    {
      "epoch": 0.17483,
      "grad_norm": 1.434721460999265,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 17483
    },
    {
      "epoch": 0.17484,
      "grad_norm": 1.0592250933041438,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 17484
    },
    {
      "epoch": 0.17485,
      "grad_norm": 1.5433120517065146,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 17485
    },
    {
      "epoch": 0.17486,
      "grad_norm": 1.0705572036394353,
      "learning_rate": 0.003,
      "loss": 4.0918,
      "step": 17486
    },
    {
      "epoch": 0.17487,
      "grad_norm": 1.3827707405959053,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 17487
    },
    {
      "epoch": 0.17488,
      "grad_norm": 1.142033885926698,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 17488
    },
    {
      "epoch": 0.17489,
      "grad_norm": 1.2753747853269362,
      "learning_rate": 0.003,
      "loss": 4.0423,
      "step": 17489
    },
    {
      "epoch": 0.1749,
      "grad_norm": 1.2093357794938853,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 17490
    },
    {
      "epoch": 0.17491,
      "grad_norm": 1.1658421812276745,
      "learning_rate": 0.003,
      "loss": 4.0287,
      "step": 17491
    },
    {
      "epoch": 0.17492,
      "grad_norm": 1.2155218871293862,
      "learning_rate": 0.003,
      "loss": 4.0292,
      "step": 17492
    },
    {
      "epoch": 0.17493,
      "grad_norm": 1.1373968714050275,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 17493
    },
    {
      "epoch": 0.17494,
      "grad_norm": 1.448814901417094,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 17494
    },
    {
      "epoch": 0.17495,
      "grad_norm": 1.0393948111752698,
      "learning_rate": 0.003,
      "loss": 4.0327,
      "step": 17495
    },
    {
      "epoch": 0.17496,
      "grad_norm": 1.2805671539513424,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 17496
    },
    {
      "epoch": 0.17497,
      "grad_norm": 1.0441830037144317,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 17497
    },
    {
      "epoch": 0.17498,
      "grad_norm": 1.2992945648252001,
      "learning_rate": 0.003,
      "loss": 4.0841,
      "step": 17498
    },
    {
      "epoch": 0.17499,
      "grad_norm": 1.1967694643430729,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 17499
    },
    {
      "epoch": 0.175,
      "grad_norm": 1.4493269513423794,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 17500
    },
    {
      "epoch": 0.17501,
      "grad_norm": 1.1608281281318578,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 17501
    },
    {
      "epoch": 0.17502,
      "grad_norm": 1.3398676043732376,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 17502
    },
    {
      "epoch": 0.17503,
      "grad_norm": 1.256590649603026,
      "learning_rate": 0.003,
      "loss": 4.0891,
      "step": 17503
    },
    {
      "epoch": 0.17504,
      "grad_norm": 1.2671038513783908,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 17504
    },
    {
      "epoch": 0.17505,
      "grad_norm": 1.1787987263178186,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 17505
    },
    {
      "epoch": 0.17506,
      "grad_norm": 1.1479484073222257,
      "learning_rate": 0.003,
      "loss": 4.0373,
      "step": 17506
    },
    {
      "epoch": 0.17507,
      "grad_norm": 1.3052434632621392,
      "learning_rate": 0.003,
      "loss": 4.0326,
      "step": 17507
    },
    {
      "epoch": 0.17508,
      "grad_norm": 1.1784053215658497,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 17508
    },
    {
      "epoch": 0.17509,
      "grad_norm": 1.3909370878692535,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 17509
    },
    {
      "epoch": 0.1751,
      "grad_norm": 1.0533720768723058,
      "learning_rate": 0.003,
      "loss": 4.0363,
      "step": 17510
    },
    {
      "epoch": 0.17511,
      "grad_norm": 1.4538669744073178,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 17511
    },
    {
      "epoch": 0.17512,
      "grad_norm": 0.9633325199231935,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 17512
    },
    {
      "epoch": 0.17513,
      "grad_norm": 1.1919300616810415,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 17513
    },
    {
      "epoch": 0.17514,
      "grad_norm": 1.146252261093832,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 17514
    },
    {
      "epoch": 0.17515,
      "grad_norm": 1.368024356258058,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 17515
    },
    {
      "epoch": 0.17516,
      "grad_norm": 1.2166841015921095,
      "learning_rate": 0.003,
      "loss": 4.0547,
      "step": 17516
    },
    {
      "epoch": 0.17517,
      "grad_norm": 1.3091680163057102,
      "learning_rate": 0.003,
      "loss": 4.0727,
      "step": 17517
    },
    {
      "epoch": 0.17518,
      "grad_norm": 1.2396443494024652,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 17518
    },
    {
      "epoch": 0.17519,
      "grad_norm": 1.1627180611853813,
      "learning_rate": 0.003,
      "loss": 4.0218,
      "step": 17519
    },
    {
      "epoch": 0.1752,
      "grad_norm": 1.6198315221427717,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 17520
    },
    {
      "epoch": 0.17521,
      "grad_norm": 0.840168992554525,
      "learning_rate": 0.003,
      "loss": 4.0287,
      "step": 17521
    },
    {
      "epoch": 0.17522,
      "grad_norm": 0.9590914720353984,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 17522
    },
    {
      "epoch": 0.17523,
      "grad_norm": 1.3983920320796508,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 17523
    },
    {
      "epoch": 0.17524,
      "grad_norm": 1.0315801148551125,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 17524
    },
    {
      "epoch": 0.17525,
      "grad_norm": 1.3434704317693753,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 17525
    },
    {
      "epoch": 0.17526,
      "grad_norm": 1.2293442992262185,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 17526
    },
    {
      "epoch": 0.17527,
      "grad_norm": 1.2841900805269686,
      "learning_rate": 0.003,
      "loss": 4.0304,
      "step": 17527
    },
    {
      "epoch": 0.17528,
      "grad_norm": 1.0836105577783346,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 17528
    },
    {
      "epoch": 0.17529,
      "grad_norm": 1.3544433389629666,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 17529
    },
    {
      "epoch": 0.1753,
      "grad_norm": 1.2382887236243227,
      "learning_rate": 0.003,
      "loss": 4.0354,
      "step": 17530
    },
    {
      "epoch": 0.17531,
      "grad_norm": 1.5347719322095512,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 17531
    },
    {
      "epoch": 0.17532,
      "grad_norm": 1.330589852802039,
      "learning_rate": 0.003,
      "loss": 4.0086,
      "step": 17532
    },
    {
      "epoch": 0.17533,
      "grad_norm": 1.1626480484934163,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 17533
    },
    {
      "epoch": 0.17534,
      "grad_norm": 1.335722699882295,
      "learning_rate": 0.003,
      "loss": 4.0371,
      "step": 17534
    },
    {
      "epoch": 0.17535,
      "grad_norm": 1.2927036804776442,
      "learning_rate": 0.003,
      "loss": 4.0187,
      "step": 17535
    },
    {
      "epoch": 0.17536,
      "grad_norm": 1.1470715866277477,
      "learning_rate": 0.003,
      "loss": 4.0324,
      "step": 17536
    },
    {
      "epoch": 0.17537,
      "grad_norm": 1.4091680550006398,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 17537
    },
    {
      "epoch": 0.17538,
      "grad_norm": 1.0395124989106723,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 17538
    },
    {
      "epoch": 0.17539,
      "grad_norm": 1.1193328696694718,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 17539
    },
    {
      "epoch": 0.1754,
      "grad_norm": 1.0572607792899362,
      "learning_rate": 0.003,
      "loss": 4.084,
      "step": 17540
    },
    {
      "epoch": 0.17541,
      "grad_norm": 1.127607101416577,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 17541
    },
    {
      "epoch": 0.17542,
      "grad_norm": 1.1000821175888242,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 17542
    },
    {
      "epoch": 0.17543,
      "grad_norm": 1.1518160078872373,
      "learning_rate": 0.003,
      "loss": 4.0703,
      "step": 17543
    },
    {
      "epoch": 0.17544,
      "grad_norm": 1.0542688960623041,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 17544
    },
    {
      "epoch": 0.17545,
      "grad_norm": 1.328051050266375,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 17545
    },
    {
      "epoch": 0.17546,
      "grad_norm": 1.2527923949541515,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 17546
    },
    {
      "epoch": 0.17547,
      "grad_norm": 1.4186973412639106,
      "learning_rate": 0.003,
      "loss": 4.0374,
      "step": 17547
    },
    {
      "epoch": 0.17548,
      "grad_norm": 1.1633900552347904,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 17548
    },
    {
      "epoch": 0.17549,
      "grad_norm": 1.4780459216676087,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 17549
    },
    {
      "epoch": 0.1755,
      "grad_norm": 1.0172920362123463,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 17550
    },
    {
      "epoch": 0.17551,
      "grad_norm": 1.3685240676184935,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 17551
    },
    {
      "epoch": 0.17552,
      "grad_norm": 1.0368441281610936,
      "learning_rate": 0.003,
      "loss": 4.0824,
      "step": 17552
    },
    {
      "epoch": 0.17553,
      "grad_norm": 1.4058036979030497,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 17553
    },
    {
      "epoch": 0.17554,
      "grad_norm": 1.1194347132437152,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 17554
    },
    {
      "epoch": 0.17555,
      "grad_norm": 1.5655268291642233,
      "learning_rate": 0.003,
      "loss": 4.0301,
      "step": 17555
    },
    {
      "epoch": 0.17556,
      "grad_norm": 1.1736440422000152,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 17556
    },
    {
      "epoch": 0.17557,
      "grad_norm": 1.170099664234999,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 17557
    },
    {
      "epoch": 0.17558,
      "grad_norm": 1.1925769624962441,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 17558
    },
    {
      "epoch": 0.17559,
      "grad_norm": 1.4664331252751481,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 17559
    },
    {
      "epoch": 0.1756,
      "grad_norm": 1.165334631183967,
      "learning_rate": 0.003,
      "loss": 4.0236,
      "step": 17560
    },
    {
      "epoch": 0.17561,
      "grad_norm": 1.2493506552134022,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 17561
    },
    {
      "epoch": 0.17562,
      "grad_norm": 1.1387507616222472,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 17562
    },
    {
      "epoch": 0.17563,
      "grad_norm": 1.230383826478518,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 17563
    },
    {
      "epoch": 0.17564,
      "grad_norm": 1.3822001105484043,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 17564
    },
    {
      "epoch": 0.17565,
      "grad_norm": 1.1913997662718807,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 17565
    },
    {
      "epoch": 0.17566,
      "grad_norm": 1.174785593420601,
      "learning_rate": 0.003,
      "loss": 4.028,
      "step": 17566
    },
    {
      "epoch": 0.17567,
      "grad_norm": 1.2781363720580885,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 17567
    },
    {
      "epoch": 0.17568,
      "grad_norm": 1.430901217235836,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 17568
    },
    {
      "epoch": 0.17569,
      "grad_norm": 1.4839028594353691,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 17569
    },
    {
      "epoch": 0.1757,
      "grad_norm": 1.0595950525634856,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 17570
    },
    {
      "epoch": 0.17571,
      "grad_norm": 1.397745750793812,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 17571
    },
    {
      "epoch": 0.17572,
      "grad_norm": 0.8888418747520094,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 17572
    },
    {
      "epoch": 0.17573,
      "grad_norm": 1.1291369602671173,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 17573
    },
    {
      "epoch": 0.17574,
      "grad_norm": 1.345867183091558,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 17574
    },
    {
      "epoch": 0.17575,
      "grad_norm": 1.2251985628025621,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 17575
    },
    {
      "epoch": 0.17576,
      "grad_norm": 1.1472025451571806,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 17576
    },
    {
      "epoch": 0.17577,
      "grad_norm": 1.310219249105373,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 17577
    },
    {
      "epoch": 0.17578,
      "grad_norm": 1.115732655745892,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 17578
    },
    {
      "epoch": 0.17579,
      "grad_norm": 1.4050434236000149,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 17579
    },
    {
      "epoch": 0.1758,
      "grad_norm": 1.1561917260482364,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 17580
    },
    {
      "epoch": 0.17581,
      "grad_norm": 1.1093752538129786,
      "learning_rate": 0.003,
      "loss": 4.0806,
      "step": 17581
    },
    {
      "epoch": 0.17582,
      "grad_norm": 1.1966739403321605,
      "learning_rate": 0.003,
      "loss": 4.0825,
      "step": 17582
    },
    {
      "epoch": 0.17583,
      "grad_norm": 1.1597078275261965,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 17583
    },
    {
      "epoch": 0.17584,
      "grad_norm": 1.245543602017537,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 17584
    },
    {
      "epoch": 0.17585,
      "grad_norm": 1.2971524283976426,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 17585
    },
    {
      "epoch": 0.17586,
      "grad_norm": 1.279460954671343,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 17586
    },
    {
      "epoch": 0.17587,
      "grad_norm": 0.9850573623267598,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 17587
    },
    {
      "epoch": 0.17588,
      "grad_norm": 1.5538238797994108,
      "learning_rate": 0.003,
      "loss": 4.1036,
      "step": 17588
    },
    {
      "epoch": 0.17589,
      "grad_norm": 1.4729239462062387,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 17589
    },
    {
      "epoch": 0.1759,
      "grad_norm": 1.3982049000883698,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 17590
    },
    {
      "epoch": 0.17591,
      "grad_norm": 1.1514404548821,
      "learning_rate": 0.003,
      "loss": 4.0724,
      "step": 17591
    },
    {
      "epoch": 0.17592,
      "grad_norm": 1.3239635036333988,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 17592
    },
    {
      "epoch": 0.17593,
      "grad_norm": 1.1577004099456845,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 17593
    },
    {
      "epoch": 0.17594,
      "grad_norm": 1.48381322508598,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 17594
    },
    {
      "epoch": 0.17595,
      "grad_norm": 1.2818816514500897,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 17595
    },
    {
      "epoch": 0.17596,
      "grad_norm": 1.1230113393426457,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 17596
    },
    {
      "epoch": 0.17597,
      "grad_norm": 1.3729943591307712,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 17597
    },
    {
      "epoch": 0.17598,
      "grad_norm": 0.9914762352146483,
      "learning_rate": 0.003,
      "loss": 4.0829,
      "step": 17598
    },
    {
      "epoch": 0.17599,
      "grad_norm": 1.3774066199231678,
      "learning_rate": 0.003,
      "loss": 4.0945,
      "step": 17599
    },
    {
      "epoch": 0.176,
      "grad_norm": 1.0965748648443667,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 17600
    },
    {
      "epoch": 0.17601,
      "grad_norm": 1.3602734721447638,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 17601
    },
    {
      "epoch": 0.17602,
      "grad_norm": 1.1284290528234537,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 17602
    },
    {
      "epoch": 0.17603,
      "grad_norm": 1.4470060608861322,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 17603
    },
    {
      "epoch": 0.17604,
      "grad_norm": 1.3326749037952799,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 17604
    },
    {
      "epoch": 0.17605,
      "grad_norm": 1.1992346349580116,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 17605
    },
    {
      "epoch": 0.17606,
      "grad_norm": 1.204252474404044,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 17606
    },
    {
      "epoch": 0.17607,
      "grad_norm": 1.3357520365039544,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 17607
    },
    {
      "epoch": 0.17608,
      "grad_norm": 1.0331392547237794,
      "learning_rate": 0.003,
      "loss": 4.0313,
      "step": 17608
    },
    {
      "epoch": 0.17609,
      "grad_norm": 1.3546960054869321,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 17609
    },
    {
      "epoch": 0.1761,
      "grad_norm": 1.121500244461343,
      "learning_rate": 0.003,
      "loss": 4.0274,
      "step": 17610
    },
    {
      "epoch": 0.17611,
      "grad_norm": 1.6978483805985116,
      "learning_rate": 0.003,
      "loss": 4.0619,
      "step": 17611
    },
    {
      "epoch": 0.17612,
      "grad_norm": 1.2286940762229328,
      "learning_rate": 0.003,
      "loss": 3.9987,
      "step": 17612
    },
    {
      "epoch": 0.17613,
      "grad_norm": 1.1652848259594104,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 17613
    },
    {
      "epoch": 0.17614,
      "grad_norm": 1.273266706629538,
      "learning_rate": 0.003,
      "loss": 4.0284,
      "step": 17614
    },
    {
      "epoch": 0.17615,
      "grad_norm": 1.3824239141339554,
      "learning_rate": 0.003,
      "loss": 4.0295,
      "step": 17615
    },
    {
      "epoch": 0.17616,
      "grad_norm": 0.9745262884860337,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 17616
    },
    {
      "epoch": 0.17617,
      "grad_norm": 1.4242361731724766,
      "learning_rate": 0.003,
      "loss": 4.0374,
      "step": 17617
    },
    {
      "epoch": 0.17618,
      "grad_norm": 0.8912212954298186,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 17618
    },
    {
      "epoch": 0.17619,
      "grad_norm": 1.1166246583787072,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 17619
    },
    {
      "epoch": 0.1762,
      "grad_norm": 1.4365710019264393,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 17620
    },
    {
      "epoch": 0.17621,
      "grad_norm": 0.9786269404450421,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 17621
    },
    {
      "epoch": 0.17622,
      "grad_norm": 1.2822849544308548,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 17622
    },
    {
      "epoch": 0.17623,
      "grad_norm": 1.2558430140495684,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 17623
    },
    {
      "epoch": 0.17624,
      "grad_norm": 1.0919536116804685,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 17624
    },
    {
      "epoch": 0.17625,
      "grad_norm": 1.234739985489756,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 17625
    },
    {
      "epoch": 0.17626,
      "grad_norm": 1.0727220983044485,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 17626
    },
    {
      "epoch": 0.17627,
      "grad_norm": 1.3477234653911756,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 17627
    },
    {
      "epoch": 0.17628,
      "grad_norm": 1.0251531491230008,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 17628
    },
    {
      "epoch": 0.17629,
      "grad_norm": 1.5822116001438553,
      "learning_rate": 0.003,
      "loss": 4.0808,
      "step": 17629
    },
    {
      "epoch": 0.1763,
      "grad_norm": 1.1129092524525874,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 17630
    },
    {
      "epoch": 0.17631,
      "grad_norm": 1.3659215961959672,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 17631
    },
    {
      "epoch": 0.17632,
      "grad_norm": 1.0435482840136914,
      "learning_rate": 0.003,
      "loss": 4.1079,
      "step": 17632
    },
    {
      "epoch": 0.17633,
      "grad_norm": 1.3355283738661576,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 17633
    },
    {
      "epoch": 0.17634,
      "grad_norm": 1.274431936531252,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 17634
    },
    {
      "epoch": 0.17635,
      "grad_norm": 1.2090461791871303,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 17635
    },
    {
      "epoch": 0.17636,
      "grad_norm": 1.3685920892283052,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 17636
    },
    {
      "epoch": 0.17637,
      "grad_norm": 1.2009704327649677,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 17637
    },
    {
      "epoch": 0.17638,
      "grad_norm": 1.2632615719779379,
      "learning_rate": 0.003,
      "loss": 4.0781,
      "step": 17638
    },
    {
      "epoch": 0.17639,
      "grad_norm": 1.1397733894136788,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 17639
    },
    {
      "epoch": 0.1764,
      "grad_norm": 1.4079776352686797,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 17640
    },
    {
      "epoch": 0.17641,
      "grad_norm": 1.039036046539604,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 17641
    },
    {
      "epoch": 0.17642,
      "grad_norm": 1.2930599131435265,
      "learning_rate": 0.003,
      "loss": 4.0172,
      "step": 17642
    },
    {
      "epoch": 0.17643,
      "grad_norm": 1.168619643131443,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 17643
    },
    {
      "epoch": 0.17644,
      "grad_norm": 1.66000628596868,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 17644
    },
    {
      "epoch": 0.17645,
      "grad_norm": 1.0035728656425535,
      "learning_rate": 0.003,
      "loss": 4.0767,
      "step": 17645
    },
    {
      "epoch": 0.17646,
      "grad_norm": 1.3657648068501418,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 17646
    },
    {
      "epoch": 0.17647,
      "grad_norm": 1.2531256308702934,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 17647
    },
    {
      "epoch": 0.17648,
      "grad_norm": 1.1741662655175689,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 17648
    },
    {
      "epoch": 0.17649,
      "grad_norm": 1.3786238821849517,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 17649
    },
    {
      "epoch": 0.1765,
      "grad_norm": 1.2318838699530237,
      "learning_rate": 0.003,
      "loss": 4.075,
      "step": 17650
    },
    {
      "epoch": 0.17651,
      "grad_norm": 1.3729766156990553,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 17651
    },
    {
      "epoch": 0.17652,
      "grad_norm": 1.271866798739319,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 17652
    },
    {
      "epoch": 0.17653,
      "grad_norm": 1.0108262950648896,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 17653
    },
    {
      "epoch": 0.17654,
      "grad_norm": 1.4361150452615088,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 17654
    },
    {
      "epoch": 0.17655,
      "grad_norm": 1.3401112621851474,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 17655
    },
    {
      "epoch": 0.17656,
      "grad_norm": 1.0152148181993021,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 17656
    },
    {
      "epoch": 0.17657,
      "grad_norm": 1.5114430360780986,
      "learning_rate": 0.003,
      "loss": 4.0246,
      "step": 17657
    },
    {
      "epoch": 0.17658,
      "grad_norm": 1.111390450685094,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 17658
    },
    {
      "epoch": 0.17659,
      "grad_norm": 1.338600690986281,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 17659
    },
    {
      "epoch": 0.1766,
      "grad_norm": 1.0702854368560606,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 17660
    },
    {
      "epoch": 0.17661,
      "grad_norm": 1.2839299019526675,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 17661
    },
    {
      "epoch": 0.17662,
      "grad_norm": 1.3771072187069797,
      "learning_rate": 0.003,
      "loss": 4.0317,
      "step": 17662
    },
    {
      "epoch": 0.17663,
      "grad_norm": 1.1019750514542517,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 17663
    },
    {
      "epoch": 0.17664,
      "grad_norm": 1.249229399378219,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 17664
    },
    {
      "epoch": 0.17665,
      "grad_norm": 1.0222927806103053,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 17665
    },
    {
      "epoch": 0.17666,
      "grad_norm": 1.1079364755619592,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 17666
    },
    {
      "epoch": 0.17667,
      "grad_norm": 1.4148312123252131,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 17667
    },
    {
      "epoch": 0.17668,
      "grad_norm": 1.3043529386896926,
      "learning_rate": 0.003,
      "loss": 4.0861,
      "step": 17668
    },
    {
      "epoch": 0.17669,
      "grad_norm": 1.3097333085299816,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 17669
    },
    {
      "epoch": 0.1767,
      "grad_norm": 1.3170883756693887,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 17670
    },
    {
      "epoch": 0.17671,
      "grad_norm": 1.3114065044602923,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 17671
    },
    {
      "epoch": 0.17672,
      "grad_norm": 1.2785908563031507,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 17672
    },
    {
      "epoch": 0.17673,
      "grad_norm": 1.4310817681056016,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 17673
    },
    {
      "epoch": 0.17674,
      "grad_norm": 1.0281852852331894,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 17674
    },
    {
      "epoch": 0.17675,
      "grad_norm": 1.2569150977830006,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 17675
    },
    {
      "epoch": 0.17676,
      "grad_norm": 1.1352305456951268,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 17676
    },
    {
      "epoch": 0.17677,
      "grad_norm": 1.3484709075956498,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 17677
    },
    {
      "epoch": 0.17678,
      "grad_norm": 1.1328023324347336,
      "learning_rate": 0.003,
      "loss": 4.0853,
      "step": 17678
    },
    {
      "epoch": 0.17679,
      "grad_norm": 1.2653804097778356,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 17679
    },
    {
      "epoch": 0.1768,
      "grad_norm": 1.0475305282319034,
      "learning_rate": 0.003,
      "loss": 4.0274,
      "step": 17680
    },
    {
      "epoch": 0.17681,
      "grad_norm": 1.3737179652172784,
      "learning_rate": 0.003,
      "loss": 4.0925,
      "step": 17681
    },
    {
      "epoch": 0.17682,
      "grad_norm": 1.2535426664356284,
      "learning_rate": 0.003,
      "loss": 4.0301,
      "step": 17682
    },
    {
      "epoch": 0.17683,
      "grad_norm": 1.2279889096925745,
      "learning_rate": 0.003,
      "loss": 4.0768,
      "step": 17683
    },
    {
      "epoch": 0.17684,
      "grad_norm": 1.1955588371680803,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 17684
    },
    {
      "epoch": 0.17685,
      "grad_norm": 1.4865881201944695,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 17685
    },
    {
      "epoch": 0.17686,
      "grad_norm": 1.0444206015567041,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 17686
    },
    {
      "epoch": 0.17687,
      "grad_norm": 1.4115457653886876,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 17687
    },
    {
      "epoch": 0.17688,
      "grad_norm": 1.0142565536116115,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 17688
    },
    {
      "epoch": 0.17689,
      "grad_norm": 1.2686762194693473,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 17689
    },
    {
      "epoch": 0.1769,
      "grad_norm": 1.1771112520826768,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 17690
    },
    {
      "epoch": 0.17691,
      "grad_norm": 1.1278410458658201,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 17691
    },
    {
      "epoch": 0.17692,
      "grad_norm": 1.6059003840813524,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 17692
    },
    {
      "epoch": 0.17693,
      "grad_norm": 0.9920783057554169,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 17693
    },
    {
      "epoch": 0.17694,
      "grad_norm": 1.4282914897729126,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 17694
    },
    {
      "epoch": 0.17695,
      "grad_norm": 1.1648853447259635,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 17695
    },
    {
      "epoch": 0.17696,
      "grad_norm": 1.580463939936142,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 17696
    },
    {
      "epoch": 0.17697,
      "grad_norm": 1.117583895703195,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 17697
    },
    {
      "epoch": 0.17698,
      "grad_norm": 1.2340128342748578,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 17698
    },
    {
      "epoch": 0.17699,
      "grad_norm": 1.1396362622344216,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 17699
    },
    {
      "epoch": 0.177,
      "grad_norm": 1.214027466886459,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 17700
    },
    {
      "epoch": 0.17701,
      "grad_norm": 1.1328025500727548,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 17701
    },
    {
      "epoch": 0.17702,
      "grad_norm": 1.408872578584067,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 17702
    },
    {
      "epoch": 0.17703,
      "grad_norm": 1.1604123137397944,
      "learning_rate": 0.003,
      "loss": 4.0178,
      "step": 17703
    },
    {
      "epoch": 0.17704,
      "grad_norm": 1.5697108988592918,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 17704
    },
    {
      "epoch": 0.17705,
      "grad_norm": 1.2145111871919732,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 17705
    },
    {
      "epoch": 0.17706,
      "grad_norm": 1.1636825149371066,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 17706
    },
    {
      "epoch": 0.17707,
      "grad_norm": 1.144255277297568,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 17707
    },
    {
      "epoch": 0.17708,
      "grad_norm": 1.1035256098491526,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 17708
    },
    {
      "epoch": 0.17709,
      "grad_norm": 1.321224817587734,
      "learning_rate": 0.003,
      "loss": 4.0901,
      "step": 17709
    },
    {
      "epoch": 0.1771,
      "grad_norm": 1.144107581755462,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 17710
    },
    {
      "epoch": 0.17711,
      "grad_norm": 1.182481438002469,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 17711
    },
    {
      "epoch": 0.17712,
      "grad_norm": 1.3135315910739842,
      "learning_rate": 0.003,
      "loss": 4.0215,
      "step": 17712
    },
    {
      "epoch": 0.17713,
      "grad_norm": 1.3106809535007589,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 17713
    },
    {
      "epoch": 0.17714,
      "grad_norm": 1.370874815697478,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 17714
    },
    {
      "epoch": 0.17715,
      "grad_norm": 1.1333885622735582,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 17715
    },
    {
      "epoch": 0.17716,
      "grad_norm": 1.2091495521461906,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 17716
    },
    {
      "epoch": 0.17717,
      "grad_norm": 1.330653285021552,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 17717
    },
    {
      "epoch": 0.17718,
      "grad_norm": 1.3424982564609935,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 17718
    },
    {
      "epoch": 0.17719,
      "grad_norm": 1.0814534319686981,
      "learning_rate": 0.003,
      "loss": 4.0257,
      "step": 17719
    },
    {
      "epoch": 0.1772,
      "grad_norm": 1.2538717772839065,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 17720
    },
    {
      "epoch": 0.17721,
      "grad_norm": 1.109582526488981,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 17721
    },
    {
      "epoch": 0.17722,
      "grad_norm": 1.519905307423152,
      "learning_rate": 0.003,
      "loss": 4.0326,
      "step": 17722
    },
    {
      "epoch": 0.17723,
      "grad_norm": 1.0296511825449017,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 17723
    },
    {
      "epoch": 0.17724,
      "grad_norm": 1.3566963687284888,
      "learning_rate": 0.003,
      "loss": 4.0564,
      "step": 17724
    },
    {
      "epoch": 0.17725,
      "grad_norm": 1.1788101028553641,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 17725
    },
    {
      "epoch": 0.17726,
      "grad_norm": 1.421231601442113,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 17726
    },
    {
      "epoch": 0.17727,
      "grad_norm": 1.2076056839101652,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 17727
    },
    {
      "epoch": 0.17728,
      "grad_norm": 1.520651972884749,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 17728
    },
    {
      "epoch": 0.17729,
      "grad_norm": 1.1488636254034916,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 17729
    },
    {
      "epoch": 0.1773,
      "grad_norm": 1.1346597045234943,
      "learning_rate": 0.003,
      "loss": 4.0354,
      "step": 17730
    },
    {
      "epoch": 0.17731,
      "grad_norm": 1.306299544302792,
      "learning_rate": 0.003,
      "loss": 4.0549,
      "step": 17731
    },
    {
      "epoch": 0.17732,
      "grad_norm": 1.2966838820232638,
      "learning_rate": 0.003,
      "loss": 4.0781,
      "step": 17732
    },
    {
      "epoch": 0.17733,
      "grad_norm": 1.2096264718522176,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 17733
    },
    {
      "epoch": 0.17734,
      "grad_norm": 1.4028764472622368,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 17734
    },
    {
      "epoch": 0.17735,
      "grad_norm": 0.9606968967699793,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 17735
    },
    {
      "epoch": 0.17736,
      "grad_norm": 1.2020753944731186,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 17736
    },
    {
      "epoch": 0.17737,
      "grad_norm": 1.092798913708364,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 17737
    },
    {
      "epoch": 0.17738,
      "grad_norm": 1.1665994160643567,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 17738
    },
    {
      "epoch": 0.17739,
      "grad_norm": 1.1739179621104348,
      "learning_rate": 0.003,
      "loss": 4.0888,
      "step": 17739
    },
    {
      "epoch": 0.1774,
      "grad_norm": 1.171815396210287,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 17740
    },
    {
      "epoch": 0.17741,
      "grad_norm": 1.3307553846157683,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 17741
    },
    {
      "epoch": 0.17742,
      "grad_norm": 1.1873860826085796,
      "learning_rate": 0.003,
      "loss": 4.0547,
      "step": 17742
    },
    {
      "epoch": 0.17743,
      "grad_norm": 1.7621063120413198,
      "learning_rate": 0.003,
      "loss": 4.1058,
      "step": 17743
    },
    {
      "epoch": 0.17744,
      "grad_norm": 1.0028118946892863,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 17744
    },
    {
      "epoch": 0.17745,
      "grad_norm": 1.3005509065026561,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 17745
    },
    {
      "epoch": 0.17746,
      "grad_norm": 1.223731103567378,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 17746
    },
    {
      "epoch": 0.17747,
      "grad_norm": 1.1780862268274155,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 17747
    },
    {
      "epoch": 0.17748,
      "grad_norm": 1.3037820890798655,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 17748
    },
    {
      "epoch": 0.17749,
      "grad_norm": 1.2787151216940233,
      "learning_rate": 0.003,
      "loss": 4.0371,
      "step": 17749
    },
    {
      "epoch": 0.1775,
      "grad_norm": 1.258189733789853,
      "learning_rate": 0.003,
      "loss": 4.0617,
      "step": 17750
    },
    {
      "epoch": 0.17751,
      "grad_norm": 1.2173859470415813,
      "learning_rate": 0.003,
      "loss": 4.0814,
      "step": 17751
    },
    {
      "epoch": 0.17752,
      "grad_norm": 1.0831928331721983,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 17752
    },
    {
      "epoch": 0.17753,
      "grad_norm": 1.1724284204882491,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 17753
    },
    {
      "epoch": 0.17754,
      "grad_norm": 1.1634608471119323,
      "learning_rate": 0.003,
      "loss": 4.0812,
      "step": 17754
    },
    {
      "epoch": 0.17755,
      "grad_norm": 1.312550749239694,
      "learning_rate": 0.003,
      "loss": 4.0718,
      "step": 17755
    },
    {
      "epoch": 0.17756,
      "grad_norm": 1.2343635267456727,
      "learning_rate": 0.003,
      "loss": 4.0295,
      "step": 17756
    },
    {
      "epoch": 0.17757,
      "grad_norm": 1.6113516556311576,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 17757
    },
    {
      "epoch": 0.17758,
      "grad_norm": 1.3210645551520415,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 17758
    },
    {
      "epoch": 0.17759,
      "grad_norm": 1.059033798774361,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 17759
    },
    {
      "epoch": 0.1776,
      "grad_norm": 1.5521546207512071,
      "learning_rate": 0.003,
      "loss": 4.0422,
      "step": 17760
    },
    {
      "epoch": 0.17761,
      "grad_norm": 1.1829313243367725,
      "learning_rate": 0.003,
      "loss": 4.0564,
      "step": 17761
    },
    {
      "epoch": 0.17762,
      "grad_norm": 1.680215690648254,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 17762
    },
    {
      "epoch": 0.17763,
      "grad_norm": 0.9782282263638936,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 17763
    },
    {
      "epoch": 0.17764,
      "grad_norm": 1.3291797239860548,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 17764
    },
    {
      "epoch": 0.17765,
      "grad_norm": 1.5131838203870505,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 17765
    },
    {
      "epoch": 0.17766,
      "grad_norm": 1.1618004425482447,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 17766
    },
    {
      "epoch": 0.17767,
      "grad_norm": 1.314309478723575,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 17767
    },
    {
      "epoch": 0.17768,
      "grad_norm": 1.1771406581385115,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 17768
    },
    {
      "epoch": 0.17769,
      "grad_norm": 1.081005441102454,
      "learning_rate": 0.003,
      "loss": 4.0718,
      "step": 17769
    },
    {
      "epoch": 0.1777,
      "grad_norm": 1.3551935041036287,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 17770
    },
    {
      "epoch": 0.17771,
      "grad_norm": 1.2143773829771678,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 17771
    },
    {
      "epoch": 0.17772,
      "grad_norm": 1.141660551470356,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 17772
    },
    {
      "epoch": 0.17773,
      "grad_norm": 1.332139537228624,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 17773
    },
    {
      "epoch": 0.17774,
      "grad_norm": 1.157054587182458,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 17774
    },
    {
      "epoch": 0.17775,
      "grad_norm": 1.2408157631694359,
      "learning_rate": 0.003,
      "loss": 4.0194,
      "step": 17775
    },
    {
      "epoch": 0.17776,
      "grad_norm": 1.1591098606009527,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 17776
    },
    {
      "epoch": 0.17777,
      "grad_norm": 1.2447238780446748,
      "learning_rate": 0.003,
      "loss": 4.0299,
      "step": 17777
    },
    {
      "epoch": 0.17778,
      "grad_norm": 1.3893890679657246,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 17778
    },
    {
      "epoch": 0.17779,
      "grad_norm": 1.258303519382804,
      "learning_rate": 0.003,
      "loss": 4.0311,
      "step": 17779
    },
    {
      "epoch": 0.1778,
      "grad_norm": 1.159148796376476,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 17780
    },
    {
      "epoch": 0.17781,
      "grad_norm": 1.233847865044257,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 17781
    },
    {
      "epoch": 0.17782,
      "grad_norm": 1.163521453872752,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 17782
    },
    {
      "epoch": 0.17783,
      "grad_norm": 1.2348060635009395,
      "learning_rate": 0.003,
      "loss": 4.0336,
      "step": 17783
    },
    {
      "epoch": 0.17784,
      "grad_norm": 1.1449346808237173,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 17784
    },
    {
      "epoch": 0.17785,
      "grad_norm": 1.2755708596167314,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 17785
    },
    {
      "epoch": 0.17786,
      "grad_norm": 1.0950855712195076,
      "learning_rate": 0.003,
      "loss": 4.0702,
      "step": 17786
    },
    {
      "epoch": 0.17787,
      "grad_norm": 1.449139170957916,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 17787
    },
    {
      "epoch": 0.17788,
      "grad_norm": 1.1825995204290765,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 17788
    },
    {
      "epoch": 0.17789,
      "grad_norm": 1.4782552630292467,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 17789
    },
    {
      "epoch": 0.1779,
      "grad_norm": 1.0884511898359692,
      "learning_rate": 0.003,
      "loss": 4.0281,
      "step": 17790
    },
    {
      "epoch": 0.17791,
      "grad_norm": 1.303993288342492,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 17791
    },
    {
      "epoch": 0.17792,
      "grad_norm": 1.4776915153971397,
      "learning_rate": 0.003,
      "loss": 4.0276,
      "step": 17792
    },
    {
      "epoch": 0.17793,
      "grad_norm": 1.143160794688882,
      "learning_rate": 0.003,
      "loss": 4.0817,
      "step": 17793
    },
    {
      "epoch": 0.17794,
      "grad_norm": 1.337585706731177,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 17794
    },
    {
      "epoch": 0.17795,
      "grad_norm": 1.2882867499124373,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 17795
    },
    {
      "epoch": 0.17796,
      "grad_norm": 1.3099175579503028,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 17796
    },
    {
      "epoch": 0.17797,
      "grad_norm": 1.3772907450722711,
      "learning_rate": 0.003,
      "loss": 4.0728,
      "step": 17797
    },
    {
      "epoch": 0.17798,
      "grad_norm": 1.1439080417379976,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 17798
    },
    {
      "epoch": 0.17799,
      "grad_norm": 1.4427349945651953,
      "learning_rate": 0.003,
      "loss": 4.0793,
      "step": 17799
    },
    {
      "epoch": 0.178,
      "grad_norm": 0.8359998493602386,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 17800
    },
    {
      "epoch": 0.17801,
      "grad_norm": 1.3170836659544407,
      "learning_rate": 0.003,
      "loss": 4.0373,
      "step": 17801
    },
    {
      "epoch": 0.17802,
      "grad_norm": 1.3913854503988206,
      "learning_rate": 0.003,
      "loss": 4.0801,
      "step": 17802
    },
    {
      "epoch": 0.17803,
      "grad_norm": 1.117097811135875,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 17803
    },
    {
      "epoch": 0.17804,
      "grad_norm": 1.44672804441292,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 17804
    },
    {
      "epoch": 0.17805,
      "grad_norm": 1.081482875995203,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 17805
    },
    {
      "epoch": 0.17806,
      "grad_norm": 1.2817128313436066,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 17806
    },
    {
      "epoch": 0.17807,
      "grad_norm": 1.1442125206487275,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 17807
    },
    {
      "epoch": 0.17808,
      "grad_norm": 1.2389433793066236,
      "learning_rate": 0.003,
      "loss": 4.027,
      "step": 17808
    },
    {
      "epoch": 0.17809,
      "grad_norm": 1.1412907392971439,
      "learning_rate": 0.003,
      "loss": 4.0748,
      "step": 17809
    },
    {
      "epoch": 0.1781,
      "grad_norm": 1.4502744583052565,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 17810
    },
    {
      "epoch": 0.17811,
      "grad_norm": 0.8816630115732121,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 17811
    },
    {
      "epoch": 0.17812,
      "grad_norm": 1.243229745885222,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 17812
    },
    {
      "epoch": 0.17813,
      "grad_norm": 1.2535565267594253,
      "learning_rate": 0.003,
      "loss": 4.0964,
      "step": 17813
    },
    {
      "epoch": 0.17814,
      "grad_norm": 1.2126124047186526,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 17814
    },
    {
      "epoch": 0.17815,
      "grad_norm": 1.298381161029849,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 17815
    },
    {
      "epoch": 0.17816,
      "grad_norm": 1.200970343801902,
      "learning_rate": 0.003,
      "loss": 4.0373,
      "step": 17816
    },
    {
      "epoch": 0.17817,
      "grad_norm": 1.4408631650324562,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 17817
    },
    {
      "epoch": 0.17818,
      "grad_norm": 1.3058479181801004,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 17818
    },
    {
      "epoch": 0.17819,
      "grad_norm": 1.2483198465754957,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 17819
    },
    {
      "epoch": 0.1782,
      "grad_norm": 1.2421489369861138,
      "learning_rate": 0.003,
      "loss": 4.0768,
      "step": 17820
    },
    {
      "epoch": 0.17821,
      "grad_norm": 1.2208967380802178,
      "learning_rate": 0.003,
      "loss": 4.0694,
      "step": 17821
    },
    {
      "epoch": 0.17822,
      "grad_norm": 1.2559317071733944,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 17822
    },
    {
      "epoch": 0.17823,
      "grad_norm": 1.2096541669674137,
      "learning_rate": 0.003,
      "loss": 4.0326,
      "step": 17823
    },
    {
      "epoch": 0.17824,
      "grad_norm": 1.2152591273481994,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 17824
    },
    {
      "epoch": 0.17825,
      "grad_norm": 1.2091785587737627,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 17825
    },
    {
      "epoch": 0.17826,
      "grad_norm": 1.1591984087625793,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 17826
    },
    {
      "epoch": 0.17827,
      "grad_norm": 1.2241813736032778,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 17827
    },
    {
      "epoch": 0.17828,
      "grad_norm": 1.3272339228045538,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 17828
    },
    {
      "epoch": 0.17829,
      "grad_norm": 1.139992611986561,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 17829
    },
    {
      "epoch": 0.1783,
      "grad_norm": 1.5554212661070375,
      "learning_rate": 0.003,
      "loss": 4.0194,
      "step": 17830
    },
    {
      "epoch": 0.17831,
      "grad_norm": 0.9181997975303219,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 17831
    },
    {
      "epoch": 0.17832,
      "grad_norm": 1.1076323918128805,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 17832
    },
    {
      "epoch": 0.17833,
      "grad_norm": 1.3858914559464381,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 17833
    },
    {
      "epoch": 0.17834,
      "grad_norm": 1.1699376628371638,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 17834
    },
    {
      "epoch": 0.17835,
      "grad_norm": 1.3609949015398057,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 17835
    },
    {
      "epoch": 0.17836,
      "grad_norm": 1.0474430728969828,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 17836
    },
    {
      "epoch": 0.17837,
      "grad_norm": 1.517426880051347,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 17837
    },
    {
      "epoch": 0.17838,
      "grad_norm": 1.1199970353263173,
      "learning_rate": 0.003,
      "loss": 4.0751,
      "step": 17838
    },
    {
      "epoch": 0.17839,
      "grad_norm": 1.5003042927264383,
      "learning_rate": 0.003,
      "loss": 4.0825,
      "step": 17839
    },
    {
      "epoch": 0.1784,
      "grad_norm": 1.1665549956682206,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 17840
    },
    {
      "epoch": 0.17841,
      "grad_norm": 1.3668209475230961,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 17841
    },
    {
      "epoch": 0.17842,
      "grad_norm": 1.3351548331053524,
      "learning_rate": 0.003,
      "loss": 4.0328,
      "step": 17842
    },
    {
      "epoch": 0.17843,
      "grad_norm": 1.2465284262700684,
      "learning_rate": 0.003,
      "loss": 4.0178,
      "step": 17843
    },
    {
      "epoch": 0.17844,
      "grad_norm": 1.4059048434887123,
      "learning_rate": 0.003,
      "loss": 4.0942,
      "step": 17844
    },
    {
      "epoch": 0.17845,
      "grad_norm": 1.1641418528709417,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 17845
    },
    {
      "epoch": 0.17846,
      "grad_norm": 1.2066772712782268,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 17846
    },
    {
      "epoch": 0.17847,
      "grad_norm": 1.1941036074655749,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 17847
    },
    {
      "epoch": 0.17848,
      "grad_norm": 1.2433204115339367,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 17848
    },
    {
      "epoch": 0.17849,
      "grad_norm": 1.3093484254756549,
      "learning_rate": 0.003,
      "loss": 4.0849,
      "step": 17849
    },
    {
      "epoch": 0.1785,
      "grad_norm": 1.1826914800534813,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 17850
    },
    {
      "epoch": 0.17851,
      "grad_norm": 1.2097111418164714,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 17851
    },
    {
      "epoch": 0.17852,
      "grad_norm": 1.2434591953396055,
      "learning_rate": 0.003,
      "loss": 4.0632,
      "step": 17852
    },
    {
      "epoch": 0.17853,
      "grad_norm": 1.2430392272630955,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 17853
    },
    {
      "epoch": 0.17854,
      "grad_norm": 1.1597079985384222,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 17854
    },
    {
      "epoch": 0.17855,
      "grad_norm": 1.1952883665850529,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 17855
    },
    {
      "epoch": 0.17856,
      "grad_norm": 1.2904642448505192,
      "learning_rate": 0.003,
      "loss": 4.0756,
      "step": 17856
    },
    {
      "epoch": 0.17857,
      "grad_norm": 1.2101126895353091,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 17857
    },
    {
      "epoch": 0.17858,
      "grad_norm": 1.2557265330953655,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 17858
    },
    {
      "epoch": 0.17859,
      "grad_norm": 1.3445512913386632,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 17859
    },
    {
      "epoch": 0.1786,
      "grad_norm": 1.3347337979593992,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 17860
    },
    {
      "epoch": 0.17861,
      "grad_norm": 1.1674711177676054,
      "learning_rate": 0.003,
      "loss": 4.084,
      "step": 17861
    },
    {
      "epoch": 0.17862,
      "grad_norm": 1.179269192911582,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 17862
    },
    {
      "epoch": 0.17863,
      "grad_norm": 1.1267366647848631,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 17863
    },
    {
      "epoch": 0.17864,
      "grad_norm": 1.3658451603592885,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 17864
    },
    {
      "epoch": 0.17865,
      "grad_norm": 1.183338712902706,
      "learning_rate": 0.003,
      "loss": 4.0283,
      "step": 17865
    },
    {
      "epoch": 0.17866,
      "grad_norm": 1.426545758295184,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 17866
    },
    {
      "epoch": 0.17867,
      "grad_norm": 1.0203541867654127,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 17867
    },
    {
      "epoch": 0.17868,
      "grad_norm": 1.349649210302117,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 17868
    },
    {
      "epoch": 0.17869,
      "grad_norm": 1.0430308736743832,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 17869
    },
    {
      "epoch": 0.1787,
      "grad_norm": 1.4004712405709534,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 17870
    },
    {
      "epoch": 0.17871,
      "grad_norm": 1.1322203274855553,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 17871
    },
    {
      "epoch": 0.17872,
      "grad_norm": 1.3211443371500873,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 17872
    },
    {
      "epoch": 0.17873,
      "grad_norm": 1.458967552716695,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 17873
    },
    {
      "epoch": 0.17874,
      "grad_norm": 1.3076101482343323,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 17874
    },
    {
      "epoch": 0.17875,
      "grad_norm": 1.18593223186616,
      "learning_rate": 0.003,
      "loss": 4.086,
      "step": 17875
    },
    {
      "epoch": 0.17876,
      "grad_norm": 1.1109432963991712,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 17876
    },
    {
      "epoch": 0.17877,
      "grad_norm": 1.4205139155536228,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 17877
    },
    {
      "epoch": 0.17878,
      "grad_norm": 1.0616259733822684,
      "learning_rate": 0.003,
      "loss": 4.0197,
      "step": 17878
    },
    {
      "epoch": 0.17879,
      "grad_norm": 1.33721088716122,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 17879
    },
    {
      "epoch": 0.1788,
      "grad_norm": 1.045484556026667,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 17880
    },
    {
      "epoch": 0.17881,
      "grad_norm": 1.423347840598154,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 17881
    },
    {
      "epoch": 0.17882,
      "grad_norm": 1.0699509572458517,
      "learning_rate": 0.003,
      "loss": 4.0301,
      "step": 17882
    },
    {
      "epoch": 0.17883,
      "grad_norm": 1.4800768903141235,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 17883
    },
    {
      "epoch": 0.17884,
      "grad_norm": 1.2282277128798569,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 17884
    },
    {
      "epoch": 0.17885,
      "grad_norm": 1.458533419609548,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 17885
    },
    {
      "epoch": 0.17886,
      "grad_norm": 1.291266728601586,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 17886
    },
    {
      "epoch": 0.17887,
      "grad_norm": 1.4931675431109201,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 17887
    },
    {
      "epoch": 0.17888,
      "grad_norm": 1.3043677984540665,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 17888
    },
    {
      "epoch": 0.17889,
      "grad_norm": 1.0833629842955117,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 17889
    },
    {
      "epoch": 0.1789,
      "grad_norm": 1.3650054650821548,
      "learning_rate": 0.003,
      "loss": 4.0897,
      "step": 17890
    },
    {
      "epoch": 0.17891,
      "grad_norm": 1.1478210263972732,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 17891
    },
    {
      "epoch": 0.17892,
      "grad_norm": 1.2251283044225163,
      "learning_rate": 0.003,
      "loss": 4.0991,
      "step": 17892
    },
    {
      "epoch": 0.17893,
      "grad_norm": 1.3066400717250508,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 17893
    },
    {
      "epoch": 0.17894,
      "grad_norm": 1.1998657253539133,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 17894
    },
    {
      "epoch": 0.17895,
      "grad_norm": 1.2697499227104956,
      "learning_rate": 0.003,
      "loss": 4.0918,
      "step": 17895
    },
    {
      "epoch": 0.17896,
      "grad_norm": 1.2260165025111005,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 17896
    },
    {
      "epoch": 0.17897,
      "grad_norm": 1.2328994647871134,
      "learning_rate": 0.003,
      "loss": 4.0743,
      "step": 17897
    },
    {
      "epoch": 0.17898,
      "grad_norm": 1.1355480189018692,
      "learning_rate": 0.003,
      "loss": 4.0282,
      "step": 17898
    },
    {
      "epoch": 0.17899,
      "grad_norm": 1.4180974474725112,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 17899
    },
    {
      "epoch": 0.179,
      "grad_norm": 1.299799815272747,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 17900
    },
    {
      "epoch": 0.17901,
      "grad_norm": 1.1427514002836991,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 17901
    },
    {
      "epoch": 0.17902,
      "grad_norm": 1.4421237669924571,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 17902
    },
    {
      "epoch": 0.17903,
      "grad_norm": 1.143470683758552,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 17903
    },
    {
      "epoch": 0.17904,
      "grad_norm": 1.2890418395082373,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 17904
    },
    {
      "epoch": 0.17905,
      "grad_norm": 1.231504588880156,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 17905
    },
    {
      "epoch": 0.17906,
      "grad_norm": 1.1615472894176582,
      "learning_rate": 0.003,
      "loss": 4.0337,
      "step": 17906
    },
    {
      "epoch": 0.17907,
      "grad_norm": 1.1718428948631765,
      "learning_rate": 0.003,
      "loss": 4.0161,
      "step": 17907
    },
    {
      "epoch": 0.17908,
      "grad_norm": 1.2373777391072323,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 17908
    },
    {
      "epoch": 0.17909,
      "grad_norm": 1.4080376200108604,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 17909
    },
    {
      "epoch": 0.1791,
      "grad_norm": 1.0077488587335164,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 17910
    },
    {
      "epoch": 0.17911,
      "grad_norm": 1.5016356252161176,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 17911
    },
    {
      "epoch": 0.17912,
      "grad_norm": 1.1234412426190965,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 17912
    },
    {
      "epoch": 0.17913,
      "grad_norm": 1.3905931376690837,
      "learning_rate": 0.003,
      "loss": 4.0768,
      "step": 17913
    },
    {
      "epoch": 0.17914,
      "grad_norm": 0.9897384707940726,
      "learning_rate": 0.003,
      "loss": 4.0432,
      "step": 17914
    },
    {
      "epoch": 0.17915,
      "grad_norm": 1.6719850108086427,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 17915
    },
    {
      "epoch": 0.17916,
      "grad_norm": 1.1694364300723652,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 17916
    },
    {
      "epoch": 0.17917,
      "grad_norm": 1.3665114206250535,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 17917
    },
    {
      "epoch": 0.17918,
      "grad_norm": 1.4539978671274996,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 17918
    },
    {
      "epoch": 0.17919,
      "grad_norm": 1.0078089353484048,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 17919
    },
    {
      "epoch": 0.1792,
      "grad_norm": 1.2324802885155937,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 17920
    },
    {
      "epoch": 0.17921,
      "grad_norm": 1.2576899133452208,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 17921
    },
    {
      "epoch": 0.17922,
      "grad_norm": 1.2547391534459178,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 17922
    },
    {
      "epoch": 0.17923,
      "grad_norm": 1.1905937404588032,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 17923
    },
    {
      "epoch": 0.17924,
      "grad_norm": 1.3320548580499494,
      "learning_rate": 0.003,
      "loss": 4.0675,
      "step": 17924
    },
    {
      "epoch": 0.17925,
      "grad_norm": 1.1673613294904652,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 17925
    },
    {
      "epoch": 0.17926,
      "grad_norm": 1.0821393933109273,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 17926
    },
    {
      "epoch": 0.17927,
      "grad_norm": 1.3049139324124037,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 17927
    },
    {
      "epoch": 0.17928,
      "grad_norm": 1.279645265800928,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 17928
    },
    {
      "epoch": 0.17929,
      "grad_norm": 1.2267947142349527,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 17929
    },
    {
      "epoch": 0.1793,
      "grad_norm": 1.2725727547444488,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 17930
    },
    {
      "epoch": 0.17931,
      "grad_norm": 1.2168153164979243,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 17931
    },
    {
      "epoch": 0.17932,
      "grad_norm": 1.1107128817347829,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 17932
    },
    {
      "epoch": 0.17933,
      "grad_norm": 1.1207404804594867,
      "learning_rate": 0.003,
      "loss": 4.0354,
      "step": 17933
    },
    {
      "epoch": 0.17934,
      "grad_norm": 1.2833669444402074,
      "learning_rate": 0.003,
      "loss": 4.0838,
      "step": 17934
    },
    {
      "epoch": 0.17935,
      "grad_norm": 1.0390032883015055,
      "learning_rate": 0.003,
      "loss": 4.0672,
      "step": 17935
    },
    {
      "epoch": 0.17936,
      "grad_norm": 1.1763755061412124,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 17936
    },
    {
      "epoch": 0.17937,
      "grad_norm": 1.4458590632263786,
      "learning_rate": 0.003,
      "loss": 4.0729,
      "step": 17937
    },
    {
      "epoch": 0.17938,
      "grad_norm": 1.0989058007592827,
      "learning_rate": 0.003,
      "loss": 4.0672,
      "step": 17938
    },
    {
      "epoch": 0.17939,
      "grad_norm": 1.326712895987331,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 17939
    },
    {
      "epoch": 0.1794,
      "grad_norm": 1.0218133507438032,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 17940
    },
    {
      "epoch": 0.17941,
      "grad_norm": 1.3050696388879868,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 17941
    },
    {
      "epoch": 0.17942,
      "grad_norm": 1.0433657660442555,
      "learning_rate": 0.003,
      "loss": 4.0299,
      "step": 17942
    },
    {
      "epoch": 0.17943,
      "grad_norm": 1.3186778687120766,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 17943
    },
    {
      "epoch": 0.17944,
      "grad_norm": 1.155891861610521,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 17944
    },
    {
      "epoch": 0.17945,
      "grad_norm": 1.6330229213431369,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 17945
    },
    {
      "epoch": 0.17946,
      "grad_norm": 0.962515382284713,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 17946
    },
    {
      "epoch": 0.17947,
      "grad_norm": 1.2324918576067574,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 17947
    },
    {
      "epoch": 0.17948,
      "grad_norm": 1.3618482939584695,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 17948
    },
    {
      "epoch": 0.17949,
      "grad_norm": 1.1950894831066303,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 17949
    },
    {
      "epoch": 0.1795,
      "grad_norm": 1.2517382845383047,
      "learning_rate": 0.003,
      "loss": 4.0797,
      "step": 17950
    },
    {
      "epoch": 0.17951,
      "grad_norm": 1.277703868914862,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 17951
    },
    {
      "epoch": 0.17952,
      "grad_norm": 1.0964228888869958,
      "learning_rate": 0.003,
      "loss": 4.0262,
      "step": 17952
    },
    {
      "epoch": 0.17953,
      "grad_norm": 1.1939341821407763,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 17953
    },
    {
      "epoch": 0.17954,
      "grad_norm": 1.1651267305984425,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 17954
    },
    {
      "epoch": 0.17955,
      "grad_norm": 1.3850039040149604,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 17955
    },
    {
      "epoch": 0.17956,
      "grad_norm": 1.1177304808547166,
      "learning_rate": 0.003,
      "loss": 4.0326,
      "step": 17956
    },
    {
      "epoch": 0.17957,
      "grad_norm": 1.426185111467826,
      "learning_rate": 0.003,
      "loss": 4.0354,
      "step": 17957
    },
    {
      "epoch": 0.17958,
      "grad_norm": 1.0222384263820816,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 17958
    },
    {
      "epoch": 0.17959,
      "grad_norm": 1.4837637148519538,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 17959
    },
    {
      "epoch": 0.1796,
      "grad_norm": 1.031401407932794,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 17960
    },
    {
      "epoch": 0.17961,
      "grad_norm": 1.4946769120816015,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 17961
    },
    {
      "epoch": 0.17962,
      "grad_norm": 1.1092444101815782,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 17962
    },
    {
      "epoch": 0.17963,
      "grad_norm": 1.364482573566523,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 17963
    },
    {
      "epoch": 0.17964,
      "grad_norm": 1.0107775822137524,
      "learning_rate": 0.003,
      "loss": 4.0632,
      "step": 17964
    },
    {
      "epoch": 0.17965,
      "grad_norm": 1.321693710368358,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 17965
    },
    {
      "epoch": 0.17966,
      "grad_norm": 1.3516587372727524,
      "learning_rate": 0.003,
      "loss": 4.029,
      "step": 17966
    },
    {
      "epoch": 0.17967,
      "grad_norm": 1.4901704315784046,
      "learning_rate": 0.003,
      "loss": 4.0665,
      "step": 17967
    },
    {
      "epoch": 0.17968,
      "grad_norm": 1.1203814778283905,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 17968
    },
    {
      "epoch": 0.17969,
      "grad_norm": 1.411285830228734,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 17969
    },
    {
      "epoch": 0.1797,
      "grad_norm": 1.26186049354943,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 17970
    },
    {
      "epoch": 0.17971,
      "grad_norm": 1.2830855891996265,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 17971
    },
    {
      "epoch": 0.17972,
      "grad_norm": 1.1050419981201274,
      "learning_rate": 0.003,
      "loss": 4.0851,
      "step": 17972
    },
    {
      "epoch": 0.17973,
      "grad_norm": 1.3510607889599966,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 17973
    },
    {
      "epoch": 0.17974,
      "grad_norm": 1.3193656073693985,
      "learning_rate": 0.003,
      "loss": 4.0227,
      "step": 17974
    },
    {
      "epoch": 0.17975,
      "grad_norm": 1.475721440137748,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 17975
    },
    {
      "epoch": 0.17976,
      "grad_norm": 1.0470951869725496,
      "learning_rate": 0.003,
      "loss": 4.0363,
      "step": 17976
    },
    {
      "epoch": 0.17977,
      "grad_norm": 1.381573519833511,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 17977
    },
    {
      "epoch": 0.17978,
      "grad_norm": 1.2287920507101606,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 17978
    },
    {
      "epoch": 0.17979,
      "grad_norm": 1.049816581057559,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 17979
    },
    {
      "epoch": 0.1798,
      "grad_norm": 1.3932567849099906,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 17980
    },
    {
      "epoch": 0.17981,
      "grad_norm": 0.9571817753127221,
      "learning_rate": 0.003,
      "loss": 3.9816,
      "step": 17981
    },
    {
      "epoch": 0.17982,
      "grad_norm": 1.33453258245156,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 17982
    },
    {
      "epoch": 0.17983,
      "grad_norm": 1.0808435904112632,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 17983
    },
    {
      "epoch": 0.17984,
      "grad_norm": 1.3283266299336365,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 17984
    },
    {
      "epoch": 0.17985,
      "grad_norm": 1.1088742473039481,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 17985
    },
    {
      "epoch": 0.17986,
      "grad_norm": 1.0956769108826203,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 17986
    },
    {
      "epoch": 0.17987,
      "grad_norm": 1.1649275307222553,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 17987
    },
    {
      "epoch": 0.17988,
      "grad_norm": 1.303071401589229,
      "learning_rate": 0.003,
      "loss": 4.0764,
      "step": 17988
    },
    {
      "epoch": 0.17989,
      "grad_norm": 1.240823923601934,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 17989
    },
    {
      "epoch": 0.1799,
      "grad_norm": 1.6323677339061988,
      "learning_rate": 0.003,
      "loss": 4.093,
      "step": 17990
    },
    {
      "epoch": 0.17991,
      "grad_norm": 1.2061400821525412,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 17991
    },
    {
      "epoch": 0.17992,
      "grad_norm": 1.184887958492287,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 17992
    },
    {
      "epoch": 0.17993,
      "grad_norm": 1.5001225441924497,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 17993
    },
    {
      "epoch": 0.17994,
      "grad_norm": 1.1464491904627079,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 17994
    },
    {
      "epoch": 0.17995,
      "grad_norm": 1.3643988758344954,
      "learning_rate": 0.003,
      "loss": 4.0922,
      "step": 17995
    },
    {
      "epoch": 0.17996,
      "grad_norm": 1.2115026512711184,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 17996
    },
    {
      "epoch": 0.17997,
      "grad_norm": 1.3257856655430436,
      "learning_rate": 0.003,
      "loss": 4.0705,
      "step": 17997
    },
    {
      "epoch": 0.17998,
      "grad_norm": 1.2256908288383672,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 17998
    },
    {
      "epoch": 0.17999,
      "grad_norm": 1.363555714171468,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 17999
    },
    {
      "epoch": 0.18,
      "grad_norm": 1.164308022587496,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 18000
    },
    {
      "epoch": 0.18001,
      "grad_norm": 1.3412044136812997,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 18001
    },
    {
      "epoch": 0.18002,
      "grad_norm": 1.0335057488033608,
      "learning_rate": 0.003,
      "loss": 4.0011,
      "step": 18002
    },
    {
      "epoch": 0.18003,
      "grad_norm": 1.4237231980653955,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 18003
    },
    {
      "epoch": 0.18004,
      "grad_norm": 1.2158323562143312,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 18004
    },
    {
      "epoch": 0.18005,
      "grad_norm": 1.1791576182188304,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 18005
    },
    {
      "epoch": 0.18006,
      "grad_norm": 1.1951902681655582,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 18006
    },
    {
      "epoch": 0.18007,
      "grad_norm": 1.172380680181894,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 18007
    },
    {
      "epoch": 0.18008,
      "grad_norm": 1.2765435380928838,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 18008
    },
    {
      "epoch": 0.18009,
      "grad_norm": 1.2416008822680729,
      "learning_rate": 0.003,
      "loss": 4.024,
      "step": 18009
    },
    {
      "epoch": 0.1801,
      "grad_norm": 1.525421875150215,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 18010
    },
    {
      "epoch": 0.18011,
      "grad_norm": 1.021853060087861,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 18011
    },
    {
      "epoch": 0.18012,
      "grad_norm": 1.3502946125471011,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 18012
    },
    {
      "epoch": 0.18013,
      "grad_norm": 1.239070355051635,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 18013
    },
    {
      "epoch": 0.18014,
      "grad_norm": 1.3423907417513339,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 18014
    },
    {
      "epoch": 0.18015,
      "grad_norm": 1.2316428670209958,
      "learning_rate": 0.003,
      "loss": 4.1037,
      "step": 18015
    },
    {
      "epoch": 0.18016,
      "grad_norm": 1.2381433964966237,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 18016
    },
    {
      "epoch": 0.18017,
      "grad_norm": 1.1709753510909788,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 18017
    },
    {
      "epoch": 0.18018,
      "grad_norm": 1.2412957293555624,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 18018
    },
    {
      "epoch": 0.18019,
      "grad_norm": 1.1756494709968706,
      "learning_rate": 0.003,
      "loss": 4.0361,
      "step": 18019
    },
    {
      "epoch": 0.1802,
      "grad_norm": 1.199461034209744,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 18020
    },
    {
      "epoch": 0.18021,
      "grad_norm": 1.159139826162093,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 18021
    },
    {
      "epoch": 0.18022,
      "grad_norm": 1.2105068534819363,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 18022
    },
    {
      "epoch": 0.18023,
      "grad_norm": 1.2302619521320286,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 18023
    },
    {
      "epoch": 0.18024,
      "grad_norm": 1.1376319466362441,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 18024
    },
    {
      "epoch": 0.18025,
      "grad_norm": 1.3512522055142933,
      "learning_rate": 0.003,
      "loss": 4.0285,
      "step": 18025
    },
    {
      "epoch": 0.18026,
      "grad_norm": 1.3164740883781272,
      "learning_rate": 0.003,
      "loss": 4.0777,
      "step": 18026
    },
    {
      "epoch": 0.18027,
      "grad_norm": 1.2879961756010196,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 18027
    },
    {
      "epoch": 0.18028,
      "grad_norm": 1.1804963011805325,
      "learning_rate": 0.003,
      "loss": 4.0261,
      "step": 18028
    },
    {
      "epoch": 0.18029,
      "grad_norm": 1.4838431177520457,
      "learning_rate": 0.003,
      "loss": 4.0684,
      "step": 18029
    },
    {
      "epoch": 0.1803,
      "grad_norm": 1.0351592429171272,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 18030
    },
    {
      "epoch": 0.18031,
      "grad_norm": 1.2113149016403093,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 18031
    },
    {
      "epoch": 0.18032,
      "grad_norm": 1.148497153568014,
      "learning_rate": 0.003,
      "loss": 4.1188,
      "step": 18032
    },
    {
      "epoch": 0.18033,
      "grad_norm": 1.2622573843454679,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 18033
    },
    {
      "epoch": 0.18034,
      "grad_norm": 1.1921723760443994,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 18034
    },
    {
      "epoch": 0.18035,
      "grad_norm": 1.6950407168602217,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 18035
    },
    {
      "epoch": 0.18036,
      "grad_norm": 1.0196026972145558,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 18036
    },
    {
      "epoch": 0.18037,
      "grad_norm": 1.5396249695351576,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 18037
    },
    {
      "epoch": 0.18038,
      "grad_norm": 0.9897671671610535,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 18038
    },
    {
      "epoch": 0.18039,
      "grad_norm": 1.3512283651462276,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 18039
    },
    {
      "epoch": 0.1804,
      "grad_norm": 1.1484591871116598,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 18040
    },
    {
      "epoch": 0.18041,
      "grad_norm": 1.2147665348576069,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 18041
    },
    {
      "epoch": 0.18042,
      "grad_norm": 1.2380319471707066,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 18042
    },
    {
      "epoch": 0.18043,
      "grad_norm": 1.2160999869057305,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 18043
    },
    {
      "epoch": 0.18044,
      "grad_norm": 1.349050585184057,
      "learning_rate": 0.003,
      "loss": 4.1029,
      "step": 18044
    },
    {
      "epoch": 0.18045,
      "grad_norm": 1.3352668030417683,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 18045
    },
    {
      "epoch": 0.18046,
      "grad_norm": 1.1045455583294432,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 18046
    },
    {
      "epoch": 0.18047,
      "grad_norm": 1.338558036270574,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 18047
    },
    {
      "epoch": 0.18048,
      "grad_norm": 1.2568708707224872,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 18048
    },
    {
      "epoch": 0.18049,
      "grad_norm": 1.2333058752609791,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 18049
    },
    {
      "epoch": 0.1805,
      "grad_norm": 1.2138238824506928,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 18050
    },
    {
      "epoch": 0.18051,
      "grad_norm": 1.203369092557718,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 18051
    },
    {
      "epoch": 0.18052,
      "grad_norm": 1.1492783619007025,
      "learning_rate": 0.003,
      "loss": 4.0617,
      "step": 18052
    },
    {
      "epoch": 0.18053,
      "grad_norm": 1.294912972265915,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 18053
    },
    {
      "epoch": 0.18054,
      "grad_norm": 1.1068522222740473,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 18054
    },
    {
      "epoch": 0.18055,
      "grad_norm": 1.2042374149239887,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 18055
    },
    {
      "epoch": 0.18056,
      "grad_norm": 1.1564275150288272,
      "learning_rate": 0.003,
      "loss": 4.0418,
      "step": 18056
    },
    {
      "epoch": 0.18057,
      "grad_norm": 1.0611837217930469,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 18057
    },
    {
      "epoch": 0.18058,
      "grad_norm": 1.4900755342779948,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 18058
    },
    {
      "epoch": 0.18059,
      "grad_norm": 1.2733962853773153,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 18059
    },
    {
      "epoch": 0.1806,
      "grad_norm": 1.2119750026885194,
      "learning_rate": 0.003,
      "loss": 4.0223,
      "step": 18060
    },
    {
      "epoch": 0.18061,
      "grad_norm": 1.2263481938044916,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 18061
    },
    {
      "epoch": 0.18062,
      "grad_norm": 1.2387285682421108,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 18062
    },
    {
      "epoch": 0.18063,
      "grad_norm": 1.4175924684006194,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 18063
    },
    {
      "epoch": 0.18064,
      "grad_norm": 1.063975173407913,
      "learning_rate": 0.003,
      "loss": 4.0221,
      "step": 18064
    },
    {
      "epoch": 0.18065,
      "grad_norm": 1.5299678144357638,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 18065
    },
    {
      "epoch": 0.18066,
      "grad_norm": 1.1752826025686958,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 18066
    },
    {
      "epoch": 0.18067,
      "grad_norm": 1.3438190004736612,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 18067
    },
    {
      "epoch": 0.18068,
      "grad_norm": 1.2332253208533215,
      "learning_rate": 0.003,
      "loss": 4.028,
      "step": 18068
    },
    {
      "epoch": 0.18069,
      "grad_norm": 1.107062614861532,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 18069
    },
    {
      "epoch": 0.1807,
      "grad_norm": 1.26988589284218,
      "learning_rate": 0.003,
      "loss": 4.0301,
      "step": 18070
    },
    {
      "epoch": 0.18071,
      "grad_norm": 1.2597509582459114,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 18071
    },
    {
      "epoch": 0.18072,
      "grad_norm": 1.2984968024076675,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 18072
    },
    {
      "epoch": 0.18073,
      "grad_norm": 1.1404329439653857,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 18073
    },
    {
      "epoch": 0.18074,
      "grad_norm": 1.320504001181121,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 18074
    },
    {
      "epoch": 0.18075,
      "grad_norm": 1.1885836109920593,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 18075
    },
    {
      "epoch": 0.18076,
      "grad_norm": 1.2229886387747457,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 18076
    },
    {
      "epoch": 0.18077,
      "grad_norm": 1.0857055300924845,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 18077
    },
    {
      "epoch": 0.18078,
      "grad_norm": 1.3553751900839532,
      "learning_rate": 0.003,
      "loss": 4.0355,
      "step": 18078
    },
    {
      "epoch": 0.18079,
      "grad_norm": 1.2476777391744713,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 18079
    },
    {
      "epoch": 0.1808,
      "grad_norm": 1.381771594339528,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 18080
    },
    {
      "epoch": 0.18081,
      "grad_norm": 1.0374523519668841,
      "learning_rate": 0.003,
      "loss": 4.0849,
      "step": 18081
    },
    {
      "epoch": 0.18082,
      "grad_norm": 1.3655064736167601,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 18082
    },
    {
      "epoch": 0.18083,
      "grad_norm": 1.3394540211727788,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 18083
    },
    {
      "epoch": 0.18084,
      "grad_norm": 1.2450092834886002,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 18084
    },
    {
      "epoch": 0.18085,
      "grad_norm": 1.1342345617675242,
      "learning_rate": 0.003,
      "loss": 4.0263,
      "step": 18085
    },
    {
      "epoch": 0.18086,
      "grad_norm": 1.117787528449565,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 18086
    },
    {
      "epoch": 0.18087,
      "grad_norm": 1.4351247873622142,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 18087
    },
    {
      "epoch": 0.18088,
      "grad_norm": 1.1237382278508348,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 18088
    },
    {
      "epoch": 0.18089,
      "grad_norm": 1.6307934694970347,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 18089
    },
    {
      "epoch": 0.1809,
      "grad_norm": 0.9957714737265753,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 18090
    },
    {
      "epoch": 0.18091,
      "grad_norm": 1.4826744600099264,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 18091
    },
    {
      "epoch": 0.18092,
      "grad_norm": 1.0616821669304313,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 18092
    },
    {
      "epoch": 0.18093,
      "grad_norm": 1.2013092222755266,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 18093
    },
    {
      "epoch": 0.18094,
      "grad_norm": 1.270532891813035,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 18094
    },
    {
      "epoch": 0.18095,
      "grad_norm": 1.146642489000094,
      "learning_rate": 0.003,
      "loss": 4.0251,
      "step": 18095
    },
    {
      "epoch": 0.18096,
      "grad_norm": 1.3894318324246953,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 18096
    },
    {
      "epoch": 0.18097,
      "grad_norm": 1.3435900704186123,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 18097
    },
    {
      "epoch": 0.18098,
      "grad_norm": 1.3240453622481962,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 18098
    },
    {
      "epoch": 0.18099,
      "grad_norm": 1.2954307780137428,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 18099
    },
    {
      "epoch": 0.181,
      "grad_norm": 1.4478304170711598,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 18100
    },
    {
      "epoch": 0.18101,
      "grad_norm": 1.1157493662902294,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 18101
    },
    {
      "epoch": 0.18102,
      "grad_norm": 1.4174307426902089,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 18102
    },
    {
      "epoch": 0.18103,
      "grad_norm": 1.0507780911728946,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 18103
    },
    {
      "epoch": 0.18104,
      "grad_norm": 1.4682717700412773,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 18104
    },
    {
      "epoch": 0.18105,
      "grad_norm": 1.0560006411757137,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 18105
    },
    {
      "epoch": 0.18106,
      "grad_norm": 1.2451914317863495,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 18106
    },
    {
      "epoch": 0.18107,
      "grad_norm": 1.1641326286115536,
      "learning_rate": 0.003,
      "loss": 4.034,
      "step": 18107
    },
    {
      "epoch": 0.18108,
      "grad_norm": 1.214895668988106,
      "learning_rate": 0.003,
      "loss": 4.0672,
      "step": 18108
    },
    {
      "epoch": 0.18109,
      "grad_norm": 0.9797829546107352,
      "learning_rate": 0.003,
      "loss": 4.0418,
      "step": 18109
    },
    {
      "epoch": 0.1811,
      "grad_norm": 1.3979027989683943,
      "learning_rate": 0.003,
      "loss": 4.0929,
      "step": 18110
    },
    {
      "epoch": 0.18111,
      "grad_norm": 1.1773445699754366,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 18111
    },
    {
      "epoch": 0.18112,
      "grad_norm": 1.491013110380474,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 18112
    },
    {
      "epoch": 0.18113,
      "grad_norm": 1.4326619946319281,
      "learning_rate": 0.003,
      "loss": 4.0954,
      "step": 18113
    },
    {
      "epoch": 0.18114,
      "grad_norm": 1.1473072257356771,
      "learning_rate": 0.003,
      "loss": 4.0623,
      "step": 18114
    },
    {
      "epoch": 0.18115,
      "grad_norm": 1.3187217651431622,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 18115
    },
    {
      "epoch": 0.18116,
      "grad_norm": 1.1026689359745885,
      "learning_rate": 0.003,
      "loss": 4.0929,
      "step": 18116
    },
    {
      "epoch": 0.18117,
      "grad_norm": 1.407834913836384,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 18117
    },
    {
      "epoch": 0.18118,
      "grad_norm": 0.9974611519559162,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 18118
    },
    {
      "epoch": 0.18119,
      "grad_norm": 1.3584033870556507,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 18119
    },
    {
      "epoch": 0.1812,
      "grad_norm": 1.1575947584923405,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 18120
    },
    {
      "epoch": 0.18121,
      "grad_norm": 1.3993242415779288,
      "learning_rate": 0.003,
      "loss": 4.0343,
      "step": 18121
    },
    {
      "epoch": 0.18122,
      "grad_norm": 1.2113842574798337,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 18122
    },
    {
      "epoch": 0.18123,
      "grad_norm": 1.110930997452562,
      "learning_rate": 0.003,
      "loss": 4.0172,
      "step": 18123
    },
    {
      "epoch": 0.18124,
      "grad_norm": 1.267077278650209,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 18124
    },
    {
      "epoch": 0.18125,
      "grad_norm": 1.248509228043099,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 18125
    },
    {
      "epoch": 0.18126,
      "grad_norm": 1.23188913831077,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 18126
    },
    {
      "epoch": 0.18127,
      "grad_norm": 1.259012015805301,
      "learning_rate": 0.003,
      "loss": 4.0835,
      "step": 18127
    },
    {
      "epoch": 0.18128,
      "grad_norm": 1.0563186995646476,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 18128
    },
    {
      "epoch": 0.18129,
      "grad_norm": 1.329551586318536,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 18129
    },
    {
      "epoch": 0.1813,
      "grad_norm": 1.2869493911917629,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 18130
    },
    {
      "epoch": 0.18131,
      "grad_norm": 1.2765876629558837,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 18131
    },
    {
      "epoch": 0.18132,
      "grad_norm": 1.1801299514195327,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 18132
    },
    {
      "epoch": 0.18133,
      "grad_norm": 1.424653965297774,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 18133
    },
    {
      "epoch": 0.18134,
      "grad_norm": 1.0888569962987793,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 18134
    },
    {
      "epoch": 0.18135,
      "grad_norm": 1.5304092847839716,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 18135
    },
    {
      "epoch": 0.18136,
      "grad_norm": 1.1698168815889853,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 18136
    },
    {
      "epoch": 0.18137,
      "grad_norm": 1.279134688994487,
      "learning_rate": 0.003,
      "loss": 4.0815,
      "step": 18137
    },
    {
      "epoch": 0.18138,
      "grad_norm": 1.1713600574739995,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 18138
    },
    {
      "epoch": 0.18139,
      "grad_norm": 1.348354173926797,
      "learning_rate": 0.003,
      "loss": 4.0549,
      "step": 18139
    },
    {
      "epoch": 0.1814,
      "grad_norm": 1.1903681611134498,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 18140
    },
    {
      "epoch": 0.18141,
      "grad_norm": 1.396601393949861,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 18141
    },
    {
      "epoch": 0.18142,
      "grad_norm": 1.1114557185986427,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 18142
    },
    {
      "epoch": 0.18143,
      "grad_norm": 1.363436301360319,
      "learning_rate": 0.003,
      "loss": 4.0389,
      "step": 18143
    },
    {
      "epoch": 0.18144,
      "grad_norm": 1.1996189226751284,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 18144
    },
    {
      "epoch": 0.18145,
      "grad_norm": 1.5186331509259658,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 18145
    },
    {
      "epoch": 0.18146,
      "grad_norm": 0.9823695488471641,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 18146
    },
    {
      "epoch": 0.18147,
      "grad_norm": 1.3796416762623962,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 18147
    },
    {
      "epoch": 0.18148,
      "grad_norm": 1.2882762858680572,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 18148
    },
    {
      "epoch": 0.18149,
      "grad_norm": 1.1096876382926448,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 18149
    },
    {
      "epoch": 0.1815,
      "grad_norm": 1.4933113427119287,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 18150
    },
    {
      "epoch": 0.18151,
      "grad_norm": 1.053805286179601,
      "learning_rate": 0.003,
      "loss": 4.0702,
      "step": 18151
    },
    {
      "epoch": 0.18152,
      "grad_norm": 1.3146079074140908,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 18152
    },
    {
      "epoch": 0.18153,
      "grad_norm": 1.0868174410400009,
      "learning_rate": 0.003,
      "loss": 4.0743,
      "step": 18153
    },
    {
      "epoch": 0.18154,
      "grad_norm": 1.264663576873165,
      "learning_rate": 0.003,
      "loss": 4.034,
      "step": 18154
    },
    {
      "epoch": 0.18155,
      "grad_norm": 1.2457024950840911,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 18155
    },
    {
      "epoch": 0.18156,
      "grad_norm": 1.2908701271650775,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 18156
    },
    {
      "epoch": 0.18157,
      "grad_norm": 1.5574366991134188,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 18157
    },
    {
      "epoch": 0.18158,
      "grad_norm": 1.1082657444457111,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 18158
    },
    {
      "epoch": 0.18159,
      "grad_norm": 1.6012985022268318,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 18159
    },
    {
      "epoch": 0.1816,
      "grad_norm": 1.0897029935236637,
      "learning_rate": 0.003,
      "loss": 4.0656,
      "step": 18160
    },
    {
      "epoch": 0.18161,
      "grad_norm": 1.3739888336510175,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 18161
    },
    {
      "epoch": 0.18162,
      "grad_norm": 1.075768081948606,
      "learning_rate": 0.003,
      "loss": 4.0724,
      "step": 18162
    },
    {
      "epoch": 0.18163,
      "grad_norm": 1.0848080036670058,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 18163
    },
    {
      "epoch": 0.18164,
      "grad_norm": 1.251003885452719,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 18164
    },
    {
      "epoch": 0.18165,
      "grad_norm": 1.2802533359731652,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 18165
    },
    {
      "epoch": 0.18166,
      "grad_norm": 1.1654615542109659,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 18166
    },
    {
      "epoch": 0.18167,
      "grad_norm": 1.3373001401801745,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 18167
    },
    {
      "epoch": 0.18168,
      "grad_norm": 1.1409164703407304,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 18168
    },
    {
      "epoch": 0.18169,
      "grad_norm": 1.468935684660133,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 18169
    },
    {
      "epoch": 0.1817,
      "grad_norm": 1.121908993117358,
      "learning_rate": 0.003,
      "loss": 4.0423,
      "step": 18170
    },
    {
      "epoch": 0.18171,
      "grad_norm": 1.3702058817884184,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 18171
    },
    {
      "epoch": 0.18172,
      "grad_norm": 1.1760654252535787,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 18172
    },
    {
      "epoch": 0.18173,
      "grad_norm": 1.1683759422998519,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 18173
    },
    {
      "epoch": 0.18174,
      "grad_norm": 1.2596310269817508,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 18174
    },
    {
      "epoch": 0.18175,
      "grad_norm": 1.0022911390677356,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 18175
    },
    {
      "epoch": 0.18176,
      "grad_norm": 1.5245780473996084,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 18176
    },
    {
      "epoch": 0.18177,
      "grad_norm": 1.0375269397207594,
      "learning_rate": 0.003,
      "loss": 4.0712,
      "step": 18177
    },
    {
      "epoch": 0.18178,
      "grad_norm": 1.7900866802540607,
      "learning_rate": 0.003,
      "loss": 4.0922,
      "step": 18178
    },
    {
      "epoch": 0.18179,
      "grad_norm": 0.8776712208774177,
      "learning_rate": 0.003,
      "loss": 4.0293,
      "step": 18179
    },
    {
      "epoch": 0.1818,
      "grad_norm": 1.2002975729982084,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 18180
    },
    {
      "epoch": 0.18181,
      "grad_norm": 1.3797133184577732,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 18181
    },
    {
      "epoch": 0.18182,
      "grad_norm": 1.0868714237281438,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 18182
    },
    {
      "epoch": 0.18183,
      "grad_norm": 1.3460658814721587,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 18183
    },
    {
      "epoch": 0.18184,
      "grad_norm": 1.366988467703209,
      "learning_rate": 0.003,
      "loss": 4.0807,
      "step": 18184
    },
    {
      "epoch": 0.18185,
      "grad_norm": 1.4201669622193525,
      "learning_rate": 0.003,
      "loss": 4.086,
      "step": 18185
    },
    {
      "epoch": 0.18186,
      "grad_norm": 1.205017084823888,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 18186
    },
    {
      "epoch": 0.18187,
      "grad_norm": 1.3172298536356926,
      "learning_rate": 0.003,
      "loss": 4.0675,
      "step": 18187
    },
    {
      "epoch": 0.18188,
      "grad_norm": 1.0620127686630847,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 18188
    },
    {
      "epoch": 0.18189,
      "grad_norm": 1.4387558417071202,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 18189
    },
    {
      "epoch": 0.1819,
      "grad_norm": 1.2840994198713898,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 18190
    },
    {
      "epoch": 0.18191,
      "grad_norm": 1.2090175105525351,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 18191
    },
    {
      "epoch": 0.18192,
      "grad_norm": 1.1974107121217237,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 18192
    },
    {
      "epoch": 0.18193,
      "grad_norm": 1.308380356147569,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 18193
    },
    {
      "epoch": 0.18194,
      "grad_norm": 1.0468549651928476,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 18194
    },
    {
      "epoch": 0.18195,
      "grad_norm": 1.2222161372847502,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 18195
    },
    {
      "epoch": 0.18196,
      "grad_norm": 1.203429438902244,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 18196
    },
    {
      "epoch": 0.18197,
      "grad_norm": 1.1828547791402275,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 18197
    },
    {
      "epoch": 0.18198,
      "grad_norm": 1.254164207569211,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 18198
    },
    {
      "epoch": 0.18199,
      "grad_norm": 1.2540510591540648,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 18199
    },
    {
      "epoch": 0.182,
      "grad_norm": 1.1693680555263508,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 18200
    },
    {
      "epoch": 0.18201,
      "grad_norm": 1.5424127788744206,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 18201
    },
    {
      "epoch": 0.18202,
      "grad_norm": 1.0666229449001072,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 18202
    },
    {
      "epoch": 0.18203,
      "grad_norm": 1.483172598404617,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 18203
    },
    {
      "epoch": 0.18204,
      "grad_norm": 1.1751492384475146,
      "learning_rate": 0.003,
      "loss": 4.0328,
      "step": 18204
    },
    {
      "epoch": 0.18205,
      "grad_norm": 1.2448157635226145,
      "learning_rate": 0.003,
      "loss": 4.0267,
      "step": 18205
    },
    {
      "epoch": 0.18206,
      "grad_norm": 1.7781994750984322,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 18206
    },
    {
      "epoch": 0.18207,
      "grad_norm": 1.3792367114824855,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 18207
    },
    {
      "epoch": 0.18208,
      "grad_norm": 1.0562640052095869,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 18208
    },
    {
      "epoch": 0.18209,
      "grad_norm": 1.3334679284779756,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 18209
    },
    {
      "epoch": 0.1821,
      "grad_norm": 1.1901289354630578,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 18210
    },
    {
      "epoch": 0.18211,
      "grad_norm": 1.4574478094780343,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 18211
    },
    {
      "epoch": 0.18212,
      "grad_norm": 1.2568982149335541,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 18212
    },
    {
      "epoch": 0.18213,
      "grad_norm": 1.0991050921729633,
      "learning_rate": 0.003,
      "loss": 4.0632,
      "step": 18213
    },
    {
      "epoch": 0.18214,
      "grad_norm": 1.2358437166942335,
      "learning_rate": 0.003,
      "loss": 4.0422,
      "step": 18214
    },
    {
      "epoch": 0.18215,
      "grad_norm": 1.1466128425869118,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 18215
    },
    {
      "epoch": 0.18216,
      "grad_norm": 1.1505190831323802,
      "learning_rate": 0.003,
      "loss": 4.0246,
      "step": 18216
    },
    {
      "epoch": 0.18217,
      "grad_norm": 1.3262682912182426,
      "learning_rate": 0.003,
      "loss": 4.0675,
      "step": 18217
    },
    {
      "epoch": 0.18218,
      "grad_norm": 1.0954661064915585,
      "learning_rate": 0.003,
      "loss": 4.0207,
      "step": 18218
    },
    {
      "epoch": 0.18219,
      "grad_norm": 1.3173900128684979,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 18219
    },
    {
      "epoch": 0.1822,
      "grad_norm": 1.0356806303502464,
      "learning_rate": 0.003,
      "loss": 4.0619,
      "step": 18220
    },
    {
      "epoch": 0.18221,
      "grad_norm": 1.3347460450586923,
      "learning_rate": 0.003,
      "loss": 4.0814,
      "step": 18221
    },
    {
      "epoch": 0.18222,
      "grad_norm": 1.04383992299412,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 18222
    },
    {
      "epoch": 0.18223,
      "grad_norm": 1.269495196604096,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 18223
    },
    {
      "epoch": 0.18224,
      "grad_norm": 1.2044940236297053,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 18224
    },
    {
      "epoch": 0.18225,
      "grad_norm": 1.380718818266551,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 18225
    },
    {
      "epoch": 0.18226,
      "grad_norm": 1.3422067657627699,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 18226
    },
    {
      "epoch": 0.18227,
      "grad_norm": 1.183494487628006,
      "learning_rate": 0.003,
      "loss": 4.0819,
      "step": 18227
    },
    {
      "epoch": 0.18228,
      "grad_norm": 1.1868594272546387,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 18228
    },
    {
      "epoch": 0.18229,
      "grad_norm": 1.0844426746752334,
      "learning_rate": 0.003,
      "loss": 4.0371,
      "step": 18229
    },
    {
      "epoch": 0.1823,
      "grad_norm": 1.3500625981204193,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 18230
    },
    {
      "epoch": 0.18231,
      "grad_norm": 1.1111751784718151,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 18231
    },
    {
      "epoch": 0.18232,
      "grad_norm": 1.2567781682210177,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 18232
    },
    {
      "epoch": 0.18233,
      "grad_norm": 1.068747246417859,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 18233
    },
    {
      "epoch": 0.18234,
      "grad_norm": 1.280503411083223,
      "learning_rate": 0.003,
      "loss": 4.0416,
      "step": 18234
    },
    {
      "epoch": 0.18235,
      "grad_norm": 1.121330812361253,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 18235
    },
    {
      "epoch": 0.18236,
      "grad_norm": 1.1211308075295416,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 18236
    },
    {
      "epoch": 0.18237,
      "grad_norm": 1.4690225127213803,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 18237
    },
    {
      "epoch": 0.18238,
      "grad_norm": 1.1570290902385694,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 18238
    },
    {
      "epoch": 0.18239,
      "grad_norm": 1.4223512705051753,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 18239
    },
    {
      "epoch": 0.1824,
      "grad_norm": 1.1734552617111975,
      "learning_rate": 0.003,
      "loss": 4.0309,
      "step": 18240
    },
    {
      "epoch": 0.18241,
      "grad_norm": 1.723514897872368,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 18241
    },
    {
      "epoch": 0.18242,
      "grad_norm": 1.160824652772994,
      "learning_rate": 0.003,
      "loss": 4.0233,
      "step": 18242
    },
    {
      "epoch": 0.18243,
      "grad_norm": 1.7053584970983728,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 18243
    },
    {
      "epoch": 0.18244,
      "grad_norm": 1.2443513513362243,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 18244
    },
    {
      "epoch": 0.18245,
      "grad_norm": 1.1734999692543786,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 18245
    },
    {
      "epoch": 0.18246,
      "grad_norm": 1.3092224123559955,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 18246
    },
    {
      "epoch": 0.18247,
      "grad_norm": 1.4217802514625206,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 18247
    },
    {
      "epoch": 0.18248,
      "grad_norm": 1.059889898722704,
      "learning_rate": 0.003,
      "loss": 4.0773,
      "step": 18248
    },
    {
      "epoch": 0.18249,
      "grad_norm": 1.3825667571008604,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 18249
    },
    {
      "epoch": 0.1825,
      "grad_norm": 1.2257225759372845,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 18250
    },
    {
      "epoch": 0.18251,
      "grad_norm": 1.5244673134581954,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 18251
    },
    {
      "epoch": 0.18252,
      "grad_norm": 1.0797095673183785,
      "learning_rate": 0.003,
      "loss": 4.0923,
      "step": 18252
    },
    {
      "epoch": 0.18253,
      "grad_norm": 1.327777557684387,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 18253
    },
    {
      "epoch": 0.18254,
      "grad_norm": 1.0489593877567802,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 18254
    },
    {
      "epoch": 0.18255,
      "grad_norm": 1.5240857198603717,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 18255
    },
    {
      "epoch": 0.18256,
      "grad_norm": 1.136629056612958,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 18256
    },
    {
      "epoch": 0.18257,
      "grad_norm": 0.9934615078496511,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 18257
    },
    {
      "epoch": 0.18258,
      "grad_norm": 1.3380337871466155,
      "learning_rate": 0.003,
      "loss": 4.0801,
      "step": 18258
    },
    {
      "epoch": 0.18259,
      "grad_norm": 1.0183344292377945,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 18259
    },
    {
      "epoch": 0.1826,
      "grad_norm": 1.337678866827291,
      "learning_rate": 0.003,
      "loss": 4.0253,
      "step": 18260
    },
    {
      "epoch": 0.18261,
      "grad_norm": 1.4272146872387483,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 18261
    },
    {
      "epoch": 0.18262,
      "grad_norm": 1.1892082207166188,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 18262
    },
    {
      "epoch": 0.18263,
      "grad_norm": 1.130988447114421,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 18263
    },
    {
      "epoch": 0.18264,
      "grad_norm": 1.3096296248501902,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 18264
    },
    {
      "epoch": 0.18265,
      "grad_norm": 1.2888662142148892,
      "learning_rate": 0.003,
      "loss": 4.0617,
      "step": 18265
    },
    {
      "epoch": 0.18266,
      "grad_norm": 1.267280829020684,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 18266
    },
    {
      "epoch": 0.18267,
      "grad_norm": 1.0603504202519463,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 18267
    },
    {
      "epoch": 0.18268,
      "grad_norm": 1.2371568894298932,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 18268
    },
    {
      "epoch": 0.18269,
      "grad_norm": 1.1722973922098998,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 18269
    },
    {
      "epoch": 0.1827,
      "grad_norm": 1.3027346280041303,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 18270
    },
    {
      "epoch": 0.18271,
      "grad_norm": 1.2309646097171965,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 18271
    },
    {
      "epoch": 0.18272,
      "grad_norm": 1.3663051952039242,
      "learning_rate": 0.003,
      "loss": 4.0549,
      "step": 18272
    },
    {
      "epoch": 0.18273,
      "grad_norm": 1.2285679725594594,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 18273
    },
    {
      "epoch": 0.18274,
      "grad_norm": 1.097664724710302,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 18274
    },
    {
      "epoch": 0.18275,
      "grad_norm": 1.3283120714766745,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 18275
    },
    {
      "epoch": 0.18276,
      "grad_norm": 0.9972392283599518,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 18276
    },
    {
      "epoch": 0.18277,
      "grad_norm": 1.243113304546045,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 18277
    },
    {
      "epoch": 0.18278,
      "grad_norm": 1.2489426903513285,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 18278
    },
    {
      "epoch": 0.18279,
      "grad_norm": 1.2452532494113835,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 18279
    },
    {
      "epoch": 0.1828,
      "grad_norm": 1.2500726378055314,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 18280
    },
    {
      "epoch": 0.18281,
      "grad_norm": 1.2552143875100266,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 18281
    },
    {
      "epoch": 0.18282,
      "grad_norm": 1.2464722209299044,
      "learning_rate": 0.003,
      "loss": 4.0619,
      "step": 18282
    },
    {
      "epoch": 0.18283,
      "grad_norm": 1.1204390881075939,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 18283
    },
    {
      "epoch": 0.18284,
      "grad_norm": 1.4257660571352764,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 18284
    },
    {
      "epoch": 0.18285,
      "grad_norm": 1.1537045570323736,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 18285
    },
    {
      "epoch": 0.18286,
      "grad_norm": 1.3376390225902903,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 18286
    },
    {
      "epoch": 0.18287,
      "grad_norm": 1.1905243620450867,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 18287
    },
    {
      "epoch": 0.18288,
      "grad_norm": 1.4219983758559431,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 18288
    },
    {
      "epoch": 0.18289,
      "grad_norm": 1.491007229393689,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 18289
    },
    {
      "epoch": 0.1829,
      "grad_norm": 0.9372579397340259,
      "learning_rate": 0.003,
      "loss": 4.0195,
      "step": 18290
    },
    {
      "epoch": 0.18291,
      "grad_norm": 1.2249712767153973,
      "learning_rate": 0.003,
      "loss": 4.0291,
      "step": 18291
    },
    {
      "epoch": 0.18292,
      "grad_norm": 1.1668551361500483,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 18292
    },
    {
      "epoch": 0.18293,
      "grad_norm": 1.2483664929708789,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 18293
    },
    {
      "epoch": 0.18294,
      "grad_norm": 1.3767312298327286,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 18294
    },
    {
      "epoch": 0.18295,
      "grad_norm": 1.1326350231472428,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 18295
    },
    {
      "epoch": 0.18296,
      "grad_norm": 1.10728669872332,
      "learning_rate": 0.003,
      "loss": 4.0676,
      "step": 18296
    },
    {
      "epoch": 0.18297,
      "grad_norm": 1.5379390762954344,
      "learning_rate": 0.003,
      "loss": 4.0743,
      "step": 18297
    },
    {
      "epoch": 0.18298,
      "grad_norm": 1.0414777100691703,
      "learning_rate": 0.003,
      "loss": 4.0864,
      "step": 18298
    },
    {
      "epoch": 0.18299,
      "grad_norm": 1.5429002917083006,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 18299
    },
    {
      "epoch": 0.183,
      "grad_norm": 1.1100658287599119,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 18300
    },
    {
      "epoch": 0.18301,
      "grad_norm": 1.231714394432137,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 18301
    },
    {
      "epoch": 0.18302,
      "grad_norm": 1.2259113285668939,
      "learning_rate": 0.003,
      "loss": 4.0768,
      "step": 18302
    },
    {
      "epoch": 0.18303,
      "grad_norm": 1.1364295934492379,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 18303
    },
    {
      "epoch": 0.18304,
      "grad_norm": 1.489554152808706,
      "learning_rate": 0.003,
      "loss": 4.0838,
      "step": 18304
    },
    {
      "epoch": 0.18305,
      "grad_norm": 0.9800170074568549,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 18305
    },
    {
      "epoch": 0.18306,
      "grad_norm": 1.5025478067001217,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 18306
    },
    {
      "epoch": 0.18307,
      "grad_norm": 1.152243239338405,
      "learning_rate": 0.003,
      "loss": 4.0276,
      "step": 18307
    },
    {
      "epoch": 0.18308,
      "grad_norm": 1.3602754061144529,
      "learning_rate": 0.003,
      "loss": 4.0398,
      "step": 18308
    },
    {
      "epoch": 0.18309,
      "grad_norm": 1.2800181471469514,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 18309
    },
    {
      "epoch": 0.1831,
      "grad_norm": 1.2850650195814035,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 18310
    },
    {
      "epoch": 0.18311,
      "grad_norm": 1.226801174414026,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 18311
    },
    {
      "epoch": 0.18312,
      "grad_norm": 1.0931000755517517,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 18312
    },
    {
      "epoch": 0.18313,
      "grad_norm": 1.2551201885435146,
      "learning_rate": 0.003,
      "loss": 3.9859,
      "step": 18313
    },
    {
      "epoch": 0.18314,
      "grad_norm": 0.9381602413085871,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 18314
    },
    {
      "epoch": 0.18315,
      "grad_norm": 1.291920088716176,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 18315
    },
    {
      "epoch": 0.18316,
      "grad_norm": 1.2293352344284154,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 18316
    },
    {
      "epoch": 0.18317,
      "grad_norm": 1.2691610893685448,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 18317
    },
    {
      "epoch": 0.18318,
      "grad_norm": 1.2031632877889293,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 18318
    },
    {
      "epoch": 0.18319,
      "grad_norm": 1.4372889545623073,
      "learning_rate": 0.003,
      "loss": 4.0174,
      "step": 18319
    },
    {
      "epoch": 0.1832,
      "grad_norm": 1.1938822809284222,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 18320
    },
    {
      "epoch": 0.18321,
      "grad_norm": 1.7396964838428048,
      "learning_rate": 0.003,
      "loss": 4.1234,
      "step": 18321
    },
    {
      "epoch": 0.18322,
      "grad_norm": 1.0167582841136749,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 18322
    },
    {
      "epoch": 0.18323,
      "grad_norm": 1.2967181482217018,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 18323
    },
    {
      "epoch": 0.18324,
      "grad_norm": 1.40156900342504,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 18324
    },
    {
      "epoch": 0.18325,
      "grad_norm": 1.4238864556716415,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 18325
    },
    {
      "epoch": 0.18326,
      "grad_norm": 1.3421617265103192,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 18326
    },
    {
      "epoch": 0.18327,
      "grad_norm": 1.117911370027948,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 18327
    },
    {
      "epoch": 0.18328,
      "grad_norm": 1.3451995827335028,
      "learning_rate": 0.003,
      "loss": 4.0694,
      "step": 18328
    },
    {
      "epoch": 0.18329,
      "grad_norm": 1.1784696999814355,
      "learning_rate": 0.003,
      "loss": 4.033,
      "step": 18329
    },
    {
      "epoch": 0.1833,
      "grad_norm": 1.390971450708566,
      "learning_rate": 0.003,
      "loss": 4.0941,
      "step": 18330
    },
    {
      "epoch": 0.18331,
      "grad_norm": 1.3913771267533268,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 18331
    },
    {
      "epoch": 0.18332,
      "grad_norm": 1.0795999422508706,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 18332
    },
    {
      "epoch": 0.18333,
      "grad_norm": 1.3322619358634882,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 18333
    },
    {
      "epoch": 0.18334,
      "grad_norm": 1.1542061585922139,
      "learning_rate": 0.003,
      "loss": 4.0244,
      "step": 18334
    },
    {
      "epoch": 0.18335,
      "grad_norm": 1.4284341864109296,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 18335
    },
    {
      "epoch": 0.18336,
      "grad_norm": 1.0439042550047468,
      "learning_rate": 0.003,
      "loss": 4.0259,
      "step": 18336
    },
    {
      "epoch": 0.18337,
      "grad_norm": 1.3615724668800908,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 18337
    },
    {
      "epoch": 0.18338,
      "grad_norm": 1.1382971968373383,
      "learning_rate": 0.003,
      "loss": 4.0319,
      "step": 18338
    },
    {
      "epoch": 0.18339,
      "grad_norm": 1.4439725470651312,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 18339
    },
    {
      "epoch": 0.1834,
      "grad_norm": 1.3157856314429814,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 18340
    },
    {
      "epoch": 0.18341,
      "grad_norm": 0.9721477717900843,
      "learning_rate": 0.003,
      "loss": 4.0287,
      "step": 18341
    },
    {
      "epoch": 0.18342,
      "grad_norm": 1.3463242621740237,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 18342
    },
    {
      "epoch": 0.18343,
      "grad_norm": 1.0596979320175746,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 18343
    },
    {
      "epoch": 0.18344,
      "grad_norm": 1.4004903286620822,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 18344
    },
    {
      "epoch": 0.18345,
      "grad_norm": 1.1448159920144843,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 18345
    },
    {
      "epoch": 0.18346,
      "grad_norm": 1.3386523052482129,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 18346
    },
    {
      "epoch": 0.18347,
      "grad_norm": 1.063078283040304,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 18347
    },
    {
      "epoch": 0.18348,
      "grad_norm": 1.459775990951888,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 18348
    },
    {
      "epoch": 0.18349,
      "grad_norm": 1.2590137029632196,
      "learning_rate": 0.003,
      "loss": 4.0547,
      "step": 18349
    },
    {
      "epoch": 0.1835,
      "grad_norm": 1.394849360214779,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 18350
    },
    {
      "epoch": 0.18351,
      "grad_norm": 1.2674196745764508,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 18351
    },
    {
      "epoch": 0.18352,
      "grad_norm": 1.1597828357200477,
      "learning_rate": 0.003,
      "loss": 4.042,
      "step": 18352
    },
    {
      "epoch": 0.18353,
      "grad_norm": 1.3406387081704234,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 18353
    },
    {
      "epoch": 0.18354,
      "grad_norm": 1.1076191932627248,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 18354
    },
    {
      "epoch": 0.18355,
      "grad_norm": 1.4097792549029675,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 18355
    },
    {
      "epoch": 0.18356,
      "grad_norm": 1.1904630552071125,
      "learning_rate": 0.003,
      "loss": 4.0282,
      "step": 18356
    },
    {
      "epoch": 0.18357,
      "grad_norm": 1.337396066701302,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 18357
    },
    {
      "epoch": 0.18358,
      "grad_norm": 1.2589228951948699,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 18358
    },
    {
      "epoch": 0.18359,
      "grad_norm": 1.108926826809863,
      "learning_rate": 0.003,
      "loss": 4.0808,
      "step": 18359
    },
    {
      "epoch": 0.1836,
      "grad_norm": 1.3603760767508253,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 18360
    },
    {
      "epoch": 0.18361,
      "grad_norm": 1.1226725968804887,
      "learning_rate": 0.003,
      "loss": 4.0227,
      "step": 18361
    },
    {
      "epoch": 0.18362,
      "grad_norm": 1.2720267602985238,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 18362
    },
    {
      "epoch": 0.18363,
      "grad_norm": 1.1342031278953895,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 18363
    },
    {
      "epoch": 0.18364,
      "grad_norm": 1.295159021261614,
      "learning_rate": 0.003,
      "loss": 4.088,
      "step": 18364
    },
    {
      "epoch": 0.18365,
      "grad_norm": 1.1735434226929458,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 18365
    },
    {
      "epoch": 0.18366,
      "grad_norm": 1.2954442953501812,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 18366
    },
    {
      "epoch": 0.18367,
      "grad_norm": 1.2740847313393036,
      "learning_rate": 0.003,
      "loss": 4.0549,
      "step": 18367
    },
    {
      "epoch": 0.18368,
      "grad_norm": 1.2140895280074016,
      "learning_rate": 0.003,
      "loss": 4.0298,
      "step": 18368
    },
    {
      "epoch": 0.18369,
      "grad_norm": 1.3933726149998125,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 18369
    },
    {
      "epoch": 0.1837,
      "grad_norm": 1.1258452789286704,
      "learning_rate": 0.003,
      "loss": 4.0296,
      "step": 18370
    },
    {
      "epoch": 0.18371,
      "grad_norm": 1.3659543472847637,
      "learning_rate": 0.003,
      "loss": 4.03,
      "step": 18371
    },
    {
      "epoch": 0.18372,
      "grad_norm": 1.222245331243453,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 18372
    },
    {
      "epoch": 0.18373,
      "grad_norm": 1.7141417251122142,
      "learning_rate": 0.003,
      "loss": 4.0829,
      "step": 18373
    },
    {
      "epoch": 0.18374,
      "grad_norm": 0.9671025556018095,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 18374
    },
    {
      "epoch": 0.18375,
      "grad_norm": 1.1872740727230664,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 18375
    },
    {
      "epoch": 0.18376,
      "grad_norm": 1.3334669979762708,
      "learning_rate": 0.003,
      "loss": 4.0815,
      "step": 18376
    },
    {
      "epoch": 0.18377,
      "grad_norm": 1.1194892071827662,
      "learning_rate": 0.003,
      "loss": 4.03,
      "step": 18377
    },
    {
      "epoch": 0.18378,
      "grad_norm": 1.5590549469648822,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 18378
    },
    {
      "epoch": 0.18379,
      "grad_norm": 1.1596940620535392,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 18379
    },
    {
      "epoch": 0.1838,
      "grad_norm": 1.29515950308488,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 18380
    },
    {
      "epoch": 0.18381,
      "grad_norm": 0.9829611319668723,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 18381
    },
    {
      "epoch": 0.18382,
      "grad_norm": 1.5561889709198071,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 18382
    },
    {
      "epoch": 0.18383,
      "grad_norm": 1.1900066307598778,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 18383
    },
    {
      "epoch": 0.18384,
      "grad_norm": 1.3426263061815311,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 18384
    },
    {
      "epoch": 0.18385,
      "grad_norm": 1.3912776433001495,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 18385
    },
    {
      "epoch": 0.18386,
      "grad_norm": 1.2472173440990098,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 18386
    },
    {
      "epoch": 0.18387,
      "grad_norm": 1.0997827846903279,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 18387
    },
    {
      "epoch": 0.18388,
      "grad_norm": 1.5915345944293096,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 18388
    },
    {
      "epoch": 0.18389,
      "grad_norm": 1.0540192254937788,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 18389
    },
    {
      "epoch": 0.1839,
      "grad_norm": 1.349088059231571,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 18390
    },
    {
      "epoch": 0.18391,
      "grad_norm": 1.0832034858737989,
      "learning_rate": 0.003,
      "loss": 4.0418,
      "step": 18391
    },
    {
      "epoch": 0.18392,
      "grad_norm": 1.3619004635892382,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 18392
    },
    {
      "epoch": 0.18393,
      "grad_norm": 1.0458881019397792,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 18393
    },
    {
      "epoch": 0.18394,
      "grad_norm": 1.3173913973229336,
      "learning_rate": 0.003,
      "loss": 4.042,
      "step": 18394
    },
    {
      "epoch": 0.18395,
      "grad_norm": 1.2713040555271644,
      "learning_rate": 0.003,
      "loss": 4.042,
      "step": 18395
    },
    {
      "epoch": 0.18396,
      "grad_norm": 1.4819357793803813,
      "learning_rate": 0.003,
      "loss": 4.0882,
      "step": 18396
    },
    {
      "epoch": 0.18397,
      "grad_norm": 1.091929480347712,
      "learning_rate": 0.003,
      "loss": 4.0757,
      "step": 18397
    },
    {
      "epoch": 0.18398,
      "grad_norm": 1.5549150779413397,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 18398
    },
    {
      "epoch": 0.18399,
      "grad_norm": 1.2504656996458987,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 18399
    },
    {
      "epoch": 0.184,
      "grad_norm": 1.2328847983295108,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 18400
    },
    {
      "epoch": 0.18401,
      "grad_norm": 1.1143207423651258,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 18401
    },
    {
      "epoch": 0.18402,
      "grad_norm": 1.1704892853025841,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 18402
    },
    {
      "epoch": 0.18403,
      "grad_norm": 1.2814790421906408,
      "learning_rate": 0.003,
      "loss": 4.0183,
      "step": 18403
    },
    {
      "epoch": 0.18404,
      "grad_norm": 1.0653977131709254,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 18404
    },
    {
      "epoch": 0.18405,
      "grad_norm": 1.4286666051912853,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 18405
    },
    {
      "epoch": 0.18406,
      "grad_norm": 1.2242947705176372,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 18406
    },
    {
      "epoch": 0.18407,
      "grad_norm": 1.2965782130321206,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 18407
    },
    {
      "epoch": 0.18408,
      "grad_norm": 1.364762697056736,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 18408
    },
    {
      "epoch": 0.18409,
      "grad_norm": 1.307486344537389,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 18409
    },
    {
      "epoch": 0.1841,
      "grad_norm": 1.1484488387476597,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 18410
    },
    {
      "epoch": 0.18411,
      "grad_norm": 1.4953546714049815,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 18411
    },
    {
      "epoch": 0.18412,
      "grad_norm": 1.2321071738116949,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 18412
    },
    {
      "epoch": 0.18413,
      "grad_norm": 1.145249847322974,
      "learning_rate": 0.003,
      "loss": 4.0371,
      "step": 18413
    },
    {
      "epoch": 0.18414,
      "grad_norm": 1.2487494626464846,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 18414
    },
    {
      "epoch": 0.18415,
      "grad_norm": 1.0883548355445094,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 18415
    },
    {
      "epoch": 0.18416,
      "grad_norm": 1.2299959920294103,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 18416
    },
    {
      "epoch": 0.18417,
      "grad_norm": 1.2072193836859442,
      "learning_rate": 0.003,
      "loss": 4.0807,
      "step": 18417
    },
    {
      "epoch": 0.18418,
      "grad_norm": 1.1419089881924183,
      "learning_rate": 0.003,
      "loss": 4.0255,
      "step": 18418
    },
    {
      "epoch": 0.18419,
      "grad_norm": 1.291328812747233,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 18419
    },
    {
      "epoch": 0.1842,
      "grad_norm": 1.0516099710007751,
      "learning_rate": 0.003,
      "loss": 4.0684,
      "step": 18420
    },
    {
      "epoch": 0.18421,
      "grad_norm": 1.6372782377502058,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 18421
    },
    {
      "epoch": 0.18422,
      "grad_norm": 1.1970379761625947,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 18422
    },
    {
      "epoch": 0.18423,
      "grad_norm": 1.3665717406752702,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 18423
    },
    {
      "epoch": 0.18424,
      "grad_norm": 1.050578301481084,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 18424
    },
    {
      "epoch": 0.18425,
      "grad_norm": 1.3996093984009,
      "learning_rate": 0.003,
      "loss": 4.0168,
      "step": 18425
    },
    {
      "epoch": 0.18426,
      "grad_norm": 1.1612361973455871,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 18426
    },
    {
      "epoch": 0.18427,
      "grad_norm": 1.088657031452232,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 18427
    },
    {
      "epoch": 0.18428,
      "grad_norm": 1.4412719606226663,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 18428
    },
    {
      "epoch": 0.18429,
      "grad_norm": 1.1368657759095244,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 18429
    },
    {
      "epoch": 0.1843,
      "grad_norm": 1.1075714101497987,
      "learning_rate": 0.003,
      "loss": 4.0275,
      "step": 18430
    },
    {
      "epoch": 0.18431,
      "grad_norm": 1.2733237994192719,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 18431
    },
    {
      "epoch": 0.18432,
      "grad_norm": 1.1232848243686768,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 18432
    },
    {
      "epoch": 0.18433,
      "grad_norm": 1.4071867153137911,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 18433
    },
    {
      "epoch": 0.18434,
      "grad_norm": 1.2290071096289996,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 18434
    },
    {
      "epoch": 0.18435,
      "grad_norm": 1.032723486247934,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 18435
    },
    {
      "epoch": 0.18436,
      "grad_norm": 1.2846682751341674,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 18436
    },
    {
      "epoch": 0.18437,
      "grad_norm": 1.0787708489962597,
      "learning_rate": 0.003,
      "loss": 4.0799,
      "step": 18437
    },
    {
      "epoch": 0.18438,
      "grad_norm": 1.464669613675582,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 18438
    },
    {
      "epoch": 0.18439,
      "grad_norm": 1.2595339936840588,
      "learning_rate": 0.003,
      "loss": 4.0359,
      "step": 18439
    },
    {
      "epoch": 0.1844,
      "grad_norm": 1.1910280353388745,
      "learning_rate": 0.003,
      "loss": 4.044,
      "step": 18440
    },
    {
      "epoch": 0.18441,
      "grad_norm": 1.401596779857551,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 18441
    },
    {
      "epoch": 0.18442,
      "grad_norm": 1.1663080858959465,
      "learning_rate": 0.003,
      "loss": 4.0167,
      "step": 18442
    },
    {
      "epoch": 0.18443,
      "grad_norm": 1.4213127571437272,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 18443
    },
    {
      "epoch": 0.18444,
      "grad_norm": 1.2784713167394093,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 18444
    },
    {
      "epoch": 0.18445,
      "grad_norm": 1.2699967101061032,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 18445
    },
    {
      "epoch": 0.18446,
      "grad_norm": 1.4811331385648756,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 18446
    },
    {
      "epoch": 0.18447,
      "grad_norm": 1.2417564796737708,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 18447
    },
    {
      "epoch": 0.18448,
      "grad_norm": 1.43635534847114,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 18448
    },
    {
      "epoch": 0.18449,
      "grad_norm": 1.215379106673042,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 18449
    },
    {
      "epoch": 0.1845,
      "grad_norm": 1.2157904836130864,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 18450
    },
    {
      "epoch": 0.18451,
      "grad_norm": 1.329311485228502,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 18451
    },
    {
      "epoch": 0.18452,
      "grad_norm": 1.0561661848277206,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 18452
    },
    {
      "epoch": 0.18453,
      "grad_norm": 1.2937290983083072,
      "learning_rate": 0.003,
      "loss": 4.095,
      "step": 18453
    },
    {
      "epoch": 0.18454,
      "grad_norm": 1.0068479492925069,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 18454
    },
    {
      "epoch": 0.18455,
      "grad_norm": 1.2853261379702334,
      "learning_rate": 0.003,
      "loss": 4.0276,
      "step": 18455
    },
    {
      "epoch": 0.18456,
      "grad_norm": 1.1472678057552477,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 18456
    },
    {
      "epoch": 0.18457,
      "grad_norm": 1.518079009396172,
      "learning_rate": 0.003,
      "loss": 4.0856,
      "step": 18457
    },
    {
      "epoch": 0.18458,
      "grad_norm": 1.0893014638648486,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 18458
    },
    {
      "epoch": 0.18459,
      "grad_norm": 1.395160488488644,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 18459
    },
    {
      "epoch": 0.1846,
      "grad_norm": 1.279374257931241,
      "learning_rate": 0.003,
      "loss": 4.0274,
      "step": 18460
    },
    {
      "epoch": 0.18461,
      "grad_norm": 1.268182481391018,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 18461
    },
    {
      "epoch": 0.18462,
      "grad_norm": 1.340110662770171,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 18462
    },
    {
      "epoch": 0.18463,
      "grad_norm": 1.0870891437013017,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 18463
    },
    {
      "epoch": 0.18464,
      "grad_norm": 1.3560092280503788,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 18464
    },
    {
      "epoch": 0.18465,
      "grad_norm": 1.146359035106978,
      "learning_rate": 0.003,
      "loss": 4.0757,
      "step": 18465
    },
    {
      "epoch": 0.18466,
      "grad_norm": 1.3008419389754802,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 18466
    },
    {
      "epoch": 0.18467,
      "grad_norm": 1.2050250922697034,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 18467
    },
    {
      "epoch": 0.18468,
      "grad_norm": 1.3916879294092328,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 18468
    },
    {
      "epoch": 0.18469,
      "grad_norm": 1.6267142323330905,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 18469
    },
    {
      "epoch": 0.1847,
      "grad_norm": 1.1109695202765093,
      "learning_rate": 0.003,
      "loss": 4.0271,
      "step": 18470
    },
    {
      "epoch": 0.18471,
      "grad_norm": 1.2428610086781493,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 18471
    },
    {
      "epoch": 0.18472,
      "grad_norm": 1.4164641965220681,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 18472
    },
    {
      "epoch": 0.18473,
      "grad_norm": 1.2862536050892301,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 18473
    },
    {
      "epoch": 0.18474,
      "grad_norm": 1.372252339990451,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 18474
    },
    {
      "epoch": 0.18475,
      "grad_norm": 1.2771637418850568,
      "learning_rate": 0.003,
      "loss": 4.0547,
      "step": 18475
    },
    {
      "epoch": 0.18476,
      "grad_norm": 0.9798307090773521,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 18476
    },
    {
      "epoch": 0.18477,
      "grad_norm": 1.370160757472685,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 18477
    },
    {
      "epoch": 0.18478,
      "grad_norm": 1.0822830726748474,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 18478
    },
    {
      "epoch": 0.18479,
      "grad_norm": 1.3446721670686375,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 18479
    },
    {
      "epoch": 0.1848,
      "grad_norm": 1.2880062992123016,
      "learning_rate": 0.003,
      "loss": 4.0199,
      "step": 18480
    },
    {
      "epoch": 0.18481,
      "grad_norm": 1.1764977833596155,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 18481
    },
    {
      "epoch": 0.18482,
      "grad_norm": 1.2670672336263902,
      "learning_rate": 0.003,
      "loss": 4.0202,
      "step": 18482
    },
    {
      "epoch": 0.18483,
      "grad_norm": 1.3699901726556798,
      "learning_rate": 0.003,
      "loss": 4.0751,
      "step": 18483
    },
    {
      "epoch": 0.18484,
      "grad_norm": 1.1748915505926445,
      "learning_rate": 0.003,
      "loss": 4.0331,
      "step": 18484
    },
    {
      "epoch": 0.18485,
      "grad_norm": 1.162132788634817,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 18485
    },
    {
      "epoch": 0.18486,
      "grad_norm": 1.1761824497637563,
      "learning_rate": 0.003,
      "loss": 4.0121,
      "step": 18486
    },
    {
      "epoch": 0.18487,
      "grad_norm": 1.1980158306891817,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 18487
    },
    {
      "epoch": 0.18488,
      "grad_norm": 1.2258039379616448,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 18488
    },
    {
      "epoch": 0.18489,
      "grad_norm": 1.4208962807298569,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 18489
    },
    {
      "epoch": 0.1849,
      "grad_norm": 1.0200113458498734,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 18490
    },
    {
      "epoch": 0.18491,
      "grad_norm": 1.4595091737327908,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 18491
    },
    {
      "epoch": 0.18492,
      "grad_norm": 1.3168285517146034,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 18492
    },
    {
      "epoch": 0.18493,
      "grad_norm": 1.4532712875747906,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 18493
    },
    {
      "epoch": 0.18494,
      "grad_norm": 1.0459214766605567,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 18494
    },
    {
      "epoch": 0.18495,
      "grad_norm": 1.1381218722401705,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 18495
    },
    {
      "epoch": 0.18496,
      "grad_norm": 1.5386479772919055,
      "learning_rate": 0.003,
      "loss": 4.0165,
      "step": 18496
    },
    {
      "epoch": 0.18497,
      "grad_norm": 1.2299892544229238,
      "learning_rate": 0.003,
      "loss": 4.0332,
      "step": 18497
    },
    {
      "epoch": 0.18498,
      "grad_norm": 1.2335831638757717,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 18498
    },
    {
      "epoch": 0.18499,
      "grad_norm": 1.2669312066929301,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 18499
    },
    {
      "epoch": 0.185,
      "grad_norm": 1.087188823010408,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 18500
    },
    {
      "epoch": 0.18501,
      "grad_norm": 1.1783337651243124,
      "learning_rate": 0.003,
      "loss": 4.0173,
      "step": 18501
    },
    {
      "epoch": 0.18502,
      "grad_norm": 1.0736642518059751,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 18502
    },
    {
      "epoch": 0.18503,
      "grad_norm": 1.4393107138897194,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 18503
    },
    {
      "epoch": 0.18504,
      "grad_norm": 1.1090883746507503,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 18504
    },
    {
      "epoch": 0.18505,
      "grad_norm": 1.4161606762569607,
      "learning_rate": 0.003,
      "loss": 4.0343,
      "step": 18505
    },
    {
      "epoch": 0.18506,
      "grad_norm": 1.2049783538307672,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 18506
    },
    {
      "epoch": 0.18507,
      "grad_norm": 1.2081699483481163,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 18507
    },
    {
      "epoch": 0.18508,
      "grad_norm": 1.2776863645276184,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 18508
    },
    {
      "epoch": 0.18509,
      "grad_norm": 1.421387032589169,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 18509
    },
    {
      "epoch": 0.1851,
      "grad_norm": 1.1131341934615544,
      "learning_rate": 0.003,
      "loss": 4.0808,
      "step": 18510
    },
    {
      "epoch": 0.18511,
      "grad_norm": 1.38719870177101,
      "learning_rate": 0.003,
      "loss": 4.0964,
      "step": 18511
    },
    {
      "epoch": 0.18512,
      "grad_norm": 1.3032146929287052,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 18512
    },
    {
      "epoch": 0.18513,
      "grad_norm": 1.2852777986544128,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 18513
    },
    {
      "epoch": 0.18514,
      "grad_norm": 1.0331253160795906,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 18514
    },
    {
      "epoch": 0.18515,
      "grad_norm": 1.5607563072743642,
      "learning_rate": 0.003,
      "loss": 4.0252,
      "step": 18515
    },
    {
      "epoch": 0.18516,
      "grad_norm": 1.311212303194748,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 18516
    },
    {
      "epoch": 0.18517,
      "grad_norm": 1.1417369515028657,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 18517
    },
    {
      "epoch": 0.18518,
      "grad_norm": 1.4972229558162757,
      "learning_rate": 0.003,
      "loss": 4.034,
      "step": 18518
    },
    {
      "epoch": 0.18519,
      "grad_norm": 1.1670225381994737,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 18519
    },
    {
      "epoch": 0.1852,
      "grad_norm": 1.6450574769424848,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 18520
    },
    {
      "epoch": 0.18521,
      "grad_norm": 1.047966343768503,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 18521
    },
    {
      "epoch": 0.18522,
      "grad_norm": 1.26719944704278,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 18522
    },
    {
      "epoch": 0.18523,
      "grad_norm": 1.1959792480142655,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 18523
    },
    {
      "epoch": 0.18524,
      "grad_norm": 1.3138523037586216,
      "learning_rate": 0.003,
      "loss": 4.0739,
      "step": 18524
    },
    {
      "epoch": 0.18525,
      "grad_norm": 1.0237531776331572,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 18525
    },
    {
      "epoch": 0.18526,
      "grad_norm": 1.286349474483322,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 18526
    },
    {
      "epoch": 0.18527,
      "grad_norm": 1.2670213943868798,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 18527
    },
    {
      "epoch": 0.18528,
      "grad_norm": 1.1188335528948643,
      "learning_rate": 0.003,
      "loss": 4.075,
      "step": 18528
    },
    {
      "epoch": 0.18529,
      "grad_norm": 1.4593728460814455,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 18529
    },
    {
      "epoch": 0.1853,
      "grad_norm": 1.3874596643152133,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 18530
    },
    {
      "epoch": 0.18531,
      "grad_norm": 1.5584581857765323,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 18531
    },
    {
      "epoch": 0.18532,
      "grad_norm": 1.0277005199484917,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 18532
    },
    {
      "epoch": 0.18533,
      "grad_norm": 1.381896949372854,
      "learning_rate": 0.003,
      "loss": 4.0842,
      "step": 18533
    },
    {
      "epoch": 0.18534,
      "grad_norm": 1.2713297855170362,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 18534
    },
    {
      "epoch": 0.18535,
      "grad_norm": 1.1657663540513583,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 18535
    },
    {
      "epoch": 0.18536,
      "grad_norm": 1.3492976594869548,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 18536
    },
    {
      "epoch": 0.18537,
      "grad_norm": 1.0301590695489486,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 18537
    },
    {
      "epoch": 0.18538,
      "grad_norm": 1.2991531867485961,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 18538
    },
    {
      "epoch": 0.18539,
      "grad_norm": 1.2309409745880733,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 18539
    },
    {
      "epoch": 0.1854,
      "grad_norm": 1.6172072791048684,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 18540
    },
    {
      "epoch": 0.18541,
      "grad_norm": 1.038151461419373,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 18541
    },
    {
      "epoch": 0.18542,
      "grad_norm": 1.6681079800201724,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 18542
    },
    {
      "epoch": 0.18543,
      "grad_norm": 0.9142406340517287,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 18543
    },
    {
      "epoch": 0.18544,
      "grad_norm": 1.1290954549194858,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 18544
    },
    {
      "epoch": 0.18545,
      "grad_norm": 1.2390811169384381,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 18545
    },
    {
      "epoch": 0.18546,
      "grad_norm": 1.3684541946848185,
      "learning_rate": 0.003,
      "loss": 4.0418,
      "step": 18546
    },
    {
      "epoch": 0.18547,
      "grad_norm": 0.9376309640085142,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 18547
    },
    {
      "epoch": 0.18548,
      "grad_norm": 1.169731857420868,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 18548
    },
    {
      "epoch": 0.18549,
      "grad_norm": 1.2633582357108424,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 18549
    },
    {
      "epoch": 0.1855,
      "grad_norm": 1.2898919233025385,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 18550
    },
    {
      "epoch": 0.18551,
      "grad_norm": 1.2996495701078847,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 18551
    },
    {
      "epoch": 0.18552,
      "grad_norm": 1.645188039124316,
      "learning_rate": 0.003,
      "loss": 4.0945,
      "step": 18552
    },
    {
      "epoch": 0.18553,
      "grad_norm": 0.9700590043202127,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 18553
    },
    {
      "epoch": 0.18554,
      "grad_norm": 1.3383196501748178,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 18554
    },
    {
      "epoch": 0.18555,
      "grad_norm": 1.4447324218137332,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 18555
    },
    {
      "epoch": 0.18556,
      "grad_norm": 1.2238794234024961,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 18556
    },
    {
      "epoch": 0.18557,
      "grad_norm": 1.228237793895757,
      "learning_rate": 0.003,
      "loss": 4.0263,
      "step": 18557
    },
    {
      "epoch": 0.18558,
      "grad_norm": 1.2377248740570805,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 18558
    },
    {
      "epoch": 0.18559,
      "grad_norm": 1.1775863527255352,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 18559
    },
    {
      "epoch": 0.1856,
      "grad_norm": 1.2862630217843478,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 18560
    },
    {
      "epoch": 0.18561,
      "grad_norm": 1.2725546242503096,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 18561
    },
    {
      "epoch": 0.18562,
      "grad_norm": 1.0084923210327843,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 18562
    },
    {
      "epoch": 0.18563,
      "grad_norm": 1.4549473612904706,
      "learning_rate": 0.003,
      "loss": 4.023,
      "step": 18563
    },
    {
      "epoch": 0.18564,
      "grad_norm": 1.0898082300447982,
      "learning_rate": 0.003,
      "loss": 4.0291,
      "step": 18564
    },
    {
      "epoch": 0.18565,
      "grad_norm": 1.4204817236107417,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 18565
    },
    {
      "epoch": 0.18566,
      "grad_norm": 1.1306017397286994,
      "learning_rate": 0.003,
      "loss": 4.0326,
      "step": 18566
    },
    {
      "epoch": 0.18567,
      "grad_norm": 1.3545883527371785,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 18567
    },
    {
      "epoch": 0.18568,
      "grad_norm": 1.0700768308961939,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 18568
    },
    {
      "epoch": 0.18569,
      "grad_norm": 1.253490566049985,
      "learning_rate": 0.003,
      "loss": 4.0632,
      "step": 18569
    },
    {
      "epoch": 0.1857,
      "grad_norm": 1.1080382610625297,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 18570
    },
    {
      "epoch": 0.18571,
      "grad_norm": 1.4826227076272758,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 18571
    },
    {
      "epoch": 0.18572,
      "grad_norm": 1.0275343283728213,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 18572
    },
    {
      "epoch": 0.18573,
      "grad_norm": 1.5618169718041988,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 18573
    },
    {
      "epoch": 0.18574,
      "grad_norm": 1.2097067630440304,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 18574
    },
    {
      "epoch": 0.18575,
      "grad_norm": 1.2138181501319916,
      "learning_rate": 0.003,
      "loss": 4.0855,
      "step": 18575
    },
    {
      "epoch": 0.18576,
      "grad_norm": 1.2300603103536538,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 18576
    },
    {
      "epoch": 0.18577,
      "grad_norm": 1.1092966404884428,
      "learning_rate": 0.003,
      "loss": 4.0314,
      "step": 18577
    },
    {
      "epoch": 0.18578,
      "grad_norm": 1.4220184020364897,
      "learning_rate": 0.003,
      "loss": 4.0344,
      "step": 18578
    },
    {
      "epoch": 0.18579,
      "grad_norm": 1.508446898391007,
      "learning_rate": 0.003,
      "loss": 4.0866,
      "step": 18579
    },
    {
      "epoch": 0.1858,
      "grad_norm": 1.4423422577068636,
      "learning_rate": 0.003,
      "loss": 4.0288,
      "step": 18580
    },
    {
      "epoch": 0.18581,
      "grad_norm": 1.0395036848085926,
      "learning_rate": 0.003,
      "loss": 4.0252,
      "step": 18581
    },
    {
      "epoch": 0.18582,
      "grad_norm": 1.4666658372450543,
      "learning_rate": 0.003,
      "loss": 4.0619,
      "step": 18582
    },
    {
      "epoch": 0.18583,
      "grad_norm": 1.2325909275221758,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 18583
    },
    {
      "epoch": 0.18584,
      "grad_norm": 1.3082951147599138,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 18584
    },
    {
      "epoch": 0.18585,
      "grad_norm": 1.1768231850320106,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 18585
    },
    {
      "epoch": 0.18586,
      "grad_norm": 1.3390239461857918,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 18586
    },
    {
      "epoch": 0.18587,
      "grad_norm": 1.299623772556699,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 18587
    },
    {
      "epoch": 0.18588,
      "grad_norm": 1.255595666077245,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 18588
    },
    {
      "epoch": 0.18589,
      "grad_norm": 1.4111729802737794,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 18589
    },
    {
      "epoch": 0.1859,
      "grad_norm": 1.0180266304943313,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 18590
    },
    {
      "epoch": 0.18591,
      "grad_norm": 1.3972753800116477,
      "learning_rate": 0.003,
      "loss": 4.0291,
      "step": 18591
    },
    {
      "epoch": 0.18592,
      "grad_norm": 1.1621225220737084,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 18592
    },
    {
      "epoch": 0.18593,
      "grad_norm": 1.3732411657292347,
      "learning_rate": 0.003,
      "loss": 4.0797,
      "step": 18593
    },
    {
      "epoch": 0.18594,
      "grad_norm": 1.209977099499059,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 18594
    },
    {
      "epoch": 0.18595,
      "grad_norm": 1.0367443408157266,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 18595
    },
    {
      "epoch": 0.18596,
      "grad_norm": 1.180761823307239,
      "learning_rate": 0.003,
      "loss": 4.0892,
      "step": 18596
    },
    {
      "epoch": 0.18597,
      "grad_norm": 1.2569061231225465,
      "learning_rate": 0.003,
      "loss": 4.083,
      "step": 18597
    },
    {
      "epoch": 0.18598,
      "grad_norm": 1.2470900105281038,
      "learning_rate": 0.003,
      "loss": 4.0857,
      "step": 18598
    },
    {
      "epoch": 0.18599,
      "grad_norm": 1.1336668930254277,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 18599
    },
    {
      "epoch": 0.186,
      "grad_norm": 1.409078569317884,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 18600
    },
    {
      "epoch": 0.18601,
      "grad_norm": 0.997962942227669,
      "learning_rate": 0.003,
      "loss": 4.0265,
      "step": 18601
    },
    {
      "epoch": 0.18602,
      "grad_norm": 1.460422818822215,
      "learning_rate": 0.003,
      "loss": 4.0311,
      "step": 18602
    },
    {
      "epoch": 0.18603,
      "grad_norm": 1.0546011794234764,
      "learning_rate": 0.003,
      "loss": 4.021,
      "step": 18603
    },
    {
      "epoch": 0.18604,
      "grad_norm": 1.5160687672117177,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 18604
    },
    {
      "epoch": 0.18605,
      "grad_norm": 1.2771163674586319,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 18605
    },
    {
      "epoch": 0.18606,
      "grad_norm": 1.3208345170662767,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 18606
    },
    {
      "epoch": 0.18607,
      "grad_norm": 1.1516522908857025,
      "learning_rate": 0.003,
      "loss": 4.0784,
      "step": 18607
    },
    {
      "epoch": 0.18608,
      "grad_norm": 1.2046464737978206,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 18608
    },
    {
      "epoch": 0.18609,
      "grad_norm": 1.5262495264400762,
      "learning_rate": 0.003,
      "loss": 4.0718,
      "step": 18609
    },
    {
      "epoch": 0.1861,
      "grad_norm": 1.0641298642658337,
      "learning_rate": 0.003,
      "loss": 4.0796,
      "step": 18610
    },
    {
      "epoch": 0.18611,
      "grad_norm": 1.5932848573565546,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 18611
    },
    {
      "epoch": 0.18612,
      "grad_norm": 1.002904295640288,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 18612
    },
    {
      "epoch": 0.18613,
      "grad_norm": 1.4969747288804227,
      "learning_rate": 0.003,
      "loss": 4.0971,
      "step": 18613
    },
    {
      "epoch": 0.18614,
      "grad_norm": 1.1470765262778144,
      "learning_rate": 0.003,
      "loss": 4.0178,
      "step": 18614
    },
    {
      "epoch": 0.18615,
      "grad_norm": 1.1680529552859376,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 18615
    },
    {
      "epoch": 0.18616,
      "grad_norm": 1.1606542101751762,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 18616
    },
    {
      "epoch": 0.18617,
      "grad_norm": 1.1986020675670659,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 18617
    },
    {
      "epoch": 0.18618,
      "grad_norm": 1.4659025348896637,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 18618
    },
    {
      "epoch": 0.18619,
      "grad_norm": 1.1797749440013026,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 18619
    },
    {
      "epoch": 0.1862,
      "grad_norm": 1.3628153636650342,
      "learning_rate": 0.003,
      "loss": 4.0783,
      "step": 18620
    },
    {
      "epoch": 0.18621,
      "grad_norm": 1.2827901915534006,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 18621
    },
    {
      "epoch": 0.18622,
      "grad_norm": 1.388778940140245,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 18622
    },
    {
      "epoch": 0.18623,
      "grad_norm": 1.1919220173396643,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 18623
    },
    {
      "epoch": 0.18624,
      "grad_norm": 1.336295009143112,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 18624
    },
    {
      "epoch": 0.18625,
      "grad_norm": 1.2187632702612323,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 18625
    },
    {
      "epoch": 0.18626,
      "grad_norm": 1.3712027530620414,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 18626
    },
    {
      "epoch": 0.18627,
      "grad_norm": 1.2382458749170933,
      "learning_rate": 0.003,
      "loss": 4.0743,
      "step": 18627
    },
    {
      "epoch": 0.18628,
      "grad_norm": 1.1962023404453752,
      "learning_rate": 0.003,
      "loss": 4.0394,
      "step": 18628
    },
    {
      "epoch": 0.18629,
      "grad_norm": 1.283440987751412,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 18629
    },
    {
      "epoch": 0.1863,
      "grad_norm": 1.0614466943285759,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 18630
    },
    {
      "epoch": 0.18631,
      "grad_norm": 1.3246924380965295,
      "learning_rate": 0.003,
      "loss": 4.0879,
      "step": 18631
    },
    {
      "epoch": 0.18632,
      "grad_norm": 1.295372257801994,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 18632
    },
    {
      "epoch": 0.18633,
      "grad_norm": 1.2585393271056065,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 18633
    },
    {
      "epoch": 0.18634,
      "grad_norm": 1.4536916864523137,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 18634
    },
    {
      "epoch": 0.18635,
      "grad_norm": 1.322218953388777,
      "learning_rate": 0.003,
      "loss": 4.0834,
      "step": 18635
    },
    {
      "epoch": 0.18636,
      "grad_norm": 1.3423310818076708,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 18636
    },
    {
      "epoch": 0.18637,
      "grad_norm": 1.1432741798710784,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 18637
    },
    {
      "epoch": 0.18638,
      "grad_norm": 1.4349547761611217,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 18638
    },
    {
      "epoch": 0.18639,
      "grad_norm": 1.0468928856209745,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 18639
    },
    {
      "epoch": 0.1864,
      "grad_norm": 1.2412971534074615,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 18640
    },
    {
      "epoch": 0.18641,
      "grad_norm": 1.2683840160903244,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 18641
    },
    {
      "epoch": 0.18642,
      "grad_norm": 1.2784065112434306,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 18642
    },
    {
      "epoch": 0.18643,
      "grad_norm": 1.0127344608529372,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 18643
    },
    {
      "epoch": 0.18644,
      "grad_norm": 1.3747573847249897,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 18644
    },
    {
      "epoch": 0.18645,
      "grad_norm": 1.0808831372998855,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 18645
    },
    {
      "epoch": 0.18646,
      "grad_norm": 1.420689805250137,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 18646
    },
    {
      "epoch": 0.18647,
      "grad_norm": 1.345166995863017,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 18647
    },
    {
      "epoch": 0.18648,
      "grad_norm": 1.2971319501526732,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 18648
    },
    {
      "epoch": 0.18649,
      "grad_norm": 1.2443932292658246,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 18649
    },
    {
      "epoch": 0.1865,
      "grad_norm": 1.2414565982922923,
      "learning_rate": 0.003,
      "loss": 4.0727,
      "step": 18650
    },
    {
      "epoch": 0.18651,
      "grad_norm": 1.34014236981556,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 18651
    },
    {
      "epoch": 0.18652,
      "grad_norm": 1.0791936562175262,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 18652
    },
    {
      "epoch": 0.18653,
      "grad_norm": 1.2294366527608758,
      "learning_rate": 0.003,
      "loss": 4.0324,
      "step": 18653
    },
    {
      "epoch": 0.18654,
      "grad_norm": 1.2654637882095023,
      "learning_rate": 0.003,
      "loss": 4.0393,
      "step": 18654
    },
    {
      "epoch": 0.18655,
      "grad_norm": 1.3101259069674958,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 18655
    },
    {
      "epoch": 0.18656,
      "grad_norm": 1.2869356795084852,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 18656
    },
    {
      "epoch": 0.18657,
      "grad_norm": 1.2162196991906062,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 18657
    },
    {
      "epoch": 0.18658,
      "grad_norm": 1.2460610463275514,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 18658
    },
    {
      "epoch": 0.18659,
      "grad_norm": 1.1774219485727546,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 18659
    },
    {
      "epoch": 0.1866,
      "grad_norm": 1.4596058612447504,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 18660
    },
    {
      "epoch": 0.18661,
      "grad_norm": 0.9977181994708096,
      "learning_rate": 0.003,
      "loss": 4.0196,
      "step": 18661
    },
    {
      "epoch": 0.18662,
      "grad_norm": 1.5588672641369112,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 18662
    },
    {
      "epoch": 0.18663,
      "grad_norm": 1.0160923890565297,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 18663
    },
    {
      "epoch": 0.18664,
      "grad_norm": 1.3804578276728894,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 18664
    },
    {
      "epoch": 0.18665,
      "grad_norm": 1.5290662221388045,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 18665
    },
    {
      "epoch": 0.18666,
      "grad_norm": 1.1849130713284057,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 18666
    },
    {
      "epoch": 0.18667,
      "grad_norm": 1.32853994281615,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 18667
    },
    {
      "epoch": 0.18668,
      "grad_norm": 1.1182458060812699,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 18668
    },
    {
      "epoch": 0.18669,
      "grad_norm": 1.5520326454039035,
      "learning_rate": 0.003,
      "loss": 4.0416,
      "step": 18669
    },
    {
      "epoch": 0.1867,
      "grad_norm": 1.019058986085553,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 18670
    },
    {
      "epoch": 0.18671,
      "grad_norm": 1.4160518584743231,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 18671
    },
    {
      "epoch": 0.18672,
      "grad_norm": 1.0368478405689707,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 18672
    },
    {
      "epoch": 0.18673,
      "grad_norm": 1.3670502550654924,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 18673
    },
    {
      "epoch": 0.18674,
      "grad_norm": 1.126542538632425,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 18674
    },
    {
      "epoch": 0.18675,
      "grad_norm": 1.2764599594968868,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 18675
    },
    {
      "epoch": 0.18676,
      "grad_norm": 1.3132500344481821,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 18676
    },
    {
      "epoch": 0.18677,
      "grad_norm": 1.201393565565788,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 18677
    },
    {
      "epoch": 0.18678,
      "grad_norm": 1.1121205736381374,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 18678
    },
    {
      "epoch": 0.18679,
      "grad_norm": 1.292409613879842,
      "learning_rate": 0.003,
      "loss": 4.0398,
      "step": 18679
    },
    {
      "epoch": 0.1868,
      "grad_norm": 1.1330555155779942,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 18680
    },
    {
      "epoch": 0.18681,
      "grad_norm": 1.5906447922596547,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 18681
    },
    {
      "epoch": 0.18682,
      "grad_norm": 1.1764539128829046,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 18682
    },
    {
      "epoch": 0.18683,
      "grad_norm": 1.474986996228695,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 18683
    },
    {
      "epoch": 0.18684,
      "grad_norm": 1.2686172753821547,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 18684
    },
    {
      "epoch": 0.18685,
      "grad_norm": 1.0932632235147117,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 18685
    },
    {
      "epoch": 0.18686,
      "grad_norm": 1.3377980575479242,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 18686
    },
    {
      "epoch": 0.18687,
      "grad_norm": 1.1281891101466395,
      "learning_rate": 0.003,
      "loss": 4.0819,
      "step": 18687
    },
    {
      "epoch": 0.18688,
      "grad_norm": 1.2612420416188836,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 18688
    },
    {
      "epoch": 0.18689,
      "grad_norm": 1.3658504975467765,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 18689
    },
    {
      "epoch": 0.1869,
      "grad_norm": 1.0824198490941663,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 18690
    },
    {
      "epoch": 0.18691,
      "grad_norm": 1.5137347285081706,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 18691
    },
    {
      "epoch": 0.18692,
      "grad_norm": 1.199086493296236,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 18692
    },
    {
      "epoch": 0.18693,
      "grad_norm": 1.221970076674539,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 18693
    },
    {
      "epoch": 0.18694,
      "grad_norm": 1.4117607798223235,
      "learning_rate": 0.003,
      "loss": 4.0841,
      "step": 18694
    },
    {
      "epoch": 0.18695,
      "grad_norm": 1.3890068014115582,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 18695
    },
    {
      "epoch": 0.18696,
      "grad_norm": 1.3429995283722804,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 18696
    },
    {
      "epoch": 0.18697,
      "grad_norm": 1.1531051577775693,
      "learning_rate": 0.003,
      "loss": 4.0332,
      "step": 18697
    },
    {
      "epoch": 0.18698,
      "grad_norm": 1.3306981034164977,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 18698
    },
    {
      "epoch": 0.18699,
      "grad_norm": 1.343078886524723,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 18699
    },
    {
      "epoch": 0.187,
      "grad_norm": 1.0108194495872993,
      "learning_rate": 0.003,
      "loss": 4.0373,
      "step": 18700
    },
    {
      "epoch": 0.18701,
      "grad_norm": 1.3489124319503016,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 18701
    },
    {
      "epoch": 0.18702,
      "grad_norm": 1.0487537741166186,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 18702
    },
    {
      "epoch": 0.18703,
      "grad_norm": 1.1623226772074409,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 18703
    },
    {
      "epoch": 0.18704,
      "grad_norm": 1.3563915104551696,
      "learning_rate": 0.003,
      "loss": 4.042,
      "step": 18704
    },
    {
      "epoch": 0.18705,
      "grad_norm": 1.4090253126103949,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 18705
    },
    {
      "epoch": 0.18706,
      "grad_norm": 1.0781544362157394,
      "learning_rate": 0.003,
      "loss": 4.0305,
      "step": 18706
    },
    {
      "epoch": 0.18707,
      "grad_norm": 1.4090107424738676,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 18707
    },
    {
      "epoch": 0.18708,
      "grad_norm": 1.1798980777848695,
      "learning_rate": 0.003,
      "loss": 4.0192,
      "step": 18708
    },
    {
      "epoch": 0.18709,
      "grad_norm": 1.2834313985151309,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 18709
    },
    {
      "epoch": 0.1871,
      "grad_norm": 1.2516894700577164,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 18710
    },
    {
      "epoch": 0.18711,
      "grad_norm": 1.265294176703849,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 18711
    },
    {
      "epoch": 0.18712,
      "grad_norm": 1.183201878916828,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 18712
    },
    {
      "epoch": 0.18713,
      "grad_norm": 1.4076155148906417,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 18713
    },
    {
      "epoch": 0.18714,
      "grad_norm": 1.1285627861130147,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 18714
    },
    {
      "epoch": 0.18715,
      "grad_norm": 1.1434289243119355,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 18715
    },
    {
      "epoch": 0.18716,
      "grad_norm": 1.1301370706376264,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 18716
    },
    {
      "epoch": 0.18717,
      "grad_norm": 1.379301811151608,
      "learning_rate": 0.003,
      "loss": 4.0215,
      "step": 18717
    },
    {
      "epoch": 0.18718,
      "grad_norm": 1.111389298349418,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 18718
    },
    {
      "epoch": 0.18719,
      "grad_norm": 1.4741722058225644,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 18719
    },
    {
      "epoch": 0.1872,
      "grad_norm": 1.1450588816578233,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 18720
    },
    {
      "epoch": 0.18721,
      "grad_norm": 1.418618013788135,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 18721
    },
    {
      "epoch": 0.18722,
      "grad_norm": 1.097306672519934,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 18722
    },
    {
      "epoch": 0.18723,
      "grad_norm": 1.1003636866584108,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 18723
    },
    {
      "epoch": 0.18724,
      "grad_norm": 1.328837159162091,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 18724
    },
    {
      "epoch": 0.18725,
      "grad_norm": 1.0331274012525158,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 18725
    },
    {
      "epoch": 0.18726,
      "grad_norm": 1.2773483347630517,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 18726
    },
    {
      "epoch": 0.18727,
      "grad_norm": 1.14599339799992,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 18727
    },
    {
      "epoch": 0.18728,
      "grad_norm": 1.4346416890229334,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 18728
    },
    {
      "epoch": 0.18729,
      "grad_norm": 1.407287027352288,
      "learning_rate": 0.003,
      "loss": 4.0964,
      "step": 18729
    },
    {
      "epoch": 0.1873,
      "grad_norm": 1.2160554604571705,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 18730
    },
    {
      "epoch": 0.18731,
      "grad_norm": 1.7322312946905274,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 18731
    },
    {
      "epoch": 0.18732,
      "grad_norm": 0.956403957669077,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 18732
    },
    {
      "epoch": 0.18733,
      "grad_norm": 1.341215213111897,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 18733
    },
    {
      "epoch": 0.18734,
      "grad_norm": 1.3155922164273968,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 18734
    },
    {
      "epoch": 0.18735,
      "grad_norm": 1.0627323976815257,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 18735
    },
    {
      "epoch": 0.18736,
      "grad_norm": 1.5730089758668246,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 18736
    },
    {
      "epoch": 0.18737,
      "grad_norm": 0.9824040045509904,
      "learning_rate": 0.003,
      "loss": 4.0764,
      "step": 18737
    },
    {
      "epoch": 0.18738,
      "grad_norm": 1.6083545327331183,
      "learning_rate": 0.003,
      "loss": 4.0438,
      "step": 18738
    },
    {
      "epoch": 0.18739,
      "grad_norm": 1.1029472411719976,
      "learning_rate": 0.003,
      "loss": 4.0784,
      "step": 18739
    },
    {
      "epoch": 0.1874,
      "grad_norm": 1.3746299168459029,
      "learning_rate": 0.003,
      "loss": 4.0243,
      "step": 18740
    },
    {
      "epoch": 0.18741,
      "grad_norm": 1.195778597348714,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 18741
    },
    {
      "epoch": 0.18742,
      "grad_norm": 1.2358515578569471,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 18742
    },
    {
      "epoch": 0.18743,
      "grad_norm": 1.3240559875389892,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 18743
    },
    {
      "epoch": 0.18744,
      "grad_norm": 1.1268808956342007,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 18744
    },
    {
      "epoch": 0.18745,
      "grad_norm": 1.4062233643366688,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 18745
    },
    {
      "epoch": 0.18746,
      "grad_norm": 1.021994536965376,
      "learning_rate": 0.003,
      "loss": 4.0306,
      "step": 18746
    },
    {
      "epoch": 0.18747,
      "grad_norm": 1.281103835925785,
      "learning_rate": 0.003,
      "loss": 4.0811,
      "step": 18747
    },
    {
      "epoch": 0.18748,
      "grad_norm": 1.1180020105028619,
      "learning_rate": 0.003,
      "loss": 4.0767,
      "step": 18748
    },
    {
      "epoch": 0.18749,
      "grad_norm": 1.4942830191803198,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 18749
    },
    {
      "epoch": 0.1875,
      "grad_norm": 1.1061109217475096,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 18750
    },
    {
      "epoch": 0.18751,
      "grad_norm": 1.641043630101888,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 18751
    },
    {
      "epoch": 0.18752,
      "grad_norm": 1.1827677693278353,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 18752
    },
    {
      "epoch": 0.18753,
      "grad_norm": 1.6576891155457214,
      "learning_rate": 0.003,
      "loss": 4.0882,
      "step": 18753
    },
    {
      "epoch": 0.18754,
      "grad_norm": 0.9166261845829198,
      "learning_rate": 0.003,
      "loss": 4.0777,
      "step": 18754
    },
    {
      "epoch": 0.18755,
      "grad_norm": 1.1448762486717943,
      "learning_rate": 0.003,
      "loss": 4.0359,
      "step": 18755
    },
    {
      "epoch": 0.18756,
      "grad_norm": 1.4223527798625488,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 18756
    },
    {
      "epoch": 0.18757,
      "grad_norm": 1.392805424459787,
      "learning_rate": 0.003,
      "loss": 4.0619,
      "step": 18757
    },
    {
      "epoch": 0.18758,
      "grad_norm": 1.0679035726418673,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 18758
    },
    {
      "epoch": 0.18759,
      "grad_norm": 1.3142145374211716,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 18759
    },
    {
      "epoch": 0.1876,
      "grad_norm": 1.1928279399139405,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 18760
    },
    {
      "epoch": 0.18761,
      "grad_norm": 1.1299659658986525,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 18761
    },
    {
      "epoch": 0.18762,
      "grad_norm": 1.1132795872327033,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 18762
    },
    {
      "epoch": 0.18763,
      "grad_norm": 1.5519728527597751,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 18763
    },
    {
      "epoch": 0.18764,
      "grad_norm": 1.068029752545123,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 18764
    },
    {
      "epoch": 0.18765,
      "grad_norm": 1.4216790590547768,
      "learning_rate": 0.003,
      "loss": 4.04,
      "step": 18765
    },
    {
      "epoch": 0.18766,
      "grad_norm": 1.2092731561974441,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 18766
    },
    {
      "epoch": 0.18767,
      "grad_norm": 1.1278818613591106,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 18767
    },
    {
      "epoch": 0.18768,
      "grad_norm": 1.2642280565529451,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 18768
    },
    {
      "epoch": 0.18769,
      "grad_norm": 1.0210999890823318,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 18769
    },
    {
      "epoch": 0.1877,
      "grad_norm": 1.559365397727483,
      "learning_rate": 0.003,
      "loss": 4.0893,
      "step": 18770
    },
    {
      "epoch": 0.18771,
      "grad_norm": 0.9757599576723669,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 18771
    },
    {
      "epoch": 0.18772,
      "grad_norm": 1.57422857102648,
      "learning_rate": 0.003,
      "loss": 4.0877,
      "step": 18772
    },
    {
      "epoch": 0.18773,
      "grad_norm": 1.004663288361435,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 18773
    },
    {
      "epoch": 0.18774,
      "grad_norm": 1.6048384508059539,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 18774
    },
    {
      "epoch": 0.18775,
      "grad_norm": 1.4036700003248082,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 18775
    },
    {
      "epoch": 0.18776,
      "grad_norm": 1.1564717371974478,
      "learning_rate": 0.003,
      "loss": 4.0743,
      "step": 18776
    },
    {
      "epoch": 0.18777,
      "grad_norm": 1.423201262806476,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 18777
    },
    {
      "epoch": 0.18778,
      "grad_norm": 1.2151965986827458,
      "learning_rate": 0.003,
      "loss": 4.0831,
      "step": 18778
    },
    {
      "epoch": 0.18779,
      "grad_norm": 1.2812432269648473,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 18779
    },
    {
      "epoch": 0.1878,
      "grad_norm": 1.247415236644388,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 18780
    },
    {
      "epoch": 0.18781,
      "grad_norm": 1.3298421943655812,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 18781
    },
    {
      "epoch": 0.18782,
      "grad_norm": 1.291192897252953,
      "learning_rate": 0.003,
      "loss": 4.018,
      "step": 18782
    },
    {
      "epoch": 0.18783,
      "grad_norm": 1.1717124515165933,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 18783
    },
    {
      "epoch": 0.18784,
      "grad_norm": 1.169639893608424,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 18784
    },
    {
      "epoch": 0.18785,
      "grad_norm": 1.0843001053025956,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 18785
    },
    {
      "epoch": 0.18786,
      "grad_norm": 1.2616116562972313,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 18786
    },
    {
      "epoch": 0.18787,
      "grad_norm": 1.1914570782081966,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 18787
    },
    {
      "epoch": 0.18788,
      "grad_norm": 1.457602004564518,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 18788
    },
    {
      "epoch": 0.18789,
      "grad_norm": 1.0264531082510175,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 18789
    },
    {
      "epoch": 0.1879,
      "grad_norm": 1.3495833279426392,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 18790
    },
    {
      "epoch": 0.18791,
      "grad_norm": 1.22976012385877,
      "learning_rate": 0.003,
      "loss": 4.0308,
      "step": 18791
    },
    {
      "epoch": 0.18792,
      "grad_norm": 1.1837343742085296,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 18792
    },
    {
      "epoch": 0.18793,
      "grad_norm": 1.2220676815952656,
      "learning_rate": 0.003,
      "loss": 4.0768,
      "step": 18793
    },
    {
      "epoch": 0.18794,
      "grad_norm": 1.3643574760850015,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 18794
    },
    {
      "epoch": 0.18795,
      "grad_norm": 1.1508421010536982,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 18795
    },
    {
      "epoch": 0.18796,
      "grad_norm": 1.297945274980459,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 18796
    },
    {
      "epoch": 0.18797,
      "grad_norm": 1.134148435951273,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 18797
    },
    {
      "epoch": 0.18798,
      "grad_norm": 1.2610109456665957,
      "learning_rate": 0.003,
      "loss": 4.0564,
      "step": 18798
    },
    {
      "epoch": 0.18799,
      "grad_norm": 1.2548997534372939,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 18799
    },
    {
      "epoch": 0.188,
      "grad_norm": 1.384054339922946,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 18800
    },
    {
      "epoch": 0.18801,
      "grad_norm": 1.0618489518064131,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 18801
    },
    {
      "epoch": 0.18802,
      "grad_norm": 1.2730200819602449,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 18802
    },
    {
      "epoch": 0.18803,
      "grad_norm": 1.2770460140006974,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 18803
    },
    {
      "epoch": 0.18804,
      "grad_norm": 0.9867148284171002,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 18804
    },
    {
      "epoch": 0.18805,
      "grad_norm": 1.4204970430346897,
      "learning_rate": 0.003,
      "loss": 4.0865,
      "step": 18805
    },
    {
      "epoch": 0.18806,
      "grad_norm": 1.0189924063256883,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 18806
    },
    {
      "epoch": 0.18807,
      "grad_norm": 1.2809889790216966,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 18807
    },
    {
      "epoch": 0.18808,
      "grad_norm": 1.0096811490728688,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 18808
    },
    {
      "epoch": 0.18809,
      "grad_norm": 1.39817487077181,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 18809
    },
    {
      "epoch": 0.1881,
      "grad_norm": 1.414326729617747,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 18810
    },
    {
      "epoch": 0.18811,
      "grad_norm": 1.5941626255541983,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 18811
    },
    {
      "epoch": 0.18812,
      "grad_norm": 1.0536188164043787,
      "learning_rate": 0.003,
      "loss": 4.0265,
      "step": 18812
    },
    {
      "epoch": 0.18813,
      "grad_norm": 1.3324731310272937,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 18813
    },
    {
      "epoch": 0.18814,
      "grad_norm": 1.1925920227492695,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 18814
    },
    {
      "epoch": 0.18815,
      "grad_norm": 1.3120883229036413,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 18815
    },
    {
      "epoch": 0.18816,
      "grad_norm": 1.0930340123629445,
      "learning_rate": 0.003,
      "loss": 4.0281,
      "step": 18816
    },
    {
      "epoch": 0.18817,
      "grad_norm": 1.219252902262019,
      "learning_rate": 0.003,
      "loss": 4.0955,
      "step": 18817
    },
    {
      "epoch": 0.18818,
      "grad_norm": 1.5524946483590683,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 18818
    },
    {
      "epoch": 0.18819,
      "grad_norm": 0.9897498018958503,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 18819
    },
    {
      "epoch": 0.1882,
      "grad_norm": 1.5411202902175514,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 18820
    },
    {
      "epoch": 0.18821,
      "grad_norm": 0.9875784929877088,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 18821
    },
    {
      "epoch": 0.18822,
      "grad_norm": 1.201853807644429,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 18822
    },
    {
      "epoch": 0.18823,
      "grad_norm": 1.419840414175821,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 18823
    },
    {
      "epoch": 0.18824,
      "grad_norm": 1.4980577234133672,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 18824
    },
    {
      "epoch": 0.18825,
      "grad_norm": 1.3236118047628036,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 18825
    },
    {
      "epoch": 0.18826,
      "grad_norm": 1.2847401073788443,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 18826
    },
    {
      "epoch": 0.18827,
      "grad_norm": 1.3538219367720525,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 18827
    },
    {
      "epoch": 0.18828,
      "grad_norm": 1.1769730216139096,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 18828
    },
    {
      "epoch": 0.18829,
      "grad_norm": 0.9967557172414647,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 18829
    },
    {
      "epoch": 0.1883,
      "grad_norm": 1.5240229295468803,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 18830
    },
    {
      "epoch": 0.18831,
      "grad_norm": 0.9906777777489462,
      "learning_rate": 0.003,
      "loss": 4.0318,
      "step": 18831
    },
    {
      "epoch": 0.18832,
      "grad_norm": 1.5082207878120748,
      "learning_rate": 0.003,
      "loss": 4.0823,
      "step": 18832
    },
    {
      "epoch": 0.18833,
      "grad_norm": 0.9477446461010499,
      "learning_rate": 0.003,
      "loss": 4.0329,
      "step": 18833
    },
    {
      "epoch": 0.18834,
      "grad_norm": 1.3717536216498554,
      "learning_rate": 0.003,
      "loss": 4.0827,
      "step": 18834
    },
    {
      "epoch": 0.18835,
      "grad_norm": 1.0891391557208812,
      "learning_rate": 0.003,
      "loss": 4.0478,
      "step": 18835
    },
    {
      "epoch": 0.18836,
      "grad_norm": 1.5045117096900225,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 18836
    },
    {
      "epoch": 0.18837,
      "grad_norm": 1.4637726562877373,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 18837
    },
    {
      "epoch": 0.18838,
      "grad_norm": 1.2250820711558688,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 18838
    },
    {
      "epoch": 0.18839,
      "grad_norm": 1.1610335799901077,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 18839
    },
    {
      "epoch": 0.1884,
      "grad_norm": 1.3300297533103165,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 18840
    },
    {
      "epoch": 0.18841,
      "grad_norm": 1.3617342778490928,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 18841
    },
    {
      "epoch": 0.18842,
      "grad_norm": 1.3820649819828323,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 18842
    },
    {
      "epoch": 0.18843,
      "grad_norm": 1.16220397310607,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 18843
    },
    {
      "epoch": 0.18844,
      "grad_norm": 1.1410995537572468,
      "learning_rate": 0.003,
      "loss": 4.079,
      "step": 18844
    },
    {
      "epoch": 0.18845,
      "grad_norm": 1.363632859651913,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 18845
    },
    {
      "epoch": 0.18846,
      "grad_norm": 1.4013719562590952,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 18846
    },
    {
      "epoch": 0.18847,
      "grad_norm": 1.0040823809056576,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 18847
    },
    {
      "epoch": 0.18848,
      "grad_norm": 1.7138847323654889,
      "learning_rate": 0.003,
      "loss": 4.0438,
      "step": 18848
    },
    {
      "epoch": 0.18849,
      "grad_norm": 1.1974881816633904,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 18849
    },
    {
      "epoch": 0.1885,
      "grad_norm": 1.4071990004696164,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 18850
    },
    {
      "epoch": 0.18851,
      "grad_norm": 1.2945320517992853,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 18851
    },
    {
      "epoch": 0.18852,
      "grad_norm": 1.0493911494113357,
      "learning_rate": 0.003,
      "loss": 4.082,
      "step": 18852
    },
    {
      "epoch": 0.18853,
      "grad_norm": 1.3522301267229937,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 18853
    },
    {
      "epoch": 0.18854,
      "grad_norm": 0.9479585527222019,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 18854
    },
    {
      "epoch": 0.18855,
      "grad_norm": 1.3052992350117785,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 18855
    },
    {
      "epoch": 0.18856,
      "grad_norm": 1.1088881925585756,
      "learning_rate": 0.003,
      "loss": 4.0267,
      "step": 18856
    },
    {
      "epoch": 0.18857,
      "grad_norm": 1.1951391458949168,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 18857
    },
    {
      "epoch": 0.18858,
      "grad_norm": 1.2872035984060555,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 18858
    },
    {
      "epoch": 0.18859,
      "grad_norm": 1.0738444028740626,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 18859
    },
    {
      "epoch": 0.1886,
      "grad_norm": 1.423952097069054,
      "learning_rate": 0.003,
      "loss": 4.082,
      "step": 18860
    },
    {
      "epoch": 0.18861,
      "grad_norm": 1.1861884938319123,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 18861
    },
    {
      "epoch": 0.18862,
      "grad_norm": 1.268656998919176,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 18862
    },
    {
      "epoch": 0.18863,
      "grad_norm": 1.5208838043266313,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 18863
    },
    {
      "epoch": 0.18864,
      "grad_norm": 1.0769050208064912,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 18864
    },
    {
      "epoch": 0.18865,
      "grad_norm": 1.4268680552190511,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 18865
    },
    {
      "epoch": 0.18866,
      "grad_norm": 1.0360871095610265,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 18866
    },
    {
      "epoch": 0.18867,
      "grad_norm": 1.4852075792511028,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 18867
    },
    {
      "epoch": 0.18868,
      "grad_norm": 1.0618590015090037,
      "learning_rate": 0.003,
      "loss": 4.0797,
      "step": 18868
    },
    {
      "epoch": 0.18869,
      "grad_norm": 1.5082378931522273,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 18869
    },
    {
      "epoch": 0.1887,
      "grad_norm": 1.1699590064692302,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 18870
    },
    {
      "epoch": 0.18871,
      "grad_norm": 1.5945378827543812,
      "learning_rate": 0.003,
      "loss": 4.0827,
      "step": 18871
    },
    {
      "epoch": 0.18872,
      "grad_norm": 1.1131528037116456,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 18872
    },
    {
      "epoch": 0.18873,
      "grad_norm": 1.1786422363921647,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 18873
    },
    {
      "epoch": 0.18874,
      "grad_norm": 1.2345214530184712,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 18874
    },
    {
      "epoch": 0.18875,
      "grad_norm": 1.290648570863387,
      "learning_rate": 0.003,
      "loss": 4.0934,
      "step": 18875
    },
    {
      "epoch": 0.18876,
      "grad_norm": 1.2236955589023566,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 18876
    },
    {
      "epoch": 0.18877,
      "grad_norm": 1.1482019620159905,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 18877
    },
    {
      "epoch": 0.18878,
      "grad_norm": 1.312168673740298,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 18878
    },
    {
      "epoch": 0.18879,
      "grad_norm": 1.3563338453714813,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 18879
    },
    {
      "epoch": 0.1888,
      "grad_norm": 1.0704749748380464,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 18880
    },
    {
      "epoch": 0.18881,
      "grad_norm": 1.2655347387144644,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 18881
    },
    {
      "epoch": 0.18882,
      "grad_norm": 1.2141701531674876,
      "learning_rate": 0.003,
      "loss": 4.081,
      "step": 18882
    },
    {
      "epoch": 0.18883,
      "grad_norm": 1.2624455567851907,
      "learning_rate": 0.003,
      "loss": 4.0398,
      "step": 18883
    },
    {
      "epoch": 0.18884,
      "grad_norm": 1.406371652440986,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 18884
    },
    {
      "epoch": 0.18885,
      "grad_norm": 0.9983916971461474,
      "learning_rate": 0.003,
      "loss": 4.0247,
      "step": 18885
    },
    {
      "epoch": 0.18886,
      "grad_norm": 1.4333871584870326,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 18886
    },
    {
      "epoch": 0.18887,
      "grad_norm": 1.1255837722806947,
      "learning_rate": 0.003,
      "loss": 4.0617,
      "step": 18887
    },
    {
      "epoch": 0.18888,
      "grad_norm": 1.409572666244752,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 18888
    },
    {
      "epoch": 0.18889,
      "grad_norm": 1.1808092271215411,
      "learning_rate": 0.003,
      "loss": 4.0343,
      "step": 18889
    },
    {
      "epoch": 0.1889,
      "grad_norm": 1.294565705597222,
      "learning_rate": 0.003,
      "loss": 4.0665,
      "step": 18890
    },
    {
      "epoch": 0.18891,
      "grad_norm": 1.2245215397964557,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 18891
    },
    {
      "epoch": 0.18892,
      "grad_norm": 1.057239401631099,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 18892
    },
    {
      "epoch": 0.18893,
      "grad_norm": 1.479448868735589,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 18893
    },
    {
      "epoch": 0.18894,
      "grad_norm": 1.0000969539239213,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 18894
    },
    {
      "epoch": 0.18895,
      "grad_norm": 1.5510575764076522,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 18895
    },
    {
      "epoch": 0.18896,
      "grad_norm": 1.013820609190286,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 18896
    },
    {
      "epoch": 0.18897,
      "grad_norm": 1.339914850079941,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 18897
    },
    {
      "epoch": 0.18898,
      "grad_norm": 1.0905513146887738,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 18898
    },
    {
      "epoch": 0.18899,
      "grad_norm": 1.4980920304789715,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 18899
    },
    {
      "epoch": 0.189,
      "grad_norm": 1.220822162538095,
      "learning_rate": 0.003,
      "loss": 4.0329,
      "step": 18900
    },
    {
      "epoch": 0.18901,
      "grad_norm": 1.2984675363326095,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 18901
    },
    {
      "epoch": 0.18902,
      "grad_norm": 1.1393971083261454,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 18902
    },
    {
      "epoch": 0.18903,
      "grad_norm": 1.2798411582421874,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 18903
    },
    {
      "epoch": 0.18904,
      "grad_norm": 1.29715988715658,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 18904
    },
    {
      "epoch": 0.18905,
      "grad_norm": 1.1510916069001649,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 18905
    },
    {
      "epoch": 0.18906,
      "grad_norm": 1.2876896440316996,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 18906
    },
    {
      "epoch": 0.18907,
      "grad_norm": 1.0620968999573859,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 18907
    },
    {
      "epoch": 0.18908,
      "grad_norm": 1.2586156432936058,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 18908
    },
    {
      "epoch": 0.18909,
      "grad_norm": 1.1054595703171823,
      "learning_rate": 0.003,
      "loss": 4.0619,
      "step": 18909
    },
    {
      "epoch": 0.1891,
      "grad_norm": 1.6090156306881913,
      "learning_rate": 0.003,
      "loss": 4.0797,
      "step": 18910
    },
    {
      "epoch": 0.18911,
      "grad_norm": 1.0370237279217571,
      "learning_rate": 0.003,
      "loss": 4.0824,
      "step": 18911
    },
    {
      "epoch": 0.18912,
      "grad_norm": 1.3108642891143027,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 18912
    },
    {
      "epoch": 0.18913,
      "grad_norm": 1.166700841813181,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 18913
    },
    {
      "epoch": 0.18914,
      "grad_norm": 1.3118894510467247,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 18914
    },
    {
      "epoch": 0.18915,
      "grad_norm": 1.3672575167176313,
      "learning_rate": 0.003,
      "loss": 4.0418,
      "step": 18915
    },
    {
      "epoch": 0.18916,
      "grad_norm": 1.1851340868517821,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 18916
    },
    {
      "epoch": 0.18917,
      "grad_norm": 1.394840501538389,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 18917
    },
    {
      "epoch": 0.18918,
      "grad_norm": 1.172796389585038,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 18918
    },
    {
      "epoch": 0.18919,
      "grad_norm": 1.4820948479111926,
      "learning_rate": 0.003,
      "loss": 4.0859,
      "step": 18919
    },
    {
      "epoch": 0.1892,
      "grad_norm": 1.1087980018754855,
      "learning_rate": 0.003,
      "loss": 4.0549,
      "step": 18920
    },
    {
      "epoch": 0.18921,
      "grad_norm": 1.6150820672105983,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 18921
    },
    {
      "epoch": 0.18922,
      "grad_norm": 1.1334047373684482,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 18922
    },
    {
      "epoch": 0.18923,
      "grad_norm": 1.2049292682676265,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 18923
    },
    {
      "epoch": 0.18924,
      "grad_norm": 1.3865621325880826,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 18924
    },
    {
      "epoch": 0.18925,
      "grad_norm": 1.1170851788113854,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 18925
    },
    {
      "epoch": 0.18926,
      "grad_norm": 1.6191176988281037,
      "learning_rate": 0.003,
      "loss": 4.0943,
      "step": 18926
    },
    {
      "epoch": 0.18927,
      "grad_norm": 0.9493332963228506,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 18927
    },
    {
      "epoch": 0.18928,
      "grad_norm": 1.144685556081795,
      "learning_rate": 0.003,
      "loss": 4.0228,
      "step": 18928
    },
    {
      "epoch": 0.18929,
      "grad_norm": 1.3412926968213719,
      "learning_rate": 0.003,
      "loss": 4.0768,
      "step": 18929
    },
    {
      "epoch": 0.1893,
      "grad_norm": 1.2014837923534043,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 18930
    },
    {
      "epoch": 0.18931,
      "grad_norm": 1.247210746382838,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 18931
    },
    {
      "epoch": 0.18932,
      "grad_norm": 1.0874898122123102,
      "learning_rate": 0.003,
      "loss": 4.0923,
      "step": 18932
    },
    {
      "epoch": 0.18933,
      "grad_norm": 1.1680747946964996,
      "learning_rate": 0.003,
      "loss": 4.0326,
      "step": 18933
    },
    {
      "epoch": 0.18934,
      "grad_norm": 1.3061225339713496,
      "learning_rate": 0.003,
      "loss": 4.0602,
      "step": 18934
    },
    {
      "epoch": 0.18935,
      "grad_norm": 1.0774394467048682,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 18935
    },
    {
      "epoch": 0.18936,
      "grad_norm": 1.2662585394487187,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 18936
    },
    {
      "epoch": 0.18937,
      "grad_norm": 1.1485257564065396,
      "learning_rate": 0.003,
      "loss": 4.0287,
      "step": 18937
    },
    {
      "epoch": 0.18938,
      "grad_norm": 1.3073638993766212,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 18938
    },
    {
      "epoch": 0.18939,
      "grad_norm": 1.1820949992693355,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 18939
    },
    {
      "epoch": 0.1894,
      "grad_norm": 1.2770017063627936,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 18940
    },
    {
      "epoch": 0.18941,
      "grad_norm": 1.3485118903633273,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 18941
    },
    {
      "epoch": 0.18942,
      "grad_norm": 1.299939845997475,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 18942
    },
    {
      "epoch": 0.18943,
      "grad_norm": 1.101719247255625,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 18943
    },
    {
      "epoch": 0.18944,
      "grad_norm": 1.4394683795789305,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 18944
    },
    {
      "epoch": 0.18945,
      "grad_norm": 0.9588952070702902,
      "learning_rate": 0.003,
      "loss": 4.0188,
      "step": 18945
    },
    {
      "epoch": 0.18946,
      "grad_norm": 1.2430544770767809,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 18946
    },
    {
      "epoch": 0.18947,
      "grad_norm": 1.3361267616006969,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 18947
    },
    {
      "epoch": 0.18948,
      "grad_norm": 1.3336918407281249,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 18948
    },
    {
      "epoch": 0.18949,
      "grad_norm": 1.1394460864552343,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 18949
    },
    {
      "epoch": 0.1895,
      "grad_norm": 1.324063928905688,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 18950
    },
    {
      "epoch": 0.18951,
      "grad_norm": 1.3409462464835011,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 18951
    },
    {
      "epoch": 0.18952,
      "grad_norm": 1.2524298601876103,
      "learning_rate": 0.003,
      "loss": 4.0302,
      "step": 18952
    },
    {
      "epoch": 0.18953,
      "grad_norm": 1.162892549071531,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 18953
    },
    {
      "epoch": 0.18954,
      "grad_norm": 1.4673924290785485,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 18954
    },
    {
      "epoch": 0.18955,
      "grad_norm": 1.1855174825877477,
      "learning_rate": 0.003,
      "loss": 4.0264,
      "step": 18955
    },
    {
      "epoch": 0.18956,
      "grad_norm": 1.3151638032539035,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 18956
    },
    {
      "epoch": 0.18957,
      "grad_norm": 1.0229964035825971,
      "learning_rate": 0.003,
      "loss": 4.024,
      "step": 18957
    },
    {
      "epoch": 0.18958,
      "grad_norm": 1.305533065097813,
      "learning_rate": 0.003,
      "loss": 4.0325,
      "step": 18958
    },
    {
      "epoch": 0.18959,
      "grad_norm": 1.203761119232079,
      "learning_rate": 0.003,
      "loss": 4.0202,
      "step": 18959
    },
    {
      "epoch": 0.1896,
      "grad_norm": 1.2164254639553058,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 18960
    },
    {
      "epoch": 0.18961,
      "grad_norm": 1.2777264410334181,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 18961
    },
    {
      "epoch": 0.18962,
      "grad_norm": 1.399564003481491,
      "learning_rate": 0.003,
      "loss": 4.091,
      "step": 18962
    },
    {
      "epoch": 0.18963,
      "grad_norm": 1.2219866179380663,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 18963
    },
    {
      "epoch": 0.18964,
      "grad_norm": 1.2931070300778063,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 18964
    },
    {
      "epoch": 0.18965,
      "grad_norm": 1.2408788270459041,
      "learning_rate": 0.003,
      "loss": 4.0656,
      "step": 18965
    },
    {
      "epoch": 0.18966,
      "grad_norm": 1.3852451172811868,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 18966
    },
    {
      "epoch": 0.18967,
      "grad_norm": 1.181955340350945,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 18967
    },
    {
      "epoch": 0.18968,
      "grad_norm": 1.388009976154795,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 18968
    },
    {
      "epoch": 0.18969,
      "grad_norm": 1.452508863615844,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 18969
    },
    {
      "epoch": 0.1897,
      "grad_norm": 1.0539613115032418,
      "learning_rate": 0.003,
      "loss": 4.0257,
      "step": 18970
    },
    {
      "epoch": 0.18971,
      "grad_norm": 1.5283629289875311,
      "learning_rate": 0.003,
      "loss": 4.0357,
      "step": 18971
    },
    {
      "epoch": 0.18972,
      "grad_norm": 1.0020586289626954,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 18972
    },
    {
      "epoch": 0.18973,
      "grad_norm": 1.4643755552488762,
      "learning_rate": 0.003,
      "loss": 4.0283,
      "step": 18973
    },
    {
      "epoch": 0.18974,
      "grad_norm": 1.082547674257327,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 18974
    },
    {
      "epoch": 0.18975,
      "grad_norm": 1.4776300371215036,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 18975
    },
    {
      "epoch": 0.18976,
      "grad_norm": 1.3135592405403211,
      "learning_rate": 0.003,
      "loss": 4.0856,
      "step": 18976
    },
    {
      "epoch": 0.18977,
      "grad_norm": 1.0745658921355028,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 18977
    },
    {
      "epoch": 0.18978,
      "grad_norm": 1.2457878005667065,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 18978
    },
    {
      "epoch": 0.18979,
      "grad_norm": 1.0044668622723179,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 18979
    },
    {
      "epoch": 0.1898,
      "grad_norm": 1.4943251717141453,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 18980
    },
    {
      "epoch": 0.18981,
      "grad_norm": 1.1274084795099037,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 18981
    },
    {
      "epoch": 0.18982,
      "grad_norm": 1.306218374717761,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 18982
    },
    {
      "epoch": 0.18983,
      "grad_norm": 1.195507438217964,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 18983
    },
    {
      "epoch": 0.18984,
      "grad_norm": 1.3489812646578905,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 18984
    },
    {
      "epoch": 0.18985,
      "grad_norm": 1.079978511793761,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 18985
    },
    {
      "epoch": 0.18986,
      "grad_norm": 1.8198031846834766,
      "learning_rate": 0.003,
      "loss": 4.0243,
      "step": 18986
    },
    {
      "epoch": 0.18987,
      "grad_norm": 0.9543303051945693,
      "learning_rate": 0.003,
      "loss": 4.0256,
      "step": 18987
    },
    {
      "epoch": 0.18988,
      "grad_norm": 1.2918817749052591,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 18988
    },
    {
      "epoch": 0.18989,
      "grad_norm": 1.2298887897140032,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 18989
    },
    {
      "epoch": 0.1899,
      "grad_norm": 1.4214273857280832,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 18990
    },
    {
      "epoch": 0.18991,
      "grad_norm": 0.9867915594580235,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 18991
    },
    {
      "epoch": 0.18992,
      "grad_norm": 1.1982962929557852,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 18992
    },
    {
      "epoch": 0.18993,
      "grad_norm": 1.2484423183571205,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 18993
    },
    {
      "epoch": 0.18994,
      "grad_norm": 1.1323142694208788,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 18994
    },
    {
      "epoch": 0.18995,
      "grad_norm": 1.1463045485943708,
      "learning_rate": 0.003,
      "loss": 4.0,
      "step": 18995
    },
    {
      "epoch": 0.18996,
      "grad_norm": 1.0377533028547177,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 18996
    },
    {
      "epoch": 0.18997,
      "grad_norm": 1.3611288030082336,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 18997
    },
    {
      "epoch": 0.18998,
      "grad_norm": 1.1713704658513688,
      "learning_rate": 0.003,
      "loss": 4.0632,
      "step": 18998
    },
    {
      "epoch": 0.18999,
      "grad_norm": 1.4184447797254487,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 18999
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.9938167299526427,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 19000
    },
    {
      "epoch": 0.19001,
      "grad_norm": 1.3314032409593017,
      "learning_rate": 0.003,
      "loss": 4.0656,
      "step": 19001
    },
    {
      "epoch": 0.19002,
      "grad_norm": 1.1479969333151756,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 19002
    },
    {
      "epoch": 0.19003,
      "grad_norm": 1.2375409505772659,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 19003
    },
    {
      "epoch": 0.19004,
      "grad_norm": 1.3176797601185788,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 19004
    },
    {
      "epoch": 0.19005,
      "grad_norm": 1.6342456579206885,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 19005
    },
    {
      "epoch": 0.19006,
      "grad_norm": 1.1753775466652139,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 19006
    },
    {
      "epoch": 0.19007,
      "grad_norm": 1.2277475711315688,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 19007
    },
    {
      "epoch": 0.19008,
      "grad_norm": 1.4782021717418683,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 19008
    },
    {
      "epoch": 0.19009,
      "grad_norm": 1.0861563056624512,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 19009
    },
    {
      "epoch": 0.1901,
      "grad_norm": 1.2797915735224965,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 19010
    },
    {
      "epoch": 0.19011,
      "grad_norm": 1.3737834290748083,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 19011
    },
    {
      "epoch": 0.19012,
      "grad_norm": 1.3596128313334717,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 19012
    },
    {
      "epoch": 0.19013,
      "grad_norm": 1.1798694422318736,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 19013
    },
    {
      "epoch": 0.19014,
      "grad_norm": 1.3600351848591157,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 19014
    },
    {
      "epoch": 0.19015,
      "grad_norm": 1.141414457718081,
      "learning_rate": 0.003,
      "loss": 4.0159,
      "step": 19015
    },
    {
      "epoch": 0.19016,
      "grad_norm": 1.262402004064144,
      "learning_rate": 0.003,
      "loss": 4.0434,
      "step": 19016
    },
    {
      "epoch": 0.19017,
      "grad_norm": 1.2130498633124398,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 19017
    },
    {
      "epoch": 0.19018,
      "grad_norm": 1.271567940825599,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 19018
    },
    {
      "epoch": 0.19019,
      "grad_norm": 1.1325178362829071,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 19019
    },
    {
      "epoch": 0.1902,
      "grad_norm": 1.2074662414846844,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 19020
    },
    {
      "epoch": 0.19021,
      "grad_norm": 1.1450482817426657,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 19021
    },
    {
      "epoch": 0.19022,
      "grad_norm": 1.312247265532254,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 19022
    },
    {
      "epoch": 0.19023,
      "grad_norm": 1.3091949037421626,
      "learning_rate": 0.003,
      "loss": 4.1027,
      "step": 19023
    },
    {
      "epoch": 0.19024,
      "grad_norm": 1.1872720134607715,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 19024
    },
    {
      "epoch": 0.19025,
      "grad_norm": 1.2670890663034708,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 19025
    },
    {
      "epoch": 0.19026,
      "grad_norm": 1.3355338650971906,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 19026
    },
    {
      "epoch": 0.19027,
      "grad_norm": 1.1281748348642044,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 19027
    },
    {
      "epoch": 0.19028,
      "grad_norm": 1.5137878227413488,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 19028
    },
    {
      "epoch": 0.19029,
      "grad_norm": 1.0493984454266818,
      "learning_rate": 0.003,
      "loss": 4.0245,
      "step": 19029
    },
    {
      "epoch": 0.1903,
      "grad_norm": 1.5058467634901882,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 19030
    },
    {
      "epoch": 0.19031,
      "grad_norm": 1.45001970840602,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 19031
    },
    {
      "epoch": 0.19032,
      "grad_norm": 1.4059804607395356,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 19032
    },
    {
      "epoch": 0.19033,
      "grad_norm": 1.4382315050248882,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 19033
    },
    {
      "epoch": 0.19034,
      "grad_norm": 1.2144508213612755,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 19034
    },
    {
      "epoch": 0.19035,
      "grad_norm": 1.2834551998532726,
      "learning_rate": 0.003,
      "loss": 4.0316,
      "step": 19035
    },
    {
      "epoch": 0.19036,
      "grad_norm": 1.1145058006863604,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 19036
    },
    {
      "epoch": 0.19037,
      "grad_norm": 1.2924159032683007,
      "learning_rate": 0.003,
      "loss": 4.0319,
      "step": 19037
    },
    {
      "epoch": 0.19038,
      "grad_norm": 1.1242842475684267,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 19038
    },
    {
      "epoch": 0.19039,
      "grad_norm": 1.320438816371859,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 19039
    },
    {
      "epoch": 0.1904,
      "grad_norm": 1.0389084205361234,
      "learning_rate": 0.003,
      "loss": 4.0229,
      "step": 19040
    },
    {
      "epoch": 0.19041,
      "grad_norm": 1.4772991214516211,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 19041
    },
    {
      "epoch": 0.19042,
      "grad_norm": 1.0942789445482135,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 19042
    },
    {
      "epoch": 0.19043,
      "grad_norm": 1.4041406453379557,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 19043
    },
    {
      "epoch": 0.19044,
      "grad_norm": 1.3233649763436737,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 19044
    },
    {
      "epoch": 0.19045,
      "grad_norm": 1.3186020535932237,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 19045
    },
    {
      "epoch": 0.19046,
      "grad_norm": 1.139676960059712,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 19046
    },
    {
      "epoch": 0.19047,
      "grad_norm": 1.30211529961854,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 19047
    },
    {
      "epoch": 0.19048,
      "grad_norm": 1.2421629614746557,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 19048
    },
    {
      "epoch": 0.19049,
      "grad_norm": 1.453451977114375,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 19049
    },
    {
      "epoch": 0.1905,
      "grad_norm": 0.9663541331808644,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 19050
    },
    {
      "epoch": 0.19051,
      "grad_norm": 1.2498652674040842,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 19051
    },
    {
      "epoch": 0.19052,
      "grad_norm": 1.3002205859726348,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 19052
    },
    {
      "epoch": 0.19053,
      "grad_norm": 1.4834338460002425,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 19053
    },
    {
      "epoch": 0.19054,
      "grad_norm": 1.2333198115803878,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 19054
    },
    {
      "epoch": 0.19055,
      "grad_norm": 1.271025096267169,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 19055
    },
    {
      "epoch": 0.19056,
      "grad_norm": 1.3713791298987748,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 19056
    },
    {
      "epoch": 0.19057,
      "grad_norm": 1.2974018074502591,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 19057
    },
    {
      "epoch": 0.19058,
      "grad_norm": 1.3168209422171067,
      "learning_rate": 0.003,
      "loss": 4.0702,
      "step": 19058
    },
    {
      "epoch": 0.19059,
      "grad_norm": 1.1841553199839385,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 19059
    },
    {
      "epoch": 0.1906,
      "grad_norm": 1.2722570368049224,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 19060
    },
    {
      "epoch": 0.19061,
      "grad_norm": 1.3041753642371723,
      "learning_rate": 0.003,
      "loss": 4.0564,
      "step": 19061
    },
    {
      "epoch": 0.19062,
      "grad_norm": 1.1612477238430865,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 19062
    },
    {
      "epoch": 0.19063,
      "grad_norm": 1.2736861822164462,
      "learning_rate": 0.003,
      "loss": 4.0154,
      "step": 19063
    },
    {
      "epoch": 0.19064,
      "grad_norm": 1.0833764979127176,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 19064
    },
    {
      "epoch": 0.19065,
      "grad_norm": 1.3819328838021085,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 19065
    },
    {
      "epoch": 0.19066,
      "grad_norm": 1.2713583019321415,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 19066
    },
    {
      "epoch": 0.19067,
      "grad_norm": 1.4667943448149092,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 19067
    },
    {
      "epoch": 0.19068,
      "grad_norm": 1.1869598216617847,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 19068
    },
    {
      "epoch": 0.19069,
      "grad_norm": 1.1738634779373909,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 19069
    },
    {
      "epoch": 0.1907,
      "grad_norm": 1.2267084541346251,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 19070
    },
    {
      "epoch": 0.19071,
      "grad_norm": 1.4894837196590038,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 19071
    },
    {
      "epoch": 0.19072,
      "grad_norm": 1.452084826976423,
      "learning_rate": 0.003,
      "loss": 4.0727,
      "step": 19072
    },
    {
      "epoch": 0.19073,
      "grad_norm": 1.1464432083818423,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 19073
    },
    {
      "epoch": 0.19074,
      "grad_norm": 1.3021489620983207,
      "learning_rate": 0.003,
      "loss": 4.075,
      "step": 19074
    },
    {
      "epoch": 0.19075,
      "grad_norm": 1.0786772835814227,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 19075
    },
    {
      "epoch": 0.19076,
      "grad_norm": 1.5499784953997167,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 19076
    },
    {
      "epoch": 0.19077,
      "grad_norm": 1.2342223626061197,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 19077
    },
    {
      "epoch": 0.19078,
      "grad_norm": 1.1209715371426334,
      "learning_rate": 0.003,
      "loss": 4.0325,
      "step": 19078
    },
    {
      "epoch": 0.19079,
      "grad_norm": 1.2676347243302521,
      "learning_rate": 0.003,
      "loss": 4.0675,
      "step": 19079
    },
    {
      "epoch": 0.1908,
      "grad_norm": 1.1221281508566292,
      "learning_rate": 0.003,
      "loss": 4.0224,
      "step": 19080
    },
    {
      "epoch": 0.19081,
      "grad_norm": 1.260891655079296,
      "learning_rate": 0.003,
      "loss": 4.0316,
      "step": 19081
    },
    {
      "epoch": 0.19082,
      "grad_norm": 1.1017830975718048,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 19082
    },
    {
      "epoch": 0.19083,
      "grad_norm": 1.388700561330615,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 19083
    },
    {
      "epoch": 0.19084,
      "grad_norm": 1.34690955993557,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 19084
    },
    {
      "epoch": 0.19085,
      "grad_norm": 1.145241746931564,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 19085
    },
    {
      "epoch": 0.19086,
      "grad_norm": 1.270284682902497,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 19086
    },
    {
      "epoch": 0.19087,
      "grad_norm": 1.1500679421006432,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 19087
    },
    {
      "epoch": 0.19088,
      "grad_norm": 1.5543662258719404,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 19088
    },
    {
      "epoch": 0.19089,
      "grad_norm": 0.8755120821039072,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 19089
    },
    {
      "epoch": 0.1909,
      "grad_norm": 1.4335401457780075,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 19090
    },
    {
      "epoch": 0.19091,
      "grad_norm": 1.385490472183994,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 19091
    },
    {
      "epoch": 0.19092,
      "grad_norm": 1.6226202331853985,
      "learning_rate": 0.003,
      "loss": 4.0325,
      "step": 19092
    },
    {
      "epoch": 0.19093,
      "grad_norm": 1.1890689330629287,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 19093
    },
    {
      "epoch": 0.19094,
      "grad_norm": 1.1981371155081517,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 19094
    },
    {
      "epoch": 0.19095,
      "grad_norm": 1.2893668593081675,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 19095
    },
    {
      "epoch": 0.19096,
      "grad_norm": 1.4960662294561393,
      "learning_rate": 0.003,
      "loss": 4.0701,
      "step": 19096
    },
    {
      "epoch": 0.19097,
      "grad_norm": 0.9725147576688425,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 19097
    },
    {
      "epoch": 0.19098,
      "grad_norm": 1.3685030885529894,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 19098
    },
    {
      "epoch": 0.19099,
      "grad_norm": 1.0622915877891217,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 19099
    },
    {
      "epoch": 0.191,
      "grad_norm": 1.464669870198639,
      "learning_rate": 0.003,
      "loss": 4.087,
      "step": 19100
    },
    {
      "epoch": 0.19101,
      "grad_norm": 1.118779923692166,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 19101
    },
    {
      "epoch": 0.19102,
      "grad_norm": 1.2976349635461937,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 19102
    },
    {
      "epoch": 0.19103,
      "grad_norm": 1.1300313503024064,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 19103
    },
    {
      "epoch": 0.19104,
      "grad_norm": 1.2789988461407795,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 19104
    },
    {
      "epoch": 0.19105,
      "grad_norm": 1.2132765671340873,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 19105
    },
    {
      "epoch": 0.19106,
      "grad_norm": 1.2121676434130617,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 19106
    },
    {
      "epoch": 0.19107,
      "grad_norm": 1.265660415292461,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 19107
    },
    {
      "epoch": 0.19108,
      "grad_norm": 1.4014520774130477,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 19108
    },
    {
      "epoch": 0.19109,
      "grad_norm": 1.1756389944148185,
      "learning_rate": 0.003,
      "loss": 4.0825,
      "step": 19109
    },
    {
      "epoch": 0.1911,
      "grad_norm": 1.5469899981517996,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 19110
    },
    {
      "epoch": 0.19111,
      "grad_norm": 1.0752369359541158,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 19111
    },
    {
      "epoch": 0.19112,
      "grad_norm": 1.2913248218854734,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 19112
    },
    {
      "epoch": 0.19113,
      "grad_norm": 1.3731946415703815,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 19113
    },
    {
      "epoch": 0.19114,
      "grad_norm": 1.1777399714521335,
      "learning_rate": 0.003,
      "loss": 4.0302,
      "step": 19114
    },
    {
      "epoch": 0.19115,
      "grad_norm": 1.3621232493478581,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 19115
    },
    {
      "epoch": 0.19116,
      "grad_norm": 1.2428027654546105,
      "learning_rate": 0.003,
      "loss": 4.0718,
      "step": 19116
    },
    {
      "epoch": 0.19117,
      "grad_norm": 1.3920508306613928,
      "learning_rate": 0.003,
      "loss": 4.044,
      "step": 19117
    },
    {
      "epoch": 0.19118,
      "grad_norm": 1.1046257840388285,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 19118
    },
    {
      "epoch": 0.19119,
      "grad_norm": 1.3008624308439076,
      "learning_rate": 0.003,
      "loss": 4.0986,
      "step": 19119
    },
    {
      "epoch": 0.1912,
      "grad_norm": 1.0358300100952855,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 19120
    },
    {
      "epoch": 0.19121,
      "grad_norm": 1.2014291619295017,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 19121
    },
    {
      "epoch": 0.19122,
      "grad_norm": 1.3298764689448466,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 19122
    },
    {
      "epoch": 0.19123,
      "grad_norm": 1.2413092514637005,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 19123
    },
    {
      "epoch": 0.19124,
      "grad_norm": 1.4638470284624077,
      "learning_rate": 0.003,
      "loss": 4.0876,
      "step": 19124
    },
    {
      "epoch": 0.19125,
      "grad_norm": 1.2100119342079405,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 19125
    },
    {
      "epoch": 0.19126,
      "grad_norm": 1.327011420791088,
      "learning_rate": 0.003,
      "loss": 4.0296,
      "step": 19126
    },
    {
      "epoch": 0.19127,
      "grad_norm": 1.18672195015303,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 19127
    },
    {
      "epoch": 0.19128,
      "grad_norm": 1.2471773665078512,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 19128
    },
    {
      "epoch": 0.19129,
      "grad_norm": 1.1345648663615966,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 19129
    },
    {
      "epoch": 0.1913,
      "grad_norm": 1.249516390824633,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 19130
    },
    {
      "epoch": 0.19131,
      "grad_norm": 1.5043471304993827,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 19131
    },
    {
      "epoch": 0.19132,
      "grad_norm": 1.0754592345618441,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 19132
    },
    {
      "epoch": 0.19133,
      "grad_norm": 1.4711542619175533,
      "learning_rate": 0.003,
      "loss": 4.0325,
      "step": 19133
    },
    {
      "epoch": 0.19134,
      "grad_norm": 1.2202325514640542,
      "learning_rate": 0.003,
      "loss": 4.0856,
      "step": 19134
    },
    {
      "epoch": 0.19135,
      "grad_norm": 1.3349597063538905,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 19135
    },
    {
      "epoch": 0.19136,
      "grad_norm": 1.4219213256390757,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 19136
    },
    {
      "epoch": 0.19137,
      "grad_norm": 1.244969908564544,
      "learning_rate": 0.003,
      "loss": 4.026,
      "step": 19137
    },
    {
      "epoch": 0.19138,
      "grad_norm": 1.5286432652037785,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 19138
    },
    {
      "epoch": 0.19139,
      "grad_norm": 0.9895873720911743,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 19139
    },
    {
      "epoch": 0.1914,
      "grad_norm": 1.3173501475465692,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 19140
    },
    {
      "epoch": 0.19141,
      "grad_norm": 1.2158916545602216,
      "learning_rate": 0.003,
      "loss": 4.0142,
      "step": 19141
    },
    {
      "epoch": 0.19142,
      "grad_norm": 1.1792868374867513,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 19142
    },
    {
      "epoch": 0.19143,
      "grad_norm": 1.2818615849270238,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 19143
    },
    {
      "epoch": 0.19144,
      "grad_norm": 1.5747934446702487,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 19144
    },
    {
      "epoch": 0.19145,
      "grad_norm": 1.1546309654274602,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 19145
    },
    {
      "epoch": 0.19146,
      "grad_norm": 1.2410780386836373,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 19146
    },
    {
      "epoch": 0.19147,
      "grad_norm": 1.1409885878401673,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 19147
    },
    {
      "epoch": 0.19148,
      "grad_norm": 1.3436084461851314,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 19148
    },
    {
      "epoch": 0.19149,
      "grad_norm": 1.1699891038377133,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 19149
    },
    {
      "epoch": 0.1915,
      "grad_norm": 1.4725718423048586,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 19150
    },
    {
      "epoch": 0.19151,
      "grad_norm": 1.1693307627587524,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 19151
    },
    {
      "epoch": 0.19152,
      "grad_norm": 1.3657279001419607,
      "learning_rate": 0.003,
      "loss": 4.0305,
      "step": 19152
    },
    {
      "epoch": 0.19153,
      "grad_norm": 0.905534039598116,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 19153
    },
    {
      "epoch": 0.19154,
      "grad_norm": 1.2419837627588104,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 19154
    },
    {
      "epoch": 0.19155,
      "grad_norm": 1.4400848465098008,
      "learning_rate": 0.003,
      "loss": 4.0283,
      "step": 19155
    },
    {
      "epoch": 0.19156,
      "grad_norm": 1.2188726153413414,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 19156
    },
    {
      "epoch": 0.19157,
      "grad_norm": 1.3653131530262828,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 19157
    },
    {
      "epoch": 0.19158,
      "grad_norm": 1.3064082256285416,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 19158
    },
    {
      "epoch": 0.19159,
      "grad_norm": 1.090331273149747,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 19159
    },
    {
      "epoch": 0.1916,
      "grad_norm": 1.6293185159130206,
      "learning_rate": 0.003,
      "loss": 4.0438,
      "step": 19160
    },
    {
      "epoch": 0.19161,
      "grad_norm": 0.8287655198162486,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 19161
    },
    {
      "epoch": 0.19162,
      "grad_norm": 1.158652812687935,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 19162
    },
    {
      "epoch": 0.19163,
      "grad_norm": 1.248177992876627,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 19163
    },
    {
      "epoch": 0.19164,
      "grad_norm": 1.5455163093491473,
      "learning_rate": 0.003,
      "loss": 4.1117,
      "step": 19164
    },
    {
      "epoch": 0.19165,
      "grad_norm": 1.1528992311916821,
      "learning_rate": 0.003,
      "loss": 4.0418,
      "step": 19165
    },
    {
      "epoch": 0.19166,
      "grad_norm": 1.3662058618697008,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 19166
    },
    {
      "epoch": 0.19167,
      "grad_norm": 1.3966410134540599,
      "learning_rate": 0.003,
      "loss": 4.0777,
      "step": 19167
    },
    {
      "epoch": 0.19168,
      "grad_norm": 1.0744464750352165,
      "learning_rate": 0.003,
      "loss": 4.0235,
      "step": 19168
    },
    {
      "epoch": 0.19169,
      "grad_norm": 1.3750293897026336,
      "learning_rate": 0.003,
      "loss": 4.02,
      "step": 19169
    },
    {
      "epoch": 0.1917,
      "grad_norm": 1.1321040873817443,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 19170
    },
    {
      "epoch": 0.19171,
      "grad_norm": 1.4050662826257483,
      "learning_rate": 0.003,
      "loss": 4.0313,
      "step": 19171
    },
    {
      "epoch": 0.19172,
      "grad_norm": 1.2842379826432408,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 19172
    },
    {
      "epoch": 0.19173,
      "grad_norm": 1.4611207157151929,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 19173
    },
    {
      "epoch": 0.19174,
      "grad_norm": 0.9763677160665539,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 19174
    },
    {
      "epoch": 0.19175,
      "grad_norm": 1.190942187768536,
      "learning_rate": 0.003,
      "loss": 4.0933,
      "step": 19175
    },
    {
      "epoch": 0.19176,
      "grad_norm": 1.2381022913461883,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 19176
    },
    {
      "epoch": 0.19177,
      "grad_norm": 1.22330454111681,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 19177
    },
    {
      "epoch": 0.19178,
      "grad_norm": 1.2691358808693634,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 19178
    },
    {
      "epoch": 0.19179,
      "grad_norm": 1.2382625954537447,
      "learning_rate": 0.003,
      "loss": 4.0773,
      "step": 19179
    },
    {
      "epoch": 0.1918,
      "grad_norm": 1.4497885036032783,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 19180
    },
    {
      "epoch": 0.19181,
      "grad_norm": 1.134016258479399,
      "learning_rate": 0.003,
      "loss": 4.057,
      "step": 19181
    },
    {
      "epoch": 0.19182,
      "grad_norm": 1.35760379413228,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 19182
    },
    {
      "epoch": 0.19183,
      "grad_norm": 1.2957311215263954,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 19183
    },
    {
      "epoch": 0.19184,
      "grad_norm": 1.310887624893315,
      "learning_rate": 0.003,
      "loss": 4.0619,
      "step": 19184
    },
    {
      "epoch": 0.19185,
      "grad_norm": 1.4510683143720915,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 19185
    },
    {
      "epoch": 0.19186,
      "grad_norm": 1.2682977145649312,
      "learning_rate": 0.003,
      "loss": 4.0232,
      "step": 19186
    },
    {
      "epoch": 0.19187,
      "grad_norm": 1.575695837497368,
      "learning_rate": 0.003,
      "loss": 4.0739,
      "step": 19187
    },
    {
      "epoch": 0.19188,
      "grad_norm": 1.1916183188246712,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 19188
    },
    {
      "epoch": 0.19189,
      "grad_norm": 1.2328848455590875,
      "learning_rate": 0.003,
      "loss": 4.0266,
      "step": 19189
    },
    {
      "epoch": 0.1919,
      "grad_norm": 1.2209844780962866,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 19190
    },
    {
      "epoch": 0.19191,
      "grad_norm": 1.2732974615205064,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 19191
    },
    {
      "epoch": 0.19192,
      "grad_norm": 1.1609500022343568,
      "learning_rate": 0.003,
      "loss": 4.0132,
      "step": 19192
    },
    {
      "epoch": 0.19193,
      "grad_norm": 1.2852569005801213,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 19193
    },
    {
      "epoch": 0.19194,
      "grad_norm": 1.1691452818388632,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 19194
    },
    {
      "epoch": 0.19195,
      "grad_norm": 1.3702680762038288,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 19195
    },
    {
      "epoch": 0.19196,
      "grad_norm": 1.1694488089282924,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 19196
    },
    {
      "epoch": 0.19197,
      "grad_norm": 1.2473752069715442,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 19197
    },
    {
      "epoch": 0.19198,
      "grad_norm": 1.3688613100206077,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 19198
    },
    {
      "epoch": 0.19199,
      "grad_norm": 1.0233257482532478,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 19199
    },
    {
      "epoch": 0.192,
      "grad_norm": 1.561881594874595,
      "learning_rate": 0.003,
      "loss": 4.097,
      "step": 19200
    },
    {
      "epoch": 0.19201,
      "grad_norm": 1.0375767253882549,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 19201
    },
    {
      "epoch": 0.19202,
      "grad_norm": 1.4407943662007237,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 19202
    },
    {
      "epoch": 0.19203,
      "grad_norm": 1.2342685010454892,
      "learning_rate": 0.003,
      "loss": 4.0342,
      "step": 19203
    },
    {
      "epoch": 0.19204,
      "grad_norm": 1.4208038113256212,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 19204
    },
    {
      "epoch": 0.19205,
      "grad_norm": 1.3487435507325976,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 19205
    },
    {
      "epoch": 0.19206,
      "grad_norm": 1.1336355167725256,
      "learning_rate": 0.003,
      "loss": 4.0312,
      "step": 19206
    },
    {
      "epoch": 0.19207,
      "grad_norm": 1.3828811875751037,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 19207
    },
    {
      "epoch": 0.19208,
      "grad_norm": 0.9345781028127893,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 19208
    },
    {
      "epoch": 0.19209,
      "grad_norm": 1.333644111030958,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 19209
    },
    {
      "epoch": 0.1921,
      "grad_norm": 1.0937521969016353,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 19210
    },
    {
      "epoch": 0.19211,
      "grad_norm": 1.3620631814238218,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 19211
    },
    {
      "epoch": 0.19212,
      "grad_norm": 1.0229306203607218,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 19212
    },
    {
      "epoch": 0.19213,
      "grad_norm": 1.606807081278299,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 19213
    },
    {
      "epoch": 0.19214,
      "grad_norm": 1.0327706718080056,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 19214
    },
    {
      "epoch": 0.19215,
      "grad_norm": 1.631649256616336,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 19215
    },
    {
      "epoch": 0.19216,
      "grad_norm": 1.207976476510446,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 19216
    },
    {
      "epoch": 0.19217,
      "grad_norm": 1.3048150254088753,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 19217
    },
    {
      "epoch": 0.19218,
      "grad_norm": 1.3380053116505832,
      "learning_rate": 0.003,
      "loss": 4.1072,
      "step": 19218
    },
    {
      "epoch": 0.19219,
      "grad_norm": 1.0679714142709387,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 19219
    },
    {
      "epoch": 0.1922,
      "grad_norm": 1.4958575564073844,
      "learning_rate": 0.003,
      "loss": 4.0336,
      "step": 19220
    },
    {
      "epoch": 0.19221,
      "grad_norm": 1.117812451045177,
      "learning_rate": 0.003,
      "loss": 4.0906,
      "step": 19221
    },
    {
      "epoch": 0.19222,
      "grad_norm": 1.2419866483185766,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 19222
    },
    {
      "epoch": 0.19223,
      "grad_norm": 1.3768988883459534,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 19223
    },
    {
      "epoch": 0.19224,
      "grad_norm": 1.069206529611852,
      "learning_rate": 0.003,
      "loss": 4.0312,
      "step": 19224
    },
    {
      "epoch": 0.19225,
      "grad_norm": 1.3544075558994118,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 19225
    },
    {
      "epoch": 0.19226,
      "grad_norm": 1.130254621717398,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 19226
    },
    {
      "epoch": 0.19227,
      "grad_norm": 1.2701516631007297,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 19227
    },
    {
      "epoch": 0.19228,
      "grad_norm": 1.141542494705014,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 19228
    },
    {
      "epoch": 0.19229,
      "grad_norm": 1.7278661090592242,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 19229
    },
    {
      "epoch": 0.1923,
      "grad_norm": 0.8602523351380327,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 19230
    },
    {
      "epoch": 0.19231,
      "grad_norm": 1.0008049284928373,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 19231
    },
    {
      "epoch": 0.19232,
      "grad_norm": 1.6441796376548221,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 19232
    },
    {
      "epoch": 0.19233,
      "grad_norm": 1.06348186022257,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 19233
    },
    {
      "epoch": 0.19234,
      "grad_norm": 1.5056261816852763,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 19234
    },
    {
      "epoch": 0.19235,
      "grad_norm": 1.059974211670759,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 19235
    },
    {
      "epoch": 0.19236,
      "grad_norm": 1.4293178717403514,
      "learning_rate": 0.003,
      "loss": 4.0796,
      "step": 19236
    },
    {
      "epoch": 0.19237,
      "grad_norm": 1.3128062170109287,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 19237
    },
    {
      "epoch": 0.19238,
      "grad_norm": 1.2427623419926968,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 19238
    },
    {
      "epoch": 0.19239,
      "grad_norm": 1.3914305120241952,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 19239
    },
    {
      "epoch": 0.1924,
      "grad_norm": 1.1844701559061308,
      "learning_rate": 0.003,
      "loss": 4.0299,
      "step": 19240
    },
    {
      "epoch": 0.19241,
      "grad_norm": 1.3358176920204796,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 19241
    },
    {
      "epoch": 0.19242,
      "grad_norm": 1.2900014037234846,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 19242
    },
    {
      "epoch": 0.19243,
      "grad_norm": 1.303591170485071,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 19243
    },
    {
      "epoch": 0.19244,
      "grad_norm": 1.259426315541422,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 19244
    },
    {
      "epoch": 0.19245,
      "grad_norm": 1.09817722925374,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 19245
    },
    {
      "epoch": 0.19246,
      "grad_norm": 1.4539360635886593,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 19246
    },
    {
      "epoch": 0.19247,
      "grad_norm": 1.3018116569648512,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 19247
    },
    {
      "epoch": 0.19248,
      "grad_norm": 1.1432748974280003,
      "learning_rate": 0.003,
      "loss": 4.0314,
      "step": 19248
    },
    {
      "epoch": 0.19249,
      "grad_norm": 1.4700394550063518,
      "learning_rate": 0.003,
      "loss": 4.0315,
      "step": 19249
    },
    {
      "epoch": 0.1925,
      "grad_norm": 0.9896072843511874,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 19250
    },
    {
      "epoch": 0.19251,
      "grad_norm": 1.3138089628103964,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 19251
    },
    {
      "epoch": 0.19252,
      "grad_norm": 1.0817784890206963,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 19252
    },
    {
      "epoch": 0.19253,
      "grad_norm": 1.3153319585561318,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 19253
    },
    {
      "epoch": 0.19254,
      "grad_norm": 1.2170125022548548,
      "learning_rate": 0.003,
      "loss": 4.1,
      "step": 19254
    },
    {
      "epoch": 0.19255,
      "grad_norm": 1.2694224062537878,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 19255
    },
    {
      "epoch": 0.19256,
      "grad_norm": 1.2453691515563632,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 19256
    },
    {
      "epoch": 0.19257,
      "grad_norm": 1.2349272540389504,
      "learning_rate": 0.003,
      "loss": 4.0269,
      "step": 19257
    },
    {
      "epoch": 0.19258,
      "grad_norm": 1.2688291821025708,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 19258
    },
    {
      "epoch": 0.19259,
      "grad_norm": 1.2700821480023927,
      "learning_rate": 0.003,
      "loss": 4.0617,
      "step": 19259
    },
    {
      "epoch": 0.1926,
      "grad_norm": 1.3579286957148133,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 19260
    },
    {
      "epoch": 0.19261,
      "grad_norm": 1.4739489420098513,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 19261
    },
    {
      "epoch": 0.19262,
      "grad_norm": 0.8635668416528791,
      "learning_rate": 0.003,
      "loss": 4.0326,
      "step": 19262
    },
    {
      "epoch": 0.19263,
      "grad_norm": 1.327948158133326,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 19263
    },
    {
      "epoch": 0.19264,
      "grad_norm": 1.3390826730360608,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 19264
    },
    {
      "epoch": 0.19265,
      "grad_norm": 1.217363022053805,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 19265
    },
    {
      "epoch": 0.19266,
      "grad_norm": 1.182243398179855,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 19266
    },
    {
      "epoch": 0.19267,
      "grad_norm": 1.4727125262172842,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 19267
    },
    {
      "epoch": 0.19268,
      "grad_norm": 1.0565346665110982,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 19268
    },
    {
      "epoch": 0.19269,
      "grad_norm": 1.318652043931812,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 19269
    },
    {
      "epoch": 0.1927,
      "grad_norm": 1.1072118070146826,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 19270
    },
    {
      "epoch": 0.19271,
      "grad_norm": 1.362157172652143,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 19271
    },
    {
      "epoch": 0.19272,
      "grad_norm": 1.1281091123143556,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 19272
    },
    {
      "epoch": 0.19273,
      "grad_norm": 1.292803290024137,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 19273
    },
    {
      "epoch": 0.19274,
      "grad_norm": 1.2353272280198817,
      "learning_rate": 0.003,
      "loss": 4.0815,
      "step": 19274
    },
    {
      "epoch": 0.19275,
      "grad_norm": 1.1438153015330619,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 19275
    },
    {
      "epoch": 0.19276,
      "grad_norm": 1.4514607843346614,
      "learning_rate": 0.003,
      "loss": 4.0564,
      "step": 19276
    },
    {
      "epoch": 0.19277,
      "grad_norm": 1.2813060281552016,
      "learning_rate": 0.003,
      "loss": 4.0928,
      "step": 19277
    },
    {
      "epoch": 0.19278,
      "grad_norm": 1.6930936661015001,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 19278
    },
    {
      "epoch": 0.19279,
      "grad_norm": 1.1007912171244085,
      "learning_rate": 0.003,
      "loss": 4.0315,
      "step": 19279
    },
    {
      "epoch": 0.1928,
      "grad_norm": 1.1826340704770322,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 19280
    },
    {
      "epoch": 0.19281,
      "grad_norm": 1.2769700503974448,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 19281
    },
    {
      "epoch": 0.19282,
      "grad_norm": 1.2924742929851223,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 19282
    },
    {
      "epoch": 0.19283,
      "grad_norm": 1.234831979304908,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 19283
    },
    {
      "epoch": 0.19284,
      "grad_norm": 1.2207811629021978,
      "learning_rate": 0.003,
      "loss": 4.0793,
      "step": 19284
    },
    {
      "epoch": 0.19285,
      "grad_norm": 1.2816252739865883,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 19285
    },
    {
      "epoch": 0.19286,
      "grad_norm": 1.209800978176332,
      "learning_rate": 0.003,
      "loss": 4.0208,
      "step": 19286
    },
    {
      "epoch": 0.19287,
      "grad_norm": 1.2372222252209188,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 19287
    },
    {
      "epoch": 0.19288,
      "grad_norm": 1.158777792355288,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 19288
    },
    {
      "epoch": 0.19289,
      "grad_norm": 1.359679154466316,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 19289
    },
    {
      "epoch": 0.1929,
      "grad_norm": 0.960495143521634,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 19290
    },
    {
      "epoch": 0.19291,
      "grad_norm": 1.4301214559126807,
      "learning_rate": 0.003,
      "loss": 4.0759,
      "step": 19291
    },
    {
      "epoch": 0.19292,
      "grad_norm": 1.217408751778196,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 19292
    },
    {
      "epoch": 0.19293,
      "grad_norm": 1.3874768024193218,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 19293
    },
    {
      "epoch": 0.19294,
      "grad_norm": 1.332721404459955,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 19294
    },
    {
      "epoch": 0.19295,
      "grad_norm": 1.2173838719965513,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 19295
    },
    {
      "epoch": 0.19296,
      "grad_norm": 1.2916412791849425,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 19296
    },
    {
      "epoch": 0.19297,
      "grad_norm": 1.296134930093045,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 19297
    },
    {
      "epoch": 0.19298,
      "grad_norm": 1.1697870555772907,
      "learning_rate": 0.003,
      "loss": 4.0325,
      "step": 19298
    },
    {
      "epoch": 0.19299,
      "grad_norm": 1.1415884021127054,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 19299
    },
    {
      "epoch": 0.193,
      "grad_norm": 1.3121021912821669,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 19300
    },
    {
      "epoch": 0.19301,
      "grad_norm": 1.2277104069030647,
      "learning_rate": 0.003,
      "loss": 4.0807,
      "step": 19301
    },
    {
      "epoch": 0.19302,
      "grad_norm": 1.168389647950443,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 19302
    },
    {
      "epoch": 0.19303,
      "grad_norm": 1.199071860372418,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 19303
    },
    {
      "epoch": 0.19304,
      "grad_norm": 1.6197362151787464,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 19304
    },
    {
      "epoch": 0.19305,
      "grad_norm": 1.3872492287681017,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 19305
    },
    {
      "epoch": 0.19306,
      "grad_norm": 1.1968252615562611,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 19306
    },
    {
      "epoch": 0.19307,
      "grad_norm": 1.4302807502281258,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 19307
    },
    {
      "epoch": 0.19308,
      "grad_norm": 1.130676365717826,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 19308
    },
    {
      "epoch": 0.19309,
      "grad_norm": 1.4403273412737394,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 19309
    },
    {
      "epoch": 0.1931,
      "grad_norm": 1.0710180781601069,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 19310
    },
    {
      "epoch": 0.19311,
      "grad_norm": 1.3604383674442513,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 19311
    },
    {
      "epoch": 0.19312,
      "grad_norm": 1.093479747707285,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 19312
    },
    {
      "epoch": 0.19313,
      "grad_norm": 1.3405809261925952,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 19313
    },
    {
      "epoch": 0.19314,
      "grad_norm": 1.0784131713588263,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 19314
    },
    {
      "epoch": 0.19315,
      "grad_norm": 1.1059936570643827,
      "learning_rate": 0.003,
      "loss": 4.0217,
      "step": 19315
    },
    {
      "epoch": 0.19316,
      "grad_norm": 1.2920533396885687,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 19316
    },
    {
      "epoch": 0.19317,
      "grad_norm": 1.119322072880016,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 19317
    },
    {
      "epoch": 0.19318,
      "grad_norm": 1.2110968387282244,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 19318
    },
    {
      "epoch": 0.19319,
      "grad_norm": 1.2244841200983902,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 19319
    },
    {
      "epoch": 0.1932,
      "grad_norm": 0.9959893152714806,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 19320
    },
    {
      "epoch": 0.19321,
      "grad_norm": 1.7272148138632006,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 19321
    },
    {
      "epoch": 0.19322,
      "grad_norm": 1.266154287794963,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 19322
    },
    {
      "epoch": 0.19323,
      "grad_norm": 1.5853357914133883,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 19323
    },
    {
      "epoch": 0.19324,
      "grad_norm": 1.1858059760863204,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 19324
    },
    {
      "epoch": 0.19325,
      "grad_norm": 1.3031647803358464,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 19325
    },
    {
      "epoch": 0.19326,
      "grad_norm": 1.4328634660837625,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 19326
    },
    {
      "epoch": 0.19327,
      "grad_norm": 1.2369860960951975,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 19327
    },
    {
      "epoch": 0.19328,
      "grad_norm": 1.2614163252370505,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 19328
    },
    {
      "epoch": 0.19329,
      "grad_norm": 1.162931386262037,
      "learning_rate": 0.003,
      "loss": 4.0702,
      "step": 19329
    },
    {
      "epoch": 0.1933,
      "grad_norm": 1.330709905124812,
      "learning_rate": 0.003,
      "loss": 4.026,
      "step": 19330
    },
    {
      "epoch": 0.19331,
      "grad_norm": 1.158193294800109,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 19331
    },
    {
      "epoch": 0.19332,
      "grad_norm": 1.3349312226282135,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 19332
    },
    {
      "epoch": 0.19333,
      "grad_norm": 1.3713505684636866,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 19333
    },
    {
      "epoch": 0.19334,
      "grad_norm": 1.4973548136509773,
      "learning_rate": 0.003,
      "loss": 4.0166,
      "step": 19334
    },
    {
      "epoch": 0.19335,
      "grad_norm": 1.2475000046473035,
      "learning_rate": 0.003,
      "loss": 4.0283,
      "step": 19335
    },
    {
      "epoch": 0.19336,
      "grad_norm": 1.4186242337546051,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 19336
    },
    {
      "epoch": 0.19337,
      "grad_norm": 1.0438410940110374,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 19337
    },
    {
      "epoch": 0.19338,
      "grad_norm": 1.281900229011639,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 19338
    },
    {
      "epoch": 0.19339,
      "grad_norm": 1.179938674265604,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 19339
    },
    {
      "epoch": 0.1934,
      "grad_norm": 1.3683485649923222,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 19340
    },
    {
      "epoch": 0.19341,
      "grad_norm": 1.093382737839294,
      "learning_rate": 0.003,
      "loss": 4.0267,
      "step": 19341
    },
    {
      "epoch": 0.19342,
      "grad_norm": 1.5809442527601474,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 19342
    },
    {
      "epoch": 0.19343,
      "grad_norm": 0.9839961061261212,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 19343
    },
    {
      "epoch": 0.19344,
      "grad_norm": 1.513270827948988,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 19344
    },
    {
      "epoch": 0.19345,
      "grad_norm": 1.1007128440719929,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 19345
    },
    {
      "epoch": 0.19346,
      "grad_norm": 1.2374639283484485,
      "learning_rate": 0.003,
      "loss": 4.0263,
      "step": 19346
    },
    {
      "epoch": 0.19347,
      "grad_norm": 1.3191176372983453,
      "learning_rate": 0.003,
      "loss": 4.0727,
      "step": 19347
    },
    {
      "epoch": 0.19348,
      "grad_norm": 1.430232717602349,
      "learning_rate": 0.003,
      "loss": 4.1079,
      "step": 19348
    },
    {
      "epoch": 0.19349,
      "grad_norm": 1.3784803258619929,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 19349
    },
    {
      "epoch": 0.1935,
      "grad_norm": 1.1225933161395825,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 19350
    },
    {
      "epoch": 0.19351,
      "grad_norm": 1.340969744963439,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 19351
    },
    {
      "epoch": 0.19352,
      "grad_norm": 1.4495783809268945,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 19352
    },
    {
      "epoch": 0.19353,
      "grad_norm": 1.7421593307521097,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 19353
    },
    {
      "epoch": 0.19354,
      "grad_norm": 0.9658945600154332,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 19354
    },
    {
      "epoch": 0.19355,
      "grad_norm": 1.205921178736843,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 19355
    },
    {
      "epoch": 0.19356,
      "grad_norm": 1.2757237919629527,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 19356
    },
    {
      "epoch": 0.19357,
      "grad_norm": 1.3411673563909947,
      "learning_rate": 0.003,
      "loss": 4.0346,
      "step": 19357
    },
    {
      "epoch": 0.19358,
      "grad_norm": 1.2848950228013367,
      "learning_rate": 0.003,
      "loss": 4.0675,
      "step": 19358
    },
    {
      "epoch": 0.19359,
      "grad_norm": 1.3013320004570212,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 19359
    },
    {
      "epoch": 0.1936,
      "grad_norm": 1.0435278072734142,
      "learning_rate": 0.003,
      "loss": 4.0729,
      "step": 19360
    },
    {
      "epoch": 0.19361,
      "grad_norm": 1.287288538323467,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 19361
    },
    {
      "epoch": 0.19362,
      "grad_norm": 1.110274706607819,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 19362
    },
    {
      "epoch": 0.19363,
      "grad_norm": 1.3060179286263724,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 19363
    },
    {
      "epoch": 0.19364,
      "grad_norm": 1.0599462956406744,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 19364
    },
    {
      "epoch": 0.19365,
      "grad_norm": 1.4188548021615779,
      "learning_rate": 0.003,
      "loss": 4.0658,
      "step": 19365
    },
    {
      "epoch": 0.19366,
      "grad_norm": 0.9977169189433731,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 19366
    },
    {
      "epoch": 0.19367,
      "grad_norm": 1.6418750673442695,
      "learning_rate": 0.003,
      "loss": 4.0658,
      "step": 19367
    },
    {
      "epoch": 0.19368,
      "grad_norm": 0.915761451677153,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 19368
    },
    {
      "epoch": 0.19369,
      "grad_norm": 1.36982814408899,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 19369
    },
    {
      "epoch": 0.1937,
      "grad_norm": 1.4218831422973166,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 19370
    },
    {
      "epoch": 0.19371,
      "grad_norm": 1.1392214472527857,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 19371
    },
    {
      "epoch": 0.19372,
      "grad_norm": 1.2130464242599384,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 19372
    },
    {
      "epoch": 0.19373,
      "grad_norm": 1.4550828458250995,
      "learning_rate": 0.003,
      "loss": 4.0727,
      "step": 19373
    },
    {
      "epoch": 0.19374,
      "grad_norm": 1.21073894464927,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 19374
    },
    {
      "epoch": 0.19375,
      "grad_norm": 1.4476681629819754,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 19375
    },
    {
      "epoch": 0.19376,
      "grad_norm": 1.4949961065263637,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 19376
    },
    {
      "epoch": 0.19377,
      "grad_norm": 1.1271841630809518,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 19377
    },
    {
      "epoch": 0.19378,
      "grad_norm": 1.3807963456598449,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 19378
    },
    {
      "epoch": 0.19379,
      "grad_norm": 1.0596898741955287,
      "learning_rate": 0.003,
      "loss": 4.0199,
      "step": 19379
    },
    {
      "epoch": 0.1938,
      "grad_norm": 1.4072597453787985,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 19380
    },
    {
      "epoch": 0.19381,
      "grad_norm": 0.9550860981623658,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 19381
    },
    {
      "epoch": 0.19382,
      "grad_norm": 1.4785497463244524,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 19382
    },
    {
      "epoch": 0.19383,
      "grad_norm": 1.0397212461472258,
      "learning_rate": 0.003,
      "loss": 4.0315,
      "step": 19383
    },
    {
      "epoch": 0.19384,
      "grad_norm": 1.412362570967319,
      "learning_rate": 0.003,
      "loss": 4.0977,
      "step": 19384
    },
    {
      "epoch": 0.19385,
      "grad_norm": 1.2552527247210523,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 19385
    },
    {
      "epoch": 0.19386,
      "grad_norm": 0.9757782727735469,
      "learning_rate": 0.003,
      "loss": 4.018,
      "step": 19386
    },
    {
      "epoch": 0.19387,
      "grad_norm": 1.5585007953127197,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 19387
    },
    {
      "epoch": 0.19388,
      "grad_norm": 1.2592213733025597,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 19388
    },
    {
      "epoch": 0.19389,
      "grad_norm": 1.0797725144848942,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 19389
    },
    {
      "epoch": 0.1939,
      "grad_norm": 1.1894014821171297,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 19390
    },
    {
      "epoch": 0.19391,
      "grad_norm": 1.3322313361724551,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 19391
    },
    {
      "epoch": 0.19392,
      "grad_norm": 1.3131930257013624,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 19392
    },
    {
      "epoch": 0.19393,
      "grad_norm": 1.2090617157853354,
      "learning_rate": 0.003,
      "loss": 4.0363,
      "step": 19393
    },
    {
      "epoch": 0.19394,
      "grad_norm": 1.2370155160264173,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 19394
    },
    {
      "epoch": 0.19395,
      "grad_norm": 1.1644921328866482,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 19395
    },
    {
      "epoch": 0.19396,
      "grad_norm": 1.2434079928932653,
      "learning_rate": 0.003,
      "loss": 4.057,
      "step": 19396
    },
    {
      "epoch": 0.19397,
      "grad_norm": 1.0364099563185734,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 19397
    },
    {
      "epoch": 0.19398,
      "grad_norm": 1.836051257592041,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 19398
    },
    {
      "epoch": 0.19399,
      "grad_norm": 1.005551153961616,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 19399
    },
    {
      "epoch": 0.194,
      "grad_norm": 1.2868116846193804,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 19400
    },
    {
      "epoch": 0.19401,
      "grad_norm": 1.1521465841753658,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 19401
    },
    {
      "epoch": 0.19402,
      "grad_norm": 1.300040789113978,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 19402
    },
    {
      "epoch": 0.19403,
      "grad_norm": 1.1206071310857861,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 19403
    },
    {
      "epoch": 0.19404,
      "grad_norm": 1.2770638746309435,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 19404
    },
    {
      "epoch": 0.19405,
      "grad_norm": 1.1000698654299443,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 19405
    },
    {
      "epoch": 0.19406,
      "grad_norm": 1.3706451663448482,
      "learning_rate": 0.003,
      "loss": 4.0273,
      "step": 19406
    },
    {
      "epoch": 0.19407,
      "grad_norm": 1.322881375041662,
      "learning_rate": 0.003,
      "loss": 4.057,
      "step": 19407
    },
    {
      "epoch": 0.19408,
      "grad_norm": 1.256616975675363,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 19408
    },
    {
      "epoch": 0.19409,
      "grad_norm": 1.157319297162313,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 19409
    },
    {
      "epoch": 0.1941,
      "grad_norm": 1.2604883125122113,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 19410
    },
    {
      "epoch": 0.19411,
      "grad_norm": 1.2465910754730276,
      "learning_rate": 0.003,
      "loss": 4.0759,
      "step": 19411
    },
    {
      "epoch": 0.19412,
      "grad_norm": 1.2902604302149527,
      "learning_rate": 0.003,
      "loss": 4.0432,
      "step": 19412
    },
    {
      "epoch": 0.19413,
      "grad_norm": 1.2428339542153704,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 19413
    },
    {
      "epoch": 0.19414,
      "grad_norm": 1.641619198006355,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 19414
    },
    {
      "epoch": 0.19415,
      "grad_norm": 1.1884015575175786,
      "learning_rate": 0.003,
      "loss": 4.0172,
      "step": 19415
    },
    {
      "epoch": 0.19416,
      "grad_norm": 1.4006931979815456,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 19416
    },
    {
      "epoch": 0.19417,
      "grad_norm": 1.1684221143708646,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 19417
    },
    {
      "epoch": 0.19418,
      "grad_norm": 1.4080870540690456,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 19418
    },
    {
      "epoch": 0.19419,
      "grad_norm": 1.2924752860876128,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 19419
    },
    {
      "epoch": 0.1942,
      "grad_norm": 1.1857590064382597,
      "learning_rate": 0.003,
      "loss": 4.0336,
      "step": 19420
    },
    {
      "epoch": 0.19421,
      "grad_norm": 1.2224389378484004,
      "learning_rate": 0.003,
      "loss": 4.0398,
      "step": 19421
    },
    {
      "epoch": 0.19422,
      "grad_norm": 1.5321972622089721,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 19422
    },
    {
      "epoch": 0.19423,
      "grad_norm": 0.9439131538154166,
      "learning_rate": 0.003,
      "loss": 4.057,
      "step": 19423
    },
    {
      "epoch": 0.19424,
      "grad_norm": 1.0779829074698497,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 19424
    },
    {
      "epoch": 0.19425,
      "grad_norm": 1.4187367087645348,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 19425
    },
    {
      "epoch": 0.19426,
      "grad_norm": 1.2518657015206214,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 19426
    },
    {
      "epoch": 0.19427,
      "grad_norm": 1.2975650890704618,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 19427
    },
    {
      "epoch": 0.19428,
      "grad_norm": 1.2504288611968093,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 19428
    },
    {
      "epoch": 0.19429,
      "grad_norm": 1.0626854032608262,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 19429
    },
    {
      "epoch": 0.1943,
      "grad_norm": 1.453944232695203,
      "learning_rate": 0.003,
      "loss": 4.0804,
      "step": 19430
    },
    {
      "epoch": 0.19431,
      "grad_norm": 1.1132415563210114,
      "learning_rate": 0.003,
      "loss": 4.0178,
      "step": 19431
    },
    {
      "epoch": 0.19432,
      "grad_norm": 1.330780831477712,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 19432
    },
    {
      "epoch": 0.19433,
      "grad_norm": 1.2264764129711287,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 19433
    },
    {
      "epoch": 0.19434,
      "grad_norm": 1.119671719753815,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 19434
    },
    {
      "epoch": 0.19435,
      "grad_norm": 1.4390097295078403,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 19435
    },
    {
      "epoch": 0.19436,
      "grad_norm": 1.0497880754472968,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 19436
    },
    {
      "epoch": 0.19437,
      "grad_norm": 1.3619036851797004,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 19437
    },
    {
      "epoch": 0.19438,
      "grad_norm": 1.1321332926155272,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 19438
    },
    {
      "epoch": 0.19439,
      "grad_norm": 1.3594115203490629,
      "learning_rate": 0.003,
      "loss": 4.0702,
      "step": 19439
    },
    {
      "epoch": 0.1944,
      "grad_norm": 1.1652985939647977,
      "learning_rate": 0.003,
      "loss": 4.0811,
      "step": 19440
    },
    {
      "epoch": 0.19441,
      "grad_norm": 1.3598903677041336,
      "learning_rate": 0.003,
      "loss": 4.0853,
      "step": 19441
    },
    {
      "epoch": 0.19442,
      "grad_norm": 1.2438339406836934,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 19442
    },
    {
      "epoch": 0.19443,
      "grad_norm": 1.2762886643194773,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 19443
    },
    {
      "epoch": 0.19444,
      "grad_norm": 1.1623881767163036,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 19444
    },
    {
      "epoch": 0.19445,
      "grad_norm": 1.5096350045671685,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 19445
    },
    {
      "epoch": 0.19446,
      "grad_norm": 1.1648388825625222,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 19446
    },
    {
      "epoch": 0.19447,
      "grad_norm": 1.4692385629412785,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 19447
    },
    {
      "epoch": 0.19448,
      "grad_norm": 1.3199067754255398,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 19448
    },
    {
      "epoch": 0.19449,
      "grad_norm": 1.1597482093135931,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 19449
    },
    {
      "epoch": 0.1945,
      "grad_norm": 1.2452753354825359,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 19450
    },
    {
      "epoch": 0.19451,
      "grad_norm": 1.1868649662414918,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 19451
    },
    {
      "epoch": 0.19452,
      "grad_norm": 1.245608401244629,
      "learning_rate": 0.003,
      "loss": 4.031,
      "step": 19452
    },
    {
      "epoch": 0.19453,
      "grad_norm": 1.2706894870815892,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 19453
    },
    {
      "epoch": 0.19454,
      "grad_norm": 1.1640633667913651,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 19454
    },
    {
      "epoch": 0.19455,
      "grad_norm": 1.3139941101204728,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 19455
    },
    {
      "epoch": 0.19456,
      "grad_norm": 1.2765744305834983,
      "learning_rate": 0.003,
      "loss": 4.0825,
      "step": 19456
    },
    {
      "epoch": 0.19457,
      "grad_norm": 1.0679899936936712,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 19457
    },
    {
      "epoch": 0.19458,
      "grad_norm": 1.156260712455033,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 19458
    },
    {
      "epoch": 0.19459,
      "grad_norm": 1.043446962809172,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 19459
    },
    {
      "epoch": 0.1946,
      "grad_norm": 1.7189150230765415,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 19460
    },
    {
      "epoch": 0.19461,
      "grad_norm": 1.0548421420952567,
      "learning_rate": 0.003,
      "loss": 4.1019,
      "step": 19461
    },
    {
      "epoch": 0.19462,
      "grad_norm": 1.5763941439210714,
      "learning_rate": 0.003,
      "loss": 4.0773,
      "step": 19462
    },
    {
      "epoch": 0.19463,
      "grad_norm": 0.9633473320637942,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 19463
    },
    {
      "epoch": 0.19464,
      "grad_norm": 1.3369137592285703,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 19464
    },
    {
      "epoch": 0.19465,
      "grad_norm": 1.5895673104733836,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 19465
    },
    {
      "epoch": 0.19466,
      "grad_norm": 1.3282123036176765,
      "learning_rate": 0.003,
      "loss": 4.0743,
      "step": 19466
    },
    {
      "epoch": 0.19467,
      "grad_norm": 1.3285557790501594,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 19467
    },
    {
      "epoch": 0.19468,
      "grad_norm": 1.6404474223744476,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 19468
    },
    {
      "epoch": 0.19469,
      "grad_norm": 1.0272368445693343,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 19469
    },
    {
      "epoch": 0.1947,
      "grad_norm": 1.3470462955974183,
      "learning_rate": 0.003,
      "loss": 4.0658,
      "step": 19470
    },
    {
      "epoch": 0.19471,
      "grad_norm": 1.1352798820585221,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 19471
    },
    {
      "epoch": 0.19472,
      "grad_norm": 1.2741577834484807,
      "learning_rate": 0.003,
      "loss": 4.0665,
      "step": 19472
    },
    {
      "epoch": 0.19473,
      "grad_norm": 1.0338226235999233,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 19473
    },
    {
      "epoch": 0.19474,
      "grad_norm": 1.4778316675976706,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 19474
    },
    {
      "epoch": 0.19475,
      "grad_norm": 1.001802548083004,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 19475
    },
    {
      "epoch": 0.19476,
      "grad_norm": 1.2776564898157308,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 19476
    },
    {
      "epoch": 0.19477,
      "grad_norm": 0.9566690015761019,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 19477
    },
    {
      "epoch": 0.19478,
      "grad_norm": 1.7124043767568116,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 19478
    },
    {
      "epoch": 0.19479,
      "grad_norm": 1.3346500199645615,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 19479
    },
    {
      "epoch": 0.1948,
      "grad_norm": 1.2822789974464504,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 19480
    },
    {
      "epoch": 0.19481,
      "grad_norm": 1.2003608141339481,
      "learning_rate": 0.003,
      "loss": 4.0748,
      "step": 19481
    },
    {
      "epoch": 0.19482,
      "grad_norm": 1.275137873917073,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 19482
    },
    {
      "epoch": 0.19483,
      "grad_norm": 1.2061150927595439,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 19483
    },
    {
      "epoch": 0.19484,
      "grad_norm": 1.3083050736246036,
      "learning_rate": 0.003,
      "loss": 4.0676,
      "step": 19484
    },
    {
      "epoch": 0.19485,
      "grad_norm": 1.4213799670596485,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 19485
    },
    {
      "epoch": 0.19486,
      "grad_norm": 1.1501040588090954,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 19486
    },
    {
      "epoch": 0.19487,
      "grad_norm": 1.1549563528980238,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 19487
    },
    {
      "epoch": 0.19488,
      "grad_norm": 1.4431058402236616,
      "learning_rate": 0.003,
      "loss": 4.0422,
      "step": 19488
    },
    {
      "epoch": 0.19489,
      "grad_norm": 1.2175355473059875,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 19489
    },
    {
      "epoch": 0.1949,
      "grad_norm": 1.3138477593838491,
      "learning_rate": 0.003,
      "loss": 4.0684,
      "step": 19490
    },
    {
      "epoch": 0.19491,
      "grad_norm": 1.026785003255267,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 19491
    },
    {
      "epoch": 0.19492,
      "grad_norm": 1.443968589105436,
      "learning_rate": 0.003,
      "loss": 4.091,
      "step": 19492
    },
    {
      "epoch": 0.19493,
      "grad_norm": 1.2559761385162542,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 19493
    },
    {
      "epoch": 0.19494,
      "grad_norm": 1.2258978094864679,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 19494
    },
    {
      "epoch": 0.19495,
      "grad_norm": 1.3268032918441472,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 19495
    },
    {
      "epoch": 0.19496,
      "grad_norm": 1.172929669839517,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 19496
    },
    {
      "epoch": 0.19497,
      "grad_norm": 1.8611157687150652,
      "learning_rate": 0.003,
      "loss": 4.0703,
      "step": 19497
    },
    {
      "epoch": 0.19498,
      "grad_norm": 0.8175248819424565,
      "learning_rate": 0.003,
      "loss": 4.0326,
      "step": 19498
    },
    {
      "epoch": 0.19499,
      "grad_norm": 1.148929410552156,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 19499
    },
    {
      "epoch": 0.195,
      "grad_norm": 1.4298450383026386,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 19500
    },
    {
      "epoch": 0.19501,
      "grad_norm": 1.352448920737841,
      "learning_rate": 0.003,
      "loss": 4.0845,
      "step": 19501
    },
    {
      "epoch": 0.19502,
      "grad_norm": 1.0639703947120964,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 19502
    },
    {
      "epoch": 0.19503,
      "grad_norm": 1.3500421848525783,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 19503
    },
    {
      "epoch": 0.19504,
      "grad_norm": 1.2014125271111171,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 19504
    },
    {
      "epoch": 0.19505,
      "grad_norm": 1.116248970796125,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 19505
    },
    {
      "epoch": 0.19506,
      "grad_norm": 1.3352551390859568,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 19506
    },
    {
      "epoch": 0.19507,
      "grad_norm": 1.2268556128174655,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 19507
    },
    {
      "epoch": 0.19508,
      "grad_norm": 1.52270928368884,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 19508
    },
    {
      "epoch": 0.19509,
      "grad_norm": 0.9903829036789055,
      "learning_rate": 0.003,
      "loss": 4.0675,
      "step": 19509
    },
    {
      "epoch": 0.1951,
      "grad_norm": 1.6486221557732899,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 19510
    },
    {
      "epoch": 0.19511,
      "grad_norm": 1.1343000920670525,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 19511
    },
    {
      "epoch": 0.19512,
      "grad_norm": 1.3747103241346732,
      "learning_rate": 0.003,
      "loss": 4.0728,
      "step": 19512
    },
    {
      "epoch": 0.19513,
      "grad_norm": 1.3077240373564807,
      "learning_rate": 0.003,
      "loss": 4.0773,
      "step": 19513
    },
    {
      "epoch": 0.19514,
      "grad_norm": 1.3304880069667193,
      "learning_rate": 0.003,
      "loss": 4.0899,
      "step": 19514
    },
    {
      "epoch": 0.19515,
      "grad_norm": 1.3365639153795414,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 19515
    },
    {
      "epoch": 0.19516,
      "grad_norm": 1.2805735532922602,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 19516
    },
    {
      "epoch": 0.19517,
      "grad_norm": 1.176542850338708,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 19517
    },
    {
      "epoch": 0.19518,
      "grad_norm": 1.373636340662171,
      "learning_rate": 0.003,
      "loss": 4.0602,
      "step": 19518
    },
    {
      "epoch": 0.19519,
      "grad_norm": 1.2370671038084415,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 19519
    },
    {
      "epoch": 0.1952,
      "grad_norm": 1.1049544219671612,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 19520
    },
    {
      "epoch": 0.19521,
      "grad_norm": 1.3532990901663196,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 19521
    },
    {
      "epoch": 0.19522,
      "grad_norm": 1.3889945459144393,
      "learning_rate": 0.003,
      "loss": 4.0809,
      "step": 19522
    },
    {
      "epoch": 0.19523,
      "grad_norm": 1.1596723900487986,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 19523
    },
    {
      "epoch": 0.19524,
      "grad_norm": 1.1650541156874012,
      "learning_rate": 0.003,
      "loss": 4.0672,
      "step": 19524
    },
    {
      "epoch": 0.19525,
      "grad_norm": 1.257471344778045,
      "learning_rate": 0.003,
      "loss": 4.0301,
      "step": 19525
    },
    {
      "epoch": 0.19526,
      "grad_norm": 1.2079351500457816,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 19526
    },
    {
      "epoch": 0.19527,
      "grad_norm": 1.2256036351388304,
      "learning_rate": 0.003,
      "loss": 4.0549,
      "step": 19527
    },
    {
      "epoch": 0.19528,
      "grad_norm": 1.246772224839127,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 19528
    },
    {
      "epoch": 0.19529,
      "grad_norm": 1.2693597734212752,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 19529
    },
    {
      "epoch": 0.1953,
      "grad_norm": 0.9784867447087106,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 19530
    },
    {
      "epoch": 0.19531,
      "grad_norm": 1.1659601986877428,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 19531
    },
    {
      "epoch": 0.19532,
      "grad_norm": 1.3708558472221481,
      "learning_rate": 0.003,
      "loss": 4.0873,
      "step": 19532
    },
    {
      "epoch": 0.19533,
      "grad_norm": 1.2520196238389958,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 19533
    },
    {
      "epoch": 0.19534,
      "grad_norm": 1.1855313119014532,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 19534
    },
    {
      "epoch": 0.19535,
      "grad_norm": 1.3884757533251968,
      "learning_rate": 0.003,
      "loss": 4.0849,
      "step": 19535
    },
    {
      "epoch": 0.19536,
      "grad_norm": 1.1952833344636515,
      "learning_rate": 0.003,
      "loss": 4.0207,
      "step": 19536
    },
    {
      "epoch": 0.19537,
      "grad_norm": 1.2574406153793336,
      "learning_rate": 0.003,
      "loss": 4.0347,
      "step": 19537
    },
    {
      "epoch": 0.19538,
      "grad_norm": 1.1060869917154688,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 19538
    },
    {
      "epoch": 0.19539,
      "grad_norm": 1.4285291372266062,
      "learning_rate": 0.003,
      "loss": 4.0623,
      "step": 19539
    },
    {
      "epoch": 0.1954,
      "grad_norm": 1.4360456654895173,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 19540
    },
    {
      "epoch": 0.19541,
      "grad_norm": 1.518265568289758,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 19541
    },
    {
      "epoch": 0.19542,
      "grad_norm": 1.2775972527094612,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 19542
    },
    {
      "epoch": 0.19543,
      "grad_norm": 1.218283698591187,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 19543
    },
    {
      "epoch": 0.19544,
      "grad_norm": 1.3374266934193224,
      "learning_rate": 0.003,
      "loss": 4.0549,
      "step": 19544
    },
    {
      "epoch": 0.19545,
      "grad_norm": 1.2582913334864856,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 19545
    },
    {
      "epoch": 0.19546,
      "grad_norm": 1.4337878676516518,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 19546
    },
    {
      "epoch": 0.19547,
      "grad_norm": 1.1073651769316895,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 19547
    },
    {
      "epoch": 0.19548,
      "grad_norm": 1.410688848816578,
      "learning_rate": 0.003,
      "loss": 4.0619,
      "step": 19548
    },
    {
      "epoch": 0.19549,
      "grad_norm": 1.1377670523166983,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 19549
    },
    {
      "epoch": 0.1955,
      "grad_norm": 1.2137808592369639,
      "learning_rate": 0.003,
      "loss": 4.0236,
      "step": 19550
    },
    {
      "epoch": 0.19551,
      "grad_norm": 1.3996923080260375,
      "learning_rate": 0.003,
      "loss": 4.0815,
      "step": 19551
    },
    {
      "epoch": 0.19552,
      "grad_norm": 1.4013579938780023,
      "learning_rate": 0.003,
      "loss": 4.0371,
      "step": 19552
    },
    {
      "epoch": 0.19553,
      "grad_norm": 1.1686342591779189,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 19553
    },
    {
      "epoch": 0.19554,
      "grad_norm": 1.3472445469317165,
      "learning_rate": 0.003,
      "loss": 4.0173,
      "step": 19554
    },
    {
      "epoch": 0.19555,
      "grad_norm": 1.2043089863323246,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 19555
    },
    {
      "epoch": 0.19556,
      "grad_norm": 1.0969614136516466,
      "learning_rate": 0.003,
      "loss": 4.0239,
      "step": 19556
    },
    {
      "epoch": 0.19557,
      "grad_norm": 1.5492311727960493,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 19557
    },
    {
      "epoch": 0.19558,
      "grad_norm": 0.9987137999123095,
      "learning_rate": 0.003,
      "loss": 4.0346,
      "step": 19558
    },
    {
      "epoch": 0.19559,
      "grad_norm": 1.4186798103122438,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 19559
    },
    {
      "epoch": 0.1956,
      "grad_norm": 1.2227299263360691,
      "learning_rate": 0.003,
      "loss": 4.0814,
      "step": 19560
    },
    {
      "epoch": 0.19561,
      "grad_norm": 1.2878151342007549,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 19561
    },
    {
      "epoch": 0.19562,
      "grad_norm": 1.2513526923807567,
      "learning_rate": 0.003,
      "loss": 4.0276,
      "step": 19562
    },
    {
      "epoch": 0.19563,
      "grad_norm": 1.1845288870786257,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 19563
    },
    {
      "epoch": 0.19564,
      "grad_norm": 1.2968329398378675,
      "learning_rate": 0.003,
      "loss": 4.0619,
      "step": 19564
    },
    {
      "epoch": 0.19565,
      "grad_norm": 1.1556297886996172,
      "learning_rate": 0.003,
      "loss": 4.0227,
      "step": 19565
    },
    {
      "epoch": 0.19566,
      "grad_norm": 1.5926717140053277,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 19566
    },
    {
      "epoch": 0.19567,
      "grad_norm": 1.0154673277724815,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 19567
    },
    {
      "epoch": 0.19568,
      "grad_norm": 1.5961582019473584,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 19568
    },
    {
      "epoch": 0.19569,
      "grad_norm": 1.0970845859088805,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 19569
    },
    {
      "epoch": 0.1957,
      "grad_norm": 1.382216240127931,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 19570
    },
    {
      "epoch": 0.19571,
      "grad_norm": 1.2870224499187535,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 19571
    },
    {
      "epoch": 0.19572,
      "grad_norm": 1.2231550655720813,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 19572
    },
    {
      "epoch": 0.19573,
      "grad_norm": 1.0646521546788703,
      "learning_rate": 0.003,
      "loss": 4.0382,
      "step": 19573
    },
    {
      "epoch": 0.19574,
      "grad_norm": 1.3969937544755526,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 19574
    },
    {
      "epoch": 0.19575,
      "grad_norm": 1.379948980968305,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 19575
    },
    {
      "epoch": 0.19576,
      "grad_norm": 1.0433605833792832,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 19576
    },
    {
      "epoch": 0.19577,
      "grad_norm": 1.5545742346773992,
      "learning_rate": 0.003,
      "loss": 4.074,
      "step": 19577
    },
    {
      "epoch": 0.19578,
      "grad_norm": 1.1495336764895239,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 19578
    },
    {
      "epoch": 0.19579,
      "grad_norm": 1.2713073092805014,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 19579
    },
    {
      "epoch": 0.1958,
      "grad_norm": 1.2475666394907055,
      "learning_rate": 0.003,
      "loss": 4.0672,
      "step": 19580
    },
    {
      "epoch": 0.19581,
      "grad_norm": 1.081462776113956,
      "learning_rate": 0.003,
      "loss": 4.0055,
      "step": 19581
    },
    {
      "epoch": 0.19582,
      "grad_norm": 1.642268700127265,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 19582
    },
    {
      "epoch": 0.19583,
      "grad_norm": 1.03323585563909,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 19583
    },
    {
      "epoch": 0.19584,
      "grad_norm": 1.55716996864311,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 19584
    },
    {
      "epoch": 0.19585,
      "grad_norm": 0.993583704461042,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 19585
    },
    {
      "epoch": 0.19586,
      "grad_norm": 1.2703165301059727,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 19586
    },
    {
      "epoch": 0.19587,
      "grad_norm": 1.3644708922683342,
      "learning_rate": 0.003,
      "loss": 4.0837,
      "step": 19587
    },
    {
      "epoch": 0.19588,
      "grad_norm": 1.3114723319710333,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 19588
    },
    {
      "epoch": 0.19589,
      "grad_norm": 1.2118999445324037,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 19589
    },
    {
      "epoch": 0.1959,
      "grad_norm": 1.2095389613176255,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 19590
    },
    {
      "epoch": 0.19591,
      "grad_norm": 1.286135984669697,
      "learning_rate": 0.003,
      "loss": 4.0783,
      "step": 19591
    },
    {
      "epoch": 0.19592,
      "grad_norm": 1.2367742585855162,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 19592
    },
    {
      "epoch": 0.19593,
      "grad_norm": 1.1296819706661674,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 19593
    },
    {
      "epoch": 0.19594,
      "grad_norm": 1.328571573474519,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 19594
    },
    {
      "epoch": 0.19595,
      "grad_norm": 1.1078568494276055,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 19595
    },
    {
      "epoch": 0.19596,
      "grad_norm": 1.35862584683534,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 19596
    },
    {
      "epoch": 0.19597,
      "grad_norm": 1.2569322781269223,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 19597
    },
    {
      "epoch": 0.19598,
      "grad_norm": 1.468685773176229,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 19598
    },
    {
      "epoch": 0.19599,
      "grad_norm": 1.2630812981750894,
      "learning_rate": 0.003,
      "loss": 4.0309,
      "step": 19599
    },
    {
      "epoch": 0.196,
      "grad_norm": 1.2238107822625526,
      "learning_rate": 0.003,
      "loss": 4.0259,
      "step": 19600
    },
    {
      "epoch": 0.19601,
      "grad_norm": 1.2472312210806207,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 19601
    },
    {
      "epoch": 0.19602,
      "grad_norm": 1.3741733077287006,
      "learning_rate": 0.003,
      "loss": 4.0276,
      "step": 19602
    },
    {
      "epoch": 0.19603,
      "grad_norm": 1.1794829060341756,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 19603
    },
    {
      "epoch": 0.19604,
      "grad_norm": 1.3311347113042369,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 19604
    },
    {
      "epoch": 0.19605,
      "grad_norm": 1.0357739621590778,
      "learning_rate": 0.003,
      "loss": 4.0126,
      "step": 19605
    },
    {
      "epoch": 0.19606,
      "grad_norm": 1.5275152034988269,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 19606
    },
    {
      "epoch": 0.19607,
      "grad_norm": 1.076429551433992,
      "learning_rate": 0.003,
      "loss": 4.0231,
      "step": 19607
    },
    {
      "epoch": 0.19608,
      "grad_norm": 1.3647193929111205,
      "learning_rate": 0.003,
      "loss": 4.0438,
      "step": 19608
    },
    {
      "epoch": 0.19609,
      "grad_norm": 1.4867220985587168,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 19609
    },
    {
      "epoch": 0.1961,
      "grad_norm": 1.125587708642376,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 19610
    },
    {
      "epoch": 0.19611,
      "grad_norm": 1.3631568088824906,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 19611
    },
    {
      "epoch": 0.19612,
      "grad_norm": 1.2624153115040977,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 19612
    },
    {
      "epoch": 0.19613,
      "grad_norm": 1.2075066353474955,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 19613
    },
    {
      "epoch": 0.19614,
      "grad_norm": 1.3088675832743637,
      "learning_rate": 0.003,
      "loss": 4.0234,
      "step": 19614
    },
    {
      "epoch": 0.19615,
      "grad_norm": 1.121251813389829,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 19615
    },
    {
      "epoch": 0.19616,
      "grad_norm": 1.1065842533281147,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 19616
    },
    {
      "epoch": 0.19617,
      "grad_norm": 1.1859974265552933,
      "learning_rate": 0.003,
      "loss": 4.0852,
      "step": 19617
    },
    {
      "epoch": 0.19618,
      "grad_norm": 1.4820292716416597,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 19618
    },
    {
      "epoch": 0.19619,
      "grad_norm": 1.0404900909759272,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 19619
    },
    {
      "epoch": 0.1962,
      "grad_norm": 1.5799678557313894,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 19620
    },
    {
      "epoch": 0.19621,
      "grad_norm": 1.626377600840289,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 19621
    },
    {
      "epoch": 0.19622,
      "grad_norm": 1.2952931979556077,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 19622
    },
    {
      "epoch": 0.19623,
      "grad_norm": 1.2740424205319891,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 19623
    },
    {
      "epoch": 0.19624,
      "grad_norm": 1.4104946822740858,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 19624
    },
    {
      "epoch": 0.19625,
      "grad_norm": 1.1955443930403675,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 19625
    },
    {
      "epoch": 0.19626,
      "grad_norm": 1.3927373880231353,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 19626
    },
    {
      "epoch": 0.19627,
      "grad_norm": 1.0953666384205296,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 19627
    },
    {
      "epoch": 0.19628,
      "grad_norm": 1.5311802781921875,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 19628
    },
    {
      "epoch": 0.19629,
      "grad_norm": 1.1746433054095549,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 19629
    },
    {
      "epoch": 0.1963,
      "grad_norm": 1.2335569066702672,
      "learning_rate": 0.003,
      "loss": 4.0208,
      "step": 19630
    },
    {
      "epoch": 0.19631,
      "grad_norm": 1.269593398527897,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 19631
    },
    {
      "epoch": 0.19632,
      "grad_norm": 1.514708223805417,
      "learning_rate": 0.003,
      "loss": 4.0759,
      "step": 19632
    },
    {
      "epoch": 0.19633,
      "grad_norm": 1.252573199409988,
      "learning_rate": 0.003,
      "loss": 4.0354,
      "step": 19633
    },
    {
      "epoch": 0.19634,
      "grad_norm": 1.1973954065559616,
      "learning_rate": 0.003,
      "loss": 4.075,
      "step": 19634
    },
    {
      "epoch": 0.19635,
      "grad_norm": 1.493911201291848,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 19635
    },
    {
      "epoch": 0.19636,
      "grad_norm": 1.088139048031911,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 19636
    },
    {
      "epoch": 0.19637,
      "grad_norm": 1.2257101321716657,
      "learning_rate": 0.003,
      "loss": 4.0299,
      "step": 19637
    },
    {
      "epoch": 0.19638,
      "grad_norm": 1.246686123057356,
      "learning_rate": 0.003,
      "loss": 4.0014,
      "step": 19638
    },
    {
      "epoch": 0.19639,
      "grad_norm": 1.4004374784771214,
      "learning_rate": 0.003,
      "loss": 4.0926,
      "step": 19639
    },
    {
      "epoch": 0.1964,
      "grad_norm": 1.2275817958209077,
      "learning_rate": 0.003,
      "loss": 4.0336,
      "step": 19640
    },
    {
      "epoch": 0.19641,
      "grad_norm": 1.2209983282623578,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 19641
    },
    {
      "epoch": 0.19642,
      "grad_norm": 1.4039034259534702,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 19642
    },
    {
      "epoch": 0.19643,
      "grad_norm": 1.1166387643455105,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 19643
    },
    {
      "epoch": 0.19644,
      "grad_norm": 1.1532866416283507,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 19644
    },
    {
      "epoch": 0.19645,
      "grad_norm": 1.1511586224034764,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 19645
    },
    {
      "epoch": 0.19646,
      "grad_norm": 1.2214661318176832,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 19646
    },
    {
      "epoch": 0.19647,
      "grad_norm": 1.1854282536885827,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 19647
    },
    {
      "epoch": 0.19648,
      "grad_norm": 1.5656721018163773,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 19648
    },
    {
      "epoch": 0.19649,
      "grad_norm": 0.9549202035089439,
      "learning_rate": 0.003,
      "loss": 4.0292,
      "step": 19649
    },
    {
      "epoch": 0.1965,
      "grad_norm": 1.3043409972453064,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 19650
    },
    {
      "epoch": 0.19651,
      "grad_norm": 1.338903895964893,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 19651
    },
    {
      "epoch": 0.19652,
      "grad_norm": 1.1193027749921456,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 19652
    },
    {
      "epoch": 0.19653,
      "grad_norm": 1.294277668693427,
      "learning_rate": 0.003,
      "loss": 4.0311,
      "step": 19653
    },
    {
      "epoch": 0.19654,
      "grad_norm": 1.3469191435912236,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 19654
    },
    {
      "epoch": 0.19655,
      "grad_norm": 1.2635214490978228,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 19655
    },
    {
      "epoch": 0.19656,
      "grad_norm": 1.039754857917406,
      "learning_rate": 0.003,
      "loss": 4.0344,
      "step": 19656
    },
    {
      "epoch": 0.19657,
      "grad_norm": 1.425960245599324,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 19657
    },
    {
      "epoch": 0.19658,
      "grad_norm": 1.1131270129876956,
      "learning_rate": 0.003,
      "loss": 4.0821,
      "step": 19658
    },
    {
      "epoch": 0.19659,
      "grad_norm": 1.4233979198300244,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 19659
    },
    {
      "epoch": 0.1966,
      "grad_norm": 1.1268682959377379,
      "learning_rate": 0.003,
      "loss": 4.0373,
      "step": 19660
    },
    {
      "epoch": 0.19661,
      "grad_norm": 1.2983658300625234,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 19661
    },
    {
      "epoch": 0.19662,
      "grad_norm": 1.3793633866642898,
      "learning_rate": 0.003,
      "loss": 4.0781,
      "step": 19662
    },
    {
      "epoch": 0.19663,
      "grad_norm": 1.1681532953099747,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 19663
    },
    {
      "epoch": 0.19664,
      "grad_norm": 1.449327499358008,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 19664
    },
    {
      "epoch": 0.19665,
      "grad_norm": 1.1594026577520937,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 19665
    },
    {
      "epoch": 0.19666,
      "grad_norm": 1.304588843645449,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 19666
    },
    {
      "epoch": 0.19667,
      "grad_norm": 1.2855175972244166,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 19667
    },
    {
      "epoch": 0.19668,
      "grad_norm": 1.2108562775295844,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 19668
    },
    {
      "epoch": 0.19669,
      "grad_norm": 1.4881363232865699,
      "learning_rate": 0.003,
      "loss": 4.0317,
      "step": 19669
    },
    {
      "epoch": 0.1967,
      "grad_norm": 1.1050320650830274,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 19670
    },
    {
      "epoch": 0.19671,
      "grad_norm": 1.4028067934145172,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 19671
    },
    {
      "epoch": 0.19672,
      "grad_norm": 1.212968583047561,
      "learning_rate": 0.003,
      "loss": 4.0253,
      "step": 19672
    },
    {
      "epoch": 0.19673,
      "grad_norm": 1.4005758245894164,
      "learning_rate": 0.003,
      "loss": 4.03,
      "step": 19673
    },
    {
      "epoch": 0.19674,
      "grad_norm": 1.1259823388693067,
      "learning_rate": 0.003,
      "loss": 4.0255,
      "step": 19674
    },
    {
      "epoch": 0.19675,
      "grad_norm": 1.4615329880964987,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 19675
    },
    {
      "epoch": 0.19676,
      "grad_norm": 1.0969160536054376,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 19676
    },
    {
      "epoch": 0.19677,
      "grad_norm": 1.4081030226763,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 19677
    },
    {
      "epoch": 0.19678,
      "grad_norm": 1.2211659347447854,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 19678
    },
    {
      "epoch": 0.19679,
      "grad_norm": 1.2150982107622408,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 19679
    },
    {
      "epoch": 0.1968,
      "grad_norm": 1.3535734950333507,
      "learning_rate": 0.003,
      "loss": 4.0393,
      "step": 19680
    },
    {
      "epoch": 0.19681,
      "grad_norm": 1.0663404840851425,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 19681
    },
    {
      "epoch": 0.19682,
      "grad_norm": 1.6754522163616776,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 19682
    },
    {
      "epoch": 0.19683,
      "grad_norm": 1.0209211426190017,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 19683
    },
    {
      "epoch": 0.19684,
      "grad_norm": 1.4384155112003565,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 19684
    },
    {
      "epoch": 0.19685,
      "grad_norm": 1.183663031201126,
      "learning_rate": 0.003,
      "loss": 4.0124,
      "step": 19685
    },
    {
      "epoch": 0.19686,
      "grad_norm": 1.204767719156397,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 19686
    },
    {
      "epoch": 0.19687,
      "grad_norm": 1.43264678315374,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 19687
    },
    {
      "epoch": 0.19688,
      "grad_norm": 1.1444222737791538,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 19688
    },
    {
      "epoch": 0.19689,
      "grad_norm": 1.4164282948887503,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 19689
    },
    {
      "epoch": 0.1969,
      "grad_norm": 1.519323637865488,
      "learning_rate": 0.003,
      "loss": 4.0838,
      "step": 19690
    },
    {
      "epoch": 0.19691,
      "grad_norm": 0.9758922904826194,
      "learning_rate": 0.003,
      "loss": 4.0314,
      "step": 19691
    },
    {
      "epoch": 0.19692,
      "grad_norm": 1.2979414816054384,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 19692
    },
    {
      "epoch": 0.19693,
      "grad_norm": 1.2027431932617603,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 19693
    },
    {
      "epoch": 0.19694,
      "grad_norm": 1.483541597195135,
      "learning_rate": 0.003,
      "loss": 4.044,
      "step": 19694
    },
    {
      "epoch": 0.19695,
      "grad_norm": 1.2092502940393162,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 19695
    },
    {
      "epoch": 0.19696,
      "grad_norm": 1.4774850821717136,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 19696
    },
    {
      "epoch": 0.19697,
      "grad_norm": 1.326064854971596,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 19697
    },
    {
      "epoch": 0.19698,
      "grad_norm": 1.0462582412112698,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 19698
    },
    {
      "epoch": 0.19699,
      "grad_norm": 1.2129076652215423,
      "learning_rate": 0.003,
      "loss": 4.0266,
      "step": 19699
    },
    {
      "epoch": 0.197,
      "grad_norm": 0.9404415450425605,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 19700
    },
    {
      "epoch": 0.19701,
      "grad_norm": 1.408164938206551,
      "learning_rate": 0.003,
      "loss": 4.0832,
      "step": 19701
    },
    {
      "epoch": 0.19702,
      "grad_norm": 1.140221276354727,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 19702
    },
    {
      "epoch": 0.19703,
      "grad_norm": 1.632658533157582,
      "learning_rate": 0.003,
      "loss": 4.0815,
      "step": 19703
    },
    {
      "epoch": 0.19704,
      "grad_norm": 1.2428620826128585,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 19704
    },
    {
      "epoch": 0.19705,
      "grad_norm": 1.2176041406759814,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 19705
    },
    {
      "epoch": 0.19706,
      "grad_norm": 1.1040196987269462,
      "learning_rate": 0.003,
      "loss": 4.0153,
      "step": 19706
    },
    {
      "epoch": 0.19707,
      "grad_norm": 1.3294950566418005,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 19707
    },
    {
      "epoch": 0.19708,
      "grad_norm": 1.2000338311222096,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 19708
    },
    {
      "epoch": 0.19709,
      "grad_norm": 1.3011272832919174,
      "learning_rate": 0.003,
      "loss": 4.0854,
      "step": 19709
    },
    {
      "epoch": 0.1971,
      "grad_norm": 1.0625031410202341,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 19710
    },
    {
      "epoch": 0.19711,
      "grad_norm": 1.3300242910674192,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 19711
    },
    {
      "epoch": 0.19712,
      "grad_norm": 1.0724203569381963,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 19712
    },
    {
      "epoch": 0.19713,
      "grad_norm": 1.4760362987564652,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 19713
    },
    {
      "epoch": 0.19714,
      "grad_norm": 1.2615842808043052,
      "learning_rate": 0.003,
      "loss": 4.0203,
      "step": 19714
    },
    {
      "epoch": 0.19715,
      "grad_norm": 1.4767863603989584,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 19715
    },
    {
      "epoch": 0.19716,
      "grad_norm": 1.3187426984567565,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 19716
    },
    {
      "epoch": 0.19717,
      "grad_norm": 1.1333722085269455,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 19717
    },
    {
      "epoch": 0.19718,
      "grad_norm": 1.2921747578348115,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 19718
    },
    {
      "epoch": 0.19719,
      "grad_norm": 1.1755894496904107,
      "learning_rate": 0.003,
      "loss": 4.0051,
      "step": 19719
    },
    {
      "epoch": 0.1972,
      "grad_norm": 1.342887844017686,
      "learning_rate": 0.003,
      "loss": 4.0177,
      "step": 19720
    },
    {
      "epoch": 0.19721,
      "grad_norm": 1.1299536859301806,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 19721
    },
    {
      "epoch": 0.19722,
      "grad_norm": 1.2841992171190308,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 19722
    },
    {
      "epoch": 0.19723,
      "grad_norm": 1.2866555374877549,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 19723
    },
    {
      "epoch": 0.19724,
      "grad_norm": 1.3985165773499388,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 19724
    },
    {
      "epoch": 0.19725,
      "grad_norm": 1.0846348006817275,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 19725
    },
    {
      "epoch": 0.19726,
      "grad_norm": 1.3040928333214943,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 19726
    },
    {
      "epoch": 0.19727,
      "grad_norm": 1.2191004795301124,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 19727
    },
    {
      "epoch": 0.19728,
      "grad_norm": 1.240838042793232,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 19728
    },
    {
      "epoch": 0.19729,
      "grad_norm": 1.7369481182987327,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 19729
    },
    {
      "epoch": 0.1973,
      "grad_norm": 1.012198468014683,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 19730
    },
    {
      "epoch": 0.19731,
      "grad_norm": 1.4397242497122429,
      "learning_rate": 0.003,
      "loss": 4.0801,
      "step": 19731
    },
    {
      "epoch": 0.19732,
      "grad_norm": 1.4325529884833712,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 19732
    },
    {
      "epoch": 0.19733,
      "grad_norm": 1.072002107789731,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 19733
    },
    {
      "epoch": 0.19734,
      "grad_norm": 1.4621090283171523,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 19734
    },
    {
      "epoch": 0.19735,
      "grad_norm": 1.216349933331565,
      "learning_rate": 0.003,
      "loss": 4.0676,
      "step": 19735
    },
    {
      "epoch": 0.19736,
      "grad_norm": 1.4335498129154791,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 19736
    },
    {
      "epoch": 0.19737,
      "grad_norm": 1.162866042862082,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 19737
    },
    {
      "epoch": 0.19738,
      "grad_norm": 1.1751061853343154,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 19738
    },
    {
      "epoch": 0.19739,
      "grad_norm": 1.2320589920594738,
      "learning_rate": 0.003,
      "loss": 4.0619,
      "step": 19739
    },
    {
      "epoch": 0.1974,
      "grad_norm": 1.2162642898797495,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 19740
    },
    {
      "epoch": 0.19741,
      "grad_norm": 1.5012653945264107,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 19741
    },
    {
      "epoch": 0.19742,
      "grad_norm": 1.2301414165363693,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 19742
    },
    {
      "epoch": 0.19743,
      "grad_norm": 1.520156501009919,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 19743
    },
    {
      "epoch": 0.19744,
      "grad_norm": 1.0940999798891307,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 19744
    },
    {
      "epoch": 0.19745,
      "grad_norm": 1.213718260293644,
      "learning_rate": 0.003,
      "loss": 4.0382,
      "step": 19745
    },
    {
      "epoch": 0.19746,
      "grad_norm": 1.335698961189427,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 19746
    },
    {
      "epoch": 0.19747,
      "grad_norm": 1.3992921968338952,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 19747
    },
    {
      "epoch": 0.19748,
      "grad_norm": 1.3038221562173062,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 19748
    },
    {
      "epoch": 0.19749,
      "grad_norm": 1.131098670339918,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 19749
    },
    {
      "epoch": 0.1975,
      "grad_norm": 1.332030843151734,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 19750
    },
    {
      "epoch": 0.19751,
      "grad_norm": 1.092283739981877,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 19751
    },
    {
      "epoch": 0.19752,
      "grad_norm": 1.302600775651643,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 19752
    },
    {
      "epoch": 0.19753,
      "grad_norm": 1.4128837585792693,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 19753
    },
    {
      "epoch": 0.19754,
      "grad_norm": 1.3196083484063799,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 19754
    },
    {
      "epoch": 0.19755,
      "grad_norm": 1.2206351818074985,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 19755
    },
    {
      "epoch": 0.19756,
      "grad_norm": 1.1734556955156739,
      "learning_rate": 0.003,
      "loss": 4.0166,
      "step": 19756
    },
    {
      "epoch": 0.19757,
      "grad_norm": 1.2553909696677223,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 19757
    },
    {
      "epoch": 0.19758,
      "grad_norm": 1.3092843491839787,
      "learning_rate": 0.003,
      "loss": 4.0928,
      "step": 19758
    },
    {
      "epoch": 0.19759,
      "grad_norm": 1.1460844748190464,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 19759
    },
    {
      "epoch": 0.1976,
      "grad_norm": 1.6616584576681912,
      "learning_rate": 0.003,
      "loss": 4.1023,
      "step": 19760
    },
    {
      "epoch": 0.19761,
      "grad_norm": 1.0073287619168259,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 19761
    },
    {
      "epoch": 0.19762,
      "grad_norm": 1.6368543289617503,
      "learning_rate": 0.003,
      "loss": 4.0694,
      "step": 19762
    },
    {
      "epoch": 0.19763,
      "grad_norm": 1.0704247305937595,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 19763
    },
    {
      "epoch": 0.19764,
      "grad_norm": 1.5161658380137082,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 19764
    },
    {
      "epoch": 0.19765,
      "grad_norm": 1.1206543468688372,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 19765
    },
    {
      "epoch": 0.19766,
      "grad_norm": 1.2946853858835212,
      "learning_rate": 0.003,
      "loss": 4.0888,
      "step": 19766
    },
    {
      "epoch": 0.19767,
      "grad_norm": 1.3963559045296543,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 19767
    },
    {
      "epoch": 0.19768,
      "grad_norm": 1.1659693910680011,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 19768
    },
    {
      "epoch": 0.19769,
      "grad_norm": 1.263219690264999,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 19769
    },
    {
      "epoch": 0.1977,
      "grad_norm": 1.1237394968167047,
      "learning_rate": 0.003,
      "loss": 4.0876,
      "step": 19770
    },
    {
      "epoch": 0.19771,
      "grad_norm": 1.4099145685647496,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 19771
    },
    {
      "epoch": 0.19772,
      "grad_norm": 1.1953522455301766,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 19772
    },
    {
      "epoch": 0.19773,
      "grad_norm": 1.295100610042665,
      "learning_rate": 0.003,
      "loss": 4.0265,
      "step": 19773
    },
    {
      "epoch": 0.19774,
      "grad_norm": 1.137876500914938,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 19774
    },
    {
      "epoch": 0.19775,
      "grad_norm": 1.2001488009196706,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 19775
    },
    {
      "epoch": 0.19776,
      "grad_norm": 1.2092359549959708,
      "learning_rate": 0.003,
      "loss": 4.0359,
      "step": 19776
    },
    {
      "epoch": 0.19777,
      "grad_norm": 1.34434557789657,
      "learning_rate": 0.003,
      "loss": 4.0272,
      "step": 19777
    },
    {
      "epoch": 0.19778,
      "grad_norm": 1.2289769972887201,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 19778
    },
    {
      "epoch": 0.19779,
      "grad_norm": 1.3665721151529493,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 19779
    },
    {
      "epoch": 0.1978,
      "grad_norm": 1.200114494842136,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 19780
    },
    {
      "epoch": 0.19781,
      "grad_norm": 1.3027910997422532,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 19781
    },
    {
      "epoch": 0.19782,
      "grad_norm": 1.482047104503544,
      "learning_rate": 0.003,
      "loss": 4.0432,
      "step": 19782
    },
    {
      "epoch": 0.19783,
      "grad_norm": 1.2591000303376083,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 19783
    },
    {
      "epoch": 0.19784,
      "grad_norm": 1.3366242503251673,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 19784
    },
    {
      "epoch": 0.19785,
      "grad_norm": 1.0752888957210784,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 19785
    },
    {
      "epoch": 0.19786,
      "grad_norm": 1.443814361354114,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 19786
    },
    {
      "epoch": 0.19787,
      "grad_norm": 1.1848301940578947,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 19787
    },
    {
      "epoch": 0.19788,
      "grad_norm": 1.2137564648369716,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 19788
    },
    {
      "epoch": 0.19789,
      "grad_norm": 1.434319996226596,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 19789
    },
    {
      "epoch": 0.1979,
      "grad_norm": 1.2083466466277641,
      "learning_rate": 0.003,
      "loss": 4.0183,
      "step": 19790
    },
    {
      "epoch": 0.19791,
      "grad_norm": 1.1864184491836716,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 19791
    },
    {
      "epoch": 0.19792,
      "grad_norm": 1.1504282266698773,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 19792
    },
    {
      "epoch": 0.19793,
      "grad_norm": 1.359723089458863,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 19793
    },
    {
      "epoch": 0.19794,
      "grad_norm": 1.1286923137424962,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 19794
    },
    {
      "epoch": 0.19795,
      "grad_norm": 1.3713746016133905,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 19795
    },
    {
      "epoch": 0.19796,
      "grad_norm": 1.274236183603281,
      "learning_rate": 0.003,
      "loss": 4.046,
      "step": 19796
    },
    {
      "epoch": 0.19797,
      "grad_norm": 1.4731164051604642,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 19797
    },
    {
      "epoch": 0.19798,
      "grad_norm": 1.1204714469424668,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 19798
    },
    {
      "epoch": 0.19799,
      "grad_norm": 1.4618269616945994,
      "learning_rate": 0.003,
      "loss": 4.0103,
      "step": 19799
    },
    {
      "epoch": 0.198,
      "grad_norm": 1.2709446431944755,
      "learning_rate": 0.003,
      "loss": 4.0834,
      "step": 19800
    },
    {
      "epoch": 0.19801,
      "grad_norm": 1.4320673965655029,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 19801
    },
    {
      "epoch": 0.19802,
      "grad_norm": 1.1044847641006195,
      "learning_rate": 0.003,
      "loss": 4.075,
      "step": 19802
    },
    {
      "epoch": 0.19803,
      "grad_norm": 1.34151957210351,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 19803
    },
    {
      "epoch": 0.19804,
      "grad_norm": 1.3141402337987576,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 19804
    },
    {
      "epoch": 0.19805,
      "grad_norm": 1.2197903583977774,
      "learning_rate": 0.003,
      "loss": 4.0261,
      "step": 19805
    },
    {
      "epoch": 0.19806,
      "grad_norm": 1.3001406850014425,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 19806
    },
    {
      "epoch": 0.19807,
      "grad_norm": 1.276210948948388,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 19807
    },
    {
      "epoch": 0.19808,
      "grad_norm": 1.3200298625015325,
      "learning_rate": 0.003,
      "loss": 4.1025,
      "step": 19808
    },
    {
      "epoch": 0.19809,
      "grad_norm": 1.1835970967927212,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 19809
    },
    {
      "epoch": 0.1981,
      "grad_norm": 1.5677071896388945,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 19810
    },
    {
      "epoch": 0.19811,
      "grad_norm": 0.9995657286317865,
      "learning_rate": 0.003,
      "loss": 4.0236,
      "step": 19811
    },
    {
      "epoch": 0.19812,
      "grad_norm": 1.6820081353806127,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 19812
    },
    {
      "epoch": 0.19813,
      "grad_norm": 1.2837414160665404,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 19813
    },
    {
      "epoch": 0.19814,
      "grad_norm": 1.2874766422848056,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 19814
    },
    {
      "epoch": 0.19815,
      "grad_norm": 1.227080582477793,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 19815
    },
    {
      "epoch": 0.19816,
      "grad_norm": 1.273447687133168,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 19816
    },
    {
      "epoch": 0.19817,
      "grad_norm": 1.0630905492942202,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 19817
    },
    {
      "epoch": 0.19818,
      "grad_norm": 1.3489130559244074,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 19818
    },
    {
      "epoch": 0.19819,
      "grad_norm": 1.051338096722153,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 19819
    },
    {
      "epoch": 0.1982,
      "grad_norm": 1.490436613752286,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 19820
    },
    {
      "epoch": 0.19821,
      "grad_norm": 1.162533122975096,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 19821
    },
    {
      "epoch": 0.19822,
      "grad_norm": 1.2323568924771044,
      "learning_rate": 0.003,
      "loss": 4.0325,
      "step": 19822
    },
    {
      "epoch": 0.19823,
      "grad_norm": 1.4005699745657514,
      "learning_rate": 0.003,
      "loss": 4.0236,
      "step": 19823
    },
    {
      "epoch": 0.19824,
      "grad_norm": 1.2512895475599117,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 19824
    },
    {
      "epoch": 0.19825,
      "grad_norm": 1.4769621946202904,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 19825
    },
    {
      "epoch": 0.19826,
      "grad_norm": 0.9742481471592784,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 19826
    },
    {
      "epoch": 0.19827,
      "grad_norm": 1.441936380925012,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 19827
    },
    {
      "epoch": 0.19828,
      "grad_norm": 1.3288185839554574,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 19828
    },
    {
      "epoch": 0.19829,
      "grad_norm": 1.230331722424284,
      "learning_rate": 0.003,
      "loss": 4.0263,
      "step": 19829
    },
    {
      "epoch": 0.1983,
      "grad_norm": 1.2536637743366936,
      "learning_rate": 0.003,
      "loss": 4.0357,
      "step": 19830
    },
    {
      "epoch": 0.19831,
      "grad_norm": 1.0284825575681031,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 19831
    },
    {
      "epoch": 0.19832,
      "grad_norm": 1.5022375842577895,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 19832
    },
    {
      "epoch": 0.19833,
      "grad_norm": 1.1806101547354726,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 19833
    },
    {
      "epoch": 0.19834,
      "grad_norm": 1.5733560786177732,
      "learning_rate": 0.003,
      "loss": 4.0285,
      "step": 19834
    },
    {
      "epoch": 0.19835,
      "grad_norm": 1.0162032290423761,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 19835
    },
    {
      "epoch": 0.19836,
      "grad_norm": 1.3385074775758588,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 19836
    },
    {
      "epoch": 0.19837,
      "grad_norm": 1.2916607796056685,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 19837
    },
    {
      "epoch": 0.19838,
      "grad_norm": 1.1405657623166203,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 19838
    },
    {
      "epoch": 0.19839,
      "grad_norm": 1.3169298747833968,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 19839
    },
    {
      "epoch": 0.1984,
      "grad_norm": 1.3005917176876103,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 19840
    },
    {
      "epoch": 0.19841,
      "grad_norm": 1.1950476180369751,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 19841
    },
    {
      "epoch": 0.19842,
      "grad_norm": 1.5084819204641784,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 19842
    },
    {
      "epoch": 0.19843,
      "grad_norm": 1.3434051931634823,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 19843
    },
    {
      "epoch": 0.19844,
      "grad_norm": 1.1731326757972111,
      "learning_rate": 0.003,
      "loss": 4.044,
      "step": 19844
    },
    {
      "epoch": 0.19845,
      "grad_norm": 1.340524498590452,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 19845
    },
    {
      "epoch": 0.19846,
      "grad_norm": 1.1333914521541875,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 19846
    },
    {
      "epoch": 0.19847,
      "grad_norm": 1.3156608871762097,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 19847
    },
    {
      "epoch": 0.19848,
      "grad_norm": 1.1448220768181383,
      "learning_rate": 0.003,
      "loss": 4.024,
      "step": 19848
    },
    {
      "epoch": 0.19849,
      "grad_norm": 1.2130472682156814,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 19849
    },
    {
      "epoch": 0.1985,
      "grad_norm": 1.6346768634941562,
      "learning_rate": 0.003,
      "loss": 4.044,
      "step": 19850
    },
    {
      "epoch": 0.19851,
      "grad_norm": 1.254898576966238,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 19851
    },
    {
      "epoch": 0.19852,
      "grad_norm": 1.1760033171758264,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 19852
    },
    {
      "epoch": 0.19853,
      "grad_norm": 1.3457811542948783,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 19853
    },
    {
      "epoch": 0.19854,
      "grad_norm": 1.058916187847548,
      "learning_rate": 0.003,
      "loss": 4.0418,
      "step": 19854
    },
    {
      "epoch": 0.19855,
      "grad_norm": 1.7990623310783709,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 19855
    },
    {
      "epoch": 0.19856,
      "grad_norm": 0.8837071904663756,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 19856
    },
    {
      "epoch": 0.19857,
      "grad_norm": 1.142372906591354,
      "learning_rate": 0.003,
      "loss": 4.042,
      "step": 19857
    },
    {
      "epoch": 0.19858,
      "grad_norm": 1.2797239297455683,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 19858
    },
    {
      "epoch": 0.19859,
      "grad_norm": 1.3144649280053993,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 19859
    },
    {
      "epoch": 0.1986,
      "grad_norm": 1.0841021383129212,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 19860
    },
    {
      "epoch": 0.19861,
      "grad_norm": 1.2014366394398417,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 19861
    },
    {
      "epoch": 0.19862,
      "grad_norm": 1.0775585816687974,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 19862
    },
    {
      "epoch": 0.19863,
      "grad_norm": 1.1743469652799556,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 19863
    },
    {
      "epoch": 0.19864,
      "grad_norm": 1.410076067702952,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 19864
    },
    {
      "epoch": 0.19865,
      "grad_norm": 1.1785401252329866,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 19865
    },
    {
      "epoch": 0.19866,
      "grad_norm": 1.495714534192095,
      "learning_rate": 0.003,
      "loss": 4.0212,
      "step": 19866
    },
    {
      "epoch": 0.19867,
      "grad_norm": 1.5079690053059533,
      "learning_rate": 0.003,
      "loss": 4.0602,
      "step": 19867
    },
    {
      "epoch": 0.19868,
      "grad_norm": 1.3028722347832058,
      "learning_rate": 0.003,
      "loss": 4.0672,
      "step": 19868
    },
    {
      "epoch": 0.19869,
      "grad_norm": 1.2229180419660335,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 19869
    },
    {
      "epoch": 0.1987,
      "grad_norm": 1.3000083287845383,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 19870
    },
    {
      "epoch": 0.19871,
      "grad_norm": 1.2806149133519935,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 19871
    },
    {
      "epoch": 0.19872,
      "grad_norm": 1.4813382010985194,
      "learning_rate": 0.003,
      "loss": 4.0394,
      "step": 19872
    },
    {
      "epoch": 0.19873,
      "grad_norm": 1.107017891996426,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 19873
    },
    {
      "epoch": 0.19874,
      "grad_norm": 1.6276836704174702,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 19874
    },
    {
      "epoch": 0.19875,
      "grad_norm": 0.9401675544626849,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 19875
    },
    {
      "epoch": 0.19876,
      "grad_norm": 1.1573380857665263,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 19876
    },
    {
      "epoch": 0.19877,
      "grad_norm": 1.5833019989287398,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 19877
    },
    {
      "epoch": 0.19878,
      "grad_norm": 1.00550242586545,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 19878
    },
    {
      "epoch": 0.19879,
      "grad_norm": 1.7589818815100986,
      "learning_rate": 0.003,
      "loss": 4.0312,
      "step": 19879
    },
    {
      "epoch": 0.1988,
      "grad_norm": 1.2298928930003272,
      "learning_rate": 0.003,
      "loss": 4.0632,
      "step": 19880
    },
    {
      "epoch": 0.19881,
      "grad_norm": 1.255587711606598,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 19881
    },
    {
      "epoch": 0.19882,
      "grad_norm": 1.221522282609823,
      "learning_rate": 0.003,
      "loss": 4.0864,
      "step": 19882
    },
    {
      "epoch": 0.19883,
      "grad_norm": 1.2605578122182348,
      "learning_rate": 0.003,
      "loss": 4.0623,
      "step": 19883
    },
    {
      "epoch": 0.19884,
      "grad_norm": 1.3609947205276998,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 19884
    },
    {
      "epoch": 0.19885,
      "grad_norm": 1.1422373658539642,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 19885
    },
    {
      "epoch": 0.19886,
      "grad_norm": 1.4430124926734533,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 19886
    },
    {
      "epoch": 0.19887,
      "grad_norm": 1.0289263684445262,
      "learning_rate": 0.003,
      "loss": 4.0797,
      "step": 19887
    },
    {
      "epoch": 0.19888,
      "grad_norm": 1.2489489645216596,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 19888
    },
    {
      "epoch": 0.19889,
      "grad_norm": 1.1521252967917146,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 19889
    },
    {
      "epoch": 0.1989,
      "grad_norm": 1.342265374894575,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 19890
    },
    {
      "epoch": 0.19891,
      "grad_norm": 1.1780633659571689,
      "learning_rate": 0.003,
      "loss": 4.0549,
      "step": 19891
    },
    {
      "epoch": 0.19892,
      "grad_norm": 1.260831484289609,
      "learning_rate": 0.003,
      "loss": 4.0337,
      "step": 19892
    },
    {
      "epoch": 0.19893,
      "grad_norm": 1.218498024755539,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 19893
    },
    {
      "epoch": 0.19894,
      "grad_norm": 1.4019787340199494,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 19894
    },
    {
      "epoch": 0.19895,
      "grad_norm": 1.023096803623969,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 19895
    },
    {
      "epoch": 0.19896,
      "grad_norm": 1.5085660726002734,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 19896
    },
    {
      "epoch": 0.19897,
      "grad_norm": 1.2362003169101425,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 19897
    },
    {
      "epoch": 0.19898,
      "grad_norm": 1.2063931465920943,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 19898
    },
    {
      "epoch": 0.19899,
      "grad_norm": 1.255079633207274,
      "learning_rate": 0.003,
      "loss": 4.0254,
      "step": 19899
    },
    {
      "epoch": 0.199,
      "grad_norm": 1.0795335302369486,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 19900
    },
    {
      "epoch": 0.19901,
      "grad_norm": 1.517738283239204,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 19901
    },
    {
      "epoch": 0.19902,
      "grad_norm": 1.275472442563502,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 19902
    },
    {
      "epoch": 0.19903,
      "grad_norm": 1.598886659373521,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 19903
    },
    {
      "epoch": 0.19904,
      "grad_norm": 1.174823330451563,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 19904
    },
    {
      "epoch": 0.19905,
      "grad_norm": 1.3753719989298585,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 19905
    },
    {
      "epoch": 0.19906,
      "grad_norm": 1.2271065069522846,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 19906
    },
    {
      "epoch": 0.19907,
      "grad_norm": 1.311042604189716,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 19907
    },
    {
      "epoch": 0.19908,
      "grad_norm": 1.215407641539844,
      "learning_rate": 0.003,
      "loss": 4.011,
      "step": 19908
    },
    {
      "epoch": 0.19909,
      "grad_norm": 1.4285042785820834,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 19909
    },
    {
      "epoch": 0.1991,
      "grad_norm": 1.032240696399498,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 19910
    },
    {
      "epoch": 0.19911,
      "grad_norm": 1.5493249948776597,
      "learning_rate": 0.003,
      "loss": 4.0672,
      "step": 19911
    },
    {
      "epoch": 0.19912,
      "grad_norm": 1.2445896337671107,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 19912
    },
    {
      "epoch": 0.19913,
      "grad_norm": 1.2623958234009334,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 19913
    },
    {
      "epoch": 0.19914,
      "grad_norm": 1.175614375207124,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 19914
    },
    {
      "epoch": 0.19915,
      "grad_norm": 1.4829885795294195,
      "learning_rate": 0.003,
      "loss": 4.0564,
      "step": 19915
    },
    {
      "epoch": 0.19916,
      "grad_norm": 1.0508722651509181,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 19916
    },
    {
      "epoch": 0.19917,
      "grad_norm": 1.4794229065211975,
      "learning_rate": 0.003,
      "loss": 4.0712,
      "step": 19917
    },
    {
      "epoch": 0.19918,
      "grad_norm": 0.9297647168578973,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 19918
    },
    {
      "epoch": 0.19919,
      "grad_norm": 1.4133945135714843,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 19919
    },
    {
      "epoch": 0.1992,
      "grad_norm": 1.2317095796741588,
      "learning_rate": 0.003,
      "loss": 4.0703,
      "step": 19920
    },
    {
      "epoch": 0.19921,
      "grad_norm": 1.3173487800851562,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 19921
    },
    {
      "epoch": 0.19922,
      "grad_norm": 1.123948441025127,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 19922
    },
    {
      "epoch": 0.19923,
      "grad_norm": 1.378439216093473,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 19923
    },
    {
      "epoch": 0.19924,
      "grad_norm": 1.3997852719612403,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 19924
    },
    {
      "epoch": 0.19925,
      "grad_norm": 1.159786235179167,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 19925
    },
    {
      "epoch": 0.19926,
      "grad_norm": 1.3538870408494252,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 19926
    },
    {
      "epoch": 0.19927,
      "grad_norm": 1.392540317577808,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 19927
    },
    {
      "epoch": 0.19928,
      "grad_norm": 1.5589978943880642,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 19928
    },
    {
      "epoch": 0.19929,
      "grad_norm": 1.335774839412378,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 19929
    },
    {
      "epoch": 0.1993,
      "grad_norm": 1.1407088420493976,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 19930
    },
    {
      "epoch": 0.19931,
      "grad_norm": 1.4056955897389483,
      "learning_rate": 0.003,
      "loss": 4.0884,
      "step": 19931
    },
    {
      "epoch": 0.19932,
      "grad_norm": 1.240044423909628,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 19932
    },
    {
      "epoch": 0.19933,
      "grad_norm": 1.3847437309510247,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 19933
    },
    {
      "epoch": 0.19934,
      "grad_norm": 1.1917714786704507,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 19934
    },
    {
      "epoch": 0.19935,
      "grad_norm": 1.1149699870223053,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 19935
    },
    {
      "epoch": 0.19936,
      "grad_norm": 1.1342568304392888,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 19936
    },
    {
      "epoch": 0.19937,
      "grad_norm": 1.2700039155178806,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 19937
    },
    {
      "epoch": 0.19938,
      "grad_norm": 1.2080604366635694,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 19938
    },
    {
      "epoch": 0.19939,
      "grad_norm": 1.244308802459882,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 19939
    },
    {
      "epoch": 0.1994,
      "grad_norm": 1.1106149313546723,
      "learning_rate": 0.003,
      "loss": 4.0254,
      "step": 19940
    },
    {
      "epoch": 0.19941,
      "grad_norm": 1.3238101030344678,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 19941
    },
    {
      "epoch": 0.19942,
      "grad_norm": 1.2034382199853526,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 19942
    },
    {
      "epoch": 0.19943,
      "grad_norm": 1.4013367377250077,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 19943
    },
    {
      "epoch": 0.19944,
      "grad_norm": 1.2972300230868017,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 19944
    },
    {
      "epoch": 0.19945,
      "grad_norm": 1.2951811204708457,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 19945
    },
    {
      "epoch": 0.19946,
      "grad_norm": 1.1610586091426425,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 19946
    },
    {
      "epoch": 0.19947,
      "grad_norm": 1.4178084078057516,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 19947
    },
    {
      "epoch": 0.19948,
      "grad_norm": 1.158927526770478,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 19948
    },
    {
      "epoch": 0.19949,
      "grad_norm": 1.2809085892791512,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 19949
    },
    {
      "epoch": 0.1995,
      "grad_norm": 1.3862229637166266,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 19950
    },
    {
      "epoch": 0.19951,
      "grad_norm": 1.1954504386101603,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 19951
    },
    {
      "epoch": 0.19952,
      "grad_norm": 1.3199456984155447,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 19952
    },
    {
      "epoch": 0.19953,
      "grad_norm": 1.2481296971378508,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 19953
    },
    {
      "epoch": 0.19954,
      "grad_norm": 1.2697107338936542,
      "learning_rate": 0.003,
      "loss": 4.0288,
      "step": 19954
    },
    {
      "epoch": 0.19955,
      "grad_norm": 1.119725423462046,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 19955
    },
    {
      "epoch": 0.19956,
      "grad_norm": 1.2760293725356533,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 19956
    },
    {
      "epoch": 0.19957,
      "grad_norm": 1.297012595989181,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 19957
    },
    {
      "epoch": 0.19958,
      "grad_norm": 1.419427840824775,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 19958
    },
    {
      "epoch": 0.19959,
      "grad_norm": 0.9602902187756038,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 19959
    },
    {
      "epoch": 0.1996,
      "grad_norm": 1.1223572375743875,
      "learning_rate": 0.003,
      "loss": 4.0357,
      "step": 19960
    },
    {
      "epoch": 0.19961,
      "grad_norm": 1.5950858182495724,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 19961
    },
    {
      "epoch": 0.19962,
      "grad_norm": 1.1441204681554322,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 19962
    },
    {
      "epoch": 0.19963,
      "grad_norm": 1.3126206493847443,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 19963
    },
    {
      "epoch": 0.19964,
      "grad_norm": 1.2433338163350853,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 19964
    },
    {
      "epoch": 0.19965,
      "grad_norm": 1.291102879209652,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 19965
    },
    {
      "epoch": 0.19966,
      "grad_norm": 1.0747583379896155,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 19966
    },
    {
      "epoch": 0.19967,
      "grad_norm": 1.519028210916497,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 19967
    },
    {
      "epoch": 0.19968,
      "grad_norm": 1.0716160858687132,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 19968
    },
    {
      "epoch": 0.19969,
      "grad_norm": 1.5164868648659249,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 19969
    },
    {
      "epoch": 0.1997,
      "grad_norm": 0.9582219395470151,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 19970
    },
    {
      "epoch": 0.19971,
      "grad_norm": 1.3474662875829415,
      "learning_rate": 0.003,
      "loss": 4.0168,
      "step": 19971
    },
    {
      "epoch": 0.19972,
      "grad_norm": 1.4013781080276175,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 19972
    },
    {
      "epoch": 0.19973,
      "grad_norm": 1.1454916139054667,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 19973
    },
    {
      "epoch": 0.19974,
      "grad_norm": 1.3984598096784422,
      "learning_rate": 0.003,
      "loss": 4.035,
      "step": 19974
    },
    {
      "epoch": 0.19975,
      "grad_norm": 1.3501707571712709,
      "learning_rate": 0.003,
      "loss": 4.0344,
      "step": 19975
    },
    {
      "epoch": 0.19976,
      "grad_norm": 1.3484662570777235,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 19976
    },
    {
      "epoch": 0.19977,
      "grad_norm": 1.216492280996437,
      "learning_rate": 0.003,
      "loss": 4.0135,
      "step": 19977
    },
    {
      "epoch": 0.19978,
      "grad_norm": 1.142709040714013,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 19978
    },
    {
      "epoch": 0.19979,
      "grad_norm": 1.1135564681824013,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 19979
    },
    {
      "epoch": 0.1998,
      "grad_norm": 1.2621270377951224,
      "learning_rate": 0.003,
      "loss": 4.0416,
      "step": 19980
    },
    {
      "epoch": 0.19981,
      "grad_norm": 1.403693140830989,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 19981
    },
    {
      "epoch": 0.19982,
      "grad_norm": 0.9981059833386207,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 19982
    },
    {
      "epoch": 0.19983,
      "grad_norm": 1.3993533271308505,
      "learning_rate": 0.003,
      "loss": 4.0969,
      "step": 19983
    },
    {
      "epoch": 0.19984,
      "grad_norm": 1.3028828668644656,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 19984
    },
    {
      "epoch": 0.19985,
      "grad_norm": 1.4637659181918143,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 19985
    },
    {
      "epoch": 0.19986,
      "grad_norm": 1.1807978110160766,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 19986
    },
    {
      "epoch": 0.19987,
      "grad_norm": 1.4742560009047105,
      "learning_rate": 0.003,
      "loss": 4.028,
      "step": 19987
    },
    {
      "epoch": 0.19988,
      "grad_norm": 1.173404595311201,
      "learning_rate": 0.003,
      "loss": 4.003,
      "step": 19988
    },
    {
      "epoch": 0.19989,
      "grad_norm": 1.25242865522054,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 19989
    },
    {
      "epoch": 0.1999,
      "grad_norm": 1.387684480624249,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 19990
    },
    {
      "epoch": 0.19991,
      "grad_norm": 1.3890228225877637,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 19991
    },
    {
      "epoch": 0.19992,
      "grad_norm": 1.0908316718165803,
      "learning_rate": 0.003,
      "loss": 4.0253,
      "step": 19992
    },
    {
      "epoch": 0.19993,
      "grad_norm": 1.3039394189034819,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 19993
    },
    {
      "epoch": 0.19994,
      "grad_norm": 1.3524185493509755,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 19994
    },
    {
      "epoch": 0.19995,
      "grad_norm": 1.1431826798538707,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 19995
    },
    {
      "epoch": 0.19996,
      "grad_norm": 1.0803834019026506,
      "learning_rate": 0.003,
      "loss": 4.0191,
      "step": 19996
    },
    {
      "epoch": 0.19997,
      "grad_norm": 1.285890308629624,
      "learning_rate": 0.003,
      "loss": 4.0309,
      "step": 19997
    },
    {
      "epoch": 0.19998,
      "grad_norm": 1.3028078375512837,
      "learning_rate": 0.003,
      "loss": 4.0623,
      "step": 19998
    },
    {
      "epoch": 0.19999,
      "grad_norm": 1.6457298761704569,
      "learning_rate": 0.003,
      "loss": 4.0306,
      "step": 19999
    },
    {
      "epoch": 0.2,
      "grad_norm": 1.115562086778755,
      "learning_rate": 0.003,
      "loss": 4.0246,
      "step": 20000
    },
    {
      "epoch": 0.20001,
      "grad_norm": 1.622288790814596,
      "learning_rate": 0.003,
      "loss": 4.0279,
      "step": 20001
    },
    {
      "epoch": 0.20002,
      "grad_norm": 1.165374122105647,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 20002
    },
    {
      "epoch": 0.20003,
      "grad_norm": 1.180965129177058,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 20003
    },
    {
      "epoch": 0.20004,
      "grad_norm": 1.3909555533551616,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 20004
    },
    {
      "epoch": 0.20005,
      "grad_norm": 1.0214383584852353,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 20005
    },
    {
      "epoch": 0.20006,
      "grad_norm": 1.4877283163255348,
      "learning_rate": 0.003,
      "loss": 4.0222,
      "step": 20006
    },
    {
      "epoch": 0.20007,
      "grad_norm": 0.9872070090617107,
      "learning_rate": 0.003,
      "loss": 4.0234,
      "step": 20007
    },
    {
      "epoch": 0.20008,
      "grad_norm": 1.4735250417481307,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 20008
    },
    {
      "epoch": 0.20009,
      "grad_norm": 1.1796954083047972,
      "learning_rate": 0.003,
      "loss": 4.0724,
      "step": 20009
    },
    {
      "epoch": 0.2001,
      "grad_norm": 1.3488507948704809,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 20010
    },
    {
      "epoch": 0.20011,
      "grad_norm": 1.3140881598709688,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 20011
    },
    {
      "epoch": 0.20012,
      "grad_norm": 1.289546318132687,
      "learning_rate": 0.003,
      "loss": 4.0712,
      "step": 20012
    },
    {
      "epoch": 0.20013,
      "grad_norm": 1.500309834972784,
      "learning_rate": 0.003,
      "loss": 4.0842,
      "step": 20013
    },
    {
      "epoch": 0.20014,
      "grad_norm": 1.1858787717683152,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 20014
    },
    {
      "epoch": 0.20015,
      "grad_norm": 1.0725535067917935,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 20015
    },
    {
      "epoch": 0.20016,
      "grad_norm": 1.597372267607848,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 20016
    },
    {
      "epoch": 0.20017,
      "grad_norm": 1.0563564460372217,
      "learning_rate": 0.003,
      "loss": 4.0201,
      "step": 20017
    },
    {
      "epoch": 0.20018,
      "grad_norm": 1.2592839008600343,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 20018
    },
    {
      "epoch": 0.20019,
      "grad_norm": 1.100640324338181,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 20019
    },
    {
      "epoch": 0.2002,
      "grad_norm": 1.4228491575051412,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 20020
    },
    {
      "epoch": 0.20021,
      "grad_norm": 1.179568307485374,
      "learning_rate": 0.003,
      "loss": 4.0822,
      "step": 20021
    },
    {
      "epoch": 0.20022,
      "grad_norm": 1.3440274696691585,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 20022
    },
    {
      "epoch": 0.20023,
      "grad_norm": 1.054115868379596,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 20023
    },
    {
      "epoch": 0.20024,
      "grad_norm": 1.2388225483904531,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 20024
    },
    {
      "epoch": 0.20025,
      "grad_norm": 1.2029896621757317,
      "learning_rate": 0.003,
      "loss": 4.0299,
      "step": 20025
    },
    {
      "epoch": 0.20026,
      "grad_norm": 1.5468913707142669,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 20026
    },
    {
      "epoch": 0.20027,
      "grad_norm": 1.2897726030241574,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 20027
    },
    {
      "epoch": 0.20028,
      "grad_norm": 1.3801929628271197,
      "learning_rate": 0.003,
      "loss": 4.0314,
      "step": 20028
    },
    {
      "epoch": 0.20029,
      "grad_norm": 1.111478986976431,
      "learning_rate": 0.003,
      "loss": 4.0232,
      "step": 20029
    },
    {
      "epoch": 0.2003,
      "grad_norm": 1.2733116930929462,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 20030
    },
    {
      "epoch": 0.20031,
      "grad_norm": 1.1679315621127557,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 20031
    },
    {
      "epoch": 0.20032,
      "grad_norm": 1.2552177994447329,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 20032
    },
    {
      "epoch": 0.20033,
      "grad_norm": 1.013463527645874,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 20033
    },
    {
      "epoch": 0.20034,
      "grad_norm": 1.4306374729371047,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 20034
    },
    {
      "epoch": 0.20035,
      "grad_norm": 1.316492598084679,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 20035
    },
    {
      "epoch": 0.20036,
      "grad_norm": 1.5313705166049483,
      "learning_rate": 0.003,
      "loss": 4.0904,
      "step": 20036
    },
    {
      "epoch": 0.20037,
      "grad_norm": 1.1156570219061208,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 20037
    },
    {
      "epoch": 0.20038,
      "grad_norm": 1.2566360302027615,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 20038
    },
    {
      "epoch": 0.20039,
      "grad_norm": 1.2317306888095088,
      "learning_rate": 0.003,
      "loss": 4.0285,
      "step": 20039
    },
    {
      "epoch": 0.2004,
      "grad_norm": 1.3288950327435645,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 20040
    },
    {
      "epoch": 0.20041,
      "grad_norm": 1.3179608954967692,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 20041
    },
    {
      "epoch": 0.20042,
      "grad_norm": 1.3907809117743692,
      "learning_rate": 0.003,
      "loss": 4.0969,
      "step": 20042
    },
    {
      "epoch": 0.20043,
      "grad_norm": 1.2781584573939455,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 20043
    },
    {
      "epoch": 0.20044,
      "grad_norm": 1.3675301151990953,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 20044
    },
    {
      "epoch": 0.20045,
      "grad_norm": 1.270993402537075,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 20045
    },
    {
      "epoch": 0.20046,
      "grad_norm": 1.2574780035973352,
      "learning_rate": 0.003,
      "loss": 4.0264,
      "step": 20046
    },
    {
      "epoch": 0.20047,
      "grad_norm": 1.3772785672275094,
      "learning_rate": 0.003,
      "loss": 4.0331,
      "step": 20047
    },
    {
      "epoch": 0.20048,
      "grad_norm": 1.0130786444460946,
      "learning_rate": 0.003,
      "loss": 4.0547,
      "step": 20048
    },
    {
      "epoch": 0.20049,
      "grad_norm": 1.4723445410063,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 20049
    },
    {
      "epoch": 0.2005,
      "grad_norm": 0.9607882441038933,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 20050
    },
    {
      "epoch": 0.20051,
      "grad_norm": 1.3958089165986418,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 20051
    },
    {
      "epoch": 0.20052,
      "grad_norm": 1.1082284128077051,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 20052
    },
    {
      "epoch": 0.20053,
      "grad_norm": 1.2324738316505863,
      "learning_rate": 0.003,
      "loss": 4.0398,
      "step": 20053
    },
    {
      "epoch": 0.20054,
      "grad_norm": 1.1701801286715698,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 20054
    },
    {
      "epoch": 0.20055,
      "grad_norm": 1.5767109989106038,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 20055
    },
    {
      "epoch": 0.20056,
      "grad_norm": 1.1822714727528925,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 20056
    },
    {
      "epoch": 0.20057,
      "grad_norm": 1.512624540405217,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 20057
    },
    {
      "epoch": 0.20058,
      "grad_norm": 0.9959903059781854,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 20058
    },
    {
      "epoch": 0.20059,
      "grad_norm": 1.3754117091411615,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 20059
    },
    {
      "epoch": 0.2006,
      "grad_norm": 1.0748141614161064,
      "learning_rate": 0.003,
      "loss": 4.0262,
      "step": 20060
    },
    {
      "epoch": 0.20061,
      "grad_norm": 1.531186154248781,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 20061
    },
    {
      "epoch": 0.20062,
      "grad_norm": 1.1058611864267804,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 20062
    },
    {
      "epoch": 0.20063,
      "grad_norm": 1.524239022010734,
      "learning_rate": 0.003,
      "loss": 4.0937,
      "step": 20063
    },
    {
      "epoch": 0.20064,
      "grad_norm": 1.1334731641616702,
      "learning_rate": 0.003,
      "loss": 4.0879,
      "step": 20064
    },
    {
      "epoch": 0.20065,
      "grad_norm": 1.4810691498472939,
      "learning_rate": 0.003,
      "loss": 4.0684,
      "step": 20065
    },
    {
      "epoch": 0.20066,
      "grad_norm": 1.0674910488827514,
      "learning_rate": 0.003,
      "loss": 4.0225,
      "step": 20066
    },
    {
      "epoch": 0.20067,
      "grad_norm": 1.6207322617765014,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 20067
    },
    {
      "epoch": 0.20068,
      "grad_norm": 1.2086115569280247,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 20068
    },
    {
      "epoch": 0.20069,
      "grad_norm": 1.2904239609284227,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 20069
    },
    {
      "epoch": 0.2007,
      "grad_norm": 1.0621367796085464,
      "learning_rate": 0.003,
      "loss": 4.0806,
      "step": 20070
    },
    {
      "epoch": 0.20071,
      "grad_norm": 1.4303644222363705,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 20071
    },
    {
      "epoch": 0.20072,
      "grad_norm": 1.1062087157842548,
      "learning_rate": 0.003,
      "loss": 4.0131,
      "step": 20072
    },
    {
      "epoch": 0.20073,
      "grad_norm": 1.3451566024188915,
      "learning_rate": 0.003,
      "loss": 4.0205,
      "step": 20073
    },
    {
      "epoch": 0.20074,
      "grad_norm": 1.1083985823857898,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 20074
    },
    {
      "epoch": 0.20075,
      "grad_norm": 1.6352535934136272,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 20075
    },
    {
      "epoch": 0.20076,
      "grad_norm": 1.002940017778515,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 20076
    },
    {
      "epoch": 0.20077,
      "grad_norm": 1.439017091942511,
      "learning_rate": 0.003,
      "loss": 4.0308,
      "step": 20077
    },
    {
      "epoch": 0.20078,
      "grad_norm": 1.3782679495157797,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 20078
    },
    {
      "epoch": 0.20079,
      "grad_norm": 1.3827209292167564,
      "learning_rate": 0.003,
      "loss": 4.0422,
      "step": 20079
    },
    {
      "epoch": 0.2008,
      "grad_norm": 1.4801421508733992,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 20080
    },
    {
      "epoch": 0.20081,
      "grad_norm": 1.2299642601354424,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 20081
    },
    {
      "epoch": 0.20082,
      "grad_norm": 1.0748645186589552,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 20082
    },
    {
      "epoch": 0.20083,
      "grad_norm": 1.6764368194458739,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 20083
    },
    {
      "epoch": 0.20084,
      "grad_norm": 1.1209269924804597,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 20084
    },
    {
      "epoch": 0.20085,
      "grad_norm": 1.4128168768585003,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 20085
    },
    {
      "epoch": 0.20086,
      "grad_norm": 0.8986786012657335,
      "learning_rate": 0.003,
      "loss": 4.0394,
      "step": 20086
    },
    {
      "epoch": 0.20087,
      "grad_norm": 1.3521795476722067,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 20087
    },
    {
      "epoch": 0.20088,
      "grad_norm": 1.235014884231785,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 20088
    },
    {
      "epoch": 0.20089,
      "grad_norm": 1.2032672957274169,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 20089
    },
    {
      "epoch": 0.2009,
      "grad_norm": 1.4142481020944606,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 20090
    },
    {
      "epoch": 0.20091,
      "grad_norm": 1.3443612249758137,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 20091
    },
    {
      "epoch": 0.20092,
      "grad_norm": 1.2835404914601647,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 20092
    },
    {
      "epoch": 0.20093,
      "grad_norm": 1.1366687506192215,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 20093
    },
    {
      "epoch": 0.20094,
      "grad_norm": 1.38504527416011,
      "learning_rate": 0.003,
      "loss": 4.0331,
      "step": 20094
    },
    {
      "epoch": 0.20095,
      "grad_norm": 1.28205422920678,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 20095
    },
    {
      "epoch": 0.20096,
      "grad_norm": 1.1859031552274366,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 20096
    },
    {
      "epoch": 0.20097,
      "grad_norm": 1.0980781630590808,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 20097
    },
    {
      "epoch": 0.20098,
      "grad_norm": 1.3229800200284534,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 20098
    },
    {
      "epoch": 0.20099,
      "grad_norm": 1.1651813466999181,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 20099
    },
    {
      "epoch": 0.201,
      "grad_norm": 1.4175521333513585,
      "learning_rate": 0.003,
      "loss": 4.0423,
      "step": 20100
    },
    {
      "epoch": 0.20101,
      "grad_norm": 1.2325766987635356,
      "learning_rate": 0.003,
      "loss": 4.0312,
      "step": 20101
    },
    {
      "epoch": 0.20102,
      "grad_norm": 1.3466611075080315,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 20102
    },
    {
      "epoch": 0.20103,
      "grad_norm": 1.034269239083285,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 20103
    },
    {
      "epoch": 0.20104,
      "grad_norm": 1.376615743184664,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 20104
    },
    {
      "epoch": 0.20105,
      "grad_norm": 1.0123650904183055,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 20105
    },
    {
      "epoch": 0.20106,
      "grad_norm": 1.486929100142425,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 20106
    },
    {
      "epoch": 0.20107,
      "grad_norm": 1.1278985311114458,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 20107
    },
    {
      "epoch": 0.20108,
      "grad_norm": 1.3273253735382757,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 20108
    },
    {
      "epoch": 0.20109,
      "grad_norm": 1.4065619334016792,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 20109
    },
    {
      "epoch": 0.2011,
      "grad_norm": 1.1120393553676875,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 20110
    },
    {
      "epoch": 0.20111,
      "grad_norm": 1.4423889109397692,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 20111
    },
    {
      "epoch": 0.20112,
      "grad_norm": 1.1855066575374555,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 20112
    },
    {
      "epoch": 0.20113,
      "grad_norm": 1.5549470082093861,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 20113
    },
    {
      "epoch": 0.20114,
      "grad_norm": 1.137511816050523,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 20114
    },
    {
      "epoch": 0.20115,
      "grad_norm": 1.3462244667642502,
      "learning_rate": 0.003,
      "loss": 4.0855,
      "step": 20115
    },
    {
      "epoch": 0.20116,
      "grad_norm": 1.3206899218544779,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 20116
    },
    {
      "epoch": 0.20117,
      "grad_norm": 1.1252241220562909,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 20117
    },
    {
      "epoch": 0.20118,
      "grad_norm": 1.3005460086834528,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 20118
    },
    {
      "epoch": 0.20119,
      "grad_norm": 1.10493921156302,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 20119
    },
    {
      "epoch": 0.2012,
      "grad_norm": 1.3581398182718027,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 20120
    },
    {
      "epoch": 0.20121,
      "grad_norm": 1.2797980078882876,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 20121
    },
    {
      "epoch": 0.20122,
      "grad_norm": 1.4728301874267207,
      "learning_rate": 0.003,
      "loss": 4.0882,
      "step": 20122
    },
    {
      "epoch": 0.20123,
      "grad_norm": 0.9950607551670232,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 20123
    },
    {
      "epoch": 0.20124,
      "grad_norm": 1.5816024665862407,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 20124
    },
    {
      "epoch": 0.20125,
      "grad_norm": 1.1421784593217872,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 20125
    },
    {
      "epoch": 0.20126,
      "grad_norm": 1.5559806391426019,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 20126
    },
    {
      "epoch": 0.20127,
      "grad_norm": 1.1535837996632359,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 20127
    },
    {
      "epoch": 0.20128,
      "grad_norm": 1.3551460341143398,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 20128
    },
    {
      "epoch": 0.20129,
      "grad_norm": 0.9418879294863098,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 20129
    },
    {
      "epoch": 0.2013,
      "grad_norm": 1.3655003356694182,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 20130
    },
    {
      "epoch": 0.20131,
      "grad_norm": 1.2759535807730578,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 20131
    },
    {
      "epoch": 0.20132,
      "grad_norm": 1.198035312885684,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 20132
    },
    {
      "epoch": 0.20133,
      "grad_norm": 1.2929011908222066,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 20133
    },
    {
      "epoch": 0.20134,
      "grad_norm": 1.318354331958291,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 20134
    },
    {
      "epoch": 0.20135,
      "grad_norm": 1.8884102519126955,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 20135
    },
    {
      "epoch": 0.20136,
      "grad_norm": 1.061299313732376,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 20136
    },
    {
      "epoch": 0.20137,
      "grad_norm": 1.267913944663337,
      "learning_rate": 0.003,
      "loss": 4.0286,
      "step": 20137
    },
    {
      "epoch": 0.20138,
      "grad_norm": 1.1492742229813098,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 20138
    },
    {
      "epoch": 0.20139,
      "grad_norm": 1.3433772181787036,
      "learning_rate": 0.003,
      "loss": 4.0547,
      "step": 20139
    },
    {
      "epoch": 0.2014,
      "grad_norm": 1.1855937629839706,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 20140
    },
    {
      "epoch": 0.20141,
      "grad_norm": 1.5788519463679762,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 20141
    },
    {
      "epoch": 0.20142,
      "grad_norm": 1.1655157739928947,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 20142
    },
    {
      "epoch": 0.20143,
      "grad_norm": 1.3720381633727898,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 20143
    },
    {
      "epoch": 0.20144,
      "grad_norm": 1.0623338765075157,
      "learning_rate": 0.003,
      "loss": 4.0325,
      "step": 20144
    },
    {
      "epoch": 0.20145,
      "grad_norm": 1.3952375725860666,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 20145
    },
    {
      "epoch": 0.20146,
      "grad_norm": 1.148310575339641,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 20146
    },
    {
      "epoch": 0.20147,
      "grad_norm": 1.430096768068745,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 20147
    },
    {
      "epoch": 0.20148,
      "grad_norm": 1.1339120576674702,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 20148
    },
    {
      "epoch": 0.20149,
      "grad_norm": 1.264807104959973,
      "learning_rate": 0.003,
      "loss": 4.0438,
      "step": 20149
    },
    {
      "epoch": 0.2015,
      "grad_norm": 1.179430688158245,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 20150
    },
    {
      "epoch": 0.20151,
      "grad_norm": 1.2961004328981822,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 20151
    },
    {
      "epoch": 0.20152,
      "grad_norm": 1.0940237875476264,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 20152
    },
    {
      "epoch": 0.20153,
      "grad_norm": 1.286709407766725,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 20153
    },
    {
      "epoch": 0.20154,
      "grad_norm": 1.3227627119690266,
      "learning_rate": 0.003,
      "loss": 4.075,
      "step": 20154
    },
    {
      "epoch": 0.20155,
      "grad_norm": 1.6105251847809123,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 20155
    },
    {
      "epoch": 0.20156,
      "grad_norm": 1.0747232683539776,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 20156
    },
    {
      "epoch": 0.20157,
      "grad_norm": 1.3600242060341243,
      "learning_rate": 0.003,
      "loss": 4.0281,
      "step": 20157
    },
    {
      "epoch": 0.20158,
      "grad_norm": 1.2727788318316686,
      "learning_rate": 0.003,
      "loss": 4.0274,
      "step": 20158
    },
    {
      "epoch": 0.20159,
      "grad_norm": 1.449762729363481,
      "learning_rate": 0.003,
      "loss": 4.0196,
      "step": 20159
    },
    {
      "epoch": 0.2016,
      "grad_norm": 1.2522029223183724,
      "learning_rate": 0.003,
      "loss": 4.0287,
      "step": 20160
    },
    {
      "epoch": 0.20161,
      "grad_norm": 1.470939394443757,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 20161
    },
    {
      "epoch": 0.20162,
      "grad_norm": 0.9780713632241691,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 20162
    },
    {
      "epoch": 0.20163,
      "grad_norm": 1.4069970555682791,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 20163
    },
    {
      "epoch": 0.20164,
      "grad_norm": 1.1120523232113189,
      "learning_rate": 0.003,
      "loss": 4.0868,
      "step": 20164
    },
    {
      "epoch": 0.20165,
      "grad_norm": 1.3154390323043668,
      "learning_rate": 0.003,
      "loss": 4.0273,
      "step": 20165
    },
    {
      "epoch": 0.20166,
      "grad_norm": 1.2081791962327835,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 20166
    },
    {
      "epoch": 0.20167,
      "grad_norm": 1.382220430187685,
      "learning_rate": 0.003,
      "loss": 4.029,
      "step": 20167
    },
    {
      "epoch": 0.20168,
      "grad_norm": 1.2383371057025319,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 20168
    },
    {
      "epoch": 0.20169,
      "grad_norm": 1.2679904971753184,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 20169
    },
    {
      "epoch": 0.2017,
      "grad_norm": 1.3384110300248249,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 20170
    },
    {
      "epoch": 0.20171,
      "grad_norm": 0.9326477901198288,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 20171
    },
    {
      "epoch": 0.20172,
      "grad_norm": 1.350582451313348,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 20172
    },
    {
      "epoch": 0.20173,
      "grad_norm": 1.340611180510728,
      "learning_rate": 0.003,
      "loss": 4.0054,
      "step": 20173
    },
    {
      "epoch": 0.20174,
      "grad_norm": 1.2952026073736063,
      "learning_rate": 0.003,
      "loss": 4.0256,
      "step": 20174
    },
    {
      "epoch": 0.20175,
      "grad_norm": 1.2413191354769146,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 20175
    },
    {
      "epoch": 0.20176,
      "grad_norm": 1.2481837574056514,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 20176
    },
    {
      "epoch": 0.20177,
      "grad_norm": 1.2435819576658815,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 20177
    },
    {
      "epoch": 0.20178,
      "grad_norm": 1.0742089009050002,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 20178
    },
    {
      "epoch": 0.20179,
      "grad_norm": 1.4036356251900837,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 20179
    },
    {
      "epoch": 0.2018,
      "grad_norm": 1.1926696153443954,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 20180
    },
    {
      "epoch": 0.20181,
      "grad_norm": 1.491775506134129,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 20181
    },
    {
      "epoch": 0.20182,
      "grad_norm": 1.1372863797918262,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 20182
    },
    {
      "epoch": 0.20183,
      "grad_norm": 1.3711382987272935,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 20183
    },
    {
      "epoch": 0.20184,
      "grad_norm": 1.0709976530119787,
      "learning_rate": 0.003,
      "loss": 4.0858,
      "step": 20184
    },
    {
      "epoch": 0.20185,
      "grad_norm": 1.3723070346701574,
      "learning_rate": 0.003,
      "loss": 4.0324,
      "step": 20185
    },
    {
      "epoch": 0.20186,
      "grad_norm": 1.528028266743247,
      "learning_rate": 0.003,
      "loss": 4.0602,
      "step": 20186
    },
    {
      "epoch": 0.20187,
      "grad_norm": 1.4037706732194764,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 20187
    },
    {
      "epoch": 0.20188,
      "grad_norm": 1.149409673743734,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 20188
    },
    {
      "epoch": 0.20189,
      "grad_norm": 1.2286043874357866,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 20189
    },
    {
      "epoch": 0.2019,
      "grad_norm": 1.1478341184973142,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 20190
    },
    {
      "epoch": 0.20191,
      "grad_norm": 1.2120327248254865,
      "learning_rate": 0.003,
      "loss": 4.0777,
      "step": 20191
    },
    {
      "epoch": 0.20192,
      "grad_norm": 1.1481050371361636,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 20192
    },
    {
      "epoch": 0.20193,
      "grad_norm": 1.2131783050485487,
      "learning_rate": 0.003,
      "loss": 4.0306,
      "step": 20193
    },
    {
      "epoch": 0.20194,
      "grad_norm": 1.1697391913516844,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 20194
    },
    {
      "epoch": 0.20195,
      "grad_norm": 1.4817698697937207,
      "learning_rate": 0.003,
      "loss": 4.1085,
      "step": 20195
    },
    {
      "epoch": 0.20196,
      "grad_norm": 1.0721068277211925,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 20196
    },
    {
      "epoch": 0.20197,
      "grad_norm": 1.5394627898345763,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 20197
    },
    {
      "epoch": 0.20198,
      "grad_norm": 1.1113258936770396,
      "learning_rate": 0.003,
      "loss": 4.026,
      "step": 20198
    },
    {
      "epoch": 0.20199,
      "grad_norm": 1.5233099851925598,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 20199
    },
    {
      "epoch": 0.202,
      "grad_norm": 1.1817220138550921,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 20200
    },
    {
      "epoch": 0.20201,
      "grad_norm": 1.2445238084487324,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 20201
    },
    {
      "epoch": 0.20202,
      "grad_norm": 1.2647594216143536,
      "learning_rate": 0.003,
      "loss": 4.0947,
      "step": 20202
    },
    {
      "epoch": 0.20203,
      "grad_norm": 1.2384518932185162,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 20203
    },
    {
      "epoch": 0.20204,
      "grad_norm": 1.0317194075839051,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 20204
    },
    {
      "epoch": 0.20205,
      "grad_norm": 1.323480590891624,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 20205
    },
    {
      "epoch": 0.20206,
      "grad_norm": 1.0463189815345117,
      "learning_rate": 0.003,
      "loss": 4.0054,
      "step": 20206
    },
    {
      "epoch": 0.20207,
      "grad_norm": 1.495579252481595,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 20207
    },
    {
      "epoch": 0.20208,
      "grad_norm": 1.079861742292042,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 20208
    },
    {
      "epoch": 0.20209,
      "grad_norm": 1.2910326664187293,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 20209
    },
    {
      "epoch": 0.2021,
      "grad_norm": 1.4701760393593748,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 20210
    },
    {
      "epoch": 0.20211,
      "grad_norm": 1.3310843617338952,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 20211
    },
    {
      "epoch": 0.20212,
      "grad_norm": 1.5347552587814175,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 20212
    },
    {
      "epoch": 0.20213,
      "grad_norm": 1.1585200462266632,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 20213
    },
    {
      "epoch": 0.20214,
      "grad_norm": 1.3185943826230864,
      "learning_rate": 0.003,
      "loss": 4.0148,
      "step": 20214
    },
    {
      "epoch": 0.20215,
      "grad_norm": 1.3251462472306097,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 20215
    },
    {
      "epoch": 0.20216,
      "grad_norm": 1.1553789059246724,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 20216
    },
    {
      "epoch": 0.20217,
      "grad_norm": 1.0744917015208617,
      "learning_rate": 0.003,
      "loss": 4.0313,
      "step": 20217
    },
    {
      "epoch": 0.20218,
      "grad_norm": 1.5741468499145435,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 20218
    },
    {
      "epoch": 0.20219,
      "grad_norm": 0.991116419691916,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 20219
    },
    {
      "epoch": 0.2022,
      "grad_norm": 1.467538659804866,
      "learning_rate": 0.003,
      "loss": 4.0249,
      "step": 20220
    },
    {
      "epoch": 0.20221,
      "grad_norm": 1.3377664250726413,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 20221
    },
    {
      "epoch": 0.20222,
      "grad_norm": 1.3989307178459185,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 20222
    },
    {
      "epoch": 0.20223,
      "grad_norm": 1.1634816025495518,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 20223
    },
    {
      "epoch": 0.20224,
      "grad_norm": 1.4188224020522449,
      "learning_rate": 0.003,
      "loss": 4.0883,
      "step": 20224
    },
    {
      "epoch": 0.20225,
      "grad_norm": 1.0811208864402644,
      "learning_rate": 0.003,
      "loss": 4.0305,
      "step": 20225
    },
    {
      "epoch": 0.20226,
      "grad_norm": 1.4280355181481164,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 20226
    },
    {
      "epoch": 0.20227,
      "grad_norm": 1.067544445702645,
      "learning_rate": 0.003,
      "loss": 4.0328,
      "step": 20227
    },
    {
      "epoch": 0.20228,
      "grad_norm": 1.6110678887820848,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 20228
    },
    {
      "epoch": 0.20229,
      "grad_norm": 0.9769871544490712,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 20229
    },
    {
      "epoch": 0.2023,
      "grad_norm": 1.3872537306793618,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 20230
    },
    {
      "epoch": 0.20231,
      "grad_norm": 1.0661111881765468,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 20231
    },
    {
      "epoch": 0.20232,
      "grad_norm": 1.2983706866034253,
      "learning_rate": 0.003,
      "loss": 4.0199,
      "step": 20232
    },
    {
      "epoch": 0.20233,
      "grad_norm": 1.3006655842830865,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 20233
    },
    {
      "epoch": 0.20234,
      "grad_norm": 1.6780930215987617,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 20234
    },
    {
      "epoch": 0.20235,
      "grad_norm": 1.143017211264876,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 20235
    },
    {
      "epoch": 0.20236,
      "grad_norm": 1.369127090739487,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 20236
    },
    {
      "epoch": 0.20237,
      "grad_norm": 1.0208193547565565,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 20237
    },
    {
      "epoch": 0.20238,
      "grad_norm": 1.2853649168833825,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 20238
    },
    {
      "epoch": 0.20239,
      "grad_norm": 1.2039176355369394,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 20239
    },
    {
      "epoch": 0.2024,
      "grad_norm": 1.2742021486780266,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 20240
    },
    {
      "epoch": 0.20241,
      "grad_norm": 1.0981347079706447,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 20241
    },
    {
      "epoch": 0.20242,
      "grad_norm": 1.4868618409719494,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 20242
    },
    {
      "epoch": 0.20243,
      "grad_norm": 1.1534983644467036,
      "learning_rate": 0.003,
      "loss": 4.0155,
      "step": 20243
    },
    {
      "epoch": 0.20244,
      "grad_norm": 1.3505490920042413,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 20244
    },
    {
      "epoch": 0.20245,
      "grad_norm": 1.2754207396280954,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 20245
    },
    {
      "epoch": 0.20246,
      "grad_norm": 1.2481516555763843,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 20246
    },
    {
      "epoch": 0.20247,
      "grad_norm": 1.3683820050551518,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 20247
    },
    {
      "epoch": 0.20248,
      "grad_norm": 1.3825827423130315,
      "learning_rate": 0.003,
      "loss": 4.0383,
      "step": 20248
    },
    {
      "epoch": 0.20249,
      "grad_norm": 1.188296243989267,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 20249
    },
    {
      "epoch": 0.2025,
      "grad_norm": 1.2789938257566154,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 20250
    },
    {
      "epoch": 0.20251,
      "grad_norm": 1.2262139492662278,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 20251
    },
    {
      "epoch": 0.20252,
      "grad_norm": 1.1547178266304627,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 20252
    },
    {
      "epoch": 0.20253,
      "grad_norm": 1.7121811575875046,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 20253
    },
    {
      "epoch": 0.20254,
      "grad_norm": 1.1515771332390023,
      "learning_rate": 0.003,
      "loss": 4.0956,
      "step": 20254
    },
    {
      "epoch": 0.20255,
      "grad_norm": 1.5168764501142065,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 20255
    },
    {
      "epoch": 0.20256,
      "grad_norm": 1.0447316920803935,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 20256
    },
    {
      "epoch": 0.20257,
      "grad_norm": 1.5409148718058376,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 20257
    },
    {
      "epoch": 0.20258,
      "grad_norm": 1.254626363776836,
      "learning_rate": 0.003,
      "loss": 4.0209,
      "step": 20258
    },
    {
      "epoch": 0.20259,
      "grad_norm": 1.3004260472547127,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 20259
    },
    {
      "epoch": 0.2026,
      "grad_norm": 1.425302205282692,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 20260
    },
    {
      "epoch": 0.20261,
      "grad_norm": 1.1133046134525415,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 20261
    },
    {
      "epoch": 0.20262,
      "grad_norm": 1.2249565414123091,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 20262
    },
    {
      "epoch": 0.20263,
      "grad_norm": 1.2147479847044318,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 20263
    },
    {
      "epoch": 0.20264,
      "grad_norm": 1.2785029752457941,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 20264
    },
    {
      "epoch": 0.20265,
      "grad_norm": 1.356830248487893,
      "learning_rate": 0.003,
      "loss": 4.0233,
      "step": 20265
    },
    {
      "epoch": 0.20266,
      "grad_norm": 1.0981050254445741,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 20266
    },
    {
      "epoch": 0.20267,
      "grad_norm": 1.1983540241053046,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 20267
    },
    {
      "epoch": 0.20268,
      "grad_norm": 1.2823777515572727,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 20268
    },
    {
      "epoch": 0.20269,
      "grad_norm": 1.1623789358938565,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 20269
    },
    {
      "epoch": 0.2027,
      "grad_norm": 1.3864113772264623,
      "learning_rate": 0.003,
      "loss": 4.0547,
      "step": 20270
    },
    {
      "epoch": 0.20271,
      "grad_norm": 1.0095425180500388,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 20271
    },
    {
      "epoch": 0.20272,
      "grad_norm": 1.4567747087753842,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 20272
    },
    {
      "epoch": 0.20273,
      "grad_norm": 1.2986456339209054,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 20273
    },
    {
      "epoch": 0.20274,
      "grad_norm": 1.5445124404317825,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 20274
    },
    {
      "epoch": 0.20275,
      "grad_norm": 1.1873407876828512,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 20275
    },
    {
      "epoch": 0.20276,
      "grad_norm": 1.2862597617477896,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 20276
    },
    {
      "epoch": 0.20277,
      "grad_norm": 1.0957498635637768,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 20277
    },
    {
      "epoch": 0.20278,
      "grad_norm": 1.4062364206465463,
      "learning_rate": 0.003,
      "loss": 4.0252,
      "step": 20278
    },
    {
      "epoch": 0.20279,
      "grad_norm": 1.2413485576763343,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 20279
    },
    {
      "epoch": 0.2028,
      "grad_norm": 1.384671038415917,
      "learning_rate": 0.003,
      "loss": 4.013,
      "step": 20280
    },
    {
      "epoch": 0.20281,
      "grad_norm": 1.0885068965446765,
      "learning_rate": 0.003,
      "loss": 4.0302,
      "step": 20281
    },
    {
      "epoch": 0.20282,
      "grad_norm": 1.3977988221248578,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 20282
    },
    {
      "epoch": 0.20283,
      "grad_norm": 1.1067110693550775,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 20283
    },
    {
      "epoch": 0.20284,
      "grad_norm": 1.3549528720449708,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 20284
    },
    {
      "epoch": 0.20285,
      "grad_norm": 1.1860419143984546,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 20285
    },
    {
      "epoch": 0.20286,
      "grad_norm": 1.2059777955000675,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 20286
    },
    {
      "epoch": 0.20287,
      "grad_norm": 1.2452324495460783,
      "learning_rate": 0.003,
      "loss": 4.0225,
      "step": 20287
    },
    {
      "epoch": 0.20288,
      "grad_norm": 1.2008982965029176,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 20288
    },
    {
      "epoch": 0.20289,
      "grad_norm": 1.2053420925394356,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 20289
    },
    {
      "epoch": 0.2029,
      "grad_norm": 1.1930940105972596,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 20290
    },
    {
      "epoch": 0.20291,
      "grad_norm": 1.1511373172127057,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 20291
    },
    {
      "epoch": 0.20292,
      "grad_norm": 1.5235344977033483,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 20292
    },
    {
      "epoch": 0.20293,
      "grad_norm": 1.0133916595069772,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 20293
    },
    {
      "epoch": 0.20294,
      "grad_norm": 1.449860175797969,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 20294
    },
    {
      "epoch": 0.20295,
      "grad_norm": 1.166260469765599,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 20295
    },
    {
      "epoch": 0.20296,
      "grad_norm": 1.7094333232663563,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 20296
    },
    {
      "epoch": 0.20297,
      "grad_norm": 1.0802573259244068,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 20297
    },
    {
      "epoch": 0.20298,
      "grad_norm": 1.3950396969201893,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 20298
    },
    {
      "epoch": 0.20299,
      "grad_norm": 1.2776938243492255,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 20299
    },
    {
      "epoch": 0.203,
      "grad_norm": 1.1235479306361127,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 20300
    },
    {
      "epoch": 0.20301,
      "grad_norm": 1.3193150170315688,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 20301
    },
    {
      "epoch": 0.20302,
      "grad_norm": 1.3359995387119015,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 20302
    },
    {
      "epoch": 0.20303,
      "grad_norm": 1.3579836294915681,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 20303
    },
    {
      "epoch": 0.20304,
      "grad_norm": 1.3162768399519815,
      "learning_rate": 0.003,
      "loss": 4.0432,
      "step": 20304
    },
    {
      "epoch": 0.20305,
      "grad_norm": 1.3250263807513758,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 20305
    },
    {
      "epoch": 0.20306,
      "grad_norm": 1.2816788958797483,
      "learning_rate": 0.003,
      "loss": 4.0295,
      "step": 20306
    },
    {
      "epoch": 0.20307,
      "grad_norm": 1.326718838954274,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 20307
    },
    {
      "epoch": 0.20308,
      "grad_norm": 1.592020573675577,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 20308
    },
    {
      "epoch": 0.20309,
      "grad_norm": 1.1498016645153961,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 20309
    },
    {
      "epoch": 0.2031,
      "grad_norm": 1.3609823524921811,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 20310
    },
    {
      "epoch": 0.20311,
      "grad_norm": 1.0477184836568727,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 20311
    },
    {
      "epoch": 0.20312,
      "grad_norm": 1.696289530479139,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 20312
    },
    {
      "epoch": 0.20313,
      "grad_norm": 1.3710431911272913,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 20313
    },
    {
      "epoch": 0.20314,
      "grad_norm": 1.089592730722583,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 20314
    },
    {
      "epoch": 0.20315,
      "grad_norm": 1.2866317286088467,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 20315
    },
    {
      "epoch": 0.20316,
      "grad_norm": 1.1346409696551878,
      "learning_rate": 0.003,
      "loss": 4.035,
      "step": 20316
    },
    {
      "epoch": 0.20317,
      "grad_norm": 1.3248740676276995,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 20317
    },
    {
      "epoch": 0.20318,
      "grad_norm": 1.1648010683216785,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 20318
    },
    {
      "epoch": 0.20319,
      "grad_norm": 1.2382848380382208,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 20319
    },
    {
      "epoch": 0.2032,
      "grad_norm": 1.1936347470703148,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 20320
    },
    {
      "epoch": 0.20321,
      "grad_norm": 1.5168939617426216,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 20321
    },
    {
      "epoch": 0.20322,
      "grad_norm": 1.243897914432,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 20322
    },
    {
      "epoch": 0.20323,
      "grad_norm": 1.335384991277468,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 20323
    },
    {
      "epoch": 0.20324,
      "grad_norm": 1.2671372121627433,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 20324
    },
    {
      "epoch": 0.20325,
      "grad_norm": 1.3591566627374516,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 20325
    },
    {
      "epoch": 0.20326,
      "grad_norm": 1.1685880487870943,
      "learning_rate": 0.003,
      "loss": 4.0059,
      "step": 20326
    },
    {
      "epoch": 0.20327,
      "grad_norm": 1.2474832230538342,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 20327
    },
    {
      "epoch": 0.20328,
      "grad_norm": 1.218543277114442,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 20328
    },
    {
      "epoch": 0.20329,
      "grad_norm": 1.3341045186550506,
      "learning_rate": 0.003,
      "loss": 4.0252,
      "step": 20329
    },
    {
      "epoch": 0.2033,
      "grad_norm": 1.267574594995304,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 20330
    },
    {
      "epoch": 0.20331,
      "grad_norm": 1.2795384215934296,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 20331
    },
    {
      "epoch": 0.20332,
      "grad_norm": 1.3637715732263345,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 20332
    },
    {
      "epoch": 0.20333,
      "grad_norm": 1.358986591353964,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 20333
    },
    {
      "epoch": 0.20334,
      "grad_norm": 1.1231116626020192,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 20334
    },
    {
      "epoch": 0.20335,
      "grad_norm": 1.13442333370607,
      "learning_rate": 0.003,
      "loss": 4.0136,
      "step": 20335
    },
    {
      "epoch": 0.20336,
      "grad_norm": 1.3593483790810181,
      "learning_rate": 0.003,
      "loss": 4.0478,
      "step": 20336
    },
    {
      "epoch": 0.20337,
      "grad_norm": 1.2023125147943492,
      "learning_rate": 0.003,
      "loss": 4.0394,
      "step": 20337
    },
    {
      "epoch": 0.20338,
      "grad_norm": 1.5018889719909159,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 20338
    },
    {
      "epoch": 0.20339,
      "grad_norm": 1.1479734371080672,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 20339
    },
    {
      "epoch": 0.2034,
      "grad_norm": 1.3136111992465354,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 20340
    },
    {
      "epoch": 0.20341,
      "grad_norm": 1.3910643534364964,
      "learning_rate": 0.003,
      "loss": 4.0315,
      "step": 20341
    },
    {
      "epoch": 0.20342,
      "grad_norm": 1.1428898175511215,
      "learning_rate": 0.003,
      "loss": 4.0197,
      "step": 20342
    },
    {
      "epoch": 0.20343,
      "grad_norm": 1.4647864618752329,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 20343
    },
    {
      "epoch": 0.20344,
      "grad_norm": 1.1926491616247035,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 20344
    },
    {
      "epoch": 0.20345,
      "grad_norm": 1.246382195862455,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 20345
    },
    {
      "epoch": 0.20346,
      "grad_norm": 1.2274302854273504,
      "learning_rate": 0.003,
      "loss": 4.0124,
      "step": 20346
    },
    {
      "epoch": 0.20347,
      "grad_norm": 1.2504913828412314,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 20347
    },
    {
      "epoch": 0.20348,
      "grad_norm": 1.372360129836187,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 20348
    },
    {
      "epoch": 0.20349,
      "grad_norm": 1.289719034252838,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 20349
    },
    {
      "epoch": 0.2035,
      "grad_norm": 1.6733341302170084,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 20350
    },
    {
      "epoch": 0.20351,
      "grad_norm": 1.2187008495803562,
      "learning_rate": 0.003,
      "loss": 4.0328,
      "step": 20351
    },
    {
      "epoch": 0.20352,
      "grad_norm": 1.3169621407501653,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 20352
    },
    {
      "epoch": 0.20353,
      "grad_norm": 1.4691001197294091,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 20353
    },
    {
      "epoch": 0.20354,
      "grad_norm": 1.0503063097982503,
      "learning_rate": 0.003,
      "loss": 4.0267,
      "step": 20354
    },
    {
      "epoch": 0.20355,
      "grad_norm": 1.3117587120224454,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 20355
    },
    {
      "epoch": 0.20356,
      "grad_norm": 1.1385371164477698,
      "learning_rate": 0.003,
      "loss": 4.0301,
      "step": 20356
    },
    {
      "epoch": 0.20357,
      "grad_norm": 1.3306978257381137,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 20357
    },
    {
      "epoch": 0.20358,
      "grad_norm": 1.435511112935323,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 20358
    },
    {
      "epoch": 0.20359,
      "grad_norm": 1.2726098866315974,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 20359
    },
    {
      "epoch": 0.2036,
      "grad_norm": 1.4639642814763292,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 20360
    },
    {
      "epoch": 0.20361,
      "grad_norm": 1.2133872583870144,
      "learning_rate": 0.003,
      "loss": 4.0398,
      "step": 20361
    },
    {
      "epoch": 0.20362,
      "grad_norm": 1.3707965450986,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 20362
    },
    {
      "epoch": 0.20363,
      "grad_norm": 1.137050348760085,
      "learning_rate": 0.003,
      "loss": 4.0073,
      "step": 20363
    },
    {
      "epoch": 0.20364,
      "grad_norm": 1.2245817580588365,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 20364
    },
    {
      "epoch": 0.20365,
      "grad_norm": 1.2015734355515697,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 20365
    },
    {
      "epoch": 0.20366,
      "grad_norm": 1.2583926218200907,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 20366
    },
    {
      "epoch": 0.20367,
      "grad_norm": 1.137470719888171,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 20367
    },
    {
      "epoch": 0.20368,
      "grad_norm": 1.412201035519502,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 20368
    },
    {
      "epoch": 0.20369,
      "grad_norm": 1.3518580392474349,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 20369
    },
    {
      "epoch": 0.2037,
      "grad_norm": 1.5867531436060096,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 20370
    },
    {
      "epoch": 0.20371,
      "grad_norm": 1.0850388843692231,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 20371
    },
    {
      "epoch": 0.20372,
      "grad_norm": 1.2594058474456522,
      "learning_rate": 0.003,
      "loss": 4.0262,
      "step": 20372
    },
    {
      "epoch": 0.20373,
      "grad_norm": 1.132405443425768,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 20373
    },
    {
      "epoch": 0.20374,
      "grad_norm": 1.2085162478440552,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 20374
    },
    {
      "epoch": 0.20375,
      "grad_norm": 1.3309587794895381,
      "learning_rate": 0.003,
      "loss": 4.0478,
      "step": 20375
    },
    {
      "epoch": 0.20376,
      "grad_norm": 1.518814487811154,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 20376
    },
    {
      "epoch": 0.20377,
      "grad_norm": 1.1174678164788465,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 20377
    },
    {
      "epoch": 0.20378,
      "grad_norm": 1.5506355617390695,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 20378
    },
    {
      "epoch": 0.20379,
      "grad_norm": 1.066648758046894,
      "learning_rate": 0.003,
      "loss": 4.0729,
      "step": 20379
    },
    {
      "epoch": 0.2038,
      "grad_norm": 1.2907516251263023,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 20380
    },
    {
      "epoch": 0.20381,
      "grad_norm": 1.2177344809594945,
      "learning_rate": 0.003,
      "loss": 4.0157,
      "step": 20381
    },
    {
      "epoch": 0.20382,
      "grad_norm": 1.4399129113062175,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 20382
    },
    {
      "epoch": 0.20383,
      "grad_norm": 1.139629489947086,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 20383
    },
    {
      "epoch": 0.20384,
      "grad_norm": 1.3217997643163781,
      "learning_rate": 0.003,
      "loss": 4.0797,
      "step": 20384
    },
    {
      "epoch": 0.20385,
      "grad_norm": 1.2047773926287186,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 20385
    },
    {
      "epoch": 0.20386,
      "grad_norm": 1.1547469008316145,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 20386
    },
    {
      "epoch": 0.20387,
      "grad_norm": 1.3116613988192434,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 20387
    },
    {
      "epoch": 0.20388,
      "grad_norm": 1.1018342578418183,
      "learning_rate": 0.003,
      "loss": 4.0313,
      "step": 20388
    },
    {
      "epoch": 0.20389,
      "grad_norm": 1.2453373265547718,
      "learning_rate": 0.003,
      "loss": 4.0277,
      "step": 20389
    },
    {
      "epoch": 0.2039,
      "grad_norm": 1.2475157243348953,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 20390
    },
    {
      "epoch": 0.20391,
      "grad_norm": 1.3824501706722891,
      "learning_rate": 0.003,
      "loss": 4.0619,
      "step": 20391
    },
    {
      "epoch": 0.20392,
      "grad_norm": 1.36766174684596,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 20392
    },
    {
      "epoch": 0.20393,
      "grad_norm": 1.234481097722599,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 20393
    },
    {
      "epoch": 0.20394,
      "grad_norm": 1.150615707215384,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 20394
    },
    {
      "epoch": 0.20395,
      "grad_norm": 1.4406174400976866,
      "learning_rate": 0.003,
      "loss": 4.0773,
      "step": 20395
    },
    {
      "epoch": 0.20396,
      "grad_norm": 1.045138828697066,
      "learning_rate": 0.003,
      "loss": 4.0148,
      "step": 20396
    },
    {
      "epoch": 0.20397,
      "grad_norm": 1.643939395888882,
      "learning_rate": 0.003,
      "loss": 4.0887,
      "step": 20397
    },
    {
      "epoch": 0.20398,
      "grad_norm": 0.9437710813220958,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 20398
    },
    {
      "epoch": 0.20399,
      "grad_norm": 1.5709103193037224,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 20399
    },
    {
      "epoch": 0.204,
      "grad_norm": 1.3603139040282624,
      "learning_rate": 0.003,
      "loss": 4.0938,
      "step": 20400
    },
    {
      "epoch": 0.20401,
      "grad_norm": 1.258317441464475,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 20401
    },
    {
      "epoch": 0.20402,
      "grad_norm": 1.2323831932165392,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 20402
    },
    {
      "epoch": 0.20403,
      "grad_norm": 1.1613228064934027,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 20403
    },
    {
      "epoch": 0.20404,
      "grad_norm": 1.1157951639671577,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 20404
    },
    {
      "epoch": 0.20405,
      "grad_norm": 1.117386643961847,
      "learning_rate": 0.003,
      "loss": 4.0342,
      "step": 20405
    },
    {
      "epoch": 0.20406,
      "grad_norm": 1.142677385878031,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 20406
    },
    {
      "epoch": 0.20407,
      "grad_norm": 1.4502256122374817,
      "learning_rate": 0.003,
      "loss": 4.0743,
      "step": 20407
    },
    {
      "epoch": 0.20408,
      "grad_norm": 1.2028422026661283,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 20408
    },
    {
      "epoch": 0.20409,
      "grad_norm": 1.5946736655945457,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 20409
    },
    {
      "epoch": 0.2041,
      "grad_norm": 1.1774295096789895,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 20410
    },
    {
      "epoch": 0.20411,
      "grad_norm": 1.3394453182476518,
      "learning_rate": 0.003,
      "loss": 4.0331,
      "step": 20411
    },
    {
      "epoch": 0.20412,
      "grad_norm": 1.2014049871896877,
      "learning_rate": 0.003,
      "loss": 4.0205,
      "step": 20412
    },
    {
      "epoch": 0.20413,
      "grad_norm": 1.1046436883020136,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 20413
    },
    {
      "epoch": 0.20414,
      "grad_norm": 1.5143551635735701,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 20414
    },
    {
      "epoch": 0.20415,
      "grad_norm": 1.2081076575150453,
      "learning_rate": 0.003,
      "loss": 4.0281,
      "step": 20415
    },
    {
      "epoch": 0.20416,
      "grad_norm": 1.3398110451492105,
      "learning_rate": 0.003,
      "loss": 4.0748,
      "step": 20416
    },
    {
      "epoch": 0.20417,
      "grad_norm": 1.0986812247155366,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 20417
    },
    {
      "epoch": 0.20418,
      "grad_norm": 1.5545693709449835,
      "learning_rate": 0.003,
      "loss": 4.046,
      "step": 20418
    },
    {
      "epoch": 0.20419,
      "grad_norm": 1.141388079534894,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 20419
    },
    {
      "epoch": 0.2042,
      "grad_norm": 1.3293185978350845,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 20420
    },
    {
      "epoch": 0.20421,
      "grad_norm": 1.1445835926864036,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 20421
    },
    {
      "epoch": 0.20422,
      "grad_norm": 1.7085839702569687,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 20422
    },
    {
      "epoch": 0.20423,
      "grad_norm": 1.129517781431287,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 20423
    },
    {
      "epoch": 0.20424,
      "grad_norm": 1.4261103639533557,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 20424
    },
    {
      "epoch": 0.20425,
      "grad_norm": 1.1996507960900666,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 20425
    },
    {
      "epoch": 0.20426,
      "grad_norm": 1.2758568684985025,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 20426
    },
    {
      "epoch": 0.20427,
      "grad_norm": 1.1207260094818572,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 20427
    },
    {
      "epoch": 0.20428,
      "grad_norm": 1.3128081076573406,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 20428
    },
    {
      "epoch": 0.20429,
      "grad_norm": 1.3320582459592007,
      "learning_rate": 0.003,
      "loss": 4.0207,
      "step": 20429
    },
    {
      "epoch": 0.2043,
      "grad_norm": 1.2680088635640516,
      "learning_rate": 0.003,
      "loss": 4.079,
      "step": 20430
    },
    {
      "epoch": 0.20431,
      "grad_norm": 1.2788587267600544,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 20431
    },
    {
      "epoch": 0.20432,
      "grad_norm": 1.2588374516855552,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 20432
    },
    {
      "epoch": 0.20433,
      "grad_norm": 1.2353153784507898,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 20433
    },
    {
      "epoch": 0.20434,
      "grad_norm": 1.229581316148024,
      "learning_rate": 0.003,
      "loss": 4.0602,
      "step": 20434
    },
    {
      "epoch": 0.20435,
      "grad_norm": 1.4257389393320417,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 20435
    },
    {
      "epoch": 0.20436,
      "grad_norm": 1.2496937224156124,
      "learning_rate": 0.003,
      "loss": 4.033,
      "step": 20436
    },
    {
      "epoch": 0.20437,
      "grad_norm": 1.1178812642826619,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 20437
    },
    {
      "epoch": 0.20438,
      "grad_norm": 1.241905961365824,
      "learning_rate": 0.003,
      "loss": 4.0304,
      "step": 20438
    },
    {
      "epoch": 0.20439,
      "grad_norm": 1.2702669223841556,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 20439
    },
    {
      "epoch": 0.2044,
      "grad_norm": 1.3862685914354573,
      "learning_rate": 0.003,
      "loss": 4.0434,
      "step": 20440
    },
    {
      "epoch": 0.20441,
      "grad_norm": 1.2178800769182598,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 20441
    },
    {
      "epoch": 0.20442,
      "grad_norm": 1.2758757529344358,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 20442
    },
    {
      "epoch": 0.20443,
      "grad_norm": 1.2717016156474636,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 20443
    },
    {
      "epoch": 0.20444,
      "grad_norm": 1.1874295826494963,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 20444
    },
    {
      "epoch": 0.20445,
      "grad_norm": 1.273127108312738,
      "learning_rate": 0.003,
      "loss": 4.024,
      "step": 20445
    },
    {
      "epoch": 0.20446,
      "grad_norm": 1.3033196593525216,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 20446
    },
    {
      "epoch": 0.20447,
      "grad_norm": 1.3573380259627446,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 20447
    },
    {
      "epoch": 0.20448,
      "grad_norm": 1.3280882364089757,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 20448
    },
    {
      "epoch": 0.20449,
      "grad_norm": 1.1733062924191375,
      "learning_rate": 0.003,
      "loss": 4.0329,
      "step": 20449
    },
    {
      "epoch": 0.2045,
      "grad_norm": 1.2328609540930164,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 20450
    },
    {
      "epoch": 0.20451,
      "grad_norm": 1.3820397298453064,
      "learning_rate": 0.003,
      "loss": 4.0233,
      "step": 20451
    },
    {
      "epoch": 0.20452,
      "grad_norm": 1.1611758200547153,
      "learning_rate": 0.003,
      "loss": 4.0218,
      "step": 20452
    },
    {
      "epoch": 0.20453,
      "grad_norm": 1.1965128389229713,
      "learning_rate": 0.003,
      "loss": 4.0328,
      "step": 20453
    },
    {
      "epoch": 0.20454,
      "grad_norm": 1.1817630791136045,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 20454
    },
    {
      "epoch": 0.20455,
      "grad_norm": 1.702877011445011,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 20455
    },
    {
      "epoch": 0.20456,
      "grad_norm": 1.194996344264795,
      "learning_rate": 0.003,
      "loss": 4.0152,
      "step": 20456
    },
    {
      "epoch": 0.20457,
      "grad_norm": 1.5014411928263622,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 20457
    },
    {
      "epoch": 0.20458,
      "grad_norm": 1.351084444433871,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 20458
    },
    {
      "epoch": 0.20459,
      "grad_norm": 1.3053422526345337,
      "learning_rate": 0.003,
      "loss": 4.0268,
      "step": 20459
    },
    {
      "epoch": 0.2046,
      "grad_norm": 1.1632725678267068,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 20460
    },
    {
      "epoch": 0.20461,
      "grad_norm": 1.4148776664413345,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 20461
    },
    {
      "epoch": 0.20462,
      "grad_norm": 1.0753270302692137,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 20462
    },
    {
      "epoch": 0.20463,
      "grad_norm": 1.244881027428857,
      "learning_rate": 0.003,
      "loss": 4.0336,
      "step": 20463
    },
    {
      "epoch": 0.20464,
      "grad_norm": 1.605785444033807,
      "learning_rate": 0.003,
      "loss": 4.0828,
      "step": 20464
    },
    {
      "epoch": 0.20465,
      "grad_norm": 1.0265654918416915,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 20465
    },
    {
      "epoch": 0.20466,
      "grad_norm": 1.4597953303483058,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 20466
    },
    {
      "epoch": 0.20467,
      "grad_norm": 1.4019596632247724,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 20467
    },
    {
      "epoch": 0.20468,
      "grad_norm": 1.1972777119646112,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 20468
    },
    {
      "epoch": 0.20469,
      "grad_norm": 1.1827608046664349,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 20469
    },
    {
      "epoch": 0.2047,
      "grad_norm": 1.2217603760295772,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 20470
    },
    {
      "epoch": 0.20471,
      "grad_norm": 1.1165912816299286,
      "learning_rate": 0.003,
      "loss": 4.0312,
      "step": 20471
    },
    {
      "epoch": 0.20472,
      "grad_norm": 1.3192129060455589,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 20472
    },
    {
      "epoch": 0.20473,
      "grad_norm": 1.220922299063871,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 20473
    },
    {
      "epoch": 0.20474,
      "grad_norm": 1.3695141554657906,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 20474
    },
    {
      "epoch": 0.20475,
      "grad_norm": 1.2980958215607803,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 20475
    },
    {
      "epoch": 0.20476,
      "grad_norm": 1.1630131801927541,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 20476
    },
    {
      "epoch": 0.20477,
      "grad_norm": 1.3306418530414286,
      "learning_rate": 0.003,
      "loss": 4.0258,
      "step": 20477
    },
    {
      "epoch": 0.20478,
      "grad_norm": 1.292662469015786,
      "learning_rate": 0.003,
      "loss": 4.035,
      "step": 20478
    },
    {
      "epoch": 0.20479,
      "grad_norm": 1.429433414427323,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 20479
    },
    {
      "epoch": 0.2048,
      "grad_norm": 1.259234210940857,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 20480
    },
    {
      "epoch": 0.20481,
      "grad_norm": 1.6757163183846884,
      "learning_rate": 0.003,
      "loss": 4.0061,
      "step": 20481
    },
    {
      "epoch": 0.20482,
      "grad_norm": 1.0300496487687896,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 20482
    },
    {
      "epoch": 0.20483,
      "grad_norm": 1.4611030214175802,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 20483
    },
    {
      "epoch": 0.20484,
      "grad_norm": 1.1961501734660809,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 20484
    },
    {
      "epoch": 0.20485,
      "grad_norm": 1.6365170923707129,
      "learning_rate": 0.003,
      "loss": 4.0866,
      "step": 20485
    },
    {
      "epoch": 0.20486,
      "grad_norm": 0.9255837236365103,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 20486
    },
    {
      "epoch": 0.20487,
      "grad_norm": 1.2907347708489552,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 20487
    },
    {
      "epoch": 0.20488,
      "grad_norm": 1.211527144515664,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 20488
    },
    {
      "epoch": 0.20489,
      "grad_norm": 1.2567952590179416,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 20489
    },
    {
      "epoch": 0.2049,
      "grad_norm": 1.2447501540709187,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 20490
    },
    {
      "epoch": 0.20491,
      "grad_norm": 1.5771043171932784,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 20491
    },
    {
      "epoch": 0.20492,
      "grad_norm": 1.2944710811943263,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 20492
    },
    {
      "epoch": 0.20493,
      "grad_norm": 1.1536166957557195,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 20493
    },
    {
      "epoch": 0.20494,
      "grad_norm": 1.230412031789269,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 20494
    },
    {
      "epoch": 0.20495,
      "grad_norm": 1.4309706890344465,
      "learning_rate": 0.003,
      "loss": 4.0274,
      "step": 20495
    },
    {
      "epoch": 0.20496,
      "grad_norm": 1.239470233245192,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 20496
    },
    {
      "epoch": 0.20497,
      "grad_norm": 1.3717068737602895,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 20497
    },
    {
      "epoch": 0.20498,
      "grad_norm": 1.0206873885980308,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 20498
    },
    {
      "epoch": 0.20499,
      "grad_norm": 1.4824021249275585,
      "learning_rate": 0.003,
      "loss": 4.084,
      "step": 20499
    },
    {
      "epoch": 0.205,
      "grad_norm": 1.0350841616123798,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 20500
    },
    {
      "epoch": 0.20501,
      "grad_norm": 1.3121771019769726,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 20501
    },
    {
      "epoch": 0.20502,
      "grad_norm": 1.1708296290415547,
      "learning_rate": 0.003,
      "loss": 4.0434,
      "step": 20502
    },
    {
      "epoch": 0.20503,
      "grad_norm": 1.3824308379400518,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 20503
    },
    {
      "epoch": 0.20504,
      "grad_norm": 1.2070001077828993,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 20504
    },
    {
      "epoch": 0.20505,
      "grad_norm": 1.4755787704240584,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 20505
    },
    {
      "epoch": 0.20506,
      "grad_norm": 1.2193032792463148,
      "learning_rate": 0.003,
      "loss": 4.1092,
      "step": 20506
    },
    {
      "epoch": 0.20507,
      "grad_norm": 1.1078265884640162,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 20507
    },
    {
      "epoch": 0.20508,
      "grad_norm": 1.5052162221790877,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 20508
    },
    {
      "epoch": 0.20509,
      "grad_norm": 1.1666948310567067,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 20509
    },
    {
      "epoch": 0.2051,
      "grad_norm": 1.4079526417493138,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 20510
    },
    {
      "epoch": 0.20511,
      "grad_norm": 1.1709527819746652,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 20511
    },
    {
      "epoch": 0.20512,
      "grad_norm": 1.3605758979989855,
      "learning_rate": 0.003,
      "loss": 4.0296,
      "step": 20512
    },
    {
      "epoch": 0.20513,
      "grad_norm": 1.2534739820163656,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 20513
    },
    {
      "epoch": 0.20514,
      "grad_norm": 1.1809491957128397,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 20514
    },
    {
      "epoch": 0.20515,
      "grad_norm": 1.3620554584835562,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 20515
    },
    {
      "epoch": 0.20516,
      "grad_norm": 1.296887173066892,
      "learning_rate": 0.003,
      "loss": 4.0306,
      "step": 20516
    },
    {
      "epoch": 0.20517,
      "grad_norm": 1.318459153043808,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 20517
    },
    {
      "epoch": 0.20518,
      "grad_norm": 1.4895743766572598,
      "learning_rate": 0.003,
      "loss": 4.0394,
      "step": 20518
    },
    {
      "epoch": 0.20519,
      "grad_norm": 1.332194058789682,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 20519
    },
    {
      "epoch": 0.2052,
      "grad_norm": 1.3867458319313875,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 20520
    },
    {
      "epoch": 0.20521,
      "grad_norm": 1.1098151444373003,
      "learning_rate": 0.003,
      "loss": 4.0213,
      "step": 20521
    },
    {
      "epoch": 0.20522,
      "grad_norm": 1.330463477670165,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 20522
    },
    {
      "epoch": 0.20523,
      "grad_norm": 1.1888056787678798,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 20523
    },
    {
      "epoch": 0.20524,
      "grad_norm": 1.282382010427326,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 20524
    },
    {
      "epoch": 0.20525,
      "grad_norm": 1.3060109899235448,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 20525
    },
    {
      "epoch": 0.20526,
      "grad_norm": 1.2621685969087504,
      "learning_rate": 0.003,
      "loss": 4.0393,
      "step": 20526
    },
    {
      "epoch": 0.20527,
      "grad_norm": 1.180382961412499,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 20527
    },
    {
      "epoch": 0.20528,
      "grad_norm": 1.2882221410943004,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 20528
    },
    {
      "epoch": 0.20529,
      "grad_norm": 1.1572585682565248,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 20529
    },
    {
      "epoch": 0.2053,
      "grad_norm": 1.3687852153418119,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 20530
    },
    {
      "epoch": 0.20531,
      "grad_norm": 1.1473823648403227,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 20531
    },
    {
      "epoch": 0.20532,
      "grad_norm": 1.2134733635286072,
      "learning_rate": 0.003,
      "loss": 4.0184,
      "step": 20532
    },
    {
      "epoch": 0.20533,
      "grad_norm": 1.1899592582746903,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 20533
    },
    {
      "epoch": 0.20534,
      "grad_norm": 1.388609446741199,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 20534
    },
    {
      "epoch": 0.20535,
      "grad_norm": 1.4700216421888237,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 20535
    },
    {
      "epoch": 0.20536,
      "grad_norm": 1.1139304987306389,
      "learning_rate": 0.003,
      "loss": 4.0371,
      "step": 20536
    },
    {
      "epoch": 0.20537,
      "grad_norm": 1.666631669804934,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 20537
    },
    {
      "epoch": 0.20538,
      "grad_norm": 1.0872075776099392,
      "learning_rate": 0.003,
      "loss": 4.0285,
      "step": 20538
    },
    {
      "epoch": 0.20539,
      "grad_norm": 1.387006541861756,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 20539
    },
    {
      "epoch": 0.2054,
      "grad_norm": 1.2152337637991244,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 20540
    },
    {
      "epoch": 0.20541,
      "grad_norm": 1.464417464579699,
      "learning_rate": 0.003,
      "loss": 4.035,
      "step": 20541
    },
    {
      "epoch": 0.20542,
      "grad_norm": 1.2962152186801776,
      "learning_rate": 0.003,
      "loss": 4.1038,
      "step": 20542
    },
    {
      "epoch": 0.20543,
      "grad_norm": 1.2587650889566364,
      "learning_rate": 0.003,
      "loss": 4.0371,
      "step": 20543
    },
    {
      "epoch": 0.20544,
      "grad_norm": 1.2278624251115824,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 20544
    },
    {
      "epoch": 0.20545,
      "grad_norm": 1.3033201276713364,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 20545
    },
    {
      "epoch": 0.20546,
      "grad_norm": 1.3703718316768527,
      "learning_rate": 0.003,
      "loss": 4.011,
      "step": 20546
    },
    {
      "epoch": 0.20547,
      "grad_norm": 1.2482927032005222,
      "learning_rate": 0.003,
      "loss": 4.0279,
      "step": 20547
    },
    {
      "epoch": 0.20548,
      "grad_norm": 1.2524111624915995,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 20548
    },
    {
      "epoch": 0.20549,
      "grad_norm": 1.3874528947117495,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 20549
    },
    {
      "epoch": 0.2055,
      "grad_norm": 1.2424791935123651,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 20550
    },
    {
      "epoch": 0.20551,
      "grad_norm": 1.2245096080080125,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 20551
    },
    {
      "epoch": 0.20552,
      "grad_norm": 1.565644611290424,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 20552
    },
    {
      "epoch": 0.20553,
      "grad_norm": 0.8983082530583695,
      "learning_rate": 0.003,
      "loss": 4.0206,
      "step": 20553
    },
    {
      "epoch": 0.20554,
      "grad_norm": 1.4622981748993382,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 20554
    },
    {
      "epoch": 0.20555,
      "grad_norm": 1.3713023786716179,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 20555
    },
    {
      "epoch": 0.20556,
      "grad_norm": 1.2223116811646306,
      "learning_rate": 0.003,
      "loss": 4.0311,
      "step": 20556
    },
    {
      "epoch": 0.20557,
      "grad_norm": 1.2230211390947725,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 20557
    },
    {
      "epoch": 0.20558,
      "grad_norm": 1.2727476956328838,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 20558
    },
    {
      "epoch": 0.20559,
      "grad_norm": 1.2630263689417707,
      "learning_rate": 0.003,
      "loss": 4.0394,
      "step": 20559
    },
    {
      "epoch": 0.2056,
      "grad_norm": 1.380809886600488,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 20560
    },
    {
      "epoch": 0.20561,
      "grad_norm": 1.2041721567299215,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 20561
    },
    {
      "epoch": 0.20562,
      "grad_norm": 1.3675561477488738,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 20562
    },
    {
      "epoch": 0.20563,
      "grad_norm": 0.9813567807250267,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 20563
    },
    {
      "epoch": 0.20564,
      "grad_norm": 1.2156187944006267,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 20564
    },
    {
      "epoch": 0.20565,
      "grad_norm": 1.17726874163147,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 20565
    },
    {
      "epoch": 0.20566,
      "grad_norm": 1.252404813673878,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 20566
    },
    {
      "epoch": 0.20567,
      "grad_norm": 1.3644825816059427,
      "learning_rate": 0.003,
      "loss": 4.0271,
      "step": 20567
    },
    {
      "epoch": 0.20568,
      "grad_norm": 1.2680271459853363,
      "learning_rate": 0.003,
      "loss": 4.023,
      "step": 20568
    },
    {
      "epoch": 0.20569,
      "grad_norm": 1.251376240785194,
      "learning_rate": 0.003,
      "loss": 4.0279,
      "step": 20569
    },
    {
      "epoch": 0.2057,
      "grad_norm": 1.3928814421434723,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 20570
    },
    {
      "epoch": 0.20571,
      "grad_norm": 1.149725386259138,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 20571
    },
    {
      "epoch": 0.20572,
      "grad_norm": 1.406423417673629,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 20572
    },
    {
      "epoch": 0.20573,
      "grad_norm": 1.2594626237260136,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 20573
    },
    {
      "epoch": 0.20574,
      "grad_norm": 1.214761971646534,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 20574
    },
    {
      "epoch": 0.20575,
      "grad_norm": 1.3867559215345646,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 20575
    },
    {
      "epoch": 0.20576,
      "grad_norm": 1.0350722602622613,
      "learning_rate": 0.003,
      "loss": 4.0274,
      "step": 20576
    },
    {
      "epoch": 0.20577,
      "grad_norm": 1.4415700347605132,
      "learning_rate": 0.003,
      "loss": 4.035,
      "step": 20577
    },
    {
      "epoch": 0.20578,
      "grad_norm": 1.2257186853441844,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 20578
    },
    {
      "epoch": 0.20579,
      "grad_norm": 1.4617773787267274,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 20579
    },
    {
      "epoch": 0.2058,
      "grad_norm": 1.0446844849935397,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 20580
    },
    {
      "epoch": 0.20581,
      "grad_norm": 1.2309633880667266,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 20581
    },
    {
      "epoch": 0.20582,
      "grad_norm": 1.2501727853012718,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 20582
    },
    {
      "epoch": 0.20583,
      "grad_norm": 1.554489605582137,
      "learning_rate": 0.003,
      "loss": 4.0281,
      "step": 20583
    },
    {
      "epoch": 0.20584,
      "grad_norm": 1.1688257115667773,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 20584
    },
    {
      "epoch": 0.20585,
      "grad_norm": 1.657144677567979,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 20585
    },
    {
      "epoch": 0.20586,
      "grad_norm": 0.9750985458908387,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 20586
    },
    {
      "epoch": 0.20587,
      "grad_norm": 1.3610198926495998,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 20587
    },
    {
      "epoch": 0.20588,
      "grad_norm": 1.194014890403564,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 20588
    },
    {
      "epoch": 0.20589,
      "grad_norm": 1.2440849695940555,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 20589
    },
    {
      "epoch": 0.2059,
      "grad_norm": 1.182737702079615,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 20590
    },
    {
      "epoch": 0.20591,
      "grad_norm": 1.1991048748933788,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 20591
    },
    {
      "epoch": 0.20592,
      "grad_norm": 1.369921693717321,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 20592
    },
    {
      "epoch": 0.20593,
      "grad_norm": 1.4253420981695712,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 20593
    },
    {
      "epoch": 0.20594,
      "grad_norm": 1.239517134925124,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 20594
    },
    {
      "epoch": 0.20595,
      "grad_norm": 1.2045848075684387,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 20595
    },
    {
      "epoch": 0.20596,
      "grad_norm": 1.3277529608869314,
      "learning_rate": 0.003,
      "loss": 4.0712,
      "step": 20596
    },
    {
      "epoch": 0.20597,
      "grad_norm": 1.092490748652337,
      "learning_rate": 0.003,
      "loss": 4.0205,
      "step": 20597
    },
    {
      "epoch": 0.20598,
      "grad_norm": 1.2148708086439042,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 20598
    },
    {
      "epoch": 0.20599,
      "grad_norm": 1.456604647874599,
      "learning_rate": 0.003,
      "loss": 4.1006,
      "step": 20599
    },
    {
      "epoch": 0.206,
      "grad_norm": 1.185871450619726,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 20600
    },
    {
      "epoch": 0.20601,
      "grad_norm": 1.3666582857210787,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 20601
    },
    {
      "epoch": 0.20602,
      "grad_norm": 1.2210227052199842,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 20602
    },
    {
      "epoch": 0.20603,
      "grad_norm": 1.4549005179978116,
      "learning_rate": 0.003,
      "loss": 4.0318,
      "step": 20603
    },
    {
      "epoch": 0.20604,
      "grad_norm": 1.150989627895327,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 20604
    },
    {
      "epoch": 0.20605,
      "grad_norm": 1.5770164514930232,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 20605
    },
    {
      "epoch": 0.20606,
      "grad_norm": 1.0607505870665006,
      "learning_rate": 0.003,
      "loss": 4.0191,
      "step": 20606
    },
    {
      "epoch": 0.20607,
      "grad_norm": 1.70352693192207,
      "learning_rate": 0.003,
      "loss": 4.0891,
      "step": 20607
    },
    {
      "epoch": 0.20608,
      "grad_norm": 1.1033050008611927,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 20608
    },
    {
      "epoch": 0.20609,
      "grad_norm": 1.235798120813274,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 20609
    },
    {
      "epoch": 0.2061,
      "grad_norm": 1.6060560230983314,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 20610
    },
    {
      "epoch": 0.20611,
      "grad_norm": 0.8996221193574458,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 20611
    },
    {
      "epoch": 0.20612,
      "grad_norm": 1.4191226435691027,
      "learning_rate": 0.003,
      "loss": 4.0757,
      "step": 20612
    },
    {
      "epoch": 0.20613,
      "grad_norm": 1.2155417941742577,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 20613
    },
    {
      "epoch": 0.20614,
      "grad_norm": 1.3933365393758117,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 20614
    },
    {
      "epoch": 0.20615,
      "grad_norm": 1.259737007470902,
      "learning_rate": 0.003,
      "loss": 4.0225,
      "step": 20615
    },
    {
      "epoch": 0.20616,
      "grad_norm": 1.4224039150471577,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 20616
    },
    {
      "epoch": 0.20617,
      "grad_norm": 1.1925649227479738,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 20617
    },
    {
      "epoch": 0.20618,
      "grad_norm": 1.2215486795495927,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 20618
    },
    {
      "epoch": 0.20619,
      "grad_norm": 1.3938969321035501,
      "learning_rate": 0.003,
      "loss": 4.0939,
      "step": 20619
    },
    {
      "epoch": 0.2062,
      "grad_norm": 1.627778881202133,
      "learning_rate": 0.003,
      "loss": 4.057,
      "step": 20620
    },
    {
      "epoch": 0.20621,
      "grad_norm": 1.2438999679741989,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 20621
    },
    {
      "epoch": 0.20622,
      "grad_norm": 1.0670015597144602,
      "learning_rate": 0.003,
      "loss": 4.0344,
      "step": 20622
    },
    {
      "epoch": 0.20623,
      "grad_norm": 1.4980924500585362,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 20623
    },
    {
      "epoch": 0.20624,
      "grad_norm": 1.158117494940876,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 20624
    },
    {
      "epoch": 0.20625,
      "grad_norm": 1.4354739798432996,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 20625
    },
    {
      "epoch": 0.20626,
      "grad_norm": 1.0875577049487977,
      "learning_rate": 0.003,
      "loss": 4.0792,
      "step": 20626
    },
    {
      "epoch": 0.20627,
      "grad_norm": 1.2212342891564674,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 20627
    },
    {
      "epoch": 0.20628,
      "grad_norm": 1.1224429716310118,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 20628
    },
    {
      "epoch": 0.20629,
      "grad_norm": 1.1185573262472182,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 20629
    },
    {
      "epoch": 0.2063,
      "grad_norm": 1.3897139436188553,
      "learning_rate": 0.003,
      "loss": 4.0309,
      "step": 20630
    },
    {
      "epoch": 0.20631,
      "grad_norm": 1.317437915728774,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 20631
    },
    {
      "epoch": 0.20632,
      "grad_norm": 1.2560902403608283,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 20632
    },
    {
      "epoch": 0.20633,
      "grad_norm": 1.2556066526959209,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 20633
    },
    {
      "epoch": 0.20634,
      "grad_norm": 1.29749439461352,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 20634
    },
    {
      "epoch": 0.20635,
      "grad_norm": 1.2046627949955206,
      "learning_rate": 0.003,
      "loss": 4.0314,
      "step": 20635
    },
    {
      "epoch": 0.20636,
      "grad_norm": 1.4682069421201127,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 20636
    },
    {
      "epoch": 0.20637,
      "grad_norm": 1.181759682172407,
      "learning_rate": 0.003,
      "loss": 4.0274,
      "step": 20637
    },
    {
      "epoch": 0.20638,
      "grad_norm": 1.420208549746017,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 20638
    },
    {
      "epoch": 0.20639,
      "grad_norm": 1.0997561521912844,
      "learning_rate": 0.003,
      "loss": 4.0239,
      "step": 20639
    },
    {
      "epoch": 0.2064,
      "grad_norm": 1.3971745611642852,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 20640
    },
    {
      "epoch": 0.20641,
      "grad_norm": 1.1517191748984548,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 20641
    },
    {
      "epoch": 0.20642,
      "grad_norm": 1.3854882769852916,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 20642
    },
    {
      "epoch": 0.20643,
      "grad_norm": 1.2170386478062296,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 20643
    },
    {
      "epoch": 0.20644,
      "grad_norm": 1.281003095728175,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 20644
    },
    {
      "epoch": 0.20645,
      "grad_norm": 1.2251108945855154,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 20645
    },
    {
      "epoch": 0.20646,
      "grad_norm": 1.38774337747246,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 20646
    },
    {
      "epoch": 0.20647,
      "grad_norm": 1.2719846999883633,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 20647
    },
    {
      "epoch": 0.20648,
      "grad_norm": 1.3384524298496236,
      "learning_rate": 0.003,
      "loss": 4.0242,
      "step": 20648
    },
    {
      "epoch": 0.20649,
      "grad_norm": 1.5083236211350421,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 20649
    },
    {
      "epoch": 0.2065,
      "grad_norm": 1.1610057430015674,
      "learning_rate": 0.003,
      "loss": 4.0141,
      "step": 20650
    },
    {
      "epoch": 0.20651,
      "grad_norm": 1.2686265477241983,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 20651
    },
    {
      "epoch": 0.20652,
      "grad_norm": 1.2170028195733724,
      "learning_rate": 0.003,
      "loss": 3.9959,
      "step": 20652
    },
    {
      "epoch": 0.20653,
      "grad_norm": 1.1984071586143492,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 20653
    },
    {
      "epoch": 0.20654,
      "grad_norm": 1.2865578494221055,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 20654
    },
    {
      "epoch": 0.20655,
      "grad_norm": 1.0735489017586273,
      "learning_rate": 0.003,
      "loss": 4.042,
      "step": 20655
    },
    {
      "epoch": 0.20656,
      "grad_norm": 1.6011180870459096,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 20656
    },
    {
      "epoch": 0.20657,
      "grad_norm": 1.0945711017845394,
      "learning_rate": 0.003,
      "loss": 4.0823,
      "step": 20657
    },
    {
      "epoch": 0.20658,
      "grad_norm": 1.632189199418318,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 20658
    },
    {
      "epoch": 0.20659,
      "grad_norm": 1.248305388631274,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 20659
    },
    {
      "epoch": 0.2066,
      "grad_norm": 1.4236851837706253,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 20660
    },
    {
      "epoch": 0.20661,
      "grad_norm": 1.177042760140521,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 20661
    },
    {
      "epoch": 0.20662,
      "grad_norm": 1.2280027663932278,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 20662
    },
    {
      "epoch": 0.20663,
      "grad_norm": 1.4467956014829126,
      "learning_rate": 0.003,
      "loss": 4.0418,
      "step": 20663
    },
    {
      "epoch": 0.20664,
      "grad_norm": 1.231375671997567,
      "learning_rate": 0.003,
      "loss": 4.019,
      "step": 20664
    },
    {
      "epoch": 0.20665,
      "grad_norm": 1.1612978286709583,
      "learning_rate": 0.003,
      "loss": 4.0434,
      "step": 20665
    },
    {
      "epoch": 0.20666,
      "grad_norm": 1.1931450829525216,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 20666
    },
    {
      "epoch": 0.20667,
      "grad_norm": 1.3875364993532027,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 20667
    },
    {
      "epoch": 0.20668,
      "grad_norm": 1.2775509160710945,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 20668
    },
    {
      "epoch": 0.20669,
      "grad_norm": 1.2290026315694929,
      "learning_rate": 0.003,
      "loss": 4.0124,
      "step": 20669
    },
    {
      "epoch": 0.2067,
      "grad_norm": 1.3993429867831688,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 20670
    },
    {
      "epoch": 0.20671,
      "grad_norm": 1.2247149046568575,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 20671
    },
    {
      "epoch": 0.20672,
      "grad_norm": 1.5992138460318421,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 20672
    },
    {
      "epoch": 0.20673,
      "grad_norm": 1.217819248244983,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 20673
    },
    {
      "epoch": 0.20674,
      "grad_norm": 1.174585184075093,
      "learning_rate": 0.003,
      "loss": 4.0234,
      "step": 20674
    },
    {
      "epoch": 0.20675,
      "grad_norm": 1.5199680898632342,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 20675
    },
    {
      "epoch": 0.20676,
      "grad_norm": 1.2770945799134772,
      "learning_rate": 0.003,
      "loss": 4.0354,
      "step": 20676
    },
    {
      "epoch": 0.20677,
      "grad_norm": 1.230372537574802,
      "learning_rate": 0.003,
      "loss": 4.014,
      "step": 20677
    },
    {
      "epoch": 0.20678,
      "grad_norm": 1.2001912213070292,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 20678
    },
    {
      "epoch": 0.20679,
      "grad_norm": 1.2545792444304698,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 20679
    },
    {
      "epoch": 0.2068,
      "grad_norm": 1.2036745332235377,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 20680
    },
    {
      "epoch": 0.20681,
      "grad_norm": 1.156025537281944,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 20681
    },
    {
      "epoch": 0.20682,
      "grad_norm": 1.2147853047790425,
      "learning_rate": 0.003,
      "loss": 4.009,
      "step": 20682
    },
    {
      "epoch": 0.20683,
      "grad_norm": 1.2951899681457213,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 20683
    },
    {
      "epoch": 0.20684,
      "grad_norm": 1.2828579337821484,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 20684
    },
    {
      "epoch": 0.20685,
      "grad_norm": 1.2365241593515177,
      "learning_rate": 0.003,
      "loss": 4.0261,
      "step": 20685
    },
    {
      "epoch": 0.20686,
      "grad_norm": 1.415664458003797,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 20686
    },
    {
      "epoch": 0.20687,
      "grad_norm": 1.3797239423240015,
      "learning_rate": 0.003,
      "loss": 4.0219,
      "step": 20687
    },
    {
      "epoch": 0.20688,
      "grad_norm": 1.1438572845436745,
      "learning_rate": 0.003,
      "loss": 4.0432,
      "step": 20688
    },
    {
      "epoch": 0.20689,
      "grad_norm": 1.3461755101404702,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 20689
    },
    {
      "epoch": 0.2069,
      "grad_norm": 1.312201354100928,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 20690
    },
    {
      "epoch": 0.20691,
      "grad_norm": 1.3174299393061977,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 20691
    },
    {
      "epoch": 0.20692,
      "grad_norm": 1.1573339127994,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 20692
    },
    {
      "epoch": 0.20693,
      "grad_norm": 1.5660673222263295,
      "learning_rate": 0.003,
      "loss": 4.0314,
      "step": 20693
    },
    {
      "epoch": 0.20694,
      "grad_norm": 1.0905513582817086,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 20694
    },
    {
      "epoch": 0.20695,
      "grad_norm": 1.3212477241955471,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 20695
    },
    {
      "epoch": 0.20696,
      "grad_norm": 1.073198136565413,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 20696
    },
    {
      "epoch": 0.20697,
      "grad_norm": 1.300374772924821,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 20697
    },
    {
      "epoch": 0.20698,
      "grad_norm": 1.0767247923857175,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 20698
    },
    {
      "epoch": 0.20699,
      "grad_norm": 1.5065065597585965,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 20699
    },
    {
      "epoch": 0.207,
      "grad_norm": 1.2454389004086728,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 20700
    },
    {
      "epoch": 0.20701,
      "grad_norm": 1.3314144646308688,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 20701
    },
    {
      "epoch": 0.20702,
      "grad_norm": 1.3694677377990165,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 20702
    },
    {
      "epoch": 0.20703,
      "grad_norm": 1.4647675342775082,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 20703
    },
    {
      "epoch": 0.20704,
      "grad_norm": 1.2431562429480945,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 20704
    },
    {
      "epoch": 0.20705,
      "grad_norm": 1.3133051510352354,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 20705
    },
    {
      "epoch": 0.20706,
      "grad_norm": 1.4612118003039232,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 20706
    },
    {
      "epoch": 0.20707,
      "grad_norm": 1.118376984859685,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 20707
    },
    {
      "epoch": 0.20708,
      "grad_norm": 1.5201178905586379,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 20708
    },
    {
      "epoch": 0.20709,
      "grad_norm": 0.9611235534335962,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 20709
    },
    {
      "epoch": 0.2071,
      "grad_norm": 1.3960161951593277,
      "learning_rate": 0.003,
      "loss": 4.0317,
      "step": 20710
    },
    {
      "epoch": 0.20711,
      "grad_norm": 1.1151622481979715,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 20711
    },
    {
      "epoch": 0.20712,
      "grad_norm": 1.5674117359355935,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 20712
    },
    {
      "epoch": 0.20713,
      "grad_norm": 1.0274111294218384,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 20713
    },
    {
      "epoch": 0.20714,
      "grad_norm": 1.581769560964899,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 20714
    },
    {
      "epoch": 0.20715,
      "grad_norm": 1.084185138019809,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 20715
    },
    {
      "epoch": 0.20716,
      "grad_norm": 1.4325211186606996,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 20716
    },
    {
      "epoch": 0.20717,
      "grad_norm": 1.3918793460894499,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 20717
    },
    {
      "epoch": 0.20718,
      "grad_norm": 1.6126234769973353,
      "learning_rate": 0.003,
      "loss": 4.0739,
      "step": 20718
    },
    {
      "epoch": 0.20719,
      "grad_norm": 1.2739890863916845,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 20719
    },
    {
      "epoch": 0.2072,
      "grad_norm": 1.3784070284601277,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 20720
    },
    {
      "epoch": 0.20721,
      "grad_norm": 1.327251701653354,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 20721
    },
    {
      "epoch": 0.20722,
      "grad_norm": 1.1155978897483336,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 20722
    },
    {
      "epoch": 0.20723,
      "grad_norm": 1.315794850057153,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 20723
    },
    {
      "epoch": 0.20724,
      "grad_norm": 1.3772555675504765,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 20724
    },
    {
      "epoch": 0.20725,
      "grad_norm": 1.1192654698576237,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 20725
    },
    {
      "epoch": 0.20726,
      "grad_norm": 1.5072056858156686,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 20726
    },
    {
      "epoch": 0.20727,
      "grad_norm": 1.2860033041347505,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 20727
    },
    {
      "epoch": 0.20728,
      "grad_norm": 1.1638881920695072,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 20728
    },
    {
      "epoch": 0.20729,
      "grad_norm": 1.4448337864291165,
      "learning_rate": 0.003,
      "loss": 4.0337,
      "step": 20729
    },
    {
      "epoch": 0.2073,
      "grad_norm": 1.1178446923668317,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 20730
    },
    {
      "epoch": 0.20731,
      "grad_norm": 1.3855596548490468,
      "learning_rate": 0.003,
      "loss": 4.0308,
      "step": 20731
    },
    {
      "epoch": 0.20732,
      "grad_norm": 1.0902998889131228,
      "learning_rate": 0.003,
      "loss": 4.0032,
      "step": 20732
    },
    {
      "epoch": 0.20733,
      "grad_norm": 1.3623174030487706,
      "learning_rate": 0.003,
      "loss": 4.0306,
      "step": 20733
    },
    {
      "epoch": 0.20734,
      "grad_norm": 1.2207749874969758,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 20734
    },
    {
      "epoch": 0.20735,
      "grad_norm": 1.316201299828994,
      "learning_rate": 0.003,
      "loss": 4.0763,
      "step": 20735
    },
    {
      "epoch": 0.20736,
      "grad_norm": 1.1638952152907889,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 20736
    },
    {
      "epoch": 0.20737,
      "grad_norm": 1.2916292397240166,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 20737
    },
    {
      "epoch": 0.20738,
      "grad_norm": 1.1862988132039978,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 20738
    },
    {
      "epoch": 0.20739,
      "grad_norm": 1.3710172962165703,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 20739
    },
    {
      "epoch": 0.2074,
      "grad_norm": 1.1044965581222408,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 20740
    },
    {
      "epoch": 0.20741,
      "grad_norm": 1.270604851444255,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 20741
    },
    {
      "epoch": 0.20742,
      "grad_norm": 1.1771694767018996,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 20742
    },
    {
      "epoch": 0.20743,
      "grad_norm": 1.4379249444804114,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 20743
    },
    {
      "epoch": 0.20744,
      "grad_norm": 1.0065490802911512,
      "learning_rate": 0.003,
      "loss": 4.0801,
      "step": 20744
    },
    {
      "epoch": 0.20745,
      "grad_norm": 1.606752109344879,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 20745
    },
    {
      "epoch": 0.20746,
      "grad_norm": 1.6169098553926398,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 20746
    },
    {
      "epoch": 0.20747,
      "grad_norm": 1.0463943810491074,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 20747
    },
    {
      "epoch": 0.20748,
      "grad_norm": 1.3687174936811755,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 20748
    },
    {
      "epoch": 0.20749,
      "grad_norm": 1.1107697487186206,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 20749
    },
    {
      "epoch": 0.2075,
      "grad_norm": 1.1774792649263777,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 20750
    },
    {
      "epoch": 0.20751,
      "grad_norm": 1.2807190015587913,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 20751
    },
    {
      "epoch": 0.20752,
      "grad_norm": 1.3254048160794856,
      "learning_rate": 0.003,
      "loss": 4.0865,
      "step": 20752
    },
    {
      "epoch": 0.20753,
      "grad_norm": 1.3495750153155786,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 20753
    },
    {
      "epoch": 0.20754,
      "grad_norm": 1.357838787640671,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 20754
    },
    {
      "epoch": 0.20755,
      "grad_norm": 1.1509300649152578,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 20755
    },
    {
      "epoch": 0.20756,
      "grad_norm": 1.4739648216529362,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 20756
    },
    {
      "epoch": 0.20757,
      "grad_norm": 1.0317305907954093,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 20757
    },
    {
      "epoch": 0.20758,
      "grad_norm": 1.5550783556663623,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 20758
    },
    {
      "epoch": 0.20759,
      "grad_norm": 1.049816633874253,
      "learning_rate": 0.003,
      "loss": 4.0329,
      "step": 20759
    },
    {
      "epoch": 0.2076,
      "grad_norm": 1.3850869771822945,
      "learning_rate": 0.003,
      "loss": 4.0214,
      "step": 20760
    },
    {
      "epoch": 0.20761,
      "grad_norm": 1.18946485064815,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 20761
    },
    {
      "epoch": 0.20762,
      "grad_norm": 1.4499318233215346,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 20762
    },
    {
      "epoch": 0.20763,
      "grad_norm": 0.9098224927031964,
      "learning_rate": 0.003,
      "loss": 4.046,
      "step": 20763
    },
    {
      "epoch": 0.20764,
      "grad_norm": 1.336208800715881,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 20764
    },
    {
      "epoch": 0.20765,
      "grad_norm": 1.3240349176179802,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 20765
    },
    {
      "epoch": 0.20766,
      "grad_norm": 1.4657133881894573,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 20766
    },
    {
      "epoch": 0.20767,
      "grad_norm": 1.463064796644083,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 20767
    },
    {
      "epoch": 0.20768,
      "grad_norm": 1.0376490916670396,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 20768
    },
    {
      "epoch": 0.20769,
      "grad_norm": 1.5355933849330268,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 20769
    },
    {
      "epoch": 0.2077,
      "grad_norm": 1.1550285482192808,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 20770
    },
    {
      "epoch": 0.20771,
      "grad_norm": 1.6016164048712531,
      "learning_rate": 0.003,
      "loss": 4.0126,
      "step": 20771
    },
    {
      "epoch": 0.20772,
      "grad_norm": 1.0556020067156495,
      "learning_rate": 0.003,
      "loss": 4.0287,
      "step": 20772
    },
    {
      "epoch": 0.20773,
      "grad_norm": 1.1085310083632471,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 20773
    },
    {
      "epoch": 0.20774,
      "grad_norm": 1.3511133330470035,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 20774
    },
    {
      "epoch": 0.20775,
      "grad_norm": 1.0777696954278422,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 20775
    },
    {
      "epoch": 0.20776,
      "grad_norm": 1.4478514990309528,
      "learning_rate": 0.003,
      "loss": 4.0478,
      "step": 20776
    },
    {
      "epoch": 0.20777,
      "grad_norm": 1.2627413233954994,
      "learning_rate": 0.003,
      "loss": 4.0841,
      "step": 20777
    },
    {
      "epoch": 0.20778,
      "grad_norm": 1.1945623095279247,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 20778
    },
    {
      "epoch": 0.20779,
      "grad_norm": 1.350820223734434,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 20779
    },
    {
      "epoch": 0.2078,
      "grad_norm": 1.233820204919974,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 20780
    },
    {
      "epoch": 0.20781,
      "grad_norm": 1.424115334207775,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 20781
    },
    {
      "epoch": 0.20782,
      "grad_norm": 1.1560346264308283,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 20782
    },
    {
      "epoch": 0.20783,
      "grad_norm": 1.9797191617411831,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 20783
    },
    {
      "epoch": 0.20784,
      "grad_norm": 1.1826811649741138,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 20784
    },
    {
      "epoch": 0.20785,
      "grad_norm": 1.3513571715731507,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 20785
    },
    {
      "epoch": 0.20786,
      "grad_norm": 1.2581827014596552,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 20786
    },
    {
      "epoch": 0.20787,
      "grad_norm": 1.199670279474661,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 20787
    },
    {
      "epoch": 0.20788,
      "grad_norm": 1.2612259754453077,
      "learning_rate": 0.003,
      "loss": 4.0253,
      "step": 20788
    },
    {
      "epoch": 0.20789,
      "grad_norm": 1.4795022118185028,
      "learning_rate": 0.003,
      "loss": 4.0724,
      "step": 20789
    },
    {
      "epoch": 0.2079,
      "grad_norm": 1.1137152810385258,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 20790
    },
    {
      "epoch": 0.20791,
      "grad_norm": 1.116026403579865,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 20791
    },
    {
      "epoch": 0.20792,
      "grad_norm": 1.4050517941276137,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 20792
    },
    {
      "epoch": 0.20793,
      "grad_norm": 1.3092047542552736,
      "learning_rate": 0.003,
      "loss": 4.0792,
      "step": 20793
    },
    {
      "epoch": 0.20794,
      "grad_norm": 1.2555634846244876,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 20794
    },
    {
      "epoch": 0.20795,
      "grad_norm": 1.1152539238852723,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 20795
    },
    {
      "epoch": 0.20796,
      "grad_norm": 1.3704306356685125,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 20796
    },
    {
      "epoch": 0.20797,
      "grad_norm": 1.3005254051176414,
      "learning_rate": 0.003,
      "loss": 4.0049,
      "step": 20797
    },
    {
      "epoch": 0.20798,
      "grad_norm": 1.1293085128932312,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 20798
    },
    {
      "epoch": 0.20799,
      "grad_norm": 1.2954795612006205,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 20799
    },
    {
      "epoch": 0.208,
      "grad_norm": 1.287234029685038,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 20800
    },
    {
      "epoch": 0.20801,
      "grad_norm": 1.1639186714194185,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 20801
    },
    {
      "epoch": 0.20802,
      "grad_norm": 1.3642931017731221,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 20802
    },
    {
      "epoch": 0.20803,
      "grad_norm": 1.300376417837977,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 20803
    },
    {
      "epoch": 0.20804,
      "grad_norm": 1.19418609442342,
      "learning_rate": 0.003,
      "loss": 4.0784,
      "step": 20804
    },
    {
      "epoch": 0.20805,
      "grad_norm": 1.474440167259748,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 20805
    },
    {
      "epoch": 0.20806,
      "grad_norm": 1.1675361434908935,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 20806
    },
    {
      "epoch": 0.20807,
      "grad_norm": 1.4549767555633737,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 20807
    },
    {
      "epoch": 0.20808,
      "grad_norm": 1.2582960110328578,
      "learning_rate": 0.003,
      "loss": 4.0263,
      "step": 20808
    },
    {
      "epoch": 0.20809,
      "grad_norm": 1.293121104939769,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 20809
    },
    {
      "epoch": 0.2081,
      "grad_norm": 1.3426327364168016,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 20810
    },
    {
      "epoch": 0.20811,
      "grad_norm": 1.0024443980199094,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 20811
    },
    {
      "epoch": 0.20812,
      "grad_norm": 1.335153296270808,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 20812
    },
    {
      "epoch": 0.20813,
      "grad_norm": 1.1369543975249898,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 20813
    },
    {
      "epoch": 0.20814,
      "grad_norm": 1.2746262554435819,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 20814
    },
    {
      "epoch": 0.20815,
      "grad_norm": 1.3587407132341922,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 20815
    },
    {
      "epoch": 0.20816,
      "grad_norm": 1.1291126681104129,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 20816
    },
    {
      "epoch": 0.20817,
      "grad_norm": 1.4650140314166336,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 20817
    },
    {
      "epoch": 0.20818,
      "grad_norm": 1.113597228538697,
      "learning_rate": 0.003,
      "loss": 4.0873,
      "step": 20818
    },
    {
      "epoch": 0.20819,
      "grad_norm": 1.6122646829652605,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 20819
    },
    {
      "epoch": 0.2082,
      "grad_norm": 1.3561395147569244,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 20820
    },
    {
      "epoch": 0.20821,
      "grad_norm": 1.6761463283533349,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 20821
    },
    {
      "epoch": 0.20822,
      "grad_norm": 1.0185067312374685,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 20822
    },
    {
      "epoch": 0.20823,
      "grad_norm": 1.293982685093887,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 20823
    },
    {
      "epoch": 0.20824,
      "grad_norm": 1.1625700519350355,
      "learning_rate": 0.003,
      "loss": 4.085,
      "step": 20824
    },
    {
      "epoch": 0.20825,
      "grad_norm": 1.2888389139611907,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 20825
    },
    {
      "epoch": 0.20826,
      "grad_norm": 1.116572675993816,
      "learning_rate": 0.003,
      "loss": 4.0764,
      "step": 20826
    },
    {
      "epoch": 0.20827,
      "grad_norm": 1.3177369257552722,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 20827
    },
    {
      "epoch": 0.20828,
      "grad_norm": 1.1371442329404948,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 20828
    },
    {
      "epoch": 0.20829,
      "grad_norm": 1.2947468624123517,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 20829
    },
    {
      "epoch": 0.2083,
      "grad_norm": 1.2602130502568567,
      "learning_rate": 0.003,
      "loss": 4.0326,
      "step": 20830
    },
    {
      "epoch": 0.20831,
      "grad_norm": 1.4506110797716467,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 20831
    },
    {
      "epoch": 0.20832,
      "grad_norm": 1.0928195888686325,
      "learning_rate": 0.003,
      "loss": 4.0327,
      "step": 20832
    },
    {
      "epoch": 0.20833,
      "grad_norm": 1.4604003874374663,
      "learning_rate": 0.003,
      "loss": 4.0849,
      "step": 20833
    },
    {
      "epoch": 0.20834,
      "grad_norm": 1.2252945089312588,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 20834
    },
    {
      "epoch": 0.20835,
      "grad_norm": 1.3495567279436074,
      "learning_rate": 0.003,
      "loss": 4.0676,
      "step": 20835
    },
    {
      "epoch": 0.20836,
      "grad_norm": 1.1663520726944048,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 20836
    },
    {
      "epoch": 0.20837,
      "grad_norm": 1.3042798883210238,
      "learning_rate": 0.003,
      "loss": 4.0819,
      "step": 20837
    },
    {
      "epoch": 0.20838,
      "grad_norm": 1.234572062163786,
      "learning_rate": 0.003,
      "loss": 4.0198,
      "step": 20838
    },
    {
      "epoch": 0.20839,
      "grad_norm": 1.3203252337818405,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 20839
    },
    {
      "epoch": 0.2084,
      "grad_norm": 1.0404060047258217,
      "learning_rate": 0.003,
      "loss": 4.0266,
      "step": 20840
    },
    {
      "epoch": 0.20841,
      "grad_norm": 1.5256692288077685,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 20841
    },
    {
      "epoch": 0.20842,
      "grad_norm": 1.6727543163276237,
      "learning_rate": 0.003,
      "loss": 4.0773,
      "step": 20842
    },
    {
      "epoch": 0.20843,
      "grad_norm": 1.2483890401340039,
      "learning_rate": 0.003,
      "loss": 4.0317,
      "step": 20843
    },
    {
      "epoch": 0.20844,
      "grad_norm": 1.2585950608498393,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 20844
    },
    {
      "epoch": 0.20845,
      "grad_norm": 1.3047930143249864,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 20845
    },
    {
      "epoch": 0.20846,
      "grad_norm": 1.3345669483117277,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 20846
    },
    {
      "epoch": 0.20847,
      "grad_norm": 1.3647467882722444,
      "learning_rate": 0.003,
      "loss": 4.0773,
      "step": 20847
    },
    {
      "epoch": 0.20848,
      "grad_norm": 0.9861317635079706,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 20848
    },
    {
      "epoch": 0.20849,
      "grad_norm": 1.3763661514454046,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 20849
    },
    {
      "epoch": 0.2085,
      "grad_norm": 1.3161831768920973,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 20850
    },
    {
      "epoch": 0.20851,
      "grad_norm": 1.2378064888547666,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 20851
    },
    {
      "epoch": 0.20852,
      "grad_norm": 1.216971027708619,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 20852
    },
    {
      "epoch": 0.20853,
      "grad_norm": 1.2657044247692195,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 20853
    },
    {
      "epoch": 0.20854,
      "grad_norm": 1.3156640153098798,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 20854
    },
    {
      "epoch": 0.20855,
      "grad_norm": 1.3470086450334378,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 20855
    },
    {
      "epoch": 0.20856,
      "grad_norm": 1.1077005630643735,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 20856
    },
    {
      "epoch": 0.20857,
      "grad_norm": 2.0150889721151577,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 20857
    },
    {
      "epoch": 0.20858,
      "grad_norm": 1.0494119576201082,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 20858
    },
    {
      "epoch": 0.20859,
      "grad_norm": 1.4376968690517848,
      "learning_rate": 0.003,
      "loss": 4.0963,
      "step": 20859
    },
    {
      "epoch": 0.2086,
      "grad_norm": 1.2228033500769202,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 20860
    },
    {
      "epoch": 0.20861,
      "grad_norm": 1.3024082179552416,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 20861
    },
    {
      "epoch": 0.20862,
      "grad_norm": 1.3337153907875594,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 20862
    },
    {
      "epoch": 0.20863,
      "grad_norm": 1.3523027874716267,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 20863
    },
    {
      "epoch": 0.20864,
      "grad_norm": 1.1000205902889388,
      "learning_rate": 0.003,
      "loss": 4.0281,
      "step": 20864
    },
    {
      "epoch": 0.20865,
      "grad_norm": 1.3449720085528347,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 20865
    },
    {
      "epoch": 0.20866,
      "grad_norm": 1.2382383546368607,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 20866
    },
    {
      "epoch": 0.20867,
      "grad_norm": 1.1269017647330786,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 20867
    },
    {
      "epoch": 0.20868,
      "grad_norm": 1.2986669015988064,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 20868
    },
    {
      "epoch": 0.20869,
      "grad_norm": 1.1455552607424562,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 20869
    },
    {
      "epoch": 0.2087,
      "grad_norm": 1.3253962829092347,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 20870
    },
    {
      "epoch": 0.20871,
      "grad_norm": 1.0568251837872027,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 20871
    },
    {
      "epoch": 0.20872,
      "grad_norm": 1.2689662179831813,
      "learning_rate": 0.003,
      "loss": 4.0877,
      "step": 20872
    },
    {
      "epoch": 0.20873,
      "grad_norm": 1.0137833497000508,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 20873
    },
    {
      "epoch": 0.20874,
      "grad_norm": 1.3925150194736946,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 20874
    },
    {
      "epoch": 0.20875,
      "grad_norm": 1.147944154950515,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 20875
    },
    {
      "epoch": 0.20876,
      "grad_norm": 1.4880598077123546,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 20876
    },
    {
      "epoch": 0.20877,
      "grad_norm": 1.4617326494717378,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 20877
    },
    {
      "epoch": 0.20878,
      "grad_norm": 1.3219327544624884,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 20878
    },
    {
      "epoch": 0.20879,
      "grad_norm": 1.420988445786985,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 20879
    },
    {
      "epoch": 0.2088,
      "grad_norm": 1.1368159597668355,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 20880
    },
    {
      "epoch": 0.20881,
      "grad_norm": 1.1650311538443099,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 20881
    },
    {
      "epoch": 0.20882,
      "grad_norm": 1.4596155026540338,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 20882
    },
    {
      "epoch": 0.20883,
      "grad_norm": 1.2613695008600023,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 20883
    },
    {
      "epoch": 0.20884,
      "grad_norm": 1.2692564484497293,
      "learning_rate": 0.003,
      "loss": 4.081,
      "step": 20884
    },
    {
      "epoch": 0.20885,
      "grad_norm": 1.292229202469805,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 20885
    },
    {
      "epoch": 0.20886,
      "grad_norm": 1.1647777743341188,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 20886
    },
    {
      "epoch": 0.20887,
      "grad_norm": 1.3424953059606388,
      "learning_rate": 0.003,
      "loss": 4.0939,
      "step": 20887
    },
    {
      "epoch": 0.20888,
      "grad_norm": 1.4193618803799186,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 20888
    },
    {
      "epoch": 0.20889,
      "grad_norm": 1.1278547256220763,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 20889
    },
    {
      "epoch": 0.2089,
      "grad_norm": 1.3975877767352827,
      "learning_rate": 0.003,
      "loss": 4.027,
      "step": 20890
    },
    {
      "epoch": 0.20891,
      "grad_norm": 1.162881771907803,
      "learning_rate": 0.003,
      "loss": 4.0361,
      "step": 20891
    },
    {
      "epoch": 0.20892,
      "grad_norm": 1.5056194371792304,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 20892
    },
    {
      "epoch": 0.20893,
      "grad_norm": 1.2736401249513964,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 20893
    },
    {
      "epoch": 0.20894,
      "grad_norm": 1.4500261051793482,
      "learning_rate": 0.003,
      "loss": 4.0103,
      "step": 20894
    },
    {
      "epoch": 0.20895,
      "grad_norm": 1.179569673900195,
      "learning_rate": 0.003,
      "loss": 4.0729,
      "step": 20895
    },
    {
      "epoch": 0.20896,
      "grad_norm": 1.229599686937581,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 20896
    },
    {
      "epoch": 0.20897,
      "grad_norm": 1.2413269333270043,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 20897
    },
    {
      "epoch": 0.20898,
      "grad_norm": 1.1355251062525624,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 20898
    },
    {
      "epoch": 0.20899,
      "grad_norm": 1.326473613801506,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 20899
    },
    {
      "epoch": 0.209,
      "grad_norm": 1.2938071798412598,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 20900
    },
    {
      "epoch": 0.20901,
      "grad_norm": 1.5539919449076114,
      "learning_rate": 0.003,
      "loss": 4.0826,
      "step": 20901
    },
    {
      "epoch": 0.20902,
      "grad_norm": 1.0175792191757047,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 20902
    },
    {
      "epoch": 0.20903,
      "grad_norm": 1.4185608575024329,
      "learning_rate": 0.003,
      "loss": 4.0302,
      "step": 20903
    },
    {
      "epoch": 0.20904,
      "grad_norm": 1.489692037723099,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 20904
    },
    {
      "epoch": 0.20905,
      "grad_norm": 1.1518373995991453,
      "learning_rate": 0.003,
      "loss": 4.0878,
      "step": 20905
    },
    {
      "epoch": 0.20906,
      "grad_norm": 1.2745302902631523,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 20906
    },
    {
      "epoch": 0.20907,
      "grad_norm": 1.1981105025956502,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 20907
    },
    {
      "epoch": 0.20908,
      "grad_norm": 1.1071198808766434,
      "learning_rate": 0.003,
      "loss": 4.0233,
      "step": 20908
    },
    {
      "epoch": 0.20909,
      "grad_norm": 1.2480676753967612,
      "learning_rate": 0.003,
      "loss": 4.0271,
      "step": 20909
    },
    {
      "epoch": 0.2091,
      "grad_norm": 1.2125172864613187,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 20910
    },
    {
      "epoch": 0.20911,
      "grad_norm": 1.328490722810735,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 20911
    },
    {
      "epoch": 0.20912,
      "grad_norm": 1.1249015135299267,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 20912
    },
    {
      "epoch": 0.20913,
      "grad_norm": 1.2203451005770125,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 20913
    },
    {
      "epoch": 0.20914,
      "grad_norm": 1.4007068660764677,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 20914
    },
    {
      "epoch": 0.20915,
      "grad_norm": 1.096838057456493,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 20915
    },
    {
      "epoch": 0.20916,
      "grad_norm": 1.462002736596207,
      "learning_rate": 0.003,
      "loss": 4.0856,
      "step": 20916
    },
    {
      "epoch": 0.20917,
      "grad_norm": 1.2023236355995262,
      "learning_rate": 0.003,
      "loss": 4.0239,
      "step": 20917
    },
    {
      "epoch": 0.20918,
      "grad_norm": 1.2963698128726884,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 20918
    },
    {
      "epoch": 0.20919,
      "grad_norm": 1.228596871930989,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 20919
    },
    {
      "epoch": 0.2092,
      "grad_norm": 1.3763175882958565,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 20920
    },
    {
      "epoch": 0.20921,
      "grad_norm": 1.2404096739675687,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 20921
    },
    {
      "epoch": 0.20922,
      "grad_norm": 1.1403168687067564,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 20922
    },
    {
      "epoch": 0.20923,
      "grad_norm": 1.3997924724596653,
      "learning_rate": 0.003,
      "loss": 4.0853,
      "step": 20923
    },
    {
      "epoch": 0.20924,
      "grad_norm": 1.1041797216252074,
      "learning_rate": 0.003,
      "loss": 4.0319,
      "step": 20924
    },
    {
      "epoch": 0.20925,
      "grad_norm": 1.3711841401346831,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 20925
    },
    {
      "epoch": 0.20926,
      "grad_norm": 1.0450269013241913,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 20926
    },
    {
      "epoch": 0.20927,
      "grad_norm": 1.2769383700182384,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 20927
    },
    {
      "epoch": 0.20928,
      "grad_norm": 1.0877225315431993,
      "learning_rate": 0.003,
      "loss": 4.0282,
      "step": 20928
    },
    {
      "epoch": 0.20929,
      "grad_norm": 1.6300139792461399,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 20929
    },
    {
      "epoch": 0.2093,
      "grad_norm": 1.3242783820254749,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 20930
    },
    {
      "epoch": 0.20931,
      "grad_norm": 1.257103843444017,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 20931
    },
    {
      "epoch": 0.20932,
      "grad_norm": 1.521557788811263,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 20932
    },
    {
      "epoch": 0.20933,
      "grad_norm": 1.318340273370988,
      "learning_rate": 0.003,
      "loss": 4.0292,
      "step": 20933
    },
    {
      "epoch": 0.20934,
      "grad_norm": 1.3417539283120765,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 20934
    },
    {
      "epoch": 0.20935,
      "grad_norm": 1.3259221373793442,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 20935
    },
    {
      "epoch": 0.20936,
      "grad_norm": 1.2800407480604998,
      "learning_rate": 0.003,
      "loss": 4.0132,
      "step": 20936
    },
    {
      "epoch": 0.20937,
      "grad_norm": 0.9876674260510029,
      "learning_rate": 0.003,
      "loss": 4.0398,
      "step": 20937
    },
    {
      "epoch": 0.20938,
      "grad_norm": 1.4116121790748142,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 20938
    },
    {
      "epoch": 0.20939,
      "grad_norm": 1.2277894657211132,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 20939
    },
    {
      "epoch": 0.2094,
      "grad_norm": 1.3662094509846838,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 20940
    },
    {
      "epoch": 0.20941,
      "grad_norm": 1.115121969618257,
      "learning_rate": 0.003,
      "loss": 4.057,
      "step": 20941
    },
    {
      "epoch": 0.20942,
      "grad_norm": 1.5119266528992794,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 20942
    },
    {
      "epoch": 0.20943,
      "grad_norm": 1.3523607537848863,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 20943
    },
    {
      "epoch": 0.20944,
      "grad_norm": 1.3515926466848913,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 20944
    },
    {
      "epoch": 0.20945,
      "grad_norm": 1.1832739989508942,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 20945
    },
    {
      "epoch": 0.20946,
      "grad_norm": 1.2097681134785438,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 20946
    },
    {
      "epoch": 0.20947,
      "grad_norm": 1.1714307763907412,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 20947
    },
    {
      "epoch": 0.20948,
      "grad_norm": 1.352952022914992,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 20948
    },
    {
      "epoch": 0.20949,
      "grad_norm": 1.415737718595553,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 20949
    },
    {
      "epoch": 0.2095,
      "grad_norm": 1.058417143122475,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 20950
    },
    {
      "epoch": 0.20951,
      "grad_norm": 1.2579147483083288,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 20951
    },
    {
      "epoch": 0.20952,
      "grad_norm": 1.474085819945999,
      "learning_rate": 0.003,
      "loss": 4.0858,
      "step": 20952
    },
    {
      "epoch": 0.20953,
      "grad_norm": 0.9452978892338686,
      "learning_rate": 0.003,
      "loss": 4.0153,
      "step": 20953
    },
    {
      "epoch": 0.20954,
      "grad_norm": 1.2645678482354852,
      "learning_rate": 0.003,
      "loss": 4.0127,
      "step": 20954
    },
    {
      "epoch": 0.20955,
      "grad_norm": 1.3400150004615905,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 20955
    },
    {
      "epoch": 0.20956,
      "grad_norm": 1.133802275603527,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 20956
    },
    {
      "epoch": 0.20957,
      "grad_norm": 1.2247995439196313,
      "learning_rate": 0.003,
      "loss": 4.0319,
      "step": 20957
    },
    {
      "epoch": 0.20958,
      "grad_norm": 1.3174201355530692,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 20958
    },
    {
      "epoch": 0.20959,
      "grad_norm": 1.2321339496179788,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 20959
    },
    {
      "epoch": 0.2096,
      "grad_norm": 1.345487502302491,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 20960
    },
    {
      "epoch": 0.20961,
      "grad_norm": 1.232371302399167,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 20961
    },
    {
      "epoch": 0.20962,
      "grad_norm": 1.4100997616180153,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 20962
    },
    {
      "epoch": 0.20963,
      "grad_norm": 1.214560339721232,
      "learning_rate": 0.003,
      "loss": 4.0325,
      "step": 20963
    },
    {
      "epoch": 0.20964,
      "grad_norm": 1.3867937404423791,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 20964
    },
    {
      "epoch": 0.20965,
      "grad_norm": 1.269943982429771,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 20965
    },
    {
      "epoch": 0.20966,
      "grad_norm": 1.3386216228181835,
      "learning_rate": 0.003,
      "loss": 4.0226,
      "step": 20966
    },
    {
      "epoch": 0.20967,
      "grad_norm": 1.227105428563871,
      "learning_rate": 0.003,
      "loss": 4.0318,
      "step": 20967
    },
    {
      "epoch": 0.20968,
      "grad_norm": 1.4264916694640326,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 20968
    },
    {
      "epoch": 0.20969,
      "grad_norm": 0.9743109180159732,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 20969
    },
    {
      "epoch": 0.2097,
      "grad_norm": 1.5037940025653431,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 20970
    },
    {
      "epoch": 0.20971,
      "grad_norm": 1.1352251405098386,
      "learning_rate": 0.003,
      "loss": 4.0423,
      "step": 20971
    },
    {
      "epoch": 0.20972,
      "grad_norm": 1.3717446928455301,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 20972
    },
    {
      "epoch": 0.20973,
      "grad_norm": 1.183300212395144,
      "learning_rate": 0.003,
      "loss": 4.0298,
      "step": 20973
    },
    {
      "epoch": 0.20974,
      "grad_norm": 1.627985998429152,
      "learning_rate": 0.003,
      "loss": 4.0727,
      "step": 20974
    },
    {
      "epoch": 0.20975,
      "grad_norm": 1.189942525999578,
      "learning_rate": 0.003,
      "loss": 4.0789,
      "step": 20975
    },
    {
      "epoch": 0.20976,
      "grad_norm": 1.3191092046966775,
      "learning_rate": 0.003,
      "loss": 4.0344,
      "step": 20976
    },
    {
      "epoch": 0.20977,
      "grad_norm": 1.2108052685829256,
      "learning_rate": 0.003,
      "loss": 4.0798,
      "step": 20977
    },
    {
      "epoch": 0.20978,
      "grad_norm": 1.1163403429301721,
      "learning_rate": 0.003,
      "loss": 4.0269,
      "step": 20978
    },
    {
      "epoch": 0.20979,
      "grad_norm": 1.2791139974316421,
      "learning_rate": 0.003,
      "loss": 4.0905,
      "step": 20979
    },
    {
      "epoch": 0.2098,
      "grad_norm": 1.3739885718908496,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 20980
    },
    {
      "epoch": 0.20981,
      "grad_norm": 1.1176967990345994,
      "learning_rate": 0.003,
      "loss": 4.0205,
      "step": 20981
    },
    {
      "epoch": 0.20982,
      "grad_norm": 1.7555812723347777,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 20982
    },
    {
      "epoch": 0.20983,
      "grad_norm": 1.0190490633305063,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 20983
    },
    {
      "epoch": 0.20984,
      "grad_norm": 1.647968629574487,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 20984
    },
    {
      "epoch": 0.20985,
      "grad_norm": 1.228601296940512,
      "learning_rate": 0.003,
      "loss": 4.088,
      "step": 20985
    },
    {
      "epoch": 0.20986,
      "grad_norm": 1.1780593895154643,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 20986
    },
    {
      "epoch": 0.20987,
      "grad_norm": 1.5167477665623446,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 20987
    },
    {
      "epoch": 0.20988,
      "grad_norm": 1.2027453692295034,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 20988
    },
    {
      "epoch": 0.20989,
      "grad_norm": 1.3819784422414434,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 20989
    },
    {
      "epoch": 0.2099,
      "grad_norm": 1.431069095891436,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 20990
    },
    {
      "epoch": 0.20991,
      "grad_norm": 1.1856362937113927,
      "learning_rate": 0.003,
      "loss": 4.0684,
      "step": 20991
    },
    {
      "epoch": 0.20992,
      "grad_norm": 1.4579232113790843,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 20992
    },
    {
      "epoch": 0.20993,
      "grad_norm": 1.0877371552863573,
      "learning_rate": 0.003,
      "loss": 4.1089,
      "step": 20993
    },
    {
      "epoch": 0.20994,
      "grad_norm": 1.0799867989318765,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 20994
    },
    {
      "epoch": 0.20995,
      "grad_norm": 1.2679371046599244,
      "learning_rate": 0.003,
      "loss": 4.0314,
      "step": 20995
    },
    {
      "epoch": 0.20996,
      "grad_norm": 1.1247101100163748,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 20996
    },
    {
      "epoch": 0.20997,
      "grad_norm": 1.2723423812425403,
      "learning_rate": 0.003,
      "loss": 4.0357,
      "step": 20997
    },
    {
      "epoch": 0.20998,
      "grad_norm": 1.1523814759851905,
      "learning_rate": 0.003,
      "loss": 4.0432,
      "step": 20998
    },
    {
      "epoch": 0.20999,
      "grad_norm": 1.4168831860950526,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 20999
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.9648126597864406,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 21000
    },
    {
      "epoch": 0.21001,
      "grad_norm": 1.54406208440241,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 21001
    },
    {
      "epoch": 0.21002,
      "grad_norm": 1.128820519320507,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 21002
    },
    {
      "epoch": 0.21003,
      "grad_norm": 1.2746340201716286,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 21003
    },
    {
      "epoch": 0.21004,
      "grad_norm": 1.2857510270154682,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 21004
    },
    {
      "epoch": 0.21005,
      "grad_norm": 1.1850808119042948,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 21005
    },
    {
      "epoch": 0.21006,
      "grad_norm": 1.5522083439081036,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 21006
    },
    {
      "epoch": 0.21007,
      "grad_norm": 1.260774149470961,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 21007
    },
    {
      "epoch": 0.21008,
      "grad_norm": 1.5370298278427954,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 21008
    },
    {
      "epoch": 0.21009,
      "grad_norm": 1.2243091526973395,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 21009
    },
    {
      "epoch": 0.2101,
      "grad_norm": 1.3225787370878426,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 21010
    },
    {
      "epoch": 0.21011,
      "grad_norm": 1.18069554223696,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 21011
    },
    {
      "epoch": 0.21012,
      "grad_norm": 1.5722260010050817,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 21012
    },
    {
      "epoch": 0.21013,
      "grad_norm": 1.2113411973015873,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 21013
    },
    {
      "epoch": 0.21014,
      "grad_norm": 1.3628036017370306,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 21014
    },
    {
      "epoch": 0.21015,
      "grad_norm": 1.1750802878262518,
      "learning_rate": 0.003,
      "loss": 4.0398,
      "step": 21015
    },
    {
      "epoch": 0.21016,
      "grad_norm": 1.4427593106096692,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 21016
    },
    {
      "epoch": 0.21017,
      "grad_norm": 1.0377068324397796,
      "learning_rate": 0.003,
      "loss": 4.033,
      "step": 21017
    },
    {
      "epoch": 0.21018,
      "grad_norm": 1.6716123251557335,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 21018
    },
    {
      "epoch": 0.21019,
      "grad_norm": 1.0599154480065698,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 21019
    },
    {
      "epoch": 0.2102,
      "grad_norm": 1.5586356522325036,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 21020
    },
    {
      "epoch": 0.21021,
      "grad_norm": 1.2503098443526723,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 21021
    },
    {
      "epoch": 0.21022,
      "grad_norm": 1.2825989183117132,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 21022
    },
    {
      "epoch": 0.21023,
      "grad_norm": 1.23175707597897,
      "learning_rate": 0.003,
      "loss": 4.0285,
      "step": 21023
    },
    {
      "epoch": 0.21024,
      "grad_norm": 1.3429031287970576,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 21024
    },
    {
      "epoch": 0.21025,
      "grad_norm": 1.2119921684369184,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 21025
    },
    {
      "epoch": 0.21026,
      "grad_norm": 1.4827436505288472,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 21026
    },
    {
      "epoch": 0.21027,
      "grad_norm": 1.3281124155898993,
      "learning_rate": 0.003,
      "loss": 4.0848,
      "step": 21027
    },
    {
      "epoch": 0.21028,
      "grad_norm": 1.3266289315697783,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 21028
    },
    {
      "epoch": 0.21029,
      "grad_norm": 1.172763625947714,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 21029
    },
    {
      "epoch": 0.2103,
      "grad_norm": 1.2856114185127379,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 21030
    },
    {
      "epoch": 0.21031,
      "grad_norm": 1.1196273421901068,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 21031
    },
    {
      "epoch": 0.21032,
      "grad_norm": 1.4086759644049058,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 21032
    },
    {
      "epoch": 0.21033,
      "grad_norm": 1.2229488440703944,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 21033
    },
    {
      "epoch": 0.21034,
      "grad_norm": 1.3050149701049576,
      "learning_rate": 0.003,
      "loss": 4.0383,
      "step": 21034
    },
    {
      "epoch": 0.21035,
      "grad_norm": 1.561400770706906,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 21035
    },
    {
      "epoch": 0.21036,
      "grad_norm": 1.0559325603740646,
      "learning_rate": 0.003,
      "loss": 4.034,
      "step": 21036
    },
    {
      "epoch": 0.21037,
      "grad_norm": 1.3934052758308075,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 21037
    },
    {
      "epoch": 0.21038,
      "grad_norm": 1.1612544978492942,
      "learning_rate": 0.003,
      "loss": 4.0672,
      "step": 21038
    },
    {
      "epoch": 0.21039,
      "grad_norm": 1.4088570787549488,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 21039
    },
    {
      "epoch": 0.2104,
      "grad_norm": 1.2569459965148915,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 21040
    },
    {
      "epoch": 0.21041,
      "grad_norm": 1.3039754397941128,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 21041
    },
    {
      "epoch": 0.21042,
      "grad_norm": 1.1099797720966442,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 21042
    },
    {
      "epoch": 0.21043,
      "grad_norm": 1.2845449228632493,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 21043
    },
    {
      "epoch": 0.21044,
      "grad_norm": 1.3554885841652318,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 21044
    },
    {
      "epoch": 0.21045,
      "grad_norm": 1.230013105795136,
      "learning_rate": 0.003,
      "loss": 4.0284,
      "step": 21045
    },
    {
      "epoch": 0.21046,
      "grad_norm": 1.5314007698323049,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 21046
    },
    {
      "epoch": 0.21047,
      "grad_norm": 1.0067461744861579,
      "learning_rate": 0.003,
      "loss": 4.0246,
      "step": 21047
    },
    {
      "epoch": 0.21048,
      "grad_norm": 1.584802777434922,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 21048
    },
    {
      "epoch": 0.21049,
      "grad_norm": 1.1263449954189204,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 21049
    },
    {
      "epoch": 0.2105,
      "grad_norm": 1.6591382458406492,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 21050
    },
    {
      "epoch": 0.21051,
      "grad_norm": 1.05067562949757,
      "learning_rate": 0.003,
      "loss": 4.0899,
      "step": 21051
    },
    {
      "epoch": 0.21052,
      "grad_norm": 1.4121258978825222,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 21052
    },
    {
      "epoch": 0.21053,
      "grad_norm": 1.1885772521432727,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 21053
    },
    {
      "epoch": 0.21054,
      "grad_norm": 1.2910866675596309,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 21054
    },
    {
      "epoch": 0.21055,
      "grad_norm": 1.303537308251438,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 21055
    },
    {
      "epoch": 0.21056,
      "grad_norm": 1.3372199163127803,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 21056
    },
    {
      "epoch": 0.21057,
      "grad_norm": 1.1842089486023717,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 21057
    },
    {
      "epoch": 0.21058,
      "grad_norm": 1.1150311159984807,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 21058
    },
    {
      "epoch": 0.21059,
      "grad_norm": 1.616958463659875,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 21059
    },
    {
      "epoch": 0.2106,
      "grad_norm": 1.00862729220954,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 21060
    },
    {
      "epoch": 0.21061,
      "grad_norm": 1.502498230598199,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 21061
    },
    {
      "epoch": 0.21062,
      "grad_norm": 1.0766915542174063,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 21062
    },
    {
      "epoch": 0.21063,
      "grad_norm": 1.5675211012398373,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 21063
    },
    {
      "epoch": 0.21064,
      "grad_norm": 1.0380204363407695,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 21064
    },
    {
      "epoch": 0.21065,
      "grad_norm": 1.469412240069509,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 21065
    },
    {
      "epoch": 0.21066,
      "grad_norm": 1.1353878096784409,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 21066
    },
    {
      "epoch": 0.21067,
      "grad_norm": 1.262367418845785,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 21067
    },
    {
      "epoch": 0.21068,
      "grad_norm": 1.161593396240763,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 21068
    },
    {
      "epoch": 0.21069,
      "grad_norm": 1.2757299030587597,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 21069
    },
    {
      "epoch": 0.2107,
      "grad_norm": 1.2448132233635982,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 21070
    },
    {
      "epoch": 0.21071,
      "grad_norm": 1.442376030384632,
      "learning_rate": 0.003,
      "loss": 4.0296,
      "step": 21071
    },
    {
      "epoch": 0.21072,
      "grad_norm": 1.4587049850944842,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 21072
    },
    {
      "epoch": 0.21073,
      "grad_norm": 1.192299761850415,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 21073
    },
    {
      "epoch": 0.21074,
      "grad_norm": 1.26884910425769,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 21074
    },
    {
      "epoch": 0.21075,
      "grad_norm": 1.2893964103039317,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 21075
    },
    {
      "epoch": 0.21076,
      "grad_norm": 1.1787456309208952,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 21076
    },
    {
      "epoch": 0.21077,
      "grad_norm": 1.138603335750006,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 21077
    },
    {
      "epoch": 0.21078,
      "grad_norm": 1.1600173037126666,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 21078
    },
    {
      "epoch": 0.21079,
      "grad_norm": 1.3422085299958455,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 21079
    },
    {
      "epoch": 0.2108,
      "grad_norm": 1.106362513409859,
      "learning_rate": 0.003,
      "loss": 4.0093,
      "step": 21080
    },
    {
      "epoch": 0.21081,
      "grad_norm": 1.4805500589637395,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 21081
    },
    {
      "epoch": 0.21082,
      "grad_norm": 0.934243209161164,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 21082
    },
    {
      "epoch": 0.21083,
      "grad_norm": 1.3886084879612093,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 21083
    },
    {
      "epoch": 0.21084,
      "grad_norm": 1.1952447634633065,
      "learning_rate": 0.003,
      "loss": 4.0161,
      "step": 21084
    },
    {
      "epoch": 0.21085,
      "grad_norm": 1.271509636174596,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 21085
    },
    {
      "epoch": 0.21086,
      "grad_norm": 1.1432380501669652,
      "learning_rate": 0.003,
      "loss": 4.0292,
      "step": 21086
    },
    {
      "epoch": 0.21087,
      "grad_norm": 1.5734936022836352,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 21087
    },
    {
      "epoch": 0.21088,
      "grad_norm": 1.062576420079353,
      "learning_rate": 0.003,
      "loss": 4.0298,
      "step": 21088
    },
    {
      "epoch": 0.21089,
      "grad_norm": 1.577088852900612,
      "learning_rate": 0.003,
      "loss": 4.0774,
      "step": 21089
    },
    {
      "epoch": 0.2109,
      "grad_norm": 1.3262250573900358,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 21090
    },
    {
      "epoch": 0.21091,
      "grad_norm": 1.4790908407427124,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 21091
    },
    {
      "epoch": 0.21092,
      "grad_norm": 1.2811464635095229,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 21092
    },
    {
      "epoch": 0.21093,
      "grad_norm": 1.0974674810300278,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 21093
    },
    {
      "epoch": 0.21094,
      "grad_norm": 1.3751477595513788,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 21094
    },
    {
      "epoch": 0.21095,
      "grad_norm": 1.2412138108559143,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 21095
    },
    {
      "epoch": 0.21096,
      "grad_norm": 1.1591859582479014,
      "learning_rate": 0.003,
      "loss": 4.0342,
      "step": 21096
    },
    {
      "epoch": 0.21097,
      "grad_norm": 1.3530582912094091,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 21097
    },
    {
      "epoch": 0.21098,
      "grad_norm": 1.1617085781215324,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 21098
    },
    {
      "epoch": 0.21099,
      "grad_norm": 1.4129967228300409,
      "learning_rate": 0.003,
      "loss": 4.0357,
      "step": 21099
    },
    {
      "epoch": 0.211,
      "grad_norm": 1.0766297210176785,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 21100
    },
    {
      "epoch": 0.21101,
      "grad_norm": 1.5789553526879387,
      "learning_rate": 0.003,
      "loss": 4.0163,
      "step": 21101
    },
    {
      "epoch": 0.21102,
      "grad_norm": 1.110150691244987,
      "learning_rate": 0.003,
      "loss": 4.0288,
      "step": 21102
    },
    {
      "epoch": 0.21103,
      "grad_norm": 1.3936425498083649,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 21103
    },
    {
      "epoch": 0.21104,
      "grad_norm": 1.2694800492278608,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 21104
    },
    {
      "epoch": 0.21105,
      "grad_norm": 1.4180659771764696,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 21105
    },
    {
      "epoch": 0.21106,
      "grad_norm": 1.1710647247911583,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 21106
    },
    {
      "epoch": 0.21107,
      "grad_norm": 1.3933621823096176,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 21107
    },
    {
      "epoch": 0.21108,
      "grad_norm": 1.201541250227036,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 21108
    },
    {
      "epoch": 0.21109,
      "grad_norm": 1.2044639273867823,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 21109
    },
    {
      "epoch": 0.2111,
      "grad_norm": 1.3735940076719475,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 21110
    },
    {
      "epoch": 0.21111,
      "grad_norm": 1.2542327173484145,
      "learning_rate": 0.003,
      "loss": 4.0314,
      "step": 21111
    },
    {
      "epoch": 0.21112,
      "grad_norm": 1.6773848266159919,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 21112
    },
    {
      "epoch": 0.21113,
      "grad_norm": 1.2495999851941781,
      "learning_rate": 0.003,
      "loss": 4.0867,
      "step": 21113
    },
    {
      "epoch": 0.21114,
      "grad_norm": 1.283840621076478,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 21114
    },
    {
      "epoch": 0.21115,
      "grad_norm": 1.2666174821831944,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 21115
    },
    {
      "epoch": 0.21116,
      "grad_norm": 1.252920124476882,
      "learning_rate": 0.003,
      "loss": 4.0712,
      "step": 21116
    },
    {
      "epoch": 0.21117,
      "grad_norm": 1.1195946630367066,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 21117
    },
    {
      "epoch": 0.21118,
      "grad_norm": 1.2807588933818483,
      "learning_rate": 0.003,
      "loss": 4.013,
      "step": 21118
    },
    {
      "epoch": 0.21119,
      "grad_norm": 1.2286394305042434,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 21119
    },
    {
      "epoch": 0.2112,
      "grad_norm": 1.1650873552914147,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 21120
    },
    {
      "epoch": 0.21121,
      "grad_norm": 1.2531458893285583,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 21121
    },
    {
      "epoch": 0.21122,
      "grad_norm": 1.3200496972039166,
      "learning_rate": 0.003,
      "loss": 4.0306,
      "step": 21122
    },
    {
      "epoch": 0.21123,
      "grad_norm": 1.2464324409865812,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 21123
    },
    {
      "epoch": 0.21124,
      "grad_norm": 1.4351654375211487,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 21124
    },
    {
      "epoch": 0.21125,
      "grad_norm": 1.139776590390933,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 21125
    },
    {
      "epoch": 0.21126,
      "grad_norm": 1.3978253357428427,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 21126
    },
    {
      "epoch": 0.21127,
      "grad_norm": 1.2922171671346667,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 21127
    },
    {
      "epoch": 0.21128,
      "grad_norm": 1.091379730592485,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 21128
    },
    {
      "epoch": 0.21129,
      "grad_norm": 1.4712654455929264,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 21129
    },
    {
      "epoch": 0.2113,
      "grad_norm": 0.9216631097573316,
      "learning_rate": 0.003,
      "loss": 4.0423,
      "step": 21130
    },
    {
      "epoch": 0.21131,
      "grad_norm": 1.407904789855863,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 21131
    },
    {
      "epoch": 0.21132,
      "grad_norm": 1.115428868593496,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 21132
    },
    {
      "epoch": 0.21133,
      "grad_norm": 1.3006217391820132,
      "learning_rate": 0.003,
      "loss": 4.018,
      "step": 21133
    },
    {
      "epoch": 0.21134,
      "grad_norm": 1.0998440559746168,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 21134
    },
    {
      "epoch": 0.21135,
      "grad_norm": 1.2710129216531116,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 21135
    },
    {
      "epoch": 0.21136,
      "grad_norm": 1.3880099841956677,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 21136
    },
    {
      "epoch": 0.21137,
      "grad_norm": 1.274201487226877,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 21137
    },
    {
      "epoch": 0.21138,
      "grad_norm": 1.4527859691316656,
      "learning_rate": 0.003,
      "loss": 4.0224,
      "step": 21138
    },
    {
      "epoch": 0.21139,
      "grad_norm": 1.3381869632235348,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 21139
    },
    {
      "epoch": 0.2114,
      "grad_norm": 1.2203439761517438,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 21140
    },
    {
      "epoch": 0.21141,
      "grad_norm": 1.2587623540614699,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 21141
    },
    {
      "epoch": 0.21142,
      "grad_norm": 1.3981945231759798,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 21142
    },
    {
      "epoch": 0.21143,
      "grad_norm": 1.271008692230558,
      "learning_rate": 0.003,
      "loss": 4.0305,
      "step": 21143
    },
    {
      "epoch": 0.21144,
      "grad_norm": 1.1259055061671566,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 21144
    },
    {
      "epoch": 0.21145,
      "grad_norm": 1.2996841724908386,
      "learning_rate": 0.003,
      "loss": 4.0298,
      "step": 21145
    },
    {
      "epoch": 0.21146,
      "grad_norm": 1.1888755607807513,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 21146
    },
    {
      "epoch": 0.21147,
      "grad_norm": 1.2862247813508612,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 21147
    },
    {
      "epoch": 0.21148,
      "grad_norm": 1.1688646974365517,
      "learning_rate": 0.003,
      "loss": 4.0393,
      "step": 21148
    },
    {
      "epoch": 0.21149,
      "grad_norm": 1.363403802643418,
      "learning_rate": 0.003,
      "loss": 4.0885,
      "step": 21149
    },
    {
      "epoch": 0.2115,
      "grad_norm": 1.2336213830020937,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 21150
    },
    {
      "epoch": 0.21151,
      "grad_norm": 1.3083225835331127,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 21151
    },
    {
      "epoch": 0.21152,
      "grad_norm": 1.2041426169518765,
      "learning_rate": 0.003,
      "loss": 4.0324,
      "step": 21152
    },
    {
      "epoch": 0.21153,
      "grad_norm": 1.2308593899786529,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 21153
    },
    {
      "epoch": 0.21154,
      "grad_norm": 1.5824931816806196,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 21154
    },
    {
      "epoch": 0.21155,
      "grad_norm": 1.332366931832797,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 21155
    },
    {
      "epoch": 0.21156,
      "grad_norm": 1.4667661411092565,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 21156
    },
    {
      "epoch": 0.21157,
      "grad_norm": 1.1830738359144912,
      "learning_rate": 0.003,
      "loss": 4.0292,
      "step": 21157
    },
    {
      "epoch": 0.21158,
      "grad_norm": 1.4195587534035023,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 21158
    },
    {
      "epoch": 0.21159,
      "grad_norm": 1.3111519755366765,
      "learning_rate": 0.003,
      "loss": 4.0246,
      "step": 21159
    },
    {
      "epoch": 0.2116,
      "grad_norm": 1.300192919042313,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 21160
    },
    {
      "epoch": 0.21161,
      "grad_norm": 1.2209088854363133,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 21161
    },
    {
      "epoch": 0.21162,
      "grad_norm": 1.2583148642570114,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 21162
    },
    {
      "epoch": 0.21163,
      "grad_norm": 1.4025946969762597,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 21163
    },
    {
      "epoch": 0.21164,
      "grad_norm": 1.130491503867519,
      "learning_rate": 0.003,
      "loss": 4.0665,
      "step": 21164
    },
    {
      "epoch": 0.21165,
      "grad_norm": 1.3669270369568793,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 21165
    },
    {
      "epoch": 0.21166,
      "grad_norm": 1.1992647571186519,
      "learning_rate": 0.003,
      "loss": 4.0298,
      "step": 21166
    },
    {
      "epoch": 0.21167,
      "grad_norm": 1.4464082027049883,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 21167
    },
    {
      "epoch": 0.21168,
      "grad_norm": 1.3387509078840587,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 21168
    },
    {
      "epoch": 0.21169,
      "grad_norm": 1.1012420365333515,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 21169
    },
    {
      "epoch": 0.2117,
      "grad_norm": 1.2878223957665704,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 21170
    },
    {
      "epoch": 0.21171,
      "grad_norm": 1.1488393297719888,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 21171
    },
    {
      "epoch": 0.21172,
      "grad_norm": 1.3890064273623446,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 21172
    },
    {
      "epoch": 0.21173,
      "grad_norm": 1.153878422157314,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 21173
    },
    {
      "epoch": 0.21174,
      "grad_norm": 1.4317029178580625,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 21174
    },
    {
      "epoch": 0.21175,
      "grad_norm": 1.217479259645463,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 21175
    },
    {
      "epoch": 0.21176,
      "grad_norm": 1.1898684451219492,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 21176
    },
    {
      "epoch": 0.21177,
      "grad_norm": 1.2770393642886273,
      "learning_rate": 0.003,
      "loss": 4.0438,
      "step": 21177
    },
    {
      "epoch": 0.21178,
      "grad_norm": 1.233163842181116,
      "learning_rate": 0.003,
      "loss": 4.0128,
      "step": 21178
    },
    {
      "epoch": 0.21179,
      "grad_norm": 1.1305383322900322,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 21179
    },
    {
      "epoch": 0.2118,
      "grad_norm": 1.2239972747122971,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 21180
    },
    {
      "epoch": 0.21181,
      "grad_norm": 1.3121720290800374,
      "learning_rate": 0.003,
      "loss": 4.023,
      "step": 21181
    },
    {
      "epoch": 0.21182,
      "grad_norm": 1.5015691005417624,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 21182
    },
    {
      "epoch": 0.21183,
      "grad_norm": 1.4855617135506352,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 21183
    },
    {
      "epoch": 0.21184,
      "grad_norm": 1.363701504514968,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 21184
    },
    {
      "epoch": 0.21185,
      "grad_norm": 1.159770894014433,
      "learning_rate": 0.003,
      "loss": 4.0478,
      "step": 21185
    },
    {
      "epoch": 0.21186,
      "grad_norm": 1.4774361893297834,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 21186
    },
    {
      "epoch": 0.21187,
      "grad_norm": 1.1007616891834122,
      "learning_rate": 0.003,
      "loss": 4.0325,
      "step": 21187
    },
    {
      "epoch": 0.21188,
      "grad_norm": 1.2487380713948097,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 21188
    },
    {
      "epoch": 0.21189,
      "grad_norm": 1.3618169855089544,
      "learning_rate": 0.003,
      "loss": 4.0237,
      "step": 21189
    },
    {
      "epoch": 0.2119,
      "grad_norm": 1.201925785707879,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 21190
    },
    {
      "epoch": 0.21191,
      "grad_norm": 1.3024386752170385,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 21191
    },
    {
      "epoch": 0.21192,
      "grad_norm": 1.1672279223852937,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 21192
    },
    {
      "epoch": 0.21193,
      "grad_norm": 1.3705558796138086,
      "learning_rate": 0.003,
      "loss": 4.0233,
      "step": 21193
    },
    {
      "epoch": 0.21194,
      "grad_norm": 1.4762161050778222,
      "learning_rate": 0.003,
      "loss": 4.0834,
      "step": 21194
    },
    {
      "epoch": 0.21195,
      "grad_norm": 1.2062317526880664,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 21195
    },
    {
      "epoch": 0.21196,
      "grad_norm": 1.340626791633665,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 21196
    },
    {
      "epoch": 0.21197,
      "grad_norm": 1.2354014350222875,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 21197
    },
    {
      "epoch": 0.21198,
      "grad_norm": 1.5150865936875821,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 21198
    },
    {
      "epoch": 0.21199,
      "grad_norm": 1.1791380457988314,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 21199
    },
    {
      "epoch": 0.212,
      "grad_norm": 1.4960503083917507,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 21200
    },
    {
      "epoch": 0.21201,
      "grad_norm": 1.0226116936688727,
      "learning_rate": 0.003,
      "loss": 4.0373,
      "step": 21201
    },
    {
      "epoch": 0.21202,
      "grad_norm": 1.4619565029751385,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 21202
    },
    {
      "epoch": 0.21203,
      "grad_norm": 1.0822958599251975,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 21203
    },
    {
      "epoch": 0.21204,
      "grad_norm": 1.44743948852276,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 21204
    },
    {
      "epoch": 0.21205,
      "grad_norm": 1.0735995023147238,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 21205
    },
    {
      "epoch": 0.21206,
      "grad_norm": 1.4273899604773974,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 21206
    },
    {
      "epoch": 0.21207,
      "grad_norm": 1.0711027752697948,
      "learning_rate": 0.003,
      "loss": 4.0857,
      "step": 21207
    },
    {
      "epoch": 0.21208,
      "grad_norm": 1.5263229391706197,
      "learning_rate": 0.003,
      "loss": 4.087,
      "step": 21208
    },
    {
      "epoch": 0.21209,
      "grad_norm": 0.9509188211685536,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 21209
    },
    {
      "epoch": 0.2121,
      "grad_norm": 1.1490255252840045,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 21210
    },
    {
      "epoch": 0.21211,
      "grad_norm": 1.3422143862668483,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 21211
    },
    {
      "epoch": 0.21212,
      "grad_norm": 1.2539022969418465,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 21212
    },
    {
      "epoch": 0.21213,
      "grad_norm": 1.8629431414796072,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 21213
    },
    {
      "epoch": 0.21214,
      "grad_norm": 1.0436144942927696,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 21214
    },
    {
      "epoch": 0.21215,
      "grad_norm": 1.2701470718243433,
      "learning_rate": 0.003,
      "loss": 4.0279,
      "step": 21215
    },
    {
      "epoch": 0.21216,
      "grad_norm": 1.2025971490312357,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 21216
    },
    {
      "epoch": 0.21217,
      "grad_norm": 1.1863459257692672,
      "learning_rate": 0.003,
      "loss": 4.03,
      "step": 21217
    },
    {
      "epoch": 0.21218,
      "grad_norm": 1.2931599020212343,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 21218
    },
    {
      "epoch": 0.21219,
      "grad_norm": 1.166605540713005,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 21219
    },
    {
      "epoch": 0.2122,
      "grad_norm": 1.3880922740795296,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 21220
    },
    {
      "epoch": 0.21221,
      "grad_norm": 1.2206694900056876,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 21221
    },
    {
      "epoch": 0.21222,
      "grad_norm": 1.3585826498780238,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 21222
    },
    {
      "epoch": 0.21223,
      "grad_norm": 1.317578632988024,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 21223
    },
    {
      "epoch": 0.21224,
      "grad_norm": 1.2874456251618371,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 21224
    },
    {
      "epoch": 0.21225,
      "grad_norm": 1.241267859205183,
      "learning_rate": 0.003,
      "loss": 4.019,
      "step": 21225
    },
    {
      "epoch": 0.21226,
      "grad_norm": 1.3982627897056497,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 21226
    },
    {
      "epoch": 0.21227,
      "grad_norm": 1.395875156197999,
      "learning_rate": 0.003,
      "loss": 4.0656,
      "step": 21227
    },
    {
      "epoch": 0.21228,
      "grad_norm": 1.355288621035149,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 21228
    },
    {
      "epoch": 0.21229,
      "grad_norm": 1.3051451099291878,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 21229
    },
    {
      "epoch": 0.2123,
      "grad_norm": 1.4068723537818764,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 21230
    },
    {
      "epoch": 0.21231,
      "grad_norm": 1.1396637306136057,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 21231
    },
    {
      "epoch": 0.21232,
      "grad_norm": 1.3511158184996983,
      "learning_rate": 0.003,
      "loss": 4.0229,
      "step": 21232
    },
    {
      "epoch": 0.21233,
      "grad_norm": 1.2450881997501164,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 21233
    },
    {
      "epoch": 0.21234,
      "grad_norm": 1.2643725323698822,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 21234
    },
    {
      "epoch": 0.21235,
      "grad_norm": 1.2409392883847383,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 21235
    },
    {
      "epoch": 0.21236,
      "grad_norm": 1.6763642327401815,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 21236
    },
    {
      "epoch": 0.21237,
      "grad_norm": 1.2025489336682045,
      "learning_rate": 0.003,
      "loss": 4.0297,
      "step": 21237
    },
    {
      "epoch": 0.21238,
      "grad_norm": 1.3302806801590354,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 21238
    },
    {
      "epoch": 0.21239,
      "grad_norm": 1.1525403685678053,
      "learning_rate": 0.003,
      "loss": 4.0422,
      "step": 21239
    },
    {
      "epoch": 0.2124,
      "grad_norm": 1.3638264634999357,
      "learning_rate": 0.003,
      "loss": 4.0286,
      "step": 21240
    },
    {
      "epoch": 0.21241,
      "grad_norm": 1.0902909893581767,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 21241
    },
    {
      "epoch": 0.21242,
      "grad_norm": 1.3797642168018844,
      "learning_rate": 0.003,
      "loss": 4.0382,
      "step": 21242
    },
    {
      "epoch": 0.21243,
      "grad_norm": 1.2206955407043554,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 21243
    },
    {
      "epoch": 0.21244,
      "grad_norm": 1.5156553552816339,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 21244
    },
    {
      "epoch": 0.21245,
      "grad_norm": 0.9515384315876183,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 21245
    },
    {
      "epoch": 0.21246,
      "grad_norm": 1.361146046697627,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 21246
    },
    {
      "epoch": 0.21247,
      "grad_norm": 1.5913645215564736,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 21247
    },
    {
      "epoch": 0.21248,
      "grad_norm": 1.4370052161228561,
      "learning_rate": 0.003,
      "loss": 4.0438,
      "step": 21248
    },
    {
      "epoch": 0.21249,
      "grad_norm": 1.2309898212193815,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 21249
    },
    {
      "epoch": 0.2125,
      "grad_norm": 1.4114840886409648,
      "learning_rate": 0.003,
      "loss": 4.0779,
      "step": 21250
    },
    {
      "epoch": 0.21251,
      "grad_norm": 1.1537287913840821,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 21251
    },
    {
      "epoch": 0.21252,
      "grad_norm": 1.2553623291546478,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 21252
    },
    {
      "epoch": 0.21253,
      "grad_norm": 1.3921081641908704,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 21253
    },
    {
      "epoch": 0.21254,
      "grad_norm": 1.1053487916938944,
      "learning_rate": 0.003,
      "loss": 4.0331,
      "step": 21254
    },
    {
      "epoch": 0.21255,
      "grad_norm": 1.8061320320678969,
      "learning_rate": 0.003,
      "loss": 4.0314,
      "step": 21255
    },
    {
      "epoch": 0.21256,
      "grad_norm": 0.8969966868615841,
      "learning_rate": 0.003,
      "loss": 4.0331,
      "step": 21256
    },
    {
      "epoch": 0.21257,
      "grad_norm": 1.205780940445784,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 21257
    },
    {
      "epoch": 0.21258,
      "grad_norm": 1.3600689626589804,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 21258
    },
    {
      "epoch": 0.21259,
      "grad_norm": 1.3242629843533986,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 21259
    },
    {
      "epoch": 0.2126,
      "grad_norm": 1.3834817194882478,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 21260
    },
    {
      "epoch": 0.21261,
      "grad_norm": 1.0838249822303874,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 21261
    },
    {
      "epoch": 0.21262,
      "grad_norm": 1.5128461510514026,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 21262
    },
    {
      "epoch": 0.21263,
      "grad_norm": 1.1827106561363243,
      "learning_rate": 0.003,
      "loss": 4.0093,
      "step": 21263
    },
    {
      "epoch": 0.21264,
      "grad_norm": 1.3315292703566595,
      "learning_rate": 0.003,
      "loss": 4.0186,
      "step": 21264
    },
    {
      "epoch": 0.21265,
      "grad_norm": 1.1555146028756413,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 21265
    },
    {
      "epoch": 0.21266,
      "grad_norm": 1.4866501663257243,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 21266
    },
    {
      "epoch": 0.21267,
      "grad_norm": 1.4051336614763095,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 21267
    },
    {
      "epoch": 0.21268,
      "grad_norm": 1.1328052502105757,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 21268
    },
    {
      "epoch": 0.21269,
      "grad_norm": 1.2898245831393798,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 21269
    },
    {
      "epoch": 0.2127,
      "grad_norm": 1.3953673594523057,
      "learning_rate": 0.003,
      "loss": 4.0234,
      "step": 21270
    },
    {
      "epoch": 0.21271,
      "grad_norm": 1.131002090499698,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 21271
    },
    {
      "epoch": 0.21272,
      "grad_norm": 1.4609953205099613,
      "learning_rate": 0.003,
      "loss": 4.0289,
      "step": 21272
    },
    {
      "epoch": 0.21273,
      "grad_norm": 1.1210795678224974,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 21273
    },
    {
      "epoch": 0.21274,
      "grad_norm": 1.410929257076832,
      "learning_rate": 0.003,
      "loss": 4.0838,
      "step": 21274
    },
    {
      "epoch": 0.21275,
      "grad_norm": 1.2311509749398506,
      "learning_rate": 0.003,
      "loss": 4.0094,
      "step": 21275
    },
    {
      "epoch": 0.21276,
      "grad_norm": 1.4049179781701777,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 21276
    },
    {
      "epoch": 0.21277,
      "grad_norm": 1.099506667330958,
      "learning_rate": 0.003,
      "loss": 4.0195,
      "step": 21277
    },
    {
      "epoch": 0.21278,
      "grad_norm": 1.3875113272449338,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 21278
    },
    {
      "epoch": 0.21279,
      "grad_norm": 1.2558224446512296,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 21279
    },
    {
      "epoch": 0.2128,
      "grad_norm": 1.3544694177374053,
      "learning_rate": 0.003,
      "loss": 4.0308,
      "step": 21280
    },
    {
      "epoch": 0.21281,
      "grad_norm": 1.2628164865838394,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 21281
    },
    {
      "epoch": 0.21282,
      "grad_norm": 1.094287994161794,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 21282
    },
    {
      "epoch": 0.21283,
      "grad_norm": 1.2896996630861157,
      "learning_rate": 0.003,
      "loss": 4.0226,
      "step": 21283
    },
    {
      "epoch": 0.21284,
      "grad_norm": 1.1074798369391314,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 21284
    },
    {
      "epoch": 0.21285,
      "grad_norm": 1.3538423443692602,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 21285
    },
    {
      "epoch": 0.21286,
      "grad_norm": 1.1110113965840402,
      "learning_rate": 0.003,
      "loss": 4.0302,
      "step": 21286
    },
    {
      "epoch": 0.21287,
      "grad_norm": 1.2221320233309492,
      "learning_rate": 0.003,
      "loss": 4.0336,
      "step": 21287
    },
    {
      "epoch": 0.21288,
      "grad_norm": 1.2271627263198994,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 21288
    },
    {
      "epoch": 0.21289,
      "grad_norm": 1.2211200333212375,
      "learning_rate": 0.003,
      "loss": 4.0393,
      "step": 21289
    },
    {
      "epoch": 0.2129,
      "grad_norm": 1.1597470812427897,
      "learning_rate": 0.003,
      "loss": 4.0159,
      "step": 21290
    },
    {
      "epoch": 0.21291,
      "grad_norm": 1.1865759125638362,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 21291
    },
    {
      "epoch": 0.21292,
      "grad_norm": 1.2334931450457307,
      "learning_rate": 0.003,
      "loss": 4.0363,
      "step": 21292
    },
    {
      "epoch": 0.21293,
      "grad_norm": 1.3137351543194178,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 21293
    },
    {
      "epoch": 0.21294,
      "grad_norm": 1.2310978816330074,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 21294
    },
    {
      "epoch": 0.21295,
      "grad_norm": 1.4940754079648613,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 21295
    },
    {
      "epoch": 0.21296,
      "grad_norm": 1.3409897686951235,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 21296
    },
    {
      "epoch": 0.21297,
      "grad_norm": 1.2704738218133316,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 21297
    },
    {
      "epoch": 0.21298,
      "grad_norm": 1.2609362505399053,
      "learning_rate": 0.003,
      "loss": 4.0851,
      "step": 21298
    },
    {
      "epoch": 0.21299,
      "grad_norm": 1.225084252513662,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 21299
    },
    {
      "epoch": 0.213,
      "grad_norm": 1.20988265938044,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 21300
    },
    {
      "epoch": 0.21301,
      "grad_norm": 1.4497084415618406,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 21301
    },
    {
      "epoch": 0.21302,
      "grad_norm": 1.193484205546864,
      "learning_rate": 0.003,
      "loss": 4.0359,
      "step": 21302
    },
    {
      "epoch": 0.21303,
      "grad_norm": 1.2345370413341212,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 21303
    },
    {
      "epoch": 0.21304,
      "grad_norm": 1.318776212154148,
      "learning_rate": 0.003,
      "loss": 4.0264,
      "step": 21304
    },
    {
      "epoch": 0.21305,
      "grad_norm": 1.6794035477830318,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 21305
    },
    {
      "epoch": 0.21306,
      "grad_norm": 1.298167289102819,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 21306
    },
    {
      "epoch": 0.21307,
      "grad_norm": 1.1408218010914621,
      "learning_rate": 0.003,
      "loss": 4.0261,
      "step": 21307
    },
    {
      "epoch": 0.21308,
      "grad_norm": 1.4345539439571031,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 21308
    },
    {
      "epoch": 0.21309,
      "grad_norm": 1.08308841421971,
      "learning_rate": 0.003,
      "loss": 4.0041,
      "step": 21309
    },
    {
      "epoch": 0.2131,
      "grad_norm": 1.323940917710682,
      "learning_rate": 0.003,
      "loss": 4.0237,
      "step": 21310
    },
    {
      "epoch": 0.21311,
      "grad_norm": 1.2696573808223026,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 21311
    },
    {
      "epoch": 0.21312,
      "grad_norm": 1.1996852266168259,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 21312
    },
    {
      "epoch": 0.21313,
      "grad_norm": 1.369723073919903,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 21313
    },
    {
      "epoch": 0.21314,
      "grad_norm": 1.28562190993862,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 21314
    },
    {
      "epoch": 0.21315,
      "grad_norm": 1.446931202538823,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 21315
    },
    {
      "epoch": 0.21316,
      "grad_norm": 1.2156284322875552,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 21316
    },
    {
      "epoch": 0.21317,
      "grad_norm": 1.3079028561141957,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 21317
    },
    {
      "epoch": 0.21318,
      "grad_norm": 1.3649474090891291,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 21318
    },
    {
      "epoch": 0.21319,
      "grad_norm": 1.2606745758198419,
      "learning_rate": 0.003,
      "loss": 4.021,
      "step": 21319
    },
    {
      "epoch": 0.2132,
      "grad_norm": 1.4084878363128626,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 21320
    },
    {
      "epoch": 0.21321,
      "grad_norm": 1.2936532541666024,
      "learning_rate": 0.003,
      "loss": 4.0383,
      "step": 21321
    },
    {
      "epoch": 0.21322,
      "grad_norm": 1.38838107459198,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 21322
    },
    {
      "epoch": 0.21323,
      "grad_norm": 1.1583770792135066,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 21323
    },
    {
      "epoch": 0.21324,
      "grad_norm": 1.2580253757229194,
      "learning_rate": 0.003,
      "loss": 4.0434,
      "step": 21324
    },
    {
      "epoch": 0.21325,
      "grad_norm": 1.2897936310118077,
      "learning_rate": 0.003,
      "loss": 4.0317,
      "step": 21325
    },
    {
      "epoch": 0.21326,
      "grad_norm": 1.3398874636543947,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 21326
    },
    {
      "epoch": 0.21327,
      "grad_norm": 1.2076014768358647,
      "learning_rate": 0.003,
      "loss": 4.0271,
      "step": 21327
    },
    {
      "epoch": 0.21328,
      "grad_norm": 1.2883759373541221,
      "learning_rate": 0.003,
      "loss": 4.0207,
      "step": 21328
    },
    {
      "epoch": 0.21329,
      "grad_norm": 1.3162283800326189,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 21329
    },
    {
      "epoch": 0.2133,
      "grad_norm": 1.250449034147522,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 21330
    },
    {
      "epoch": 0.21331,
      "grad_norm": 1.4278049635796872,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 21331
    },
    {
      "epoch": 0.21332,
      "grad_norm": 1.14298792973109,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 21332
    },
    {
      "epoch": 0.21333,
      "grad_norm": 1.6626489515816607,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 21333
    },
    {
      "epoch": 0.21334,
      "grad_norm": 1.2030828894352494,
      "learning_rate": 0.003,
      "loss": 4.0331,
      "step": 21334
    },
    {
      "epoch": 0.21335,
      "grad_norm": 1.349062438237261,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 21335
    },
    {
      "epoch": 0.21336,
      "grad_norm": 1.3795838496859136,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 21336
    },
    {
      "epoch": 0.21337,
      "grad_norm": 1.0649483702192992,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 21337
    },
    {
      "epoch": 0.21338,
      "grad_norm": 1.508413228100052,
      "learning_rate": 0.003,
      "loss": 4.0672,
      "step": 21338
    },
    {
      "epoch": 0.21339,
      "grad_norm": 1.0765460212141766,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 21339
    },
    {
      "epoch": 0.2134,
      "grad_norm": 1.4888043428112916,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 21340
    },
    {
      "epoch": 0.21341,
      "grad_norm": 0.9995688586010715,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 21341
    },
    {
      "epoch": 0.21342,
      "grad_norm": 1.3972463254192167,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 21342
    },
    {
      "epoch": 0.21343,
      "grad_norm": 1.107999971944543,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 21343
    },
    {
      "epoch": 0.21344,
      "grad_norm": 1.3448387423992543,
      "learning_rate": 0.003,
      "loss": 4.0564,
      "step": 21344
    },
    {
      "epoch": 0.21345,
      "grad_norm": 1.2637795248816244,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 21345
    },
    {
      "epoch": 0.21346,
      "grad_norm": 1.0118101759825742,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 21346
    },
    {
      "epoch": 0.21347,
      "grad_norm": 1.4459521110402487,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 21347
    },
    {
      "epoch": 0.21348,
      "grad_norm": 1.0311863301319009,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 21348
    },
    {
      "epoch": 0.21349,
      "grad_norm": 1.8475944530552786,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 21349
    },
    {
      "epoch": 0.2135,
      "grad_norm": 1.138540427507579,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 21350
    },
    {
      "epoch": 0.21351,
      "grad_norm": 1.3721821472192584,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 21351
    },
    {
      "epoch": 0.21352,
      "grad_norm": 1.4142680422434566,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 21352
    },
    {
      "epoch": 0.21353,
      "grad_norm": 1.3751571157419424,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 21353
    },
    {
      "epoch": 0.21354,
      "grad_norm": 1.1920477240206362,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 21354
    },
    {
      "epoch": 0.21355,
      "grad_norm": 1.3629304224733283,
      "learning_rate": 0.003,
      "loss": 4.0389,
      "step": 21355
    },
    {
      "epoch": 0.21356,
      "grad_norm": 1.2024918478466937,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 21356
    },
    {
      "epoch": 0.21357,
      "grad_norm": 1.1978202577753532,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 21357
    },
    {
      "epoch": 0.21358,
      "grad_norm": 1.1474077755882799,
      "learning_rate": 0.003,
      "loss": 4.0288,
      "step": 21358
    },
    {
      "epoch": 0.21359,
      "grad_norm": 1.3273962868251463,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 21359
    },
    {
      "epoch": 0.2136,
      "grad_norm": 1.2976154120167893,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 21360
    },
    {
      "epoch": 0.21361,
      "grad_norm": 1.8210705771178908,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 21361
    },
    {
      "epoch": 0.21362,
      "grad_norm": 1.0567342453250417,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 21362
    },
    {
      "epoch": 0.21363,
      "grad_norm": 1.5856935209757512,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 21363
    },
    {
      "epoch": 0.21364,
      "grad_norm": 1.2636879371466914,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 21364
    },
    {
      "epoch": 0.21365,
      "grad_norm": 1.2705957404981245,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 21365
    },
    {
      "epoch": 0.21366,
      "grad_norm": 1.2541463528391474,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 21366
    },
    {
      "epoch": 0.21367,
      "grad_norm": 1.3865654261817257,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 21367
    },
    {
      "epoch": 0.21368,
      "grad_norm": 1.4439535448527894,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 21368
    },
    {
      "epoch": 0.21369,
      "grad_norm": 1.627448150006948,
      "learning_rate": 0.003,
      "loss": 4.0319,
      "step": 21369
    },
    {
      "epoch": 0.2137,
      "grad_norm": 1.0886093563909551,
      "learning_rate": 0.003,
      "loss": 4.0878,
      "step": 21370
    },
    {
      "epoch": 0.21371,
      "grad_norm": 1.282950003680138,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 21371
    },
    {
      "epoch": 0.21372,
      "grad_norm": 1.312882928122349,
      "learning_rate": 0.003,
      "loss": 4.0861,
      "step": 21372
    },
    {
      "epoch": 0.21373,
      "grad_norm": 1.4587166915301903,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 21373
    },
    {
      "epoch": 0.21374,
      "grad_norm": 1.2093497165791725,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 21374
    },
    {
      "epoch": 0.21375,
      "grad_norm": 1.2743170494067226,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 21375
    },
    {
      "epoch": 0.21376,
      "grad_norm": 1.3201371885098845,
      "learning_rate": 0.003,
      "loss": 4.0371,
      "step": 21376
    },
    {
      "epoch": 0.21377,
      "grad_norm": 1.260209169861331,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 21377
    },
    {
      "epoch": 0.21378,
      "grad_norm": 1.4904707031555913,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 21378
    },
    {
      "epoch": 0.21379,
      "grad_norm": 1.0340463707054346,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 21379
    },
    {
      "epoch": 0.2138,
      "grad_norm": 1.5150284932481612,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 21380
    },
    {
      "epoch": 0.21381,
      "grad_norm": 1.1273990458283891,
      "learning_rate": 0.003,
      "loss": 4.0418,
      "step": 21381
    },
    {
      "epoch": 0.21382,
      "grad_norm": 1.3210726311942573,
      "learning_rate": 0.003,
      "loss": 4.0168,
      "step": 21382
    },
    {
      "epoch": 0.21383,
      "grad_norm": 1.1069027042636328,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 21383
    },
    {
      "epoch": 0.21384,
      "grad_norm": 1.3092370982697676,
      "learning_rate": 0.003,
      "loss": 4.0295,
      "step": 21384
    },
    {
      "epoch": 0.21385,
      "grad_norm": 1.6472511093567705,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 21385
    },
    {
      "epoch": 0.21386,
      "grad_norm": 1.1252433906386323,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 21386
    },
    {
      "epoch": 0.21387,
      "grad_norm": 1.624161635575557,
      "learning_rate": 0.003,
      "loss": 4.0357,
      "step": 21387
    },
    {
      "epoch": 0.21388,
      "grad_norm": 0.9656518628738969,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 21388
    },
    {
      "epoch": 0.21389,
      "grad_norm": 1.51171735229888,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 21389
    },
    {
      "epoch": 0.2139,
      "grad_norm": 1.2277851492776417,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 21390
    },
    {
      "epoch": 0.21391,
      "grad_norm": 1.584747677568358,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 21391
    },
    {
      "epoch": 0.21392,
      "grad_norm": 1.202180154252793,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 21392
    },
    {
      "epoch": 0.21393,
      "grad_norm": 1.1697463543298947,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 21393
    },
    {
      "epoch": 0.21394,
      "grad_norm": 1.4453891958788705,
      "learning_rate": 0.003,
      "loss": 4.0725,
      "step": 21394
    },
    {
      "epoch": 0.21395,
      "grad_norm": 1.0284231831226134,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 21395
    },
    {
      "epoch": 0.21396,
      "grad_norm": 1.3694082143995245,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 21396
    },
    {
      "epoch": 0.21397,
      "grad_norm": 1.3419886484533383,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 21397
    },
    {
      "epoch": 0.21398,
      "grad_norm": 1.1957459993332311,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 21398
    },
    {
      "epoch": 0.21399,
      "grad_norm": 1.2523630190898598,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 21399
    },
    {
      "epoch": 0.214,
      "grad_norm": 1.2590213585149514,
      "learning_rate": 0.003,
      "loss": 4.0787,
      "step": 21400
    },
    {
      "epoch": 0.21401,
      "grad_norm": 1.2158586422954252,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 21401
    },
    {
      "epoch": 0.21402,
      "grad_norm": 1.1927925220634177,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 21402
    },
    {
      "epoch": 0.21403,
      "grad_norm": 1.3230329876727829,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 21403
    },
    {
      "epoch": 0.21404,
      "grad_norm": 1.3948266556870443,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 21404
    },
    {
      "epoch": 0.21405,
      "grad_norm": 1.4600556127552742,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 21405
    },
    {
      "epoch": 0.21406,
      "grad_norm": 1.023712788436025,
      "learning_rate": 0.003,
      "loss": 4.0267,
      "step": 21406
    },
    {
      "epoch": 0.21407,
      "grad_norm": 1.59571712199933,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 21407
    },
    {
      "epoch": 0.21408,
      "grad_norm": 0.9265689920715735,
      "learning_rate": 0.003,
      "loss": 4.0148,
      "step": 21408
    },
    {
      "epoch": 0.21409,
      "grad_norm": 1.2559292429582913,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 21409
    },
    {
      "epoch": 0.2141,
      "grad_norm": 1.356898234285401,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 21410
    },
    {
      "epoch": 0.21411,
      "grad_norm": 1.3822189229917954,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 21411
    },
    {
      "epoch": 0.21412,
      "grad_norm": 1.1456331216536773,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 21412
    },
    {
      "epoch": 0.21413,
      "grad_norm": 1.3884648494073832,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 21413
    },
    {
      "epoch": 0.21414,
      "grad_norm": 1.3388728832041317,
      "learning_rate": 0.003,
      "loss": 4.0125,
      "step": 21414
    },
    {
      "epoch": 0.21415,
      "grad_norm": 1.083806039595083,
      "learning_rate": 0.003,
      "loss": 4.0279,
      "step": 21415
    },
    {
      "epoch": 0.21416,
      "grad_norm": 1.3370142619299101,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 21416
    },
    {
      "epoch": 0.21417,
      "grad_norm": 1.2235260078732255,
      "learning_rate": 0.003,
      "loss": 4.042,
      "step": 21417
    },
    {
      "epoch": 0.21418,
      "grad_norm": 1.2795875579430278,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 21418
    },
    {
      "epoch": 0.21419,
      "grad_norm": 1.4309625949091171,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 21419
    },
    {
      "epoch": 0.2142,
      "grad_norm": 1.585382629070091,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 21420
    },
    {
      "epoch": 0.21421,
      "grad_norm": 1.006334571201957,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 21421
    },
    {
      "epoch": 0.21422,
      "grad_norm": 1.4230217197820794,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 21422
    },
    {
      "epoch": 0.21423,
      "grad_norm": 1.1473431232363076,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 21423
    },
    {
      "epoch": 0.21424,
      "grad_norm": 1.406963978924778,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 21424
    },
    {
      "epoch": 0.21425,
      "grad_norm": 1.2102770648595627,
      "learning_rate": 0.003,
      "loss": 4.0751,
      "step": 21425
    },
    {
      "epoch": 0.21426,
      "grad_norm": 1.362648793885832,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 21426
    },
    {
      "epoch": 0.21427,
      "grad_norm": 1.2221966631120762,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 21427
    },
    {
      "epoch": 0.21428,
      "grad_norm": 1.2148550249578498,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 21428
    },
    {
      "epoch": 0.21429,
      "grad_norm": 1.3770428805474857,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 21429
    },
    {
      "epoch": 0.2143,
      "grad_norm": 1.3735600093893532,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 21430
    },
    {
      "epoch": 0.21431,
      "grad_norm": 1.247480959233962,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 21431
    },
    {
      "epoch": 0.21432,
      "grad_norm": 1.0875437244340893,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 21432
    },
    {
      "epoch": 0.21433,
      "grad_norm": 1.5089941568181857,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 21433
    },
    {
      "epoch": 0.21434,
      "grad_norm": 1.0593319177878011,
      "learning_rate": 0.003,
      "loss": 4.0233,
      "step": 21434
    },
    {
      "epoch": 0.21435,
      "grad_norm": 1.4208541942764379,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 21435
    },
    {
      "epoch": 0.21436,
      "grad_norm": 0.9393903080155143,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 21436
    },
    {
      "epoch": 0.21437,
      "grad_norm": 1.591779273895513,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 21437
    },
    {
      "epoch": 0.21438,
      "grad_norm": 1.1622175569073974,
      "learning_rate": 0.003,
      "loss": 4.0329,
      "step": 21438
    },
    {
      "epoch": 0.21439,
      "grad_norm": 1.6971122172312312,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 21439
    },
    {
      "epoch": 0.2144,
      "grad_norm": 1.1780489518286923,
      "learning_rate": 0.003,
      "loss": 4.0278,
      "step": 21440
    },
    {
      "epoch": 0.21441,
      "grad_norm": 1.0365230744754172,
      "learning_rate": 0.003,
      "loss": 4.0302,
      "step": 21441
    },
    {
      "epoch": 0.21442,
      "grad_norm": 1.4104533794855865,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 21442
    },
    {
      "epoch": 0.21443,
      "grad_norm": 1.2555781924185174,
      "learning_rate": 0.003,
      "loss": 4.0756,
      "step": 21443
    },
    {
      "epoch": 0.21444,
      "grad_norm": 1.4190495250147561,
      "learning_rate": 0.003,
      "loss": 4.0903,
      "step": 21444
    },
    {
      "epoch": 0.21445,
      "grad_norm": 1.2594779709007058,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 21445
    },
    {
      "epoch": 0.21446,
      "grad_norm": 1.437264589849899,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 21446
    },
    {
      "epoch": 0.21447,
      "grad_norm": 1.1470295091155245,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 21447
    },
    {
      "epoch": 0.21448,
      "grad_norm": 1.3882002714959472,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 21448
    },
    {
      "epoch": 0.21449,
      "grad_norm": 1.1807616504826879,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 21449
    },
    {
      "epoch": 0.2145,
      "grad_norm": 1.4204086149297122,
      "learning_rate": 0.003,
      "loss": 4.0422,
      "step": 21450
    },
    {
      "epoch": 0.21451,
      "grad_norm": 1.4210985255809614,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 21451
    },
    {
      "epoch": 0.21452,
      "grad_norm": 1.2066524038320814,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 21452
    },
    {
      "epoch": 0.21453,
      "grad_norm": 1.299796867171121,
      "learning_rate": 0.003,
      "loss": 4.0382,
      "step": 21453
    },
    {
      "epoch": 0.21454,
      "grad_norm": 1.2914211157329647,
      "learning_rate": 0.003,
      "loss": 4.0267,
      "step": 21454
    },
    {
      "epoch": 0.21455,
      "grad_norm": 1.1828566623496566,
      "learning_rate": 0.003,
      "loss": 4.0239,
      "step": 21455
    },
    {
      "epoch": 0.21456,
      "grad_norm": 1.485362907689217,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 21456
    },
    {
      "epoch": 0.21457,
      "grad_norm": 1.1259998176997275,
      "learning_rate": 0.003,
      "loss": 4.0423,
      "step": 21457
    },
    {
      "epoch": 0.21458,
      "grad_norm": 1.4734118462883794,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 21458
    },
    {
      "epoch": 0.21459,
      "grad_norm": 0.9283127651333246,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 21459
    },
    {
      "epoch": 0.2146,
      "grad_norm": 1.514546398235819,
      "learning_rate": 0.003,
      "loss": 4.0665,
      "step": 21460
    },
    {
      "epoch": 0.21461,
      "grad_norm": 1.2310494697427181,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 21461
    },
    {
      "epoch": 0.21462,
      "grad_norm": 1.520764891934006,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 21462
    },
    {
      "epoch": 0.21463,
      "grad_norm": 1.1134509205383405,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 21463
    },
    {
      "epoch": 0.21464,
      "grad_norm": 1.2882346731976746,
      "learning_rate": 0.003,
      "loss": 4.0658,
      "step": 21464
    },
    {
      "epoch": 0.21465,
      "grad_norm": 1.244103986944709,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 21465
    },
    {
      "epoch": 0.21466,
      "grad_norm": 1.1535959575311907,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 21466
    },
    {
      "epoch": 0.21467,
      "grad_norm": 1.300012609803945,
      "learning_rate": 0.003,
      "loss": 4.0718,
      "step": 21467
    },
    {
      "epoch": 0.21468,
      "grad_norm": 1.2087191925302314,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 21468
    },
    {
      "epoch": 0.21469,
      "grad_norm": 1.2723417012323934,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 21469
    },
    {
      "epoch": 0.2147,
      "grad_norm": 1.1233641225791804,
      "learning_rate": 0.003,
      "loss": 4.0756,
      "step": 21470
    },
    {
      "epoch": 0.21471,
      "grad_norm": 1.569980926663672,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 21471
    },
    {
      "epoch": 0.21472,
      "grad_norm": 1.1506424120774437,
      "learning_rate": 0.003,
      "loss": 4.0684,
      "step": 21472
    },
    {
      "epoch": 0.21473,
      "grad_norm": 1.484760860744661,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 21473
    },
    {
      "epoch": 0.21474,
      "grad_norm": 1.4899737721885717,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 21474
    },
    {
      "epoch": 0.21475,
      "grad_norm": 1.489509118493849,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 21475
    },
    {
      "epoch": 0.21476,
      "grad_norm": 1.31366767297891,
      "learning_rate": 0.003,
      "loss": 4.0777,
      "step": 21476
    },
    {
      "epoch": 0.21477,
      "grad_norm": 1.2960671083487345,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 21477
    },
    {
      "epoch": 0.21478,
      "grad_norm": 1.2062205712519865,
      "learning_rate": 0.003,
      "loss": 4.0205,
      "step": 21478
    },
    {
      "epoch": 0.21479,
      "grad_norm": 1.370323358808596,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 21479
    },
    {
      "epoch": 0.2148,
      "grad_norm": 1.1825382882281281,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 21480
    },
    {
      "epoch": 0.21481,
      "grad_norm": 1.459331329537895,
      "learning_rate": 0.003,
      "loss": 4.0297,
      "step": 21481
    },
    {
      "epoch": 0.21482,
      "grad_norm": 1.2949174695844587,
      "learning_rate": 0.003,
      "loss": 4.0313,
      "step": 21482
    },
    {
      "epoch": 0.21483,
      "grad_norm": 1.375259940854289,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 21483
    },
    {
      "epoch": 0.21484,
      "grad_norm": 1.2303477301289663,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 21484
    },
    {
      "epoch": 0.21485,
      "grad_norm": 1.4353712203613151,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 21485
    },
    {
      "epoch": 0.21486,
      "grad_norm": 1.0818882035804576,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 21486
    },
    {
      "epoch": 0.21487,
      "grad_norm": 1.400946805712582,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 21487
    },
    {
      "epoch": 0.21488,
      "grad_norm": 1.2237243568913674,
      "learning_rate": 0.003,
      "loss": 4.0856,
      "step": 21488
    },
    {
      "epoch": 0.21489,
      "grad_norm": 1.5907722556953077,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 21489
    },
    {
      "epoch": 0.2149,
      "grad_norm": 1.1326397112931466,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 21490
    },
    {
      "epoch": 0.21491,
      "grad_norm": 1.5141931094356682,
      "learning_rate": 0.003,
      "loss": 4.0953,
      "step": 21491
    },
    {
      "epoch": 0.21492,
      "grad_norm": 1.2468582875027279,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 21492
    },
    {
      "epoch": 0.21493,
      "grad_norm": 1.58904364529496,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 21493
    },
    {
      "epoch": 0.21494,
      "grad_norm": 0.9441584505932659,
      "learning_rate": 0.003,
      "loss": 4.0889,
      "step": 21494
    },
    {
      "epoch": 0.21495,
      "grad_norm": 1.2017863682929744,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 21495
    },
    {
      "epoch": 0.21496,
      "grad_norm": 1.4082893274140686,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 21496
    },
    {
      "epoch": 0.21497,
      "grad_norm": 1.1384979025041544,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 21497
    },
    {
      "epoch": 0.21498,
      "grad_norm": 1.4893106830259322,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 21498
    },
    {
      "epoch": 0.21499,
      "grad_norm": 1.3061178650236966,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 21499
    },
    {
      "epoch": 0.215,
      "grad_norm": 1.1975960147897236,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 21500
    },
    {
      "epoch": 0.21501,
      "grad_norm": 1.378108295364783,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 21501
    },
    {
      "epoch": 0.21502,
      "grad_norm": 1.2509148359913587,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 21502
    },
    {
      "epoch": 0.21503,
      "grad_norm": 1.1785855504452174,
      "learning_rate": 0.003,
      "loss": 4.0418,
      "step": 21503
    },
    {
      "epoch": 0.21504,
      "grad_norm": 1.2981264827377557,
      "learning_rate": 0.003,
      "loss": 4.0389,
      "step": 21504
    },
    {
      "epoch": 0.21505,
      "grad_norm": 1.1593302304472886,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 21505
    },
    {
      "epoch": 0.21506,
      "grad_norm": 1.4530196824357662,
      "learning_rate": 0.003,
      "loss": 4.0198,
      "step": 21506
    },
    {
      "epoch": 0.21507,
      "grad_norm": 1.131644225116241,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 21507
    },
    {
      "epoch": 0.21508,
      "grad_norm": 1.2982821610148942,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 21508
    },
    {
      "epoch": 0.21509,
      "grad_norm": 1.1850714169582515,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 21509
    },
    {
      "epoch": 0.2151,
      "grad_norm": 1.3086252014539759,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 21510
    },
    {
      "epoch": 0.21511,
      "grad_norm": 1.3489796934325005,
      "learning_rate": 0.003,
      "loss": 4.0684,
      "step": 21511
    },
    {
      "epoch": 0.21512,
      "grad_norm": 1.4437118489819842,
      "learning_rate": 0.003,
      "loss": 4.0082,
      "step": 21512
    },
    {
      "epoch": 0.21513,
      "grad_norm": 1.4002119949049092,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 21513
    },
    {
      "epoch": 0.21514,
      "grad_norm": 1.1188591397030128,
      "learning_rate": 0.003,
      "loss": 4.0218,
      "step": 21514
    },
    {
      "epoch": 0.21515,
      "grad_norm": 1.4327792047917798,
      "learning_rate": 0.003,
      "loss": 4.0701,
      "step": 21515
    },
    {
      "epoch": 0.21516,
      "grad_norm": 1.341617127907377,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 21516
    },
    {
      "epoch": 0.21517,
      "grad_norm": 1.164987996259884,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 21517
    },
    {
      "epoch": 0.21518,
      "grad_norm": 1.2281640200108996,
      "learning_rate": 0.003,
      "loss": 4.0354,
      "step": 21518
    },
    {
      "epoch": 0.21519,
      "grad_norm": 1.312899207326584,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 21519
    },
    {
      "epoch": 0.2152,
      "grad_norm": 1.206296180283065,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 21520
    },
    {
      "epoch": 0.21521,
      "grad_norm": 1.459840488704888,
      "learning_rate": 0.003,
      "loss": 4.0478,
      "step": 21521
    },
    {
      "epoch": 0.21522,
      "grad_norm": 0.9606316711795931,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 21522
    },
    {
      "epoch": 0.21523,
      "grad_norm": 1.547110083453148,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 21523
    },
    {
      "epoch": 0.21524,
      "grad_norm": 1.0169027031505786,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 21524
    },
    {
      "epoch": 0.21525,
      "grad_norm": 1.4369074392414622,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 21525
    },
    {
      "epoch": 0.21526,
      "grad_norm": 1.1627215327156861,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 21526
    },
    {
      "epoch": 0.21527,
      "grad_norm": 1.4412975392393965,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 21527
    },
    {
      "epoch": 0.21528,
      "grad_norm": 1.091798184460252,
      "learning_rate": 0.003,
      "loss": 4.0291,
      "step": 21528
    },
    {
      "epoch": 0.21529,
      "grad_norm": 1.4442076048064423,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 21529
    },
    {
      "epoch": 0.2153,
      "grad_norm": 1.326773243090699,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 21530
    },
    {
      "epoch": 0.21531,
      "grad_norm": 1.4728032312949157,
      "learning_rate": 0.003,
      "loss": 4.0246,
      "step": 21531
    },
    {
      "epoch": 0.21532,
      "grad_norm": 1.3554900969417256,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 21532
    },
    {
      "epoch": 0.21533,
      "grad_norm": 1.3109923420859935,
      "learning_rate": 0.003,
      "loss": 4.025,
      "step": 21533
    },
    {
      "epoch": 0.21534,
      "grad_norm": 1.3344710645017832,
      "learning_rate": 0.003,
      "loss": 4.0312,
      "step": 21534
    },
    {
      "epoch": 0.21535,
      "grad_norm": 1.4446752836141827,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 21535
    },
    {
      "epoch": 0.21536,
      "grad_norm": 1.2802692177491153,
      "learning_rate": 0.003,
      "loss": 4.0718,
      "step": 21536
    },
    {
      "epoch": 0.21537,
      "grad_norm": 1.240002120002768,
      "learning_rate": 0.003,
      "loss": 4.0363,
      "step": 21537
    },
    {
      "epoch": 0.21538,
      "grad_norm": 1.2479876865674657,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 21538
    },
    {
      "epoch": 0.21539,
      "grad_norm": 1.2777882246417078,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 21539
    },
    {
      "epoch": 0.2154,
      "grad_norm": 1.216095670864655,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 21540
    },
    {
      "epoch": 0.21541,
      "grad_norm": 1.2426701966452454,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 21541
    },
    {
      "epoch": 0.21542,
      "grad_norm": 1.6144032344125891,
      "learning_rate": 0.003,
      "loss": 4.0347,
      "step": 21542
    },
    {
      "epoch": 0.21543,
      "grad_norm": 1.2786230481168372,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 21543
    },
    {
      "epoch": 0.21544,
      "grad_norm": 1.3659857976232632,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 21544
    },
    {
      "epoch": 0.21545,
      "grad_norm": 1.3709623322979243,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 21545
    },
    {
      "epoch": 0.21546,
      "grad_norm": 1.1588598521743951,
      "learning_rate": 0.003,
      "loss": 4.035,
      "step": 21546
    },
    {
      "epoch": 0.21547,
      "grad_norm": 1.3220473388613287,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 21547
    },
    {
      "epoch": 0.21548,
      "grad_norm": 1.2082416254846837,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 21548
    },
    {
      "epoch": 0.21549,
      "grad_norm": 1.2889883211256423,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 21549
    },
    {
      "epoch": 0.2155,
      "grad_norm": 1.1773015288618838,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 21550
    },
    {
      "epoch": 0.21551,
      "grad_norm": 1.3413792697844495,
      "learning_rate": 0.003,
      "loss": 4.0374,
      "step": 21551
    },
    {
      "epoch": 0.21552,
      "grad_norm": 1.2138099463872778,
      "learning_rate": 0.003,
      "loss": 4.0308,
      "step": 21552
    },
    {
      "epoch": 0.21553,
      "grad_norm": 1.322098344705854,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 21553
    },
    {
      "epoch": 0.21554,
      "grad_norm": 1.1972945432618907,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 21554
    },
    {
      "epoch": 0.21555,
      "grad_norm": 1.3546799217413956,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 21555
    },
    {
      "epoch": 0.21556,
      "grad_norm": 1.4130727000380368,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 21556
    },
    {
      "epoch": 0.21557,
      "grad_norm": 1.1751027556247067,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 21557
    },
    {
      "epoch": 0.21558,
      "grad_norm": 1.4329701254594025,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 21558
    },
    {
      "epoch": 0.21559,
      "grad_norm": 1.1449424741576872,
      "learning_rate": 0.003,
      "loss": 4.0729,
      "step": 21559
    },
    {
      "epoch": 0.2156,
      "grad_norm": 1.4630223754035958,
      "learning_rate": 0.003,
      "loss": 4.0478,
      "step": 21560
    },
    {
      "epoch": 0.21561,
      "grad_norm": 1.2564142417719932,
      "learning_rate": 0.003,
      "loss": 4.0793,
      "step": 21561
    },
    {
      "epoch": 0.21562,
      "grad_norm": 1.2367252119445342,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 21562
    },
    {
      "epoch": 0.21563,
      "grad_norm": 1.2917252238179042,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 21563
    },
    {
      "epoch": 0.21564,
      "grad_norm": 1.3584215993866915,
      "learning_rate": 0.003,
      "loss": 4.0209,
      "step": 21564
    },
    {
      "epoch": 0.21565,
      "grad_norm": 1.1646880532013897,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 21565
    },
    {
      "epoch": 0.21566,
      "grad_norm": 1.4133766722188805,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 21566
    },
    {
      "epoch": 0.21567,
      "grad_norm": 1.382450781353812,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 21567
    },
    {
      "epoch": 0.21568,
      "grad_norm": 1.0461181073194628,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 21568
    },
    {
      "epoch": 0.21569,
      "grad_norm": 1.4468496782759792,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 21569
    },
    {
      "epoch": 0.2157,
      "grad_norm": 1.0693770762476509,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 21570
    },
    {
      "epoch": 0.21571,
      "grad_norm": 1.3436574081692902,
      "learning_rate": 0.003,
      "loss": 4.0371,
      "step": 21571
    },
    {
      "epoch": 0.21572,
      "grad_norm": 1.1077115140885394,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 21572
    },
    {
      "epoch": 0.21573,
      "grad_norm": 1.474968349036726,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 21573
    },
    {
      "epoch": 0.21574,
      "grad_norm": 1.2243913229395103,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 21574
    },
    {
      "epoch": 0.21575,
      "grad_norm": 1.4357631117853142,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 21575
    },
    {
      "epoch": 0.21576,
      "grad_norm": 1.143636442416954,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 21576
    },
    {
      "epoch": 0.21577,
      "grad_norm": 1.3307684153775725,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 21577
    },
    {
      "epoch": 0.21578,
      "grad_norm": 1.4348354005676,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 21578
    },
    {
      "epoch": 0.21579,
      "grad_norm": 1.1869246906858275,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 21579
    },
    {
      "epoch": 0.2158,
      "grad_norm": 1.3228739877660194,
      "learning_rate": 0.003,
      "loss": 4.0202,
      "step": 21580
    },
    {
      "epoch": 0.21581,
      "grad_norm": 1.259811713759345,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 21581
    },
    {
      "epoch": 0.21582,
      "grad_norm": 1.2068431794784156,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 21582
    },
    {
      "epoch": 0.21583,
      "grad_norm": 1.1254360834494639,
      "learning_rate": 0.003,
      "loss": 4.0438,
      "step": 21583
    },
    {
      "epoch": 0.21584,
      "grad_norm": 1.4300677552421914,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 21584
    },
    {
      "epoch": 0.21585,
      "grad_norm": 1.2716059702652318,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 21585
    },
    {
      "epoch": 0.21586,
      "grad_norm": 1.3081695594695006,
      "learning_rate": 0.003,
      "loss": 4.0286,
      "step": 21586
    },
    {
      "epoch": 0.21587,
      "grad_norm": 1.2917306653430283,
      "learning_rate": 0.003,
      "loss": 4.0881,
      "step": 21587
    },
    {
      "epoch": 0.21588,
      "grad_norm": 1.2968082982162343,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 21588
    },
    {
      "epoch": 0.21589,
      "grad_norm": 1.2790088314939978,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 21589
    },
    {
      "epoch": 0.2159,
      "grad_norm": 1.2423031177996033,
      "learning_rate": 0.003,
      "loss": 4.0393,
      "step": 21590
    },
    {
      "epoch": 0.21591,
      "grad_norm": 1.3732270439759997,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 21591
    },
    {
      "epoch": 0.21592,
      "grad_norm": 1.3319770501913595,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 21592
    },
    {
      "epoch": 0.21593,
      "grad_norm": 1.3052330638964627,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 21593
    },
    {
      "epoch": 0.21594,
      "grad_norm": 1.3956728189002865,
      "learning_rate": 0.003,
      "loss": 4.0232,
      "step": 21594
    },
    {
      "epoch": 0.21595,
      "grad_norm": 1.4167772078378211,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 21595
    },
    {
      "epoch": 0.21596,
      "grad_norm": 1.307222877586128,
      "learning_rate": 0.003,
      "loss": 4.0357,
      "step": 21596
    },
    {
      "epoch": 0.21597,
      "grad_norm": 1.2620104264140628,
      "learning_rate": 0.003,
      "loss": 4.0252,
      "step": 21597
    },
    {
      "epoch": 0.21598,
      "grad_norm": 1.4415953331243176,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 21598
    },
    {
      "epoch": 0.21599,
      "grad_norm": 1.1689909013279838,
      "learning_rate": 0.003,
      "loss": 4.0416,
      "step": 21599
    },
    {
      "epoch": 0.216,
      "grad_norm": 1.1752426271453542,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 21600
    },
    {
      "epoch": 0.21601,
      "grad_norm": 1.4373277102072644,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 21601
    },
    {
      "epoch": 0.21602,
      "grad_norm": 1.139326460570499,
      "learning_rate": 0.003,
      "loss": 3.9979,
      "step": 21602
    },
    {
      "epoch": 0.21603,
      "grad_norm": 1.702342005023596,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 21603
    },
    {
      "epoch": 0.21604,
      "grad_norm": 0.9181373709243951,
      "learning_rate": 0.003,
      "loss": 4.0293,
      "step": 21604
    },
    {
      "epoch": 0.21605,
      "grad_norm": 1.4156053304286162,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 21605
    },
    {
      "epoch": 0.21606,
      "grad_norm": 1.299322167838569,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 21606
    },
    {
      "epoch": 0.21607,
      "grad_norm": 1.4494695676496232,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 21607
    },
    {
      "epoch": 0.21608,
      "grad_norm": 1.187093459511788,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 21608
    },
    {
      "epoch": 0.21609,
      "grad_norm": 1.307214047059684,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 21609
    },
    {
      "epoch": 0.2161,
      "grad_norm": 1.305218544830906,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 21610
    },
    {
      "epoch": 0.21611,
      "grad_norm": 1.1245976597135463,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 21611
    },
    {
      "epoch": 0.21612,
      "grad_norm": 1.4216159000344961,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 21612
    },
    {
      "epoch": 0.21613,
      "grad_norm": 1.1299949348152787,
      "learning_rate": 0.003,
      "loss": 4.0291,
      "step": 21613
    },
    {
      "epoch": 0.21614,
      "grad_norm": 1.4791347438098315,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 21614
    },
    {
      "epoch": 0.21615,
      "grad_norm": 1.0638591599537033,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 21615
    },
    {
      "epoch": 0.21616,
      "grad_norm": 1.3475699073183858,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 21616
    },
    {
      "epoch": 0.21617,
      "grad_norm": 1.2316611239869664,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 21617
    },
    {
      "epoch": 0.21618,
      "grad_norm": 1.5065327402318887,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 21618
    },
    {
      "epoch": 0.21619,
      "grad_norm": 1.4096142512965315,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 21619
    },
    {
      "epoch": 0.2162,
      "grad_norm": 1.159454546769651,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 21620
    },
    {
      "epoch": 0.21621,
      "grad_norm": 1.317540642950509,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 21621
    },
    {
      "epoch": 0.21622,
      "grad_norm": 1.1865350298335586,
      "learning_rate": 0.003,
      "loss": 4.0811,
      "step": 21622
    },
    {
      "epoch": 0.21623,
      "grad_norm": 1.3959728152423938,
      "learning_rate": 0.003,
      "loss": 4.0728,
      "step": 21623
    },
    {
      "epoch": 0.21624,
      "grad_norm": 1.0967286789828294,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 21624
    },
    {
      "epoch": 0.21625,
      "grad_norm": 1.4107174860021268,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 21625
    },
    {
      "epoch": 0.21626,
      "grad_norm": 1.43536578196684,
      "learning_rate": 0.003,
      "loss": 4.0393,
      "step": 21626
    },
    {
      "epoch": 0.21627,
      "grad_norm": 1.3778884389546275,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 21627
    },
    {
      "epoch": 0.21628,
      "grad_norm": 1.145262126204291,
      "learning_rate": 0.003,
      "loss": 3.9985,
      "step": 21628
    },
    {
      "epoch": 0.21629,
      "grad_norm": 1.329957496149722,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 21629
    },
    {
      "epoch": 0.2163,
      "grad_norm": 1.2073280019111943,
      "learning_rate": 0.003,
      "loss": 4.0363,
      "step": 21630
    },
    {
      "epoch": 0.21631,
      "grad_norm": 1.4143764551609217,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 21631
    },
    {
      "epoch": 0.21632,
      "grad_norm": 1.2345148189041288,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 21632
    },
    {
      "epoch": 0.21633,
      "grad_norm": 1.2452968459023985,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 21633
    },
    {
      "epoch": 0.21634,
      "grad_norm": 1.4200986048996227,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 21634
    },
    {
      "epoch": 0.21635,
      "grad_norm": 1.3058008842472895,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 21635
    },
    {
      "epoch": 0.21636,
      "grad_norm": 1.1026251764083932,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 21636
    },
    {
      "epoch": 0.21637,
      "grad_norm": 1.3835744114032524,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 21637
    },
    {
      "epoch": 0.21638,
      "grad_norm": 1.1163499998284347,
      "learning_rate": 0.003,
      "loss": 4.0416,
      "step": 21638
    },
    {
      "epoch": 0.21639,
      "grad_norm": 1.7718147706196286,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 21639
    },
    {
      "epoch": 0.2164,
      "grad_norm": 0.9960949690844515,
      "learning_rate": 0.003,
      "loss": 4.0197,
      "step": 21640
    },
    {
      "epoch": 0.21641,
      "grad_norm": 1.598915141672048,
      "learning_rate": 0.003,
      "loss": 4.0763,
      "step": 21641
    },
    {
      "epoch": 0.21642,
      "grad_norm": 1.3501190162195924,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 21642
    },
    {
      "epoch": 0.21643,
      "grad_norm": 1.11708654672925,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 21643
    },
    {
      "epoch": 0.21644,
      "grad_norm": 1.1837617744724265,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 21644
    },
    {
      "epoch": 0.21645,
      "grad_norm": 1.3930522195397887,
      "learning_rate": 0.003,
      "loss": 4.0786,
      "step": 21645
    },
    {
      "epoch": 0.21646,
      "grad_norm": 1.1971351458603388,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 21646
    },
    {
      "epoch": 0.21647,
      "grad_norm": 1.195709723980143,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 21647
    },
    {
      "epoch": 0.21648,
      "grad_norm": 1.2392818652695932,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 21648
    },
    {
      "epoch": 0.21649,
      "grad_norm": 1.2836118144686024,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 21649
    },
    {
      "epoch": 0.2165,
      "grad_norm": 1.3915139268510568,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 21650
    },
    {
      "epoch": 0.21651,
      "grad_norm": 1.1292513375476736,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 21651
    },
    {
      "epoch": 0.21652,
      "grad_norm": 1.6038647277746723,
      "learning_rate": 0.003,
      "loss": 4.0157,
      "step": 21652
    },
    {
      "epoch": 0.21653,
      "grad_norm": 0.979494980453447,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 21653
    },
    {
      "epoch": 0.21654,
      "grad_norm": 1.5388938433260417,
      "learning_rate": 0.003,
      "loss": 4.0275,
      "step": 21654
    },
    {
      "epoch": 0.21655,
      "grad_norm": 1.3595512608491056,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 21655
    },
    {
      "epoch": 0.21656,
      "grad_norm": 1.526590089855836,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 21656
    },
    {
      "epoch": 0.21657,
      "grad_norm": 1.0464180350081598,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 21657
    },
    {
      "epoch": 0.21658,
      "grad_norm": 1.4781300459318107,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 21658
    },
    {
      "epoch": 0.21659,
      "grad_norm": 1.0687999139567819,
      "learning_rate": 0.003,
      "loss": 4.0393,
      "step": 21659
    },
    {
      "epoch": 0.2166,
      "grad_norm": 1.5910343276132453,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 21660
    },
    {
      "epoch": 0.21661,
      "grad_norm": 1.054474179530512,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 21661
    },
    {
      "epoch": 0.21662,
      "grad_norm": 1.389793991058751,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 21662
    },
    {
      "epoch": 0.21663,
      "grad_norm": 1.098142249375085,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 21663
    },
    {
      "epoch": 0.21664,
      "grad_norm": 1.4766492013725177,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 21664
    },
    {
      "epoch": 0.21665,
      "grad_norm": 1.1644620344863594,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 21665
    },
    {
      "epoch": 0.21666,
      "grad_norm": 1.3802003071818196,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 21666
    },
    {
      "epoch": 0.21667,
      "grad_norm": 1.3349039152220499,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 21667
    },
    {
      "epoch": 0.21668,
      "grad_norm": 1.2044820082090815,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 21668
    },
    {
      "epoch": 0.21669,
      "grad_norm": 1.3231136323465995,
      "learning_rate": 0.003,
      "loss": 4.0254,
      "step": 21669
    },
    {
      "epoch": 0.2167,
      "grad_norm": 1.2623644530106173,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 21670
    },
    {
      "epoch": 0.21671,
      "grad_norm": 1.4624627172783418,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 21671
    },
    {
      "epoch": 0.21672,
      "grad_norm": 1.149299095223078,
      "learning_rate": 0.003,
      "loss": 4.0105,
      "step": 21672
    },
    {
      "epoch": 0.21673,
      "grad_norm": 1.2030043550332916,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 21673
    },
    {
      "epoch": 0.21674,
      "grad_norm": 1.2909773238033704,
      "learning_rate": 0.003,
      "loss": 4.0099,
      "step": 21674
    },
    {
      "epoch": 0.21675,
      "grad_norm": 1.2679498641166909,
      "learning_rate": 0.003,
      "loss": 4.0202,
      "step": 21675
    },
    {
      "epoch": 0.21676,
      "grad_norm": 1.2613544205889584,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 21676
    },
    {
      "epoch": 0.21677,
      "grad_norm": 1.0937616178822396,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 21677
    },
    {
      "epoch": 0.21678,
      "grad_norm": 1.4788878273843822,
      "learning_rate": 0.003,
      "loss": 4.0434,
      "step": 21678
    },
    {
      "epoch": 0.21679,
      "grad_norm": 1.048627147398995,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 21679
    },
    {
      "epoch": 0.2168,
      "grad_norm": 1.437251322030383,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 21680
    },
    {
      "epoch": 0.21681,
      "grad_norm": 1.1656455097893734,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 21681
    },
    {
      "epoch": 0.21682,
      "grad_norm": 1.2233444905006519,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 21682
    },
    {
      "epoch": 0.21683,
      "grad_norm": 1.1120374871767775,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 21683
    },
    {
      "epoch": 0.21684,
      "grad_norm": 1.5661729770110275,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 21684
    },
    {
      "epoch": 0.21685,
      "grad_norm": 1.4661846422040847,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 21685
    },
    {
      "epoch": 0.21686,
      "grad_norm": 1.4717736535512747,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 21686
    },
    {
      "epoch": 0.21687,
      "grad_norm": 1.1563436294708824,
      "learning_rate": 0.003,
      "loss": 4.0194,
      "step": 21687
    },
    {
      "epoch": 0.21688,
      "grad_norm": 1.118294700225654,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 21688
    },
    {
      "epoch": 0.21689,
      "grad_norm": 1.5012816566404177,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 21689
    },
    {
      "epoch": 0.2169,
      "grad_norm": 1.3687644289169574,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 21690
    },
    {
      "epoch": 0.21691,
      "grad_norm": 1.1850811915893817,
      "learning_rate": 0.003,
      "loss": 4.019,
      "step": 21691
    },
    {
      "epoch": 0.21692,
      "grad_norm": 1.2438480460026475,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 21692
    },
    {
      "epoch": 0.21693,
      "grad_norm": 1.2771512200175685,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 21693
    },
    {
      "epoch": 0.21694,
      "grad_norm": 1.2909528453072165,
      "learning_rate": 0.003,
      "loss": 4.028,
      "step": 21694
    },
    {
      "epoch": 0.21695,
      "grad_norm": 1.1863761593004463,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 21695
    },
    {
      "epoch": 0.21696,
      "grad_norm": 1.1697625793188724,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 21696
    },
    {
      "epoch": 0.21697,
      "grad_norm": 1.3451951255757777,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 21697
    },
    {
      "epoch": 0.21698,
      "grad_norm": 1.2960474918538587,
      "learning_rate": 0.003,
      "loss": 3.9967,
      "step": 21698
    },
    {
      "epoch": 0.21699,
      "grad_norm": 1.2345397804210092,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 21699
    },
    {
      "epoch": 0.217,
      "grad_norm": 1.3326197741245571,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 21700
    },
    {
      "epoch": 0.21701,
      "grad_norm": 1.3130926308827144,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 21701
    },
    {
      "epoch": 0.21702,
      "grad_norm": 1.2045128299819818,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 21702
    },
    {
      "epoch": 0.21703,
      "grad_norm": 1.2567238761158683,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 21703
    },
    {
      "epoch": 0.21704,
      "grad_norm": 1.1901544425293324,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 21704
    },
    {
      "epoch": 0.21705,
      "grad_norm": 1.2804168037424124,
      "learning_rate": 0.003,
      "loss": 4.027,
      "step": 21705
    },
    {
      "epoch": 0.21706,
      "grad_norm": 1.3042417045037906,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 21706
    },
    {
      "epoch": 0.21707,
      "grad_norm": 1.462729231419984,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 21707
    },
    {
      "epoch": 0.21708,
      "grad_norm": 1.0520057436004748,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 21708
    },
    {
      "epoch": 0.21709,
      "grad_norm": 1.465433317820797,
      "learning_rate": 0.003,
      "loss": 4.035,
      "step": 21709
    },
    {
      "epoch": 0.2171,
      "grad_norm": 1.142849578505775,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 21710
    },
    {
      "epoch": 0.21711,
      "grad_norm": 1.872884346632127,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 21711
    },
    {
      "epoch": 0.21712,
      "grad_norm": 1.260572130773345,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 21712
    },
    {
      "epoch": 0.21713,
      "grad_norm": 1.1845132205454723,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 21713
    },
    {
      "epoch": 0.21714,
      "grad_norm": 1.5152055655833143,
      "learning_rate": 0.003,
      "loss": 4.0295,
      "step": 21714
    },
    {
      "epoch": 0.21715,
      "grad_norm": 1.1536665825124806,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 21715
    },
    {
      "epoch": 0.21716,
      "grad_norm": 1.2851235738906504,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 21716
    },
    {
      "epoch": 0.21717,
      "grad_norm": 1.6653586909130602,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 21717
    },
    {
      "epoch": 0.21718,
      "grad_norm": 1.1457946051654446,
      "learning_rate": 0.003,
      "loss": 4.0359,
      "step": 21718
    },
    {
      "epoch": 0.21719,
      "grad_norm": 1.4192257902255103,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 21719
    },
    {
      "epoch": 0.2172,
      "grad_norm": 1.2438092755574954,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 21720
    },
    {
      "epoch": 0.21721,
      "grad_norm": 1.32219183146982,
      "learning_rate": 0.003,
      "loss": 4.0672,
      "step": 21721
    },
    {
      "epoch": 0.21722,
      "grad_norm": 1.284226692704832,
      "learning_rate": 0.003,
      "loss": 4.0202,
      "step": 21722
    },
    {
      "epoch": 0.21723,
      "grad_norm": 1.4497404618246834,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 21723
    },
    {
      "epoch": 0.21724,
      "grad_norm": 1.2108837005354196,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 21724
    },
    {
      "epoch": 0.21725,
      "grad_norm": 1.2882882117901246,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 21725
    },
    {
      "epoch": 0.21726,
      "grad_norm": 1.1053404936096287,
      "learning_rate": 0.003,
      "loss": 4.0311,
      "step": 21726
    },
    {
      "epoch": 0.21727,
      "grad_norm": 1.5178973976789325,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 21727
    },
    {
      "epoch": 0.21728,
      "grad_norm": 1.1428639071682594,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 21728
    },
    {
      "epoch": 0.21729,
      "grad_norm": 1.3564810985494395,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 21729
    },
    {
      "epoch": 0.2173,
      "grad_norm": 1.1904550337860347,
      "learning_rate": 0.003,
      "loss": 4.0296,
      "step": 21730
    },
    {
      "epoch": 0.21731,
      "grad_norm": 1.4848382747182223,
      "learning_rate": 0.003,
      "loss": 4.0931,
      "step": 21731
    },
    {
      "epoch": 0.21732,
      "grad_norm": 1.1988120980535053,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 21732
    },
    {
      "epoch": 0.21733,
      "grad_norm": 1.4498829318594544,
      "learning_rate": 0.003,
      "loss": 4.0738,
      "step": 21733
    },
    {
      "epoch": 0.21734,
      "grad_norm": 1.1800732265472247,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 21734
    },
    {
      "epoch": 0.21735,
      "grad_norm": 1.2274318114915934,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 21735
    },
    {
      "epoch": 0.21736,
      "grad_norm": 1.3328993248200016,
      "learning_rate": 0.003,
      "loss": 4.0315,
      "step": 21736
    },
    {
      "epoch": 0.21737,
      "grad_norm": 1.1868578841688844,
      "learning_rate": 0.003,
      "loss": 4.0249,
      "step": 21737
    },
    {
      "epoch": 0.21738,
      "grad_norm": 1.1156789035775732,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 21738
    },
    {
      "epoch": 0.21739,
      "grad_norm": 1.5106478469827882,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 21739
    },
    {
      "epoch": 0.2174,
      "grad_norm": 1.1993640107254842,
      "learning_rate": 0.003,
      "loss": 4.0117,
      "step": 21740
    },
    {
      "epoch": 0.21741,
      "grad_norm": 1.6821834633986894,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 21741
    },
    {
      "epoch": 0.21742,
      "grad_norm": 1.1557534760532018,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 21742
    },
    {
      "epoch": 0.21743,
      "grad_norm": 1.3357601424742491,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 21743
    },
    {
      "epoch": 0.21744,
      "grad_norm": 1.263381980222935,
      "learning_rate": 0.003,
      "loss": 4.0617,
      "step": 21744
    },
    {
      "epoch": 0.21745,
      "grad_norm": 1.491219457287799,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 21745
    },
    {
      "epoch": 0.21746,
      "grad_norm": 0.9872060673395434,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 21746
    },
    {
      "epoch": 0.21747,
      "grad_norm": 1.1932658773275,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 21747
    },
    {
      "epoch": 0.21748,
      "grad_norm": 1.333675384596158,
      "learning_rate": 0.003,
      "loss": 4.0432,
      "step": 21748
    },
    {
      "epoch": 0.21749,
      "grad_norm": 1.470193364690084,
      "learning_rate": 0.003,
      "loss": 4.0262,
      "step": 21749
    },
    {
      "epoch": 0.2175,
      "grad_norm": 1.0614461446475574,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 21750
    },
    {
      "epoch": 0.21751,
      "grad_norm": 1.4165992659360809,
      "learning_rate": 0.003,
      "loss": 4.0189,
      "step": 21751
    },
    {
      "epoch": 0.21752,
      "grad_norm": 1.1924084246654956,
      "learning_rate": 0.003,
      "loss": 4.0623,
      "step": 21752
    },
    {
      "epoch": 0.21753,
      "grad_norm": 1.313097715920257,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 21753
    },
    {
      "epoch": 0.21754,
      "grad_norm": 1.270571883977382,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 21754
    },
    {
      "epoch": 0.21755,
      "grad_norm": 1.272599828364782,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 21755
    },
    {
      "epoch": 0.21756,
      "grad_norm": 1.4556803151817337,
      "learning_rate": 0.003,
      "loss": 4.0167,
      "step": 21756
    },
    {
      "epoch": 0.21757,
      "grad_norm": 1.4361716690834565,
      "learning_rate": 0.003,
      "loss": 4.0549,
      "step": 21757
    },
    {
      "epoch": 0.21758,
      "grad_norm": 1.3398933909134998,
      "learning_rate": 0.003,
      "loss": 4.0602,
      "step": 21758
    },
    {
      "epoch": 0.21759,
      "grad_norm": 1.4170590315386171,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 21759
    },
    {
      "epoch": 0.2176,
      "grad_norm": 1.2193683819262564,
      "learning_rate": 0.003,
      "loss": 4.0243,
      "step": 21760
    },
    {
      "epoch": 0.21761,
      "grad_norm": 1.28055552804489,
      "learning_rate": 0.003,
      "loss": 4.0166,
      "step": 21761
    },
    {
      "epoch": 0.21762,
      "grad_norm": 1.2245662664859995,
      "learning_rate": 0.003,
      "loss": 4.0336,
      "step": 21762
    },
    {
      "epoch": 0.21763,
      "grad_norm": 1.4084482449929674,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 21763
    },
    {
      "epoch": 0.21764,
      "grad_norm": 1.1585647833835193,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 21764
    },
    {
      "epoch": 0.21765,
      "grad_norm": 1.2115678362913604,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 21765
    },
    {
      "epoch": 0.21766,
      "grad_norm": 1.157946350652913,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 21766
    },
    {
      "epoch": 0.21767,
      "grad_norm": 1.499516708908824,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 21767
    },
    {
      "epoch": 0.21768,
      "grad_norm": 1.452932108659569,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 21768
    },
    {
      "epoch": 0.21769,
      "grad_norm": 1.1759988580555416,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 21769
    },
    {
      "epoch": 0.2177,
      "grad_norm": 1.4267740862588505,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 21770
    },
    {
      "epoch": 0.21771,
      "grad_norm": 1.1640836432646118,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 21771
    },
    {
      "epoch": 0.21772,
      "grad_norm": 1.3022378991827772,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 21772
    },
    {
      "epoch": 0.21773,
      "grad_norm": 1.3072925323817297,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 21773
    },
    {
      "epoch": 0.21774,
      "grad_norm": 1.2105045238472119,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 21774
    },
    {
      "epoch": 0.21775,
      "grad_norm": 1.5017155364835821,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 21775
    },
    {
      "epoch": 0.21776,
      "grad_norm": 1.1197846090932555,
      "learning_rate": 0.003,
      "loss": 4.0238,
      "step": 21776
    },
    {
      "epoch": 0.21777,
      "grad_norm": 1.9095875506733158,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 21777
    },
    {
      "epoch": 0.21778,
      "grad_norm": 0.9170804396826207,
      "learning_rate": 0.003,
      "loss": 4.016,
      "step": 21778
    },
    {
      "epoch": 0.21779,
      "grad_norm": 1.3377267722755934,
      "learning_rate": 0.003,
      "loss": 4.0373,
      "step": 21779
    },
    {
      "epoch": 0.2178,
      "grad_norm": 1.243052792917148,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 21780
    },
    {
      "epoch": 0.21781,
      "grad_norm": 1.382652263301088,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 21781
    },
    {
      "epoch": 0.21782,
      "grad_norm": 1.1051630228725697,
      "learning_rate": 0.003,
      "loss": 4.0243,
      "step": 21782
    },
    {
      "epoch": 0.21783,
      "grad_norm": 1.1404321590529605,
      "learning_rate": 0.003,
      "loss": 4.022,
      "step": 21783
    },
    {
      "epoch": 0.21784,
      "grad_norm": 1.301785248799035,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 21784
    },
    {
      "epoch": 0.21785,
      "grad_norm": 1.0912646971413142,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 21785
    },
    {
      "epoch": 0.21786,
      "grad_norm": 1.4845960158302227,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 21786
    },
    {
      "epoch": 0.21787,
      "grad_norm": 1.4692435735597091,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 21787
    },
    {
      "epoch": 0.21788,
      "grad_norm": 1.371294947139874,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 21788
    },
    {
      "epoch": 0.21789,
      "grad_norm": 1.340268924973976,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 21789
    },
    {
      "epoch": 0.2179,
      "grad_norm": 1.3030649934143335,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 21790
    },
    {
      "epoch": 0.21791,
      "grad_norm": 1.1655853202551025,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 21791
    },
    {
      "epoch": 0.21792,
      "grad_norm": 1.417186448806512,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 21792
    },
    {
      "epoch": 0.21793,
      "grad_norm": 1.200047988513548,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 21793
    },
    {
      "epoch": 0.21794,
      "grad_norm": 1.3164823798491363,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 21794
    },
    {
      "epoch": 0.21795,
      "grad_norm": 1.3345678999834834,
      "learning_rate": 0.003,
      "loss": 4.0549,
      "step": 21795
    },
    {
      "epoch": 0.21796,
      "grad_norm": 1.2137007865353142,
      "learning_rate": 0.003,
      "loss": 3.9997,
      "step": 21796
    },
    {
      "epoch": 0.21797,
      "grad_norm": 1.0818115182812484,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 21797
    },
    {
      "epoch": 0.21798,
      "grad_norm": 1.3995812927115454,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 21798
    },
    {
      "epoch": 0.21799,
      "grad_norm": 1.0887305321876026,
      "learning_rate": 0.003,
      "loss": 4.0328,
      "step": 21799
    },
    {
      "epoch": 0.218,
      "grad_norm": 1.696440647290099,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 21800
    },
    {
      "epoch": 0.21801,
      "grad_norm": 1.2730282446637662,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 21801
    },
    {
      "epoch": 0.21802,
      "grad_norm": 1.45578713110437,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 21802
    },
    {
      "epoch": 0.21803,
      "grad_norm": 1.446971481281067,
      "learning_rate": 0.003,
      "loss": 4.0748,
      "step": 21803
    },
    {
      "epoch": 0.21804,
      "grad_norm": 0.965512232012288,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 21804
    },
    {
      "epoch": 0.21805,
      "grad_norm": 1.628209411214888,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 21805
    },
    {
      "epoch": 0.21806,
      "grad_norm": 1.1474627984919128,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 21806
    },
    {
      "epoch": 0.21807,
      "grad_norm": 1.196026867362031,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 21807
    },
    {
      "epoch": 0.21808,
      "grad_norm": 1.2443965979525626,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 21808
    },
    {
      "epoch": 0.21809,
      "grad_norm": 1.5809297041238688,
      "learning_rate": 0.003,
      "loss": 4.0835,
      "step": 21809
    },
    {
      "epoch": 0.2181,
      "grad_norm": 1.0872077699541336,
      "learning_rate": 0.003,
      "loss": 4.0345,
      "step": 21810
    },
    {
      "epoch": 0.21811,
      "grad_norm": 1.6236580541857166,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 21811
    },
    {
      "epoch": 0.21812,
      "grad_norm": 1.4540955870785828,
      "learning_rate": 0.003,
      "loss": 4.0332,
      "step": 21812
    },
    {
      "epoch": 0.21813,
      "grad_norm": 1.1880065957311807,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 21813
    },
    {
      "epoch": 0.21814,
      "grad_norm": 1.2187188051396427,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 21814
    },
    {
      "epoch": 0.21815,
      "grad_norm": 1.399988740367689,
      "learning_rate": 0.003,
      "loss": 4.0259,
      "step": 21815
    },
    {
      "epoch": 0.21816,
      "grad_norm": 1.3127841473145994,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 21816
    },
    {
      "epoch": 0.21817,
      "grad_norm": 1.2047107452225148,
      "learning_rate": 0.003,
      "loss": 4.0282,
      "step": 21817
    },
    {
      "epoch": 0.21818,
      "grad_norm": 1.3808457862570687,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 21818
    },
    {
      "epoch": 0.21819,
      "grad_norm": 1.1588561724439312,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 21819
    },
    {
      "epoch": 0.2182,
      "grad_norm": 1.4001257307576287,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 21820
    },
    {
      "epoch": 0.21821,
      "grad_norm": 1.02993503751475,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 21821
    },
    {
      "epoch": 0.21822,
      "grad_norm": 1.4803856847048495,
      "learning_rate": 0.003,
      "loss": 4.0219,
      "step": 21822
    },
    {
      "epoch": 0.21823,
      "grad_norm": 1.1216293175586112,
      "learning_rate": 0.003,
      "loss": 4.0768,
      "step": 21823
    },
    {
      "epoch": 0.21824,
      "grad_norm": 1.5049522871633374,
      "learning_rate": 0.003,
      "loss": 4.0432,
      "step": 21824
    },
    {
      "epoch": 0.21825,
      "grad_norm": 1.0412979291171327,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 21825
    },
    {
      "epoch": 0.21826,
      "grad_norm": 1.4578299600424,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 21826
    },
    {
      "epoch": 0.21827,
      "grad_norm": 1.1201321192220774,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 21827
    },
    {
      "epoch": 0.21828,
      "grad_norm": 1.4657247929765522,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 21828
    },
    {
      "epoch": 0.21829,
      "grad_norm": 1.3090297312739307,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 21829
    },
    {
      "epoch": 0.2183,
      "grad_norm": 1.3781285015558673,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 21830
    },
    {
      "epoch": 0.21831,
      "grad_norm": 1.21686698334116,
      "learning_rate": 0.003,
      "loss": 4.0616,
      "step": 21831
    },
    {
      "epoch": 0.21832,
      "grad_norm": 1.2566016580753985,
      "learning_rate": 0.003,
      "loss": 4.0148,
      "step": 21832
    },
    {
      "epoch": 0.21833,
      "grad_norm": 1.4487463646569787,
      "learning_rate": 0.003,
      "loss": 4.0337,
      "step": 21833
    },
    {
      "epoch": 0.21834,
      "grad_norm": 1.0677199207991201,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 21834
    },
    {
      "epoch": 0.21835,
      "grad_norm": 1.6613710385839113,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 21835
    },
    {
      "epoch": 0.21836,
      "grad_norm": 1.097299801532908,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 21836
    },
    {
      "epoch": 0.21837,
      "grad_norm": 1.495664002050768,
      "learning_rate": 0.003,
      "loss": 4.0354,
      "step": 21837
    },
    {
      "epoch": 0.21838,
      "grad_norm": 1.1129084471167503,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 21838
    },
    {
      "epoch": 0.21839,
      "grad_norm": 1.5530672756532087,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 21839
    },
    {
      "epoch": 0.2184,
      "grad_norm": 1.1848727302751756,
      "learning_rate": 0.003,
      "loss": 4.0225,
      "step": 21840
    },
    {
      "epoch": 0.21841,
      "grad_norm": 1.340491877693265,
      "learning_rate": 0.003,
      "loss": 4.0868,
      "step": 21841
    },
    {
      "epoch": 0.21842,
      "grad_norm": 1.3901836643028558,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 21842
    },
    {
      "epoch": 0.21843,
      "grad_norm": 1.0581420731535256,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 21843
    },
    {
      "epoch": 0.21844,
      "grad_norm": 1.4694580602654814,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 21844
    },
    {
      "epoch": 0.21845,
      "grad_norm": 1.2246254850083362,
      "learning_rate": 0.003,
      "loss": 4.0791,
      "step": 21845
    },
    {
      "epoch": 0.21846,
      "grad_norm": 1.1792676147967918,
      "learning_rate": 0.003,
      "loss": 4.0282,
      "step": 21846
    },
    {
      "epoch": 0.21847,
      "grad_norm": 1.2479741589582791,
      "learning_rate": 0.003,
      "loss": 4.0227,
      "step": 21847
    },
    {
      "epoch": 0.21848,
      "grad_norm": 1.2744055254108633,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 21848
    },
    {
      "epoch": 0.21849,
      "grad_norm": 1.6129612207504957,
      "learning_rate": 0.003,
      "loss": 4.0304,
      "step": 21849
    },
    {
      "epoch": 0.2185,
      "grad_norm": 1.3219532561820633,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 21850
    },
    {
      "epoch": 0.21851,
      "grad_norm": 1.3018424802157766,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 21851
    },
    {
      "epoch": 0.21852,
      "grad_norm": 1.3628580496989582,
      "learning_rate": 0.003,
      "loss": 4.0271,
      "step": 21852
    },
    {
      "epoch": 0.21853,
      "grad_norm": 1.1376207302177839,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 21853
    },
    {
      "epoch": 0.21854,
      "grad_norm": 1.2694024402813655,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 21854
    },
    {
      "epoch": 0.21855,
      "grad_norm": 1.3893402012442544,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 21855
    },
    {
      "epoch": 0.21856,
      "grad_norm": 1.197721354828964,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 21856
    },
    {
      "epoch": 0.21857,
      "grad_norm": 1.3381212107701923,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 21857
    },
    {
      "epoch": 0.21858,
      "grad_norm": 0.9806807309955483,
      "learning_rate": 0.003,
      "loss": 4.0094,
      "step": 21858
    },
    {
      "epoch": 0.21859,
      "grad_norm": 1.56375667778978,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 21859
    },
    {
      "epoch": 0.2186,
      "grad_norm": 1.3261575869782218,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 21860
    },
    {
      "epoch": 0.21861,
      "grad_norm": 1.261235160635816,
      "learning_rate": 0.003,
      "loss": 4.0383,
      "step": 21861
    },
    {
      "epoch": 0.21862,
      "grad_norm": 1.1608136640008393,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 21862
    },
    {
      "epoch": 0.21863,
      "grad_norm": 1.4585535655625999,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 21863
    },
    {
      "epoch": 0.21864,
      "grad_norm": 1.2657939021710913,
      "learning_rate": 0.003,
      "loss": 4.0041,
      "step": 21864
    },
    {
      "epoch": 0.21865,
      "grad_norm": 1.1623417582456166,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 21865
    },
    {
      "epoch": 0.21866,
      "grad_norm": 1.3911128896185476,
      "learning_rate": 0.003,
      "loss": 4.0354,
      "step": 21866
    },
    {
      "epoch": 0.21867,
      "grad_norm": 1.1625706876939281,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 21867
    },
    {
      "epoch": 0.21868,
      "grad_norm": 1.3387012015290805,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 21868
    },
    {
      "epoch": 0.21869,
      "grad_norm": 1.314110378401501,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 21869
    },
    {
      "epoch": 0.2187,
      "grad_norm": 0.9999991750378531,
      "learning_rate": 0.003,
      "loss": 4.0158,
      "step": 21870
    },
    {
      "epoch": 0.21871,
      "grad_norm": 1.5012812845497798,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 21871
    },
    {
      "epoch": 0.21872,
      "grad_norm": 1.2033947428833383,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 21872
    },
    {
      "epoch": 0.21873,
      "grad_norm": 1.1355665356272322,
      "learning_rate": 0.003,
      "loss": 4.0293,
      "step": 21873
    },
    {
      "epoch": 0.21874,
      "grad_norm": 1.3781499258525007,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 21874
    },
    {
      "epoch": 0.21875,
      "grad_norm": 1.1076317553859993,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 21875
    },
    {
      "epoch": 0.21876,
      "grad_norm": 1.312420119942699,
      "learning_rate": 0.003,
      "loss": 4.0332,
      "step": 21876
    },
    {
      "epoch": 0.21877,
      "grad_norm": 1.260117273022279,
      "learning_rate": 0.003,
      "loss": 4.0763,
      "step": 21877
    },
    {
      "epoch": 0.21878,
      "grad_norm": 1.2736011384829615,
      "learning_rate": 0.003,
      "loss": 4.0211,
      "step": 21878
    },
    {
      "epoch": 0.21879,
      "grad_norm": 1.6563759600413785,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 21879
    },
    {
      "epoch": 0.2188,
      "grad_norm": 1.214488166570031,
      "learning_rate": 0.003,
      "loss": 4.0343,
      "step": 21880
    },
    {
      "epoch": 0.21881,
      "grad_norm": 1.7308811759401588,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 21881
    },
    {
      "epoch": 0.21882,
      "grad_norm": 1.2657519703370974,
      "learning_rate": 0.003,
      "loss": 4.0296,
      "step": 21882
    },
    {
      "epoch": 0.21883,
      "grad_norm": 1.2021609269771085,
      "learning_rate": 0.003,
      "loss": 4.0209,
      "step": 21883
    },
    {
      "epoch": 0.21884,
      "grad_norm": 1.5947524084221563,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 21884
    },
    {
      "epoch": 0.21885,
      "grad_norm": 1.1360520615617342,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 21885
    },
    {
      "epoch": 0.21886,
      "grad_norm": 1.7329244709863258,
      "learning_rate": 0.003,
      "loss": 4.1144,
      "step": 21886
    },
    {
      "epoch": 0.21887,
      "grad_norm": 0.8703951983084132,
      "learning_rate": 0.003,
      "loss": 4.0266,
      "step": 21887
    },
    {
      "epoch": 0.21888,
      "grad_norm": 1.243980851234002,
      "learning_rate": 0.003,
      "loss": 4.0337,
      "step": 21888
    },
    {
      "epoch": 0.21889,
      "grad_norm": 1.278766372745449,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 21889
    },
    {
      "epoch": 0.2189,
      "grad_norm": 1.6718724133847218,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 21890
    },
    {
      "epoch": 0.21891,
      "grad_norm": 1.145674094050216,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 21891
    },
    {
      "epoch": 0.21892,
      "grad_norm": 1.1523355795045476,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 21892
    },
    {
      "epoch": 0.21893,
      "grad_norm": 1.2014596209351205,
      "learning_rate": 0.003,
      "loss": 4.0242,
      "step": 21893
    },
    {
      "epoch": 0.21894,
      "grad_norm": 1.3958011540508617,
      "learning_rate": 0.003,
      "loss": 4.02,
      "step": 21894
    },
    {
      "epoch": 0.21895,
      "grad_norm": 1.2393398588122093,
      "learning_rate": 0.003,
      "loss": 4.0329,
      "step": 21895
    },
    {
      "epoch": 0.21896,
      "grad_norm": 1.3347735006817876,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 21896
    },
    {
      "epoch": 0.21897,
      "grad_norm": 1.1324565067433832,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 21897
    },
    {
      "epoch": 0.21898,
      "grad_norm": 1.2039209931932169,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 21898
    },
    {
      "epoch": 0.21899,
      "grad_norm": 1.221239275920937,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 21899
    },
    {
      "epoch": 0.219,
      "grad_norm": 1.3846023565342906,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 21900
    },
    {
      "epoch": 0.21901,
      "grad_norm": 1.2097233432992271,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 21901
    },
    {
      "epoch": 0.21902,
      "grad_norm": 1.5307952520411225,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 21902
    },
    {
      "epoch": 0.21903,
      "grad_norm": 1.1426380684974333,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 21903
    },
    {
      "epoch": 0.21904,
      "grad_norm": 1.4425273570195094,
      "learning_rate": 0.003,
      "loss": 4.0347,
      "step": 21904
    },
    {
      "epoch": 0.21905,
      "grad_norm": 1.2135500938696013,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 21905
    },
    {
      "epoch": 0.21906,
      "grad_norm": 1.4791938122224157,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 21906
    },
    {
      "epoch": 0.21907,
      "grad_norm": 1.2385415384800407,
      "learning_rate": 0.003,
      "loss": 4.0302,
      "step": 21907
    },
    {
      "epoch": 0.21908,
      "grad_norm": 1.2204377059545255,
      "learning_rate": 0.003,
      "loss": 4.046,
      "step": 21908
    },
    {
      "epoch": 0.21909,
      "grad_norm": 1.4729856305254043,
      "learning_rate": 0.003,
      "loss": 4.0325,
      "step": 21909
    },
    {
      "epoch": 0.2191,
      "grad_norm": 1.1580053596916617,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 21910
    },
    {
      "epoch": 0.21911,
      "grad_norm": 1.4685645178871252,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 21911
    },
    {
      "epoch": 0.21912,
      "grad_norm": 1.1270720819698852,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 21912
    },
    {
      "epoch": 0.21913,
      "grad_norm": 1.4187154875858043,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 21913
    },
    {
      "epoch": 0.21914,
      "grad_norm": 1.110683647538591,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 21914
    },
    {
      "epoch": 0.21915,
      "grad_norm": 1.547151058553336,
      "learning_rate": 0.003,
      "loss": 4.081,
      "step": 21915
    },
    {
      "epoch": 0.21916,
      "grad_norm": 1.1328535661260921,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 21916
    },
    {
      "epoch": 0.21917,
      "grad_norm": 1.372936148828246,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 21917
    },
    {
      "epoch": 0.21918,
      "grad_norm": 1.4214768619666374,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 21918
    },
    {
      "epoch": 0.21919,
      "grad_norm": 1.3312621969706127,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 21919
    },
    {
      "epoch": 0.2192,
      "grad_norm": 1.245775162625903,
      "learning_rate": 0.003,
      "loss": 4.0308,
      "step": 21920
    },
    {
      "epoch": 0.21921,
      "grad_norm": 1.3279234469523127,
      "learning_rate": 0.003,
      "loss": 4.0221,
      "step": 21921
    },
    {
      "epoch": 0.21922,
      "grad_norm": 1.4554362869485136,
      "learning_rate": 0.003,
      "loss": 4.0324,
      "step": 21922
    },
    {
      "epoch": 0.21923,
      "grad_norm": 1.2809881640912781,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 21923
    },
    {
      "epoch": 0.21924,
      "grad_norm": 1.354197059303451,
      "learning_rate": 0.003,
      "loss": 4.0812,
      "step": 21924
    },
    {
      "epoch": 0.21925,
      "grad_norm": 1.2019574280100456,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 21925
    },
    {
      "epoch": 0.21926,
      "grad_norm": 1.1380669860661248,
      "learning_rate": 0.003,
      "loss": 4.0319,
      "step": 21926
    },
    {
      "epoch": 0.21927,
      "grad_norm": 1.4336369214880056,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 21927
    },
    {
      "epoch": 0.21928,
      "grad_norm": 1.1654398339180754,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 21928
    },
    {
      "epoch": 0.21929,
      "grad_norm": 1.4196392347750213,
      "learning_rate": 0.003,
      "loss": 4.0845,
      "step": 21929
    },
    {
      "epoch": 0.2193,
      "grad_norm": 1.0921325701490219,
      "learning_rate": 0.003,
      "loss": 4.0373,
      "step": 21930
    },
    {
      "epoch": 0.21931,
      "grad_norm": 1.4207894374679884,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 21931
    },
    {
      "epoch": 0.21932,
      "grad_norm": 1.2890895537088862,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 21932
    },
    {
      "epoch": 0.21933,
      "grad_norm": 1.7586636158569737,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 21933
    },
    {
      "epoch": 0.21934,
      "grad_norm": 1.1272703711037795,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 21934
    },
    {
      "epoch": 0.21935,
      "grad_norm": 1.2564020111487622,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 21935
    },
    {
      "epoch": 0.21936,
      "grad_norm": 1.1981853982583015,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 21936
    },
    {
      "epoch": 0.21937,
      "grad_norm": 1.2212148482772398,
      "learning_rate": 0.003,
      "loss": 4.0292,
      "step": 21937
    },
    {
      "epoch": 0.21938,
      "grad_norm": 1.190855324309062,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 21938
    },
    {
      "epoch": 0.21939,
      "grad_norm": 1.4013183516912304,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 21939
    },
    {
      "epoch": 0.2194,
      "grad_norm": 1.134892224515106,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 21940
    },
    {
      "epoch": 0.21941,
      "grad_norm": 1.301908871170717,
      "learning_rate": 0.003,
      "loss": 4.0859,
      "step": 21941
    },
    {
      "epoch": 0.21942,
      "grad_norm": 1.1932679515891222,
      "learning_rate": 0.003,
      "loss": 4.0398,
      "step": 21942
    },
    {
      "epoch": 0.21943,
      "grad_norm": 1.3558064809702928,
      "learning_rate": 0.003,
      "loss": 4.0262,
      "step": 21943
    },
    {
      "epoch": 0.21944,
      "grad_norm": 1.21968860972223,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 21944
    },
    {
      "epoch": 0.21945,
      "grad_norm": 1.3174656195034329,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 21945
    },
    {
      "epoch": 0.21946,
      "grad_norm": 1.3446320833830796,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 21946
    },
    {
      "epoch": 0.21947,
      "grad_norm": 1.6524268096393475,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 21947
    },
    {
      "epoch": 0.21948,
      "grad_norm": 1.2762365175000143,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 21948
    },
    {
      "epoch": 0.21949,
      "grad_norm": 1.4106233865194429,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 21949
    },
    {
      "epoch": 0.2195,
      "grad_norm": 1.2292659867037987,
      "learning_rate": 0.003,
      "loss": 4.0175,
      "step": 21950
    },
    {
      "epoch": 0.21951,
      "grad_norm": 1.5629695436942541,
      "learning_rate": 0.003,
      "loss": 4.0256,
      "step": 21951
    },
    {
      "epoch": 0.21952,
      "grad_norm": 1.0618016722660668,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 21952
    },
    {
      "epoch": 0.21953,
      "grad_norm": 1.426553492728306,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 21953
    },
    {
      "epoch": 0.21954,
      "grad_norm": 1.2761199006119757,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 21954
    },
    {
      "epoch": 0.21955,
      "grad_norm": 1.2847758400514797,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 21955
    },
    {
      "epoch": 0.21956,
      "grad_norm": 1.2110449349287116,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 21956
    },
    {
      "epoch": 0.21957,
      "grad_norm": 1.30524349065345,
      "learning_rate": 0.003,
      "loss": 4.026,
      "step": 21957
    },
    {
      "epoch": 0.21958,
      "grad_norm": 1.0488535033113748,
      "learning_rate": 0.003,
      "loss": 4.0318,
      "step": 21958
    },
    {
      "epoch": 0.21959,
      "grad_norm": 1.3720199191394824,
      "learning_rate": 0.003,
      "loss": 4.0398,
      "step": 21959
    },
    {
      "epoch": 0.2196,
      "grad_norm": 1.1758659575453692,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 21960
    },
    {
      "epoch": 0.21961,
      "grad_norm": 1.3792286043631028,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 21961
    },
    {
      "epoch": 0.21962,
      "grad_norm": 1.2129583578478145,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 21962
    },
    {
      "epoch": 0.21963,
      "grad_norm": 1.4881039884514828,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 21963
    },
    {
      "epoch": 0.21964,
      "grad_norm": 1.2358647782456937,
      "learning_rate": 0.003,
      "loss": 4.0195,
      "step": 21964
    },
    {
      "epoch": 0.21965,
      "grad_norm": 1.884750601200753,
      "learning_rate": 0.003,
      "loss": 4.0324,
      "step": 21965
    },
    {
      "epoch": 0.21966,
      "grad_norm": 1.115206786447901,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 21966
    },
    {
      "epoch": 0.21967,
      "grad_norm": 1.1835173732623296,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 21967
    },
    {
      "epoch": 0.21968,
      "grad_norm": 1.2952447568803054,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 21968
    },
    {
      "epoch": 0.21969,
      "grad_norm": 1.1173047032920922,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 21969
    },
    {
      "epoch": 0.2197,
      "grad_norm": 1.376174741074243,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 21970
    },
    {
      "epoch": 0.21971,
      "grad_norm": 1.2395593887738778,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 21971
    },
    {
      "epoch": 0.21972,
      "grad_norm": 1.2924060549475613,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 21972
    },
    {
      "epoch": 0.21973,
      "grad_norm": 1.3933521899632437,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 21973
    },
    {
      "epoch": 0.21974,
      "grad_norm": 1.4489330442733404,
      "learning_rate": 0.003,
      "loss": 4.0874,
      "step": 21974
    },
    {
      "epoch": 0.21975,
      "grad_norm": 1.3402591915023505,
      "learning_rate": 0.003,
      "loss": 4.022,
      "step": 21975
    },
    {
      "epoch": 0.21976,
      "grad_norm": 1.1361412490524445,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 21976
    },
    {
      "epoch": 0.21977,
      "grad_norm": 1.3929749248907122,
      "learning_rate": 0.003,
      "loss": 4.0782,
      "step": 21977
    },
    {
      "epoch": 0.21978,
      "grad_norm": 1.3121856380564516,
      "learning_rate": 0.003,
      "loss": 4.0825,
      "step": 21978
    },
    {
      "epoch": 0.21979,
      "grad_norm": 1.2598021636776788,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 21979
    },
    {
      "epoch": 0.2198,
      "grad_norm": 1.3883881259052413,
      "learning_rate": 0.003,
      "loss": 4.0286,
      "step": 21980
    },
    {
      "epoch": 0.21981,
      "grad_norm": 1.3650733820778755,
      "learning_rate": 0.003,
      "loss": 4.0547,
      "step": 21981
    },
    {
      "epoch": 0.21982,
      "grad_norm": 1.2185180588385176,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 21982
    },
    {
      "epoch": 0.21983,
      "grad_norm": 1.405136322415539,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 21983
    },
    {
      "epoch": 0.21984,
      "grad_norm": 1.1392482188167596,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 21984
    },
    {
      "epoch": 0.21985,
      "grad_norm": 1.515030162640994,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 21985
    },
    {
      "epoch": 0.21986,
      "grad_norm": 1.1278694570469598,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 21986
    },
    {
      "epoch": 0.21987,
      "grad_norm": 1.5710847577934575,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 21987
    },
    {
      "epoch": 0.21988,
      "grad_norm": 1.7399635521600438,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 21988
    },
    {
      "epoch": 0.21989,
      "grad_norm": 1.4419622166426336,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 21989
    },
    {
      "epoch": 0.2199,
      "grad_norm": 1.1323052650451653,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 21990
    },
    {
      "epoch": 0.21991,
      "grad_norm": 1.3395754195874743,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 21991
    },
    {
      "epoch": 0.21992,
      "grad_norm": 1.105614324419437,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 21992
    },
    {
      "epoch": 0.21993,
      "grad_norm": 1.3279786482364344,
      "learning_rate": 0.003,
      "loss": 4.0305,
      "step": 21993
    },
    {
      "epoch": 0.21994,
      "grad_norm": 1.1997867243597922,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 21994
    },
    {
      "epoch": 0.21995,
      "grad_norm": 1.2714493569781429,
      "learning_rate": 0.003,
      "loss": 4.0343,
      "step": 21995
    },
    {
      "epoch": 0.21996,
      "grad_norm": 1.159293005396773,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 21996
    },
    {
      "epoch": 0.21997,
      "grad_norm": 1.282598504075124,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 21997
    },
    {
      "epoch": 0.21998,
      "grad_norm": 1.1958611546236209,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 21998
    },
    {
      "epoch": 0.21999,
      "grad_norm": 1.3805033783662493,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 21999
    },
    {
      "epoch": 0.22,
      "grad_norm": 1.2006099392204475,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 22000
    },
    {
      "epoch": 0.22001,
      "grad_norm": 1.6004264636382097,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 22001
    },
    {
      "epoch": 0.22002,
      "grad_norm": 1.1289168964241154,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 22002
    },
    {
      "epoch": 0.22003,
      "grad_norm": 1.310963994707133,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 22003
    },
    {
      "epoch": 0.22004,
      "grad_norm": 1.1871691746716844,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 22004
    },
    {
      "epoch": 0.22005,
      "grad_norm": 1.3996480373055058,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 22005
    },
    {
      "epoch": 0.22006,
      "grad_norm": 1.4427349936785299,
      "learning_rate": 0.003,
      "loss": 4.0564,
      "step": 22006
    },
    {
      "epoch": 0.22007,
      "grad_norm": 1.03229091907169,
      "learning_rate": 0.003,
      "loss": 4.0256,
      "step": 22007
    },
    {
      "epoch": 0.22008,
      "grad_norm": 1.979030657273191,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 22008
    },
    {
      "epoch": 0.22009,
      "grad_norm": 1.0712747011990373,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 22009
    },
    {
      "epoch": 0.2201,
      "grad_norm": 1.220404774144469,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 22010
    },
    {
      "epoch": 0.22011,
      "grad_norm": 1.3574732714764237,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 22011
    },
    {
      "epoch": 0.22012,
      "grad_norm": 1.2587755650236263,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 22012
    },
    {
      "epoch": 0.22013,
      "grad_norm": 1.3788069975716672,
      "learning_rate": 0.003,
      "loss": 4.0319,
      "step": 22013
    },
    {
      "epoch": 0.22014,
      "grad_norm": 1.3649281869718972,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 22014
    },
    {
      "epoch": 0.22015,
      "grad_norm": 1.3188502903880825,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 22015
    },
    {
      "epoch": 0.22016,
      "grad_norm": 1.2532977210053056,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 22016
    },
    {
      "epoch": 0.22017,
      "grad_norm": 1.2454509901370068,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 22017
    },
    {
      "epoch": 0.22018,
      "grad_norm": 1.2219080505179116,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 22018
    },
    {
      "epoch": 0.22019,
      "grad_norm": 1.227557250799576,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 22019
    },
    {
      "epoch": 0.2202,
      "grad_norm": 1.4217461690897921,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 22020
    },
    {
      "epoch": 0.22021,
      "grad_norm": 1.1745684753740826,
      "learning_rate": 0.003,
      "loss": 4.0824,
      "step": 22021
    },
    {
      "epoch": 0.22022,
      "grad_norm": 1.3316959051174804,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 22022
    },
    {
      "epoch": 0.22023,
      "grad_norm": 1.132020313014203,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 22023
    },
    {
      "epoch": 0.22024,
      "grad_norm": 1.391354676802887,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 22024
    },
    {
      "epoch": 0.22025,
      "grad_norm": 1.2009920679822612,
      "learning_rate": 0.003,
      "loss": 4.0328,
      "step": 22025
    },
    {
      "epoch": 0.22026,
      "grad_norm": 1.3050770476118223,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 22026
    },
    {
      "epoch": 0.22027,
      "grad_norm": 1.2710427521381245,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 22027
    },
    {
      "epoch": 0.22028,
      "grad_norm": 1.512835456341759,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 22028
    },
    {
      "epoch": 0.22029,
      "grad_norm": 1.0541873228922884,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 22029
    },
    {
      "epoch": 0.2203,
      "grad_norm": 1.5742314943388278,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 22030
    },
    {
      "epoch": 0.22031,
      "grad_norm": 1.28883727437431,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 22031
    },
    {
      "epoch": 0.22032,
      "grad_norm": 1.3461851868328158,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 22032
    },
    {
      "epoch": 0.22033,
      "grad_norm": 1.334434768183276,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 22033
    },
    {
      "epoch": 0.22034,
      "grad_norm": 1.1809934324254372,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 22034
    },
    {
      "epoch": 0.22035,
      "grad_norm": 1.3568723623825492,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 22035
    },
    {
      "epoch": 0.22036,
      "grad_norm": 1.1667312162928247,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 22036
    },
    {
      "epoch": 0.22037,
      "grad_norm": 1.3838957074239497,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 22037
    },
    {
      "epoch": 0.22038,
      "grad_norm": 1.064283880214727,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 22038
    },
    {
      "epoch": 0.22039,
      "grad_norm": 1.5433932879794727,
      "learning_rate": 0.003,
      "loss": 4.088,
      "step": 22039
    },
    {
      "epoch": 0.2204,
      "grad_norm": 1.0299755896576255,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 22040
    },
    {
      "epoch": 0.22041,
      "grad_norm": 1.3830542724373722,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 22041
    },
    {
      "epoch": 0.22042,
      "grad_norm": 1.3030790728249382,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 22042
    },
    {
      "epoch": 0.22043,
      "grad_norm": 1.1550599226862925,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 22043
    },
    {
      "epoch": 0.22044,
      "grad_norm": 1.3085847492254559,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 22044
    },
    {
      "epoch": 0.22045,
      "grad_norm": 1.1182409119330847,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 22045
    },
    {
      "epoch": 0.22046,
      "grad_norm": 1.5194677321955776,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 22046
    },
    {
      "epoch": 0.22047,
      "grad_norm": 1.2218773568374732,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 22047
    },
    {
      "epoch": 0.22048,
      "grad_norm": 1.576138031865719,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 22048
    },
    {
      "epoch": 0.22049,
      "grad_norm": 1.239135079172457,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 22049
    },
    {
      "epoch": 0.2205,
      "grad_norm": 1.1697429723915767,
      "learning_rate": 0.003,
      "loss": 4.0196,
      "step": 22050
    },
    {
      "epoch": 0.22051,
      "grad_norm": 1.2253034134469498,
      "learning_rate": 0.003,
      "loss": 4.0265,
      "step": 22051
    },
    {
      "epoch": 0.22052,
      "grad_norm": 1.249720705142228,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 22052
    },
    {
      "epoch": 0.22053,
      "grad_norm": 1.3785068123116304,
      "learning_rate": 0.003,
      "loss": 4.0317,
      "step": 22053
    },
    {
      "epoch": 0.22054,
      "grad_norm": 0.9985964205797072,
      "learning_rate": 0.003,
      "loss": 4.0282,
      "step": 22054
    },
    {
      "epoch": 0.22055,
      "grad_norm": 1.289746726837481,
      "learning_rate": 0.003,
      "loss": 4.01,
      "step": 22055
    },
    {
      "epoch": 0.22056,
      "grad_norm": 1.2888530434697942,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 22056
    },
    {
      "epoch": 0.22057,
      "grad_norm": 1.4008643616024095,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 22057
    },
    {
      "epoch": 0.22058,
      "grad_norm": 1.3198347378582163,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 22058
    },
    {
      "epoch": 0.22059,
      "grad_norm": 1.184716181331145,
      "learning_rate": 0.003,
      "loss": 4.0198,
      "step": 22059
    },
    {
      "epoch": 0.2206,
      "grad_norm": 1.2195267100300138,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 22060
    },
    {
      "epoch": 0.22061,
      "grad_norm": 1.3366811902149836,
      "learning_rate": 0.003,
      "loss": 4.0273,
      "step": 22061
    },
    {
      "epoch": 0.22062,
      "grad_norm": 1.3207975326265124,
      "learning_rate": 0.003,
      "loss": 4.0191,
      "step": 22062
    },
    {
      "epoch": 0.22063,
      "grad_norm": 1.542595951567761,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 22063
    },
    {
      "epoch": 0.22064,
      "grad_norm": 1.059672367589958,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 22064
    },
    {
      "epoch": 0.22065,
      "grad_norm": 1.3609537408360166,
      "learning_rate": 0.003,
      "loss": 4.0058,
      "step": 22065
    },
    {
      "epoch": 0.22066,
      "grad_norm": 1.2922115423020513,
      "learning_rate": 0.003,
      "loss": 4.0149,
      "step": 22066
    },
    {
      "epoch": 0.22067,
      "grad_norm": 1.3724490104352296,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 22067
    },
    {
      "epoch": 0.22068,
      "grad_norm": 1.2304597950909055,
      "learning_rate": 0.003,
      "loss": 4.0186,
      "step": 22068
    },
    {
      "epoch": 0.22069,
      "grad_norm": 1.4060815997573863,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 22069
    },
    {
      "epoch": 0.2207,
      "grad_norm": 1.3168133631060361,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 22070
    },
    {
      "epoch": 0.22071,
      "grad_norm": 1.1000232768231184,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 22071
    },
    {
      "epoch": 0.22072,
      "grad_norm": 1.2985061789221102,
      "learning_rate": 0.003,
      "loss": 4.0617,
      "step": 22072
    },
    {
      "epoch": 0.22073,
      "grad_norm": 1.3343167037257264,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 22073
    },
    {
      "epoch": 0.22074,
      "grad_norm": 1.5818526276304599,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 22074
    },
    {
      "epoch": 0.22075,
      "grad_norm": 1.2370323629411177,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 22075
    },
    {
      "epoch": 0.22076,
      "grad_norm": 1.2236372247133998,
      "learning_rate": 0.003,
      "loss": 4.0795,
      "step": 22076
    },
    {
      "epoch": 0.22077,
      "grad_norm": 1.4676775252690224,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 22077
    },
    {
      "epoch": 0.22078,
      "grad_norm": 1.2644907515701838,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 22078
    },
    {
      "epoch": 0.22079,
      "grad_norm": 1.2902745386762855,
      "learning_rate": 0.003,
      "loss": 4.0549,
      "step": 22079
    },
    {
      "epoch": 0.2208,
      "grad_norm": 1.3074531077802405,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 22080
    },
    {
      "epoch": 0.22081,
      "grad_norm": 1.324892085960681,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 22081
    },
    {
      "epoch": 0.22082,
      "grad_norm": 1.227464840408412,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 22082
    },
    {
      "epoch": 0.22083,
      "grad_norm": 1.4387552546135856,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 22083
    },
    {
      "epoch": 0.22084,
      "grad_norm": 1.0736599163266853,
      "learning_rate": 0.003,
      "loss": 4.0143,
      "step": 22084
    },
    {
      "epoch": 0.22085,
      "grad_norm": 1.4733321761073723,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 22085
    },
    {
      "epoch": 0.22086,
      "grad_norm": 0.9524379310571377,
      "learning_rate": 0.003,
      "loss": 4.0619,
      "step": 22086
    },
    {
      "epoch": 0.22087,
      "grad_norm": 1.3604123053335961,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 22087
    },
    {
      "epoch": 0.22088,
      "grad_norm": 1.229120424668029,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 22088
    },
    {
      "epoch": 0.22089,
      "grad_norm": 1.0602883349987202,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 22089
    },
    {
      "epoch": 0.2209,
      "grad_norm": 1.3339922136285414,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 22090
    },
    {
      "epoch": 0.22091,
      "grad_norm": 1.1983700160262636,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 22091
    },
    {
      "epoch": 0.22092,
      "grad_norm": 1.4439405413795647,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 22092
    },
    {
      "epoch": 0.22093,
      "grad_norm": 1.2562344082226966,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 22093
    },
    {
      "epoch": 0.22094,
      "grad_norm": 1.3410934863378028,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 22094
    },
    {
      "epoch": 0.22095,
      "grad_norm": 1.5791392824791675,
      "learning_rate": 0.003,
      "loss": 4.0243,
      "step": 22095
    },
    {
      "epoch": 0.22096,
      "grad_norm": 1.2348082012269177,
      "learning_rate": 0.003,
      "loss": 4.0819,
      "step": 22096
    },
    {
      "epoch": 0.22097,
      "grad_norm": 1.2668458739879667,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 22097
    },
    {
      "epoch": 0.22098,
      "grad_norm": 1.33665490855949,
      "learning_rate": 0.003,
      "loss": 4.0221,
      "step": 22098
    },
    {
      "epoch": 0.22099,
      "grad_norm": 1.3129218902648512,
      "learning_rate": 0.003,
      "loss": 4.0261,
      "step": 22099
    },
    {
      "epoch": 0.221,
      "grad_norm": 1.1421527114272347,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 22100
    },
    {
      "epoch": 0.22101,
      "grad_norm": 1.293725981566844,
      "learning_rate": 0.003,
      "loss": 4.0632,
      "step": 22101
    },
    {
      "epoch": 0.22102,
      "grad_norm": 1.1726341143874381,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 22102
    },
    {
      "epoch": 0.22103,
      "grad_norm": 1.362723647667269,
      "learning_rate": 0.003,
      "loss": 4.0695,
      "step": 22103
    },
    {
      "epoch": 0.22104,
      "grad_norm": 1.2387148849942935,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 22104
    },
    {
      "epoch": 0.22105,
      "grad_norm": 1.4540185131777992,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 22105
    },
    {
      "epoch": 0.22106,
      "grad_norm": 1.271059598551409,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 22106
    },
    {
      "epoch": 0.22107,
      "grad_norm": 1.2978366936481063,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 22107
    },
    {
      "epoch": 0.22108,
      "grad_norm": 1.178265137297338,
      "learning_rate": 0.003,
      "loss": 4.0272,
      "step": 22108
    },
    {
      "epoch": 0.22109,
      "grad_norm": 1.3938851889024702,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 22109
    },
    {
      "epoch": 0.2211,
      "grad_norm": 1.2962696073975482,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 22110
    },
    {
      "epoch": 0.22111,
      "grad_norm": 1.1512034284164956,
      "learning_rate": 0.003,
      "loss": 4.009,
      "step": 22111
    },
    {
      "epoch": 0.22112,
      "grad_norm": 1.3157831848230688,
      "learning_rate": 0.003,
      "loss": 4.0292,
      "step": 22112
    },
    {
      "epoch": 0.22113,
      "grad_norm": 1.327099346972767,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 22113
    },
    {
      "epoch": 0.22114,
      "grad_norm": 1.418072769481291,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 22114
    },
    {
      "epoch": 0.22115,
      "grad_norm": 1.120536080965957,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 22115
    },
    {
      "epoch": 0.22116,
      "grad_norm": 1.401330009769329,
      "learning_rate": 0.003,
      "loss": 4.0329,
      "step": 22116
    },
    {
      "epoch": 0.22117,
      "grad_norm": 1.1719044404065446,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 22117
    },
    {
      "epoch": 0.22118,
      "grad_norm": 1.3289814894533063,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 22118
    },
    {
      "epoch": 0.22119,
      "grad_norm": 1.2830149264163095,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 22119
    },
    {
      "epoch": 0.2212,
      "grad_norm": 1.2561297299636018,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 22120
    },
    {
      "epoch": 0.22121,
      "grad_norm": 1.26843521352646,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 22121
    },
    {
      "epoch": 0.22122,
      "grad_norm": 1.4501851741246976,
      "learning_rate": 0.003,
      "loss": 4.0312,
      "step": 22122
    },
    {
      "epoch": 0.22123,
      "grad_norm": 1.067518429602658,
      "learning_rate": 0.003,
      "loss": 4.0184,
      "step": 22123
    },
    {
      "epoch": 0.22124,
      "grad_norm": 1.2479888401514971,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 22124
    },
    {
      "epoch": 0.22125,
      "grad_norm": 1.3969267245385515,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 22125
    },
    {
      "epoch": 0.22126,
      "grad_norm": 1.2327623436297128,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 22126
    },
    {
      "epoch": 0.22127,
      "grad_norm": 1.5941374199152674,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 22127
    },
    {
      "epoch": 0.22128,
      "grad_norm": 1.2497619135028593,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 22128
    },
    {
      "epoch": 0.22129,
      "grad_norm": 1.3817912621007702,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 22129
    },
    {
      "epoch": 0.2213,
      "grad_norm": 1.228494123558605,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 22130
    },
    {
      "epoch": 0.22131,
      "grad_norm": 1.3183425194046234,
      "learning_rate": 0.003,
      "loss": 4.0209,
      "step": 22131
    },
    {
      "epoch": 0.22132,
      "grad_norm": 1.4042877045462423,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 22132
    },
    {
      "epoch": 0.22133,
      "grad_norm": 1.2713861340476686,
      "learning_rate": 0.003,
      "loss": 4.0344,
      "step": 22133
    },
    {
      "epoch": 0.22134,
      "grad_norm": 1.264998720335586,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 22134
    },
    {
      "epoch": 0.22135,
      "grad_norm": 1.0702543077013797,
      "learning_rate": 0.003,
      "loss": 4.0225,
      "step": 22135
    },
    {
      "epoch": 0.22136,
      "grad_norm": 1.2781884544972073,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 22136
    },
    {
      "epoch": 0.22137,
      "grad_norm": 1.2852709215884592,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 22137
    },
    {
      "epoch": 0.22138,
      "grad_norm": 1.1005312711969961,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 22138
    },
    {
      "epoch": 0.22139,
      "grad_norm": 1.4759152117594718,
      "learning_rate": 0.003,
      "loss": 4.0423,
      "step": 22139
    },
    {
      "epoch": 0.2214,
      "grad_norm": 1.1324323721310299,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 22140
    },
    {
      "epoch": 0.22141,
      "grad_norm": 1.6236663750577598,
      "learning_rate": 0.003,
      "loss": 4.0288,
      "step": 22141
    },
    {
      "epoch": 0.22142,
      "grad_norm": 1.1052368982921688,
      "learning_rate": 0.003,
      "loss": 4.0703,
      "step": 22142
    },
    {
      "epoch": 0.22143,
      "grad_norm": 1.5905769657683608,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 22143
    },
    {
      "epoch": 0.22144,
      "grad_norm": 1.290551305451846,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 22144
    },
    {
      "epoch": 0.22145,
      "grad_norm": 1.6117881526943743,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 22145
    },
    {
      "epoch": 0.22146,
      "grad_norm": 1.1898382046669767,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 22146
    },
    {
      "epoch": 0.22147,
      "grad_norm": 1.2688902828105881,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 22147
    },
    {
      "epoch": 0.22148,
      "grad_norm": 1.4496171386489354,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 22148
    },
    {
      "epoch": 0.22149,
      "grad_norm": 1.2586881822922122,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 22149
    },
    {
      "epoch": 0.2215,
      "grad_norm": 1.2137604380996958,
      "learning_rate": 0.003,
      "loss": 4.0141,
      "step": 22150
    },
    {
      "epoch": 0.22151,
      "grad_norm": 1.3586147786780414,
      "learning_rate": 0.003,
      "loss": 4.0434,
      "step": 22151
    },
    {
      "epoch": 0.22152,
      "grad_norm": 1.3470812827794485,
      "learning_rate": 0.003,
      "loss": 4.0859,
      "step": 22152
    },
    {
      "epoch": 0.22153,
      "grad_norm": 1.111077917048213,
      "learning_rate": 0.003,
      "loss": 4.0345,
      "step": 22153
    },
    {
      "epoch": 0.22154,
      "grad_norm": 1.2526395333727327,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 22154
    },
    {
      "epoch": 0.22155,
      "grad_norm": 1.2096794324801892,
      "learning_rate": 0.003,
      "loss": 4.0432,
      "step": 22155
    },
    {
      "epoch": 0.22156,
      "grad_norm": 1.4582326550309401,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 22156
    },
    {
      "epoch": 0.22157,
      "grad_norm": 1.409000980007499,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 22157
    },
    {
      "epoch": 0.22158,
      "grad_norm": 1.3329337922666435,
      "learning_rate": 0.003,
      "loss": 4.0804,
      "step": 22158
    },
    {
      "epoch": 0.22159,
      "grad_norm": 1.365377698047997,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 22159
    },
    {
      "epoch": 0.2216,
      "grad_norm": 1.1750227017032777,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 22160
    },
    {
      "epoch": 0.22161,
      "grad_norm": 1.5052042818096398,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 22161
    },
    {
      "epoch": 0.22162,
      "grad_norm": 1.2857515138723257,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 22162
    },
    {
      "epoch": 0.22163,
      "grad_norm": 1.317674462309269,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 22163
    },
    {
      "epoch": 0.22164,
      "grad_norm": 1.4494465627418465,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 22164
    },
    {
      "epoch": 0.22165,
      "grad_norm": 1.1890524232637567,
      "learning_rate": 0.003,
      "loss": 4.0281,
      "step": 22165
    },
    {
      "epoch": 0.22166,
      "grad_norm": 1.245053588698817,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 22166
    },
    {
      "epoch": 0.22167,
      "grad_norm": 1.2127224222811872,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 22167
    },
    {
      "epoch": 0.22168,
      "grad_norm": 1.3495825122136302,
      "learning_rate": 0.003,
      "loss": 4.0279,
      "step": 22168
    },
    {
      "epoch": 0.22169,
      "grad_norm": 1.3198287182724973,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 22169
    },
    {
      "epoch": 0.2217,
      "grad_norm": 1.1676461875714192,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 22170
    },
    {
      "epoch": 0.22171,
      "grad_norm": 1.3226166710355396,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 22171
    },
    {
      "epoch": 0.22172,
      "grad_norm": 1.2403691143544944,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 22172
    },
    {
      "epoch": 0.22173,
      "grad_norm": 1.2596956337110792,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 22173
    },
    {
      "epoch": 0.22174,
      "grad_norm": 1.2764309524916242,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 22174
    },
    {
      "epoch": 0.22175,
      "grad_norm": 1.48595336118084,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 22175
    },
    {
      "epoch": 0.22176,
      "grad_norm": 1.2256372106366105,
      "learning_rate": 0.003,
      "loss": 4.0806,
      "step": 22176
    },
    {
      "epoch": 0.22177,
      "grad_norm": 1.3040329225333143,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 22177
    },
    {
      "epoch": 0.22178,
      "grad_norm": 1.3292123033342786,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 22178
    },
    {
      "epoch": 0.22179,
      "grad_norm": 1.2179206276720262,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 22179
    },
    {
      "epoch": 0.2218,
      "grad_norm": 1.1196439625291523,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 22180
    },
    {
      "epoch": 0.22181,
      "grad_norm": 1.4174064312799082,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 22181
    },
    {
      "epoch": 0.22182,
      "grad_norm": 1.4953810749362755,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 22182
    },
    {
      "epoch": 0.22183,
      "grad_norm": 1.2424138147622161,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 22183
    },
    {
      "epoch": 0.22184,
      "grad_norm": 1.203758898472911,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 22184
    },
    {
      "epoch": 0.22185,
      "grad_norm": 1.2753021228370445,
      "learning_rate": 0.003,
      "loss": 4.0329,
      "step": 22185
    },
    {
      "epoch": 0.22186,
      "grad_norm": 1.5446446901416815,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 22186
    },
    {
      "epoch": 0.22187,
      "grad_norm": 1.286948814251965,
      "learning_rate": 0.003,
      "loss": 4.0177,
      "step": 22187
    },
    {
      "epoch": 0.22188,
      "grad_norm": 1.3190321978728676,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 22188
    },
    {
      "epoch": 0.22189,
      "grad_norm": 1.382468196040536,
      "learning_rate": 0.003,
      "loss": 4.0297,
      "step": 22189
    },
    {
      "epoch": 0.2219,
      "grad_norm": 1.1252654706006864,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 22190
    },
    {
      "epoch": 0.22191,
      "grad_norm": 1.3471660412782678,
      "learning_rate": 0.003,
      "loss": 4.0252,
      "step": 22191
    },
    {
      "epoch": 0.22192,
      "grad_norm": 1.0705751076007068,
      "learning_rate": 0.003,
      "loss": 4.0314,
      "step": 22192
    },
    {
      "epoch": 0.22193,
      "grad_norm": 1.6493932185604472,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 22193
    },
    {
      "epoch": 0.22194,
      "grad_norm": 1.1736238809221848,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 22194
    },
    {
      "epoch": 0.22195,
      "grad_norm": 1.2157897684409515,
      "learning_rate": 0.003,
      "loss": 4.0212,
      "step": 22195
    },
    {
      "epoch": 0.22196,
      "grad_norm": 1.2794623068668234,
      "learning_rate": 0.003,
      "loss": 4.0728,
      "step": 22196
    },
    {
      "epoch": 0.22197,
      "grad_norm": 1.3300454281223202,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 22197
    },
    {
      "epoch": 0.22198,
      "grad_norm": 1.3339497746025875,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 22198
    },
    {
      "epoch": 0.22199,
      "grad_norm": 1.0741702760335805,
      "learning_rate": 0.003,
      "loss": 4.0332,
      "step": 22199
    },
    {
      "epoch": 0.222,
      "grad_norm": 1.3166946481347264,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 22200
    },
    {
      "epoch": 0.22201,
      "grad_norm": 1.3751429508002253,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 22201
    },
    {
      "epoch": 0.22202,
      "grad_norm": 1.3557418090919628,
      "learning_rate": 0.003,
      "loss": 4.0432,
      "step": 22202
    },
    {
      "epoch": 0.22203,
      "grad_norm": 1.2601051449173593,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 22203
    },
    {
      "epoch": 0.22204,
      "grad_norm": 1.3010076869110623,
      "learning_rate": 0.003,
      "loss": 4.0304,
      "step": 22204
    },
    {
      "epoch": 0.22205,
      "grad_norm": 1.2402110537802256,
      "learning_rate": 0.003,
      "loss": 4.0623,
      "step": 22205
    },
    {
      "epoch": 0.22206,
      "grad_norm": 1.2812950776919276,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 22206
    },
    {
      "epoch": 0.22207,
      "grad_norm": 1.387955277094656,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 22207
    },
    {
      "epoch": 0.22208,
      "grad_norm": 1.267722103524495,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 22208
    },
    {
      "epoch": 0.22209,
      "grad_norm": 1.3558599220332246,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 22209
    },
    {
      "epoch": 0.2221,
      "grad_norm": 1.4674635859547762,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 22210
    },
    {
      "epoch": 0.22211,
      "grad_norm": 1.2803201729098241,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 22211
    },
    {
      "epoch": 0.22212,
      "grad_norm": 1.2872499174108878,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 22212
    },
    {
      "epoch": 0.22213,
      "grad_norm": 1.534661195185381,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 22213
    },
    {
      "epoch": 0.22214,
      "grad_norm": 1.206148335199384,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 22214
    },
    {
      "epoch": 0.22215,
      "grad_norm": 1.4653518529019343,
      "learning_rate": 0.003,
      "loss": 4.0478,
      "step": 22215
    },
    {
      "epoch": 0.22216,
      "grad_norm": 1.3758519702839136,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 22216
    },
    {
      "epoch": 0.22217,
      "grad_norm": 0.9965818671762745,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 22217
    },
    {
      "epoch": 0.22218,
      "grad_norm": 1.4390991131991475,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 22218
    },
    {
      "epoch": 0.22219,
      "grad_norm": 1.0430885363107065,
      "learning_rate": 0.003,
      "loss": 4.0175,
      "step": 22219
    },
    {
      "epoch": 0.2222,
      "grad_norm": 1.6980517978677148,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 22220
    },
    {
      "epoch": 0.22221,
      "grad_norm": 1.016124124849197,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 22221
    },
    {
      "epoch": 0.22222,
      "grad_norm": 1.4373105670424555,
      "learning_rate": 0.003,
      "loss": 4.0748,
      "step": 22222
    },
    {
      "epoch": 0.22223,
      "grad_norm": 1.2350703992552048,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 22223
    },
    {
      "epoch": 0.22224,
      "grad_norm": 1.1116514156313353,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 22224
    },
    {
      "epoch": 0.22225,
      "grad_norm": 1.1848877751606608,
      "learning_rate": 0.003,
      "loss": 4.0313,
      "step": 22225
    },
    {
      "epoch": 0.22226,
      "grad_norm": 1.2034576921297468,
      "learning_rate": 0.003,
      "loss": 4.0371,
      "step": 22226
    },
    {
      "epoch": 0.22227,
      "grad_norm": 1.4162655732776661,
      "learning_rate": 0.003,
      "loss": 4.0355,
      "step": 22227
    },
    {
      "epoch": 0.22228,
      "grad_norm": 1.133867009897121,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 22228
    },
    {
      "epoch": 0.22229,
      "grad_norm": 1.584053996594265,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 22229
    },
    {
      "epoch": 0.2223,
      "grad_norm": 1.293766270217981,
      "learning_rate": 0.003,
      "loss": 4.0359,
      "step": 22230
    },
    {
      "epoch": 0.22231,
      "grad_norm": 1.325285564953847,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 22231
    },
    {
      "epoch": 0.22232,
      "grad_norm": 1.2219685925488346,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 22232
    },
    {
      "epoch": 0.22233,
      "grad_norm": 1.2736953396915642,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 22233
    },
    {
      "epoch": 0.22234,
      "grad_norm": 1.4948282289064123,
      "learning_rate": 0.003,
      "loss": 4.0619,
      "step": 22234
    },
    {
      "epoch": 0.22235,
      "grad_norm": 1.4841260398697191,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 22235
    },
    {
      "epoch": 0.22236,
      "grad_norm": 1.3580554324399594,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 22236
    },
    {
      "epoch": 0.22237,
      "grad_norm": 1.2282339725710454,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 22237
    },
    {
      "epoch": 0.22238,
      "grad_norm": 1.3470326964917405,
      "learning_rate": 0.003,
      "loss": 4.0675,
      "step": 22238
    },
    {
      "epoch": 0.22239,
      "grad_norm": 1.4522168393080932,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 22239
    },
    {
      "epoch": 0.2224,
      "grad_norm": 1.1399714328058317,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 22240
    },
    {
      "epoch": 0.22241,
      "grad_norm": 1.4846788031412537,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 22241
    },
    {
      "epoch": 0.22242,
      "grad_norm": 1.050301085182913,
      "learning_rate": 0.003,
      "loss": 4.0218,
      "step": 22242
    },
    {
      "epoch": 0.22243,
      "grad_norm": 1.5609731667227993,
      "learning_rate": 0.003,
      "loss": 4.079,
      "step": 22243
    },
    {
      "epoch": 0.22244,
      "grad_norm": 1.1309386771706527,
      "learning_rate": 0.003,
      "loss": 4.035,
      "step": 22244
    },
    {
      "epoch": 0.22245,
      "grad_norm": 1.6179429538297576,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 22245
    },
    {
      "epoch": 0.22246,
      "grad_norm": 1.2934334521502595,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 22246
    },
    {
      "epoch": 0.22247,
      "grad_norm": 1.4519038796624146,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 22247
    },
    {
      "epoch": 0.22248,
      "grad_norm": 1.1833006425804269,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 22248
    },
    {
      "epoch": 0.22249,
      "grad_norm": 1.3246829441844563,
      "learning_rate": 0.003,
      "loss": 4.0355,
      "step": 22249
    },
    {
      "epoch": 0.2225,
      "grad_norm": 1.0861351098106586,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 22250
    },
    {
      "epoch": 0.22251,
      "grad_norm": 1.877304766376981,
      "learning_rate": 0.003,
      "loss": 4.0432,
      "step": 22251
    },
    {
      "epoch": 0.22252,
      "grad_norm": 0.9286239406522893,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 22252
    },
    {
      "epoch": 0.22253,
      "grad_norm": 1.4307041713117088,
      "learning_rate": 0.003,
      "loss": 4.0269,
      "step": 22253
    },
    {
      "epoch": 0.22254,
      "grad_norm": 1.435639142730211,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 22254
    },
    {
      "epoch": 0.22255,
      "grad_norm": 1.5054286514642126,
      "learning_rate": 0.003,
      "loss": 4.082,
      "step": 22255
    },
    {
      "epoch": 0.22256,
      "grad_norm": 1.0504286982391589,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 22256
    },
    {
      "epoch": 0.22257,
      "grad_norm": 1.2221700419293187,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 22257
    },
    {
      "epoch": 0.22258,
      "grad_norm": 1.5719172508623824,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 22258
    },
    {
      "epoch": 0.22259,
      "grad_norm": 1.0656455147623263,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 22259
    },
    {
      "epoch": 0.2226,
      "grad_norm": 1.6173941270830197,
      "learning_rate": 0.003,
      "loss": 4.0622,
      "step": 22260
    },
    {
      "epoch": 0.22261,
      "grad_norm": 1.0519273454191997,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 22261
    },
    {
      "epoch": 0.22262,
      "grad_norm": 1.5023187322203926,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 22262
    },
    {
      "epoch": 0.22263,
      "grad_norm": 0.9452941415252466,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 22263
    },
    {
      "epoch": 0.22264,
      "grad_norm": 1.2330828125061868,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 22264
    },
    {
      "epoch": 0.22265,
      "grad_norm": 1.2745966699823645,
      "learning_rate": 0.003,
      "loss": 4.0859,
      "step": 22265
    },
    {
      "epoch": 0.22266,
      "grad_norm": 1.2830923093107793,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 22266
    },
    {
      "epoch": 0.22267,
      "grad_norm": 1.1763273594841448,
      "learning_rate": 0.003,
      "loss": 4.0234,
      "step": 22267
    },
    {
      "epoch": 0.22268,
      "grad_norm": 1.2877466306358871,
      "learning_rate": 0.003,
      "loss": 4.0357,
      "step": 22268
    },
    {
      "epoch": 0.22269,
      "grad_norm": 1.464868480327683,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 22269
    },
    {
      "epoch": 0.2227,
      "grad_norm": 1.0312615050265248,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 22270
    },
    {
      "epoch": 0.22271,
      "grad_norm": 1.4502149804268774,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 22271
    },
    {
      "epoch": 0.22272,
      "grad_norm": 1.155633312870541,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 22272
    },
    {
      "epoch": 0.22273,
      "grad_norm": 1.4255298658899878,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 22273
    },
    {
      "epoch": 0.22274,
      "grad_norm": 1.4504068707265383,
      "learning_rate": 0.003,
      "loss": 4.0818,
      "step": 22274
    },
    {
      "epoch": 0.22275,
      "grad_norm": 1.3851491233174398,
      "learning_rate": 0.003,
      "loss": 4.0617,
      "step": 22275
    },
    {
      "epoch": 0.22276,
      "grad_norm": 1.2922839019261136,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 22276
    },
    {
      "epoch": 0.22277,
      "grad_norm": 1.4410317816557514,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 22277
    },
    {
      "epoch": 0.22278,
      "grad_norm": 1.336970797140703,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 22278
    },
    {
      "epoch": 0.22279,
      "grad_norm": 1.2350932710515,
      "learning_rate": 0.003,
      "loss": 4.0299,
      "step": 22279
    },
    {
      "epoch": 0.2228,
      "grad_norm": 1.0855331358867362,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 22280
    },
    {
      "epoch": 0.22281,
      "grad_norm": 1.452992882821028,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 22281
    },
    {
      "epoch": 0.22282,
      "grad_norm": 1.2090584854632997,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 22282
    },
    {
      "epoch": 0.22283,
      "grad_norm": 1.3079075769214261,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 22283
    },
    {
      "epoch": 0.22284,
      "grad_norm": 1.2317660787996143,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 22284
    },
    {
      "epoch": 0.22285,
      "grad_norm": 1.2386508563305105,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 22285
    },
    {
      "epoch": 0.22286,
      "grad_norm": 1.2384648024035614,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 22286
    },
    {
      "epoch": 0.22287,
      "grad_norm": 1.7077043579283722,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 22287
    },
    {
      "epoch": 0.22288,
      "grad_norm": 1.1340765158806927,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 22288
    },
    {
      "epoch": 0.22289,
      "grad_norm": 1.2469906059671239,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 22289
    },
    {
      "epoch": 0.2229,
      "grad_norm": 1.2846156754206628,
      "learning_rate": 0.003,
      "loss": 4.0256,
      "step": 22290
    },
    {
      "epoch": 0.22291,
      "grad_norm": 1.0916856529044876,
      "learning_rate": 0.003,
      "loss": 4.0373,
      "step": 22291
    },
    {
      "epoch": 0.22292,
      "grad_norm": 1.6297288285047618,
      "learning_rate": 0.003,
      "loss": 4.035,
      "step": 22292
    },
    {
      "epoch": 0.22293,
      "grad_norm": 1.0844638564461677,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 22293
    },
    {
      "epoch": 0.22294,
      "grad_norm": 1.359361726134664,
      "learning_rate": 0.003,
      "loss": 4.0262,
      "step": 22294
    },
    {
      "epoch": 0.22295,
      "grad_norm": 1.249457508093861,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 22295
    },
    {
      "epoch": 0.22296,
      "grad_norm": 1.4601642774573411,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 22296
    },
    {
      "epoch": 0.22297,
      "grad_norm": 1.2309937735568728,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 22297
    },
    {
      "epoch": 0.22298,
      "grad_norm": 1.1975980985484633,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 22298
    },
    {
      "epoch": 0.22299,
      "grad_norm": 1.2023968725377303,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 22299
    },
    {
      "epoch": 0.223,
      "grad_norm": 1.276569396931763,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 22300
    },
    {
      "epoch": 0.22301,
      "grad_norm": 1.5279585850236914,
      "learning_rate": 0.003,
      "loss": 4.0215,
      "step": 22301
    },
    {
      "epoch": 0.22302,
      "grad_norm": 1.2117977118371657,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 22302
    },
    {
      "epoch": 0.22303,
      "grad_norm": 1.2715957643290374,
      "learning_rate": 0.003,
      "loss": 3.9883,
      "step": 22303
    },
    {
      "epoch": 0.22304,
      "grad_norm": 1.1552053751556521,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 22304
    },
    {
      "epoch": 0.22305,
      "grad_norm": 1.578301985762087,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 22305
    },
    {
      "epoch": 0.22306,
      "grad_norm": 1.1582304903209346,
      "learning_rate": 0.003,
      "loss": 4.0229,
      "step": 22306
    },
    {
      "epoch": 0.22307,
      "grad_norm": 1.2956039149251017,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 22307
    },
    {
      "epoch": 0.22308,
      "grad_norm": 1.2236909258652096,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 22308
    },
    {
      "epoch": 0.22309,
      "grad_norm": 1.2852634680259556,
      "learning_rate": 0.003,
      "loss": 4.025,
      "step": 22309
    },
    {
      "epoch": 0.2231,
      "grad_norm": 1.0868540714500894,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 22310
    },
    {
      "epoch": 0.22311,
      "grad_norm": 1.182212080244588,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 22311
    },
    {
      "epoch": 0.22312,
      "grad_norm": 1.3121510630486868,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 22312
    },
    {
      "epoch": 0.22313,
      "grad_norm": 1.0167860947785579,
      "learning_rate": 0.003,
      "loss": 4.0363,
      "step": 22313
    },
    {
      "epoch": 0.22314,
      "grad_norm": 1.4314189082804962,
      "learning_rate": 0.003,
      "loss": 4.0416,
      "step": 22314
    },
    {
      "epoch": 0.22315,
      "grad_norm": 1.0938782114272059,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 22315
    },
    {
      "epoch": 0.22316,
      "grad_norm": 1.4573906816415934,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 22316
    },
    {
      "epoch": 0.22317,
      "grad_norm": 1.3304554290767985,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 22317
    },
    {
      "epoch": 0.22318,
      "grad_norm": 1.4129225666875442,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 22318
    },
    {
      "epoch": 0.22319,
      "grad_norm": 1.6290619628830438,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 22319
    },
    {
      "epoch": 0.2232,
      "grad_norm": 1.2383722469772172,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 22320
    },
    {
      "epoch": 0.22321,
      "grad_norm": 1.3661791730130857,
      "learning_rate": 0.003,
      "loss": 4.0697,
      "step": 22321
    },
    {
      "epoch": 0.22322,
      "grad_norm": 1.168435559990707,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 22322
    },
    {
      "epoch": 0.22323,
      "grad_norm": 1.412346641501458,
      "learning_rate": 0.003,
      "loss": 4.0702,
      "step": 22323
    },
    {
      "epoch": 0.22324,
      "grad_norm": 1.23847662842489,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 22324
    },
    {
      "epoch": 0.22325,
      "grad_norm": 1.13232533705196,
      "learning_rate": 0.003,
      "loss": 4.0393,
      "step": 22325
    },
    {
      "epoch": 0.22326,
      "grad_norm": 1.0809228457746634,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 22326
    },
    {
      "epoch": 0.22327,
      "grad_norm": 1.2593200103397935,
      "learning_rate": 0.003,
      "loss": 4.0318,
      "step": 22327
    },
    {
      "epoch": 0.22328,
      "grad_norm": 1.1832379244327795,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 22328
    },
    {
      "epoch": 0.22329,
      "grad_norm": 1.3634986343403839,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 22329
    },
    {
      "epoch": 0.2233,
      "grad_norm": 1.3001802022990911,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 22330
    },
    {
      "epoch": 0.22331,
      "grad_norm": 1.4709011222083932,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 22331
    },
    {
      "epoch": 0.22332,
      "grad_norm": 1.4709580366309234,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 22332
    },
    {
      "epoch": 0.22333,
      "grad_norm": 1.1163882905309481,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 22333
    },
    {
      "epoch": 0.22334,
      "grad_norm": 1.4893043270331274,
      "learning_rate": 0.003,
      "loss": 4.087,
      "step": 22334
    },
    {
      "epoch": 0.22335,
      "grad_norm": 0.9839768616454792,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 22335
    },
    {
      "epoch": 0.22336,
      "grad_norm": 1.5790218564132446,
      "learning_rate": 0.003,
      "loss": 4.0757,
      "step": 22336
    },
    {
      "epoch": 0.22337,
      "grad_norm": 1.1386007221622676,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 22337
    },
    {
      "epoch": 0.22338,
      "grad_norm": 1.4932045660837323,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 22338
    },
    {
      "epoch": 0.22339,
      "grad_norm": 1.2818113626498306,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 22339
    },
    {
      "epoch": 0.2234,
      "grad_norm": 1.3015829509065822,
      "learning_rate": 0.003,
      "loss": 4.027,
      "step": 22340
    },
    {
      "epoch": 0.22341,
      "grad_norm": 1.0940565336446755,
      "learning_rate": 0.003,
      "loss": 4.018,
      "step": 22341
    },
    {
      "epoch": 0.22342,
      "grad_norm": 1.4679803696931557,
      "learning_rate": 0.003,
      "loss": 4.0256,
      "step": 22342
    },
    {
      "epoch": 0.22343,
      "grad_norm": 1.3606075206541697,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 22343
    },
    {
      "epoch": 0.22344,
      "grad_norm": 1.2753880982610875,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 22344
    },
    {
      "epoch": 0.22345,
      "grad_norm": 1.1863030220797137,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 22345
    },
    {
      "epoch": 0.22346,
      "grad_norm": 1.3796537921896148,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 22346
    },
    {
      "epoch": 0.22347,
      "grad_norm": 1.3850505212625757,
      "learning_rate": 0.003,
      "loss": 4.0325,
      "step": 22347
    },
    {
      "epoch": 0.22348,
      "grad_norm": 1.1212723584802724,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 22348
    },
    {
      "epoch": 0.22349,
      "grad_norm": 1.4953190800799308,
      "learning_rate": 0.003,
      "loss": 4.0434,
      "step": 22349
    },
    {
      "epoch": 0.2235,
      "grad_norm": 1.2159937041121087,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 22350
    },
    {
      "epoch": 0.22351,
      "grad_norm": 1.3013062519303582,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 22351
    },
    {
      "epoch": 0.22352,
      "grad_norm": 1.0749860878580382,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 22352
    },
    {
      "epoch": 0.22353,
      "grad_norm": 1.4021783584701677,
      "learning_rate": 0.003,
      "loss": 4.04,
      "step": 22353
    },
    {
      "epoch": 0.22354,
      "grad_norm": 1.3528154375174712,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 22354
    },
    {
      "epoch": 0.22355,
      "grad_norm": 1.3334532021261907,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 22355
    },
    {
      "epoch": 0.22356,
      "grad_norm": 1.4016203227488941,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 22356
    },
    {
      "epoch": 0.22357,
      "grad_norm": 1.1047671489127466,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 22357
    },
    {
      "epoch": 0.22358,
      "grad_norm": 1.2573296160077105,
      "learning_rate": 0.003,
      "loss": 4.0844,
      "step": 22358
    },
    {
      "epoch": 0.22359,
      "grad_norm": 1.4888231359476305,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 22359
    },
    {
      "epoch": 0.2236,
      "grad_norm": 1.2463718734796545,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 22360
    },
    {
      "epoch": 0.22361,
      "grad_norm": 1.404590659680424,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 22361
    },
    {
      "epoch": 0.22362,
      "grad_norm": 1.2813413033547938,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 22362
    },
    {
      "epoch": 0.22363,
      "grad_norm": 1.4883101044999885,
      "learning_rate": 0.003,
      "loss": 4.0247,
      "step": 22363
    },
    {
      "epoch": 0.22364,
      "grad_norm": 1.1796491598106018,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 22364
    },
    {
      "epoch": 0.22365,
      "grad_norm": 1.478516703458727,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 22365
    },
    {
      "epoch": 0.22366,
      "grad_norm": 1.0224712330143668,
      "learning_rate": 0.003,
      "loss": 4.0106,
      "step": 22366
    },
    {
      "epoch": 0.22367,
      "grad_norm": 1.3751666660674458,
      "learning_rate": 0.003,
      "loss": 4.0422,
      "step": 22367
    },
    {
      "epoch": 0.22368,
      "grad_norm": 1.135934042305452,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 22368
    },
    {
      "epoch": 0.22369,
      "grad_norm": 1.522880405573611,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 22369
    },
    {
      "epoch": 0.2237,
      "grad_norm": 1.3445328112617103,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 22370
    },
    {
      "epoch": 0.22371,
      "grad_norm": 1.418514430795147,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 22371
    },
    {
      "epoch": 0.22372,
      "grad_norm": 1.2207692837978814,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 22372
    },
    {
      "epoch": 0.22373,
      "grad_norm": 1.2769624870617733,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 22373
    },
    {
      "epoch": 0.22374,
      "grad_norm": 1.0708031813127818,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 22374
    },
    {
      "epoch": 0.22375,
      "grad_norm": 1.3730281568778038,
      "learning_rate": 0.003,
      "loss": 4.0311,
      "step": 22375
    },
    {
      "epoch": 0.22376,
      "grad_norm": 1.0477212952218362,
      "learning_rate": 0.003,
      "loss": 4.0276,
      "step": 22376
    },
    {
      "epoch": 0.22377,
      "grad_norm": 1.7332241105349706,
      "learning_rate": 0.003,
      "loss": 4.011,
      "step": 22377
    },
    {
      "epoch": 0.22378,
      "grad_norm": 1.0521585596074527,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 22378
    },
    {
      "epoch": 0.22379,
      "grad_norm": 1.490163928963922,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 22379
    },
    {
      "epoch": 0.2238,
      "grad_norm": 1.1353874945847433,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 22380
    },
    {
      "epoch": 0.22381,
      "grad_norm": 1.3622848418232558,
      "learning_rate": 0.003,
      "loss": 4.016,
      "step": 22381
    },
    {
      "epoch": 0.22382,
      "grad_norm": 1.4320302775215485,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 22382
    },
    {
      "epoch": 0.22383,
      "grad_norm": 1.3346976943076352,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 22383
    },
    {
      "epoch": 0.22384,
      "grad_norm": 1.3432305941947116,
      "learning_rate": 0.003,
      "loss": 4.0336,
      "step": 22384
    },
    {
      "epoch": 0.22385,
      "grad_norm": 1.116094994647951,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 22385
    },
    {
      "epoch": 0.22386,
      "grad_norm": 1.369139885417438,
      "learning_rate": 0.003,
      "loss": 4.0784,
      "step": 22386
    },
    {
      "epoch": 0.22387,
      "grad_norm": 1.2171968643952806,
      "learning_rate": 0.003,
      "loss": 4.0296,
      "step": 22387
    },
    {
      "epoch": 0.22388,
      "grad_norm": 1.5021210991763598,
      "learning_rate": 0.003,
      "loss": 4.081,
      "step": 22388
    },
    {
      "epoch": 0.22389,
      "grad_norm": 0.9165710159106715,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 22389
    },
    {
      "epoch": 0.2239,
      "grad_norm": 1.3538788722876058,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 22390
    },
    {
      "epoch": 0.22391,
      "grad_norm": 1.2920509847169448,
      "learning_rate": 0.003,
      "loss": 4.0416,
      "step": 22391
    },
    {
      "epoch": 0.22392,
      "grad_norm": 1.2823088339124191,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 22392
    },
    {
      "epoch": 0.22393,
      "grad_norm": 1.1256445035900986,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 22393
    },
    {
      "epoch": 0.22394,
      "grad_norm": 1.491436558545818,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 22394
    },
    {
      "epoch": 0.22395,
      "grad_norm": 1.3985742371270176,
      "learning_rate": 0.003,
      "loss": 4.075,
      "step": 22395
    },
    {
      "epoch": 0.22396,
      "grad_norm": 1.146971564066309,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 22396
    },
    {
      "epoch": 0.22397,
      "grad_norm": 1.2971131257252893,
      "learning_rate": 0.003,
      "loss": 4.0252,
      "step": 22397
    },
    {
      "epoch": 0.22398,
      "grad_norm": 1.3626163416376342,
      "learning_rate": 0.003,
      "loss": 4.0265,
      "step": 22398
    },
    {
      "epoch": 0.22399,
      "grad_norm": 1.1601142495852304,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 22399
    },
    {
      "epoch": 0.224,
      "grad_norm": 1.4788745770494174,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 22400
    },
    {
      "epoch": 0.22401,
      "grad_norm": 1.115497940758771,
      "learning_rate": 0.003,
      "loss": 4.0857,
      "step": 22401
    },
    {
      "epoch": 0.22402,
      "grad_norm": 1.3175398356922252,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 22402
    },
    {
      "epoch": 0.22403,
      "grad_norm": 1.3867457091875925,
      "learning_rate": 0.003,
      "loss": 4.0703,
      "step": 22403
    },
    {
      "epoch": 0.22404,
      "grad_norm": 1.2188372464295825,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 22404
    },
    {
      "epoch": 0.22405,
      "grad_norm": 1.6136079424523424,
      "learning_rate": 0.003,
      "loss": 4.0278,
      "step": 22405
    },
    {
      "epoch": 0.22406,
      "grad_norm": 1.2840510951647703,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 22406
    },
    {
      "epoch": 0.22407,
      "grad_norm": 1.1922017910090996,
      "learning_rate": 0.003,
      "loss": 4.0233,
      "step": 22407
    },
    {
      "epoch": 0.22408,
      "grad_norm": 1.2513253776576456,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 22408
    },
    {
      "epoch": 0.22409,
      "grad_norm": 0.9935214065796066,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 22409
    },
    {
      "epoch": 0.2241,
      "grad_norm": 1.311400695830719,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 22410
    },
    {
      "epoch": 0.22411,
      "grad_norm": 1.176252463303334,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 22411
    },
    {
      "epoch": 0.22412,
      "grad_norm": 1.2844675406192732,
      "learning_rate": 0.003,
      "loss": 4.0287,
      "step": 22412
    },
    {
      "epoch": 0.22413,
      "grad_norm": 1.226472730943966,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 22413
    },
    {
      "epoch": 0.22414,
      "grad_norm": 1.4417331936987128,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 22414
    },
    {
      "epoch": 0.22415,
      "grad_norm": 1.4405129959311636,
      "learning_rate": 0.003,
      "loss": 4.0206,
      "step": 22415
    },
    {
      "epoch": 0.22416,
      "grad_norm": 1.2116370472850526,
      "learning_rate": 0.003,
      "loss": 4.0077,
      "step": 22416
    },
    {
      "epoch": 0.22417,
      "grad_norm": 1.467541063276362,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 22417
    },
    {
      "epoch": 0.22418,
      "grad_norm": 1.4105104108935589,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 22418
    },
    {
      "epoch": 0.22419,
      "grad_norm": 1.1739765243612241,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 22419
    },
    {
      "epoch": 0.2242,
      "grad_norm": 1.4885789058986547,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 22420
    },
    {
      "epoch": 0.22421,
      "grad_norm": 1.037911003556224,
      "learning_rate": 0.003,
      "loss": 4.0262,
      "step": 22421
    },
    {
      "epoch": 0.22422,
      "grad_norm": 1.3592019525985986,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 22422
    },
    {
      "epoch": 0.22423,
      "grad_norm": 1.3884009919816935,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 22423
    },
    {
      "epoch": 0.22424,
      "grad_norm": 1.518517321208513,
      "learning_rate": 0.003,
      "loss": 4.0189,
      "step": 22424
    },
    {
      "epoch": 0.22425,
      "grad_norm": 1.1687833287461964,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 22425
    },
    {
      "epoch": 0.22426,
      "grad_norm": 1.365890176260934,
      "learning_rate": 0.003,
      "loss": 4.0327,
      "step": 22426
    },
    {
      "epoch": 0.22427,
      "grad_norm": 1.133197490558877,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 22427
    },
    {
      "epoch": 0.22428,
      "grad_norm": 1.5122539036707174,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 22428
    },
    {
      "epoch": 0.22429,
      "grad_norm": 1.0362710842338592,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 22429
    },
    {
      "epoch": 0.2243,
      "grad_norm": 1.584004024651577,
      "learning_rate": 0.003,
      "loss": 4.0383,
      "step": 22430
    },
    {
      "epoch": 0.22431,
      "grad_norm": 1.1423398695142792,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 22431
    },
    {
      "epoch": 0.22432,
      "grad_norm": 1.3993501986507162,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 22432
    },
    {
      "epoch": 0.22433,
      "grad_norm": 1.114000224834582,
      "learning_rate": 0.003,
      "loss": 4.0337,
      "step": 22433
    },
    {
      "epoch": 0.22434,
      "grad_norm": 1.318832448588106,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 22434
    },
    {
      "epoch": 0.22435,
      "grad_norm": 1.1834170443200742,
      "learning_rate": 0.003,
      "loss": 4.0772,
      "step": 22435
    },
    {
      "epoch": 0.22436,
      "grad_norm": 1.2640764447417074,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 22436
    },
    {
      "epoch": 0.22437,
      "grad_norm": 1.377324457859247,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 22437
    },
    {
      "epoch": 0.22438,
      "grad_norm": 1.158898418135193,
      "learning_rate": 0.003,
      "loss": 4.0267,
      "step": 22438
    },
    {
      "epoch": 0.22439,
      "grad_norm": 1.3749488508273955,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 22439
    },
    {
      "epoch": 0.2244,
      "grad_norm": 1.322779772421797,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 22440
    },
    {
      "epoch": 0.22441,
      "grad_norm": 1.2278097001260544,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 22441
    },
    {
      "epoch": 0.22442,
      "grad_norm": 1.2261115415486579,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 22442
    },
    {
      "epoch": 0.22443,
      "grad_norm": 1.4746898038594523,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 22443
    },
    {
      "epoch": 0.22444,
      "grad_norm": 1.3500624604968665,
      "learning_rate": 0.003,
      "loss": 4.0152,
      "step": 22444
    },
    {
      "epoch": 0.22445,
      "grad_norm": 1.3502637523670458,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 22445
    },
    {
      "epoch": 0.22446,
      "grad_norm": 1.2719783765555996,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 22446
    },
    {
      "epoch": 0.22447,
      "grad_norm": 1.3869575617555128,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 22447
    },
    {
      "epoch": 0.22448,
      "grad_norm": 1.225287527657583,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 22448
    },
    {
      "epoch": 0.22449,
      "grad_norm": 1.3169393455999998,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 22449
    },
    {
      "epoch": 0.2245,
      "grad_norm": 1.4657992689735533,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 22450
    },
    {
      "epoch": 0.22451,
      "grad_norm": 1.4944982135719134,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 22451
    },
    {
      "epoch": 0.22452,
      "grad_norm": 1.1613291719364822,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 22452
    },
    {
      "epoch": 0.22453,
      "grad_norm": 1.5470304026709796,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 22453
    },
    {
      "epoch": 0.22454,
      "grad_norm": 1.0105540330389218,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 22454
    },
    {
      "epoch": 0.22455,
      "grad_norm": 1.4572158881289734,
      "learning_rate": 0.003,
      "loss": 4.0178,
      "step": 22455
    },
    {
      "epoch": 0.22456,
      "grad_norm": 1.0906508336760523,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 22456
    },
    {
      "epoch": 0.22457,
      "grad_norm": 1.7741978830452934,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 22457
    },
    {
      "epoch": 0.22458,
      "grad_norm": 1.2198289454157725,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 22458
    },
    {
      "epoch": 0.22459,
      "grad_norm": 1.4733642872878125,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 22459
    },
    {
      "epoch": 0.2246,
      "grad_norm": 1.32696670690676,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 22460
    },
    {
      "epoch": 0.22461,
      "grad_norm": 1.1886104756898297,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 22461
    },
    {
      "epoch": 0.22462,
      "grad_norm": 1.2191357719141387,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 22462
    },
    {
      "epoch": 0.22463,
      "grad_norm": 1.4188923527117434,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 22463
    },
    {
      "epoch": 0.22464,
      "grad_norm": 1.2805984183571502,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 22464
    },
    {
      "epoch": 0.22465,
      "grad_norm": 1.5753496965318274,
      "learning_rate": 0.003,
      "loss": 4.0784,
      "step": 22465
    },
    {
      "epoch": 0.22466,
      "grad_norm": 1.179880806487591,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 22466
    },
    {
      "epoch": 0.22467,
      "grad_norm": 1.3485754220347155,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 22467
    },
    {
      "epoch": 0.22468,
      "grad_norm": 1.2708371386466257,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 22468
    },
    {
      "epoch": 0.22469,
      "grad_norm": 1.1944117439714106,
      "learning_rate": 0.003,
      "loss": 4.0824,
      "step": 22469
    },
    {
      "epoch": 0.2247,
      "grad_norm": 1.2766852560716921,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 22470
    },
    {
      "epoch": 0.22471,
      "grad_norm": 1.7035429052101791,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 22471
    },
    {
      "epoch": 0.22472,
      "grad_norm": 1.0915261183000333,
      "learning_rate": 0.003,
      "loss": 4.0835,
      "step": 22472
    },
    {
      "epoch": 0.22473,
      "grad_norm": 1.4883615868165714,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 22473
    },
    {
      "epoch": 0.22474,
      "grad_norm": 1.0578550955517174,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 22474
    },
    {
      "epoch": 0.22475,
      "grad_norm": 1.2875892117294654,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 22475
    },
    {
      "epoch": 0.22476,
      "grad_norm": 1.2169411525097316,
      "learning_rate": 0.003,
      "loss": 4.0676,
      "step": 22476
    },
    {
      "epoch": 0.22477,
      "grad_norm": 1.4483348325305532,
      "learning_rate": 0.003,
      "loss": 4.0295,
      "step": 22477
    },
    {
      "epoch": 0.22478,
      "grad_norm": 1.2229456460115564,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 22478
    },
    {
      "epoch": 0.22479,
      "grad_norm": 1.1979696729402194,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 22479
    },
    {
      "epoch": 0.2248,
      "grad_norm": 1.3704086152133736,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 22480
    },
    {
      "epoch": 0.22481,
      "grad_norm": 1.2069377418342675,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 22481
    },
    {
      "epoch": 0.22482,
      "grad_norm": 1.3379856851864262,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 22482
    },
    {
      "epoch": 0.22483,
      "grad_norm": 1.1664769506298684,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 22483
    },
    {
      "epoch": 0.22484,
      "grad_norm": 1.5041251557210558,
      "learning_rate": 0.003,
      "loss": 4.0617,
      "step": 22484
    },
    {
      "epoch": 0.22485,
      "grad_norm": 1.0180817941224296,
      "learning_rate": 0.003,
      "loss": 4.0263,
      "step": 22485
    },
    {
      "epoch": 0.22486,
      "grad_norm": 1.6643621587920203,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 22486
    },
    {
      "epoch": 0.22487,
      "grad_norm": 1.1126847126724073,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 22487
    },
    {
      "epoch": 0.22488,
      "grad_norm": 1.3776327714744627,
      "learning_rate": 0.003,
      "loss": 4.0771,
      "step": 22488
    },
    {
      "epoch": 0.22489,
      "grad_norm": 1.434849507538738,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 22489
    },
    {
      "epoch": 0.2249,
      "grad_norm": 1.3887483141725754,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 22490
    },
    {
      "epoch": 0.22491,
      "grad_norm": 1.1684750215251973,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 22491
    },
    {
      "epoch": 0.22492,
      "grad_norm": 1.1722986701929685,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 22492
    },
    {
      "epoch": 0.22493,
      "grad_norm": 1.4917275414576998,
      "learning_rate": 0.003,
      "loss": 4.0167,
      "step": 22493
    },
    {
      "epoch": 0.22494,
      "grad_norm": 1.3392680617555675,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 22494
    },
    {
      "epoch": 0.22495,
      "grad_norm": 1.3363684423707247,
      "learning_rate": 0.003,
      "loss": 4.0732,
      "step": 22495
    },
    {
      "epoch": 0.22496,
      "grad_norm": 1.1204296554549116,
      "learning_rate": 0.003,
      "loss": 4.0806,
      "step": 22496
    },
    {
      "epoch": 0.22497,
      "grad_norm": 1.4715952734942133,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 22497
    },
    {
      "epoch": 0.22498,
      "grad_norm": 1.1286796848027008,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 22498
    },
    {
      "epoch": 0.22499,
      "grad_norm": 1.372096531276422,
      "learning_rate": 0.003,
      "loss": 4.0347,
      "step": 22499
    },
    {
      "epoch": 0.225,
      "grad_norm": 1.1486641500096484,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 22500
    },
    {
      "epoch": 0.22501,
      "grad_norm": 1.3723721393832513,
      "learning_rate": 0.003,
      "loss": 4.0398,
      "step": 22501
    },
    {
      "epoch": 0.22502,
      "grad_norm": 1.3046545766839284,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 22502
    },
    {
      "epoch": 0.22503,
      "grad_norm": 1.3284179722379172,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 22503
    },
    {
      "epoch": 0.22504,
      "grad_norm": 1.1119344957399873,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 22504
    },
    {
      "epoch": 0.22505,
      "grad_norm": 1.578456863799798,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 22505
    },
    {
      "epoch": 0.22506,
      "grad_norm": 1.2077848169170577,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 22506
    },
    {
      "epoch": 0.22507,
      "grad_norm": 1.2051758062685873,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 22507
    },
    {
      "epoch": 0.22508,
      "grad_norm": 1.280602849013952,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 22508
    },
    {
      "epoch": 0.22509,
      "grad_norm": 1.352851133581328,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 22509
    },
    {
      "epoch": 0.2251,
      "grad_norm": 1.0192495616969712,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 22510
    },
    {
      "epoch": 0.22511,
      "grad_norm": 1.5177906883465204,
      "learning_rate": 0.003,
      "loss": 4.0276,
      "step": 22511
    },
    {
      "epoch": 0.22512,
      "grad_norm": 1.1457818258301886,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 22512
    },
    {
      "epoch": 0.22513,
      "grad_norm": 1.3491140413787326,
      "learning_rate": 0.003,
      "loss": 4.042,
      "step": 22513
    },
    {
      "epoch": 0.22514,
      "grad_norm": 0.99994817126937,
      "learning_rate": 0.003,
      "loss": 4.0088,
      "step": 22514
    },
    {
      "epoch": 0.22515,
      "grad_norm": 1.414576842473423,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 22515
    },
    {
      "epoch": 0.22516,
      "grad_norm": 1.0583718157620126,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 22516
    },
    {
      "epoch": 0.22517,
      "grad_norm": 1.51180811624445,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 22517
    },
    {
      "epoch": 0.22518,
      "grad_norm": 1.0395624435698443,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 22518
    },
    {
      "epoch": 0.22519,
      "grad_norm": 1.4144988736342254,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 22519
    },
    {
      "epoch": 0.2252,
      "grad_norm": 1.4898113967313333,
      "learning_rate": 0.003,
      "loss": 4.0279,
      "step": 22520
    },
    {
      "epoch": 0.22521,
      "grad_norm": 1.2624273689852192,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 22521
    },
    {
      "epoch": 0.22522,
      "grad_norm": 1.4067702398160626,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 22522
    },
    {
      "epoch": 0.22523,
      "grad_norm": 1.1773782057120372,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 22523
    },
    {
      "epoch": 0.22524,
      "grad_norm": 1.3744929248938713,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 22524
    },
    {
      "epoch": 0.22525,
      "grad_norm": 1.036773671963765,
      "learning_rate": 0.003,
      "loss": 4.0382,
      "step": 22525
    },
    {
      "epoch": 0.22526,
      "grad_norm": 1.3639080767800937,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 22526
    },
    {
      "epoch": 0.22527,
      "grad_norm": 1.252612508165742,
      "learning_rate": 0.003,
      "loss": 4.0676,
      "step": 22527
    },
    {
      "epoch": 0.22528,
      "grad_norm": 1.4278657114127173,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 22528
    },
    {
      "epoch": 0.22529,
      "grad_norm": 0.9999350331633271,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 22529
    },
    {
      "epoch": 0.2253,
      "grad_norm": 1.2817891196627766,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 22530
    },
    {
      "epoch": 0.22531,
      "grad_norm": 1.4696249469244116,
      "learning_rate": 0.003,
      "loss": 4.0751,
      "step": 22531
    },
    {
      "epoch": 0.22532,
      "grad_norm": 1.302138195690749,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 22532
    },
    {
      "epoch": 0.22533,
      "grad_norm": 1.3981459768298665,
      "learning_rate": 0.003,
      "loss": 4.0202,
      "step": 22533
    },
    {
      "epoch": 0.22534,
      "grad_norm": 1.3194697626184033,
      "learning_rate": 0.003,
      "loss": 4.0267,
      "step": 22534
    },
    {
      "epoch": 0.22535,
      "grad_norm": 1.2430679840444745,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 22535
    },
    {
      "epoch": 0.22536,
      "grad_norm": 1.2846080870555665,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 22536
    },
    {
      "epoch": 0.22537,
      "grad_norm": 1.1769136551752915,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 22537
    },
    {
      "epoch": 0.22538,
      "grad_norm": 1.2321372670348028,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 22538
    },
    {
      "epoch": 0.22539,
      "grad_norm": 1.5042893783921505,
      "learning_rate": 0.003,
      "loss": 4.0299,
      "step": 22539
    },
    {
      "epoch": 0.2254,
      "grad_norm": 1.124989172031265,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 22540
    },
    {
      "epoch": 0.22541,
      "grad_norm": 1.5375434287648648,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 22541
    },
    {
      "epoch": 0.22542,
      "grad_norm": 1.1460432617510374,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 22542
    },
    {
      "epoch": 0.22543,
      "grad_norm": 1.229328340830702,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 22543
    },
    {
      "epoch": 0.22544,
      "grad_norm": 1.2005061825366072,
      "learning_rate": 0.003,
      "loss": 4.0269,
      "step": 22544
    },
    {
      "epoch": 0.22545,
      "grad_norm": 1.2614278890985622,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 22545
    },
    {
      "epoch": 0.22546,
      "grad_norm": 1.3068986510878424,
      "learning_rate": 0.003,
      "loss": 4.0182,
      "step": 22546
    },
    {
      "epoch": 0.22547,
      "grad_norm": 1.3270799932059623,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 22547
    },
    {
      "epoch": 0.22548,
      "grad_norm": 1.6015301694475044,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 22548
    },
    {
      "epoch": 0.22549,
      "grad_norm": 0.9898988981525878,
      "learning_rate": 0.003,
      "loss": 4.0251,
      "step": 22549
    },
    {
      "epoch": 0.2255,
      "grad_norm": 1.5398743726609867,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 22550
    },
    {
      "epoch": 0.22551,
      "grad_norm": 1.0979781519615899,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 22551
    },
    {
      "epoch": 0.22552,
      "grad_norm": 1.2099441771834618,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 22552
    },
    {
      "epoch": 0.22553,
      "grad_norm": 1.3489680531585568,
      "learning_rate": 0.003,
      "loss": 4.0313,
      "step": 22553
    },
    {
      "epoch": 0.22554,
      "grad_norm": 1.324302858070193,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 22554
    },
    {
      "epoch": 0.22555,
      "grad_norm": 1.2575044614820337,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 22555
    },
    {
      "epoch": 0.22556,
      "grad_norm": 1.2626124900806674,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 22556
    },
    {
      "epoch": 0.22557,
      "grad_norm": 1.2154527676652886,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 22557
    },
    {
      "epoch": 0.22558,
      "grad_norm": 1.5566588781201214,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 22558
    },
    {
      "epoch": 0.22559,
      "grad_norm": 1.2210561493024892,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 22559
    },
    {
      "epoch": 0.2256,
      "grad_norm": 1.4062844461091686,
      "learning_rate": 0.003,
      "loss": 4.0764,
      "step": 22560
    },
    {
      "epoch": 0.22561,
      "grad_norm": 1.1072881788800877,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 22561
    },
    {
      "epoch": 0.22562,
      "grad_norm": 1.697164349991573,
      "learning_rate": 0.003,
      "loss": 4.0784,
      "step": 22562
    },
    {
      "epoch": 0.22563,
      "grad_norm": 1.0823401616660748,
      "learning_rate": 0.003,
      "loss": 4.0371,
      "step": 22563
    },
    {
      "epoch": 0.22564,
      "grad_norm": 1.4578122990061382,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 22564
    },
    {
      "epoch": 0.22565,
      "grad_norm": 1.0274068168740182,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 22565
    },
    {
      "epoch": 0.22566,
      "grad_norm": 1.3415441629728502,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 22566
    },
    {
      "epoch": 0.22567,
      "grad_norm": 1.1806346198089603,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 22567
    },
    {
      "epoch": 0.22568,
      "grad_norm": 1.4665297214099582,
      "learning_rate": 0.003,
      "loss": 4.0342,
      "step": 22568
    },
    {
      "epoch": 0.22569,
      "grad_norm": 1.0734620731968774,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 22569
    },
    {
      "epoch": 0.2257,
      "grad_norm": 1.5077966401944736,
      "learning_rate": 0.003,
      "loss": 4.0215,
      "step": 22570
    },
    {
      "epoch": 0.22571,
      "grad_norm": 1.2047552478719217,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 22571
    },
    {
      "epoch": 0.22572,
      "grad_norm": 1.525018125012932,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 22572
    },
    {
      "epoch": 0.22573,
      "grad_norm": 1.0652457912081579,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 22573
    },
    {
      "epoch": 0.22574,
      "grad_norm": 1.2114860422416958,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 22574
    },
    {
      "epoch": 0.22575,
      "grad_norm": 1.3818053420657728,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 22575
    },
    {
      "epoch": 0.22576,
      "grad_norm": 1.0639394818485324,
      "learning_rate": 0.003,
      "loss": 4.0268,
      "step": 22576
    },
    {
      "epoch": 0.22577,
      "grad_norm": 1.3868765056606982,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 22577
    },
    {
      "epoch": 0.22578,
      "grad_norm": 1.520924933073174,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 22578
    },
    {
      "epoch": 0.22579,
      "grad_norm": 1.26000523465386,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 22579
    },
    {
      "epoch": 0.2258,
      "grad_norm": 1.3881665919508095,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 22580
    },
    {
      "epoch": 0.22581,
      "grad_norm": 1.2315326602596626,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 22581
    },
    {
      "epoch": 0.22582,
      "grad_norm": 1.1901480425516753,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 22582
    },
    {
      "epoch": 0.22583,
      "grad_norm": 1.130467926058565,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 22583
    },
    {
      "epoch": 0.22584,
      "grad_norm": 1.406528403899872,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 22584
    },
    {
      "epoch": 0.22585,
      "grad_norm": 1.542422584892502,
      "learning_rate": 0.003,
      "loss": 4.082,
      "step": 22585
    },
    {
      "epoch": 0.22586,
      "grad_norm": 1.593864830777442,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 22586
    },
    {
      "epoch": 0.22587,
      "grad_norm": 1.3854399345300927,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 22587
    },
    {
      "epoch": 0.22588,
      "grad_norm": 1.2856110491107158,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 22588
    },
    {
      "epoch": 0.22589,
      "grad_norm": 1.2276983551534677,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 22589
    },
    {
      "epoch": 0.2259,
      "grad_norm": 1.4277051343967955,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 22590
    },
    {
      "epoch": 0.22591,
      "grad_norm": 1.1407240152746587,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 22591
    },
    {
      "epoch": 0.22592,
      "grad_norm": 1.2619386720652488,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 22592
    },
    {
      "epoch": 0.22593,
      "grad_norm": 1.313989982492832,
      "learning_rate": 0.003,
      "loss": 4.0277,
      "step": 22593
    },
    {
      "epoch": 0.22594,
      "grad_norm": 1.1822401340355067,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 22594
    },
    {
      "epoch": 0.22595,
      "grad_norm": 1.278760915465666,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 22595
    },
    {
      "epoch": 0.22596,
      "grad_norm": 1.2367738374208836,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 22596
    },
    {
      "epoch": 0.22597,
      "grad_norm": 1.300515051860066,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 22597
    },
    {
      "epoch": 0.22598,
      "grad_norm": 1.2797126858848549,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 22598
    },
    {
      "epoch": 0.22599,
      "grad_norm": 1.308836644674331,
      "learning_rate": 0.003,
      "loss": 4.034,
      "step": 22599
    },
    {
      "epoch": 0.226,
      "grad_norm": 1.1671181400459543,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 22600
    },
    {
      "epoch": 0.22601,
      "grad_norm": 1.3042428869441687,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 22601
    },
    {
      "epoch": 0.22602,
      "grad_norm": 1.2135366414762496,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 22602
    },
    {
      "epoch": 0.22603,
      "grad_norm": 1.26134826915533,
      "learning_rate": 0.003,
      "loss": 4.0288,
      "step": 22603
    },
    {
      "epoch": 0.22604,
      "grad_norm": 1.2354811227773834,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 22604
    },
    {
      "epoch": 0.22605,
      "grad_norm": 1.2536625176725607,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 22605
    },
    {
      "epoch": 0.22606,
      "grad_norm": 1.313000819827062,
      "learning_rate": 0.003,
      "loss": 4.0727,
      "step": 22606
    },
    {
      "epoch": 0.22607,
      "grad_norm": 1.4713753017199727,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 22607
    },
    {
      "epoch": 0.22608,
      "grad_norm": 1.3520838679471667,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 22608
    },
    {
      "epoch": 0.22609,
      "grad_norm": 1.5455726355290877,
      "learning_rate": 0.003,
      "loss": 4.0822,
      "step": 22609
    },
    {
      "epoch": 0.2261,
      "grad_norm": 1.2182019153494124,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 22610
    },
    {
      "epoch": 0.22611,
      "grad_norm": 1.3561505169364962,
      "learning_rate": 0.003,
      "loss": 4.0114,
      "step": 22611
    },
    {
      "epoch": 0.22612,
      "grad_norm": 1.3133920050258785,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 22612
    },
    {
      "epoch": 0.22613,
      "grad_norm": 1.312449719643275,
      "learning_rate": 0.003,
      "loss": 4.0797,
      "step": 22613
    },
    {
      "epoch": 0.22614,
      "grad_norm": 1.074657696380918,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 22614
    },
    {
      "epoch": 0.22615,
      "grad_norm": 1.442241840495215,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 22615
    },
    {
      "epoch": 0.22616,
      "grad_norm": 1.0601450853962522,
      "learning_rate": 0.003,
      "loss": 4.0257,
      "step": 22616
    },
    {
      "epoch": 0.22617,
      "grad_norm": 1.5620336776709003,
      "learning_rate": 0.003,
      "loss": 4.0346,
      "step": 22617
    },
    {
      "epoch": 0.22618,
      "grad_norm": 0.9689079372536944,
      "learning_rate": 0.003,
      "loss": 4.0728,
      "step": 22618
    },
    {
      "epoch": 0.22619,
      "grad_norm": 1.6808687314107054,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 22619
    },
    {
      "epoch": 0.2262,
      "grad_norm": 1.1640894040723138,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 22620
    },
    {
      "epoch": 0.22621,
      "grad_norm": 1.344668278352046,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 22621
    },
    {
      "epoch": 0.22622,
      "grad_norm": 1.44299896409508,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 22622
    },
    {
      "epoch": 0.22623,
      "grad_norm": 1.3721375096943011,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 22623
    },
    {
      "epoch": 0.22624,
      "grad_norm": 1.1522684824387952,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 22624
    },
    {
      "epoch": 0.22625,
      "grad_norm": 1.2260741731087532,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 22625
    },
    {
      "epoch": 0.22626,
      "grad_norm": 1.3070977190046338,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 22626
    },
    {
      "epoch": 0.22627,
      "grad_norm": 1.2589188365447337,
      "learning_rate": 0.003,
      "loss": 4.046,
      "step": 22627
    },
    {
      "epoch": 0.22628,
      "grad_norm": 1.3944764788183477,
      "learning_rate": 0.003,
      "loss": 4.0769,
      "step": 22628
    },
    {
      "epoch": 0.22629,
      "grad_norm": 1.1662320467262188,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 22629
    },
    {
      "epoch": 0.2263,
      "grad_norm": 1.3279237232083825,
      "learning_rate": 0.003,
      "loss": 4.035,
      "step": 22630
    },
    {
      "epoch": 0.22631,
      "grad_norm": 1.4282357337112004,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 22631
    },
    {
      "epoch": 0.22632,
      "grad_norm": 1.2661735843623207,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 22632
    },
    {
      "epoch": 0.22633,
      "grad_norm": 1.6257038340349381,
      "learning_rate": 0.003,
      "loss": 4.0783,
      "step": 22633
    },
    {
      "epoch": 0.22634,
      "grad_norm": 0.9648899165313575,
      "learning_rate": 0.003,
      "loss": 4.0344,
      "step": 22634
    },
    {
      "epoch": 0.22635,
      "grad_norm": 1.371901664366491,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 22635
    },
    {
      "epoch": 0.22636,
      "grad_norm": 1.0438516546415813,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 22636
    },
    {
      "epoch": 0.22637,
      "grad_norm": 1.4310376589392284,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 22637
    },
    {
      "epoch": 0.22638,
      "grad_norm": 1.1567361263821145,
      "learning_rate": 0.003,
      "loss": 4.0666,
      "step": 22638
    },
    {
      "epoch": 0.22639,
      "grad_norm": 1.2405414390662812,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 22639
    },
    {
      "epoch": 0.2264,
      "grad_norm": 1.3410653625276123,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 22640
    },
    {
      "epoch": 0.22641,
      "grad_norm": 1.4188255889704002,
      "learning_rate": 0.003,
      "loss": 4.0608,
      "step": 22641
    },
    {
      "epoch": 0.22642,
      "grad_norm": 1.3474829338391447,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 22642
    },
    {
      "epoch": 0.22643,
      "grad_norm": 1.2841709957180494,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 22643
    },
    {
      "epoch": 0.22644,
      "grad_norm": 1.2520534260697076,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 22644
    },
    {
      "epoch": 0.22645,
      "grad_norm": 1.5517767716720081,
      "learning_rate": 0.003,
      "loss": 4.0418,
      "step": 22645
    },
    {
      "epoch": 0.22646,
      "grad_norm": 1.0330104694980737,
      "learning_rate": 0.003,
      "loss": 4.076,
      "step": 22646
    },
    {
      "epoch": 0.22647,
      "grad_norm": 1.4845952140019196,
      "learning_rate": 0.003,
      "loss": 4.0139,
      "step": 22647
    },
    {
      "epoch": 0.22648,
      "grad_norm": 1.1610074054596695,
      "learning_rate": 0.003,
      "loss": 4.084,
      "step": 22648
    },
    {
      "epoch": 0.22649,
      "grad_norm": 1.3770728628981728,
      "learning_rate": 0.003,
      "loss": 4.0315,
      "step": 22649
    },
    {
      "epoch": 0.2265,
      "grad_norm": 1.1936327852921174,
      "learning_rate": 0.003,
      "loss": 4.017,
      "step": 22650
    },
    {
      "epoch": 0.22651,
      "grad_norm": 1.331939898988897,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 22651
    },
    {
      "epoch": 0.22652,
      "grad_norm": 1.1178861395283695,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 22652
    },
    {
      "epoch": 0.22653,
      "grad_norm": 1.354185330857788,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 22653
    },
    {
      "epoch": 0.22654,
      "grad_norm": 1.4125164056890323,
      "learning_rate": 0.003,
      "loss": 4.0347,
      "step": 22654
    },
    {
      "epoch": 0.22655,
      "grad_norm": 1.232991593856581,
      "learning_rate": 0.003,
      "loss": 4.0422,
      "step": 22655
    },
    {
      "epoch": 0.22656,
      "grad_norm": 1.5464579689092532,
      "learning_rate": 0.003,
      "loss": 4.0376,
      "step": 22656
    },
    {
      "epoch": 0.22657,
      "grad_norm": 1.0228004325978513,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 22657
    },
    {
      "epoch": 0.22658,
      "grad_norm": 1.216542862857828,
      "learning_rate": 0.003,
      "loss": 4.057,
      "step": 22658
    },
    {
      "epoch": 0.22659,
      "grad_norm": 1.3763995609191408,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 22659
    },
    {
      "epoch": 0.2266,
      "grad_norm": 1.6924043104114337,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 22660
    },
    {
      "epoch": 0.22661,
      "grad_norm": 1.11683907313906,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 22661
    },
    {
      "epoch": 0.22662,
      "grad_norm": 1.6378062181657183,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 22662
    },
    {
      "epoch": 0.22663,
      "grad_norm": 1.1164816037937753,
      "learning_rate": 0.003,
      "loss": 4.0326,
      "step": 22663
    },
    {
      "epoch": 0.22664,
      "grad_norm": 1.5921206462792123,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 22664
    },
    {
      "epoch": 0.22665,
      "grad_norm": 0.9708473646983161,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 22665
    },
    {
      "epoch": 0.22666,
      "grad_norm": 1.3820591197284526,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 22666
    },
    {
      "epoch": 0.22667,
      "grad_norm": 1.109630020127074,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 22667
    },
    {
      "epoch": 0.22668,
      "grad_norm": 1.3925172522796756,
      "learning_rate": 0.003,
      "loss": 4.0245,
      "step": 22668
    },
    {
      "epoch": 0.22669,
      "grad_norm": 1.3126031415495094,
      "learning_rate": 0.003,
      "loss": 4.0327,
      "step": 22669
    },
    {
      "epoch": 0.2267,
      "grad_norm": 1.148292833387036,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 22670
    },
    {
      "epoch": 0.22671,
      "grad_norm": 1.370358115085091,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 22671
    },
    {
      "epoch": 0.22672,
      "grad_norm": 1.3196396087006252,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 22672
    },
    {
      "epoch": 0.22673,
      "grad_norm": 1.518693881349011,
      "learning_rate": 0.003,
      "loss": 4.0119,
      "step": 22673
    },
    {
      "epoch": 0.22674,
      "grad_norm": 1.2770842397458222,
      "learning_rate": 0.003,
      "loss": 4.0319,
      "step": 22674
    },
    {
      "epoch": 0.22675,
      "grad_norm": 1.4607556447767738,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 22675
    },
    {
      "epoch": 0.22676,
      "grad_norm": 1.1421132126999296,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 22676
    },
    {
      "epoch": 0.22677,
      "grad_norm": 1.324177575118353,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 22677
    },
    {
      "epoch": 0.22678,
      "grad_norm": 1.0253559359484354,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 22678
    },
    {
      "epoch": 0.22679,
      "grad_norm": 1.5325664911066361,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 22679
    },
    {
      "epoch": 0.2268,
      "grad_norm": 1.0671308318897104,
      "learning_rate": 0.003,
      "loss": 4.04,
      "step": 22680
    },
    {
      "epoch": 0.22681,
      "grad_norm": 1.6768360576583528,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 22681
    },
    {
      "epoch": 0.22682,
      "grad_norm": 1.125262484743771,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 22682
    },
    {
      "epoch": 0.22683,
      "grad_norm": 1.3855513862543525,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 22683
    },
    {
      "epoch": 0.22684,
      "grad_norm": 1.235809625591013,
      "learning_rate": 0.003,
      "loss": 4.0624,
      "step": 22684
    },
    {
      "epoch": 0.22685,
      "grad_norm": 1.2529885237322402,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 22685
    },
    {
      "epoch": 0.22686,
      "grad_norm": 1.3013013941449454,
      "learning_rate": 0.003,
      "loss": 4.0359,
      "step": 22686
    },
    {
      "epoch": 0.22687,
      "grad_norm": 1.2866448254940237,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 22687
    },
    {
      "epoch": 0.22688,
      "grad_norm": 1.3333273010739528,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 22688
    },
    {
      "epoch": 0.22689,
      "grad_norm": 1.3697817826252374,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 22689
    },
    {
      "epoch": 0.2269,
      "grad_norm": 1.1749598944282096,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 22690
    },
    {
      "epoch": 0.22691,
      "grad_norm": 1.3354900475986349,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 22691
    },
    {
      "epoch": 0.22692,
      "grad_norm": 1.2378429742426724,
      "learning_rate": 0.003,
      "loss": 4.04,
      "step": 22692
    },
    {
      "epoch": 0.22693,
      "grad_norm": 1.4046528514027574,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 22693
    },
    {
      "epoch": 0.22694,
      "grad_norm": 1.104016045879578,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 22694
    },
    {
      "epoch": 0.22695,
      "grad_norm": 1.5853447545804227,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 22695
    },
    {
      "epoch": 0.22696,
      "grad_norm": 1.2017974352948775,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 22696
    },
    {
      "epoch": 0.22697,
      "grad_norm": 1.2639960734509235,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 22697
    },
    {
      "epoch": 0.22698,
      "grad_norm": 1.3598055873871733,
      "learning_rate": 0.003,
      "loss": 4.0219,
      "step": 22698
    },
    {
      "epoch": 0.22699,
      "grad_norm": 1.2155778228686982,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 22699
    },
    {
      "epoch": 0.227,
      "grad_norm": 1.5096492497099996,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 22700
    },
    {
      "epoch": 0.22701,
      "grad_norm": 1.1438661949627844,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 22701
    },
    {
      "epoch": 0.22702,
      "grad_norm": 1.4522348604048556,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 22702
    },
    {
      "epoch": 0.22703,
      "grad_norm": 1.0230340496577157,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 22703
    },
    {
      "epoch": 0.22704,
      "grad_norm": 1.4613033441247205,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 22704
    },
    {
      "epoch": 0.22705,
      "grad_norm": 1.063308297536425,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 22705
    },
    {
      "epoch": 0.22706,
      "grad_norm": 1.5973493003176626,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 22706
    },
    {
      "epoch": 0.22707,
      "grad_norm": 1.1719072994723225,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 22707
    },
    {
      "epoch": 0.22708,
      "grad_norm": 1.3647035739638114,
      "learning_rate": 0.003,
      "loss": 4.0114,
      "step": 22708
    },
    {
      "epoch": 0.22709,
      "grad_norm": 1.21264256468456,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 22709
    },
    {
      "epoch": 0.2271,
      "grad_norm": 1.4957409118180294,
      "learning_rate": 0.003,
      "loss": 4.0438,
      "step": 22710
    },
    {
      "epoch": 0.22711,
      "grad_norm": 1.3074504903035473,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 22711
    },
    {
      "epoch": 0.22712,
      "grad_norm": 1.2299409912407897,
      "learning_rate": 0.003,
      "loss": 4.0316,
      "step": 22712
    },
    {
      "epoch": 0.22713,
      "grad_norm": 1.4756516454197908,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 22713
    },
    {
      "epoch": 0.22714,
      "grad_norm": 1.2180637235702896,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 22714
    },
    {
      "epoch": 0.22715,
      "grad_norm": 1.3062867497873833,
      "learning_rate": 0.003,
      "loss": 4.0291,
      "step": 22715
    },
    {
      "epoch": 0.22716,
      "grad_norm": 1.1861936183326716,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 22716
    },
    {
      "epoch": 0.22717,
      "grad_norm": 1.2698126370628935,
      "learning_rate": 0.003,
      "loss": 4.0729,
      "step": 22717
    },
    {
      "epoch": 0.22718,
      "grad_norm": 1.142857025678499,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 22718
    },
    {
      "epoch": 0.22719,
      "grad_norm": 1.2838286118946398,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 22719
    },
    {
      "epoch": 0.2272,
      "grad_norm": 1.3379258913071415,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 22720
    },
    {
      "epoch": 0.22721,
      "grad_norm": 1.1583924755668984,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 22721
    },
    {
      "epoch": 0.22722,
      "grad_norm": 1.338776068657845,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 22722
    },
    {
      "epoch": 0.22723,
      "grad_norm": 1.193967684373097,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 22723
    },
    {
      "epoch": 0.22724,
      "grad_norm": 1.2453421739638189,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 22724
    },
    {
      "epoch": 0.22725,
      "grad_norm": 1.3277986491013223,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 22725
    },
    {
      "epoch": 0.22726,
      "grad_norm": 1.1606456992588032,
      "learning_rate": 0.003,
      "loss": 4.046,
      "step": 22726
    },
    {
      "epoch": 0.22727,
      "grad_norm": 1.6014790251768964,
      "learning_rate": 0.003,
      "loss": 4.0679,
      "step": 22727
    },
    {
      "epoch": 0.22728,
      "grad_norm": 1.1772438646256707,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 22728
    },
    {
      "epoch": 0.22729,
      "grad_norm": 1.5753193741401947,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 22729
    },
    {
      "epoch": 0.2273,
      "grad_norm": 1.2244001581798778,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 22730
    },
    {
      "epoch": 0.22731,
      "grad_norm": 1.3049762139181278,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 22731
    },
    {
      "epoch": 0.22732,
      "grad_norm": 1.4257523619725556,
      "learning_rate": 0.003,
      "loss": 4.0748,
      "step": 22732
    },
    {
      "epoch": 0.22733,
      "grad_norm": 1.1300033013606043,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 22733
    },
    {
      "epoch": 0.22734,
      "grad_norm": 1.2789000183009511,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 22734
    },
    {
      "epoch": 0.22735,
      "grad_norm": 1.3787011030907839,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 22735
    },
    {
      "epoch": 0.22736,
      "grad_norm": 1.1534785050906549,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 22736
    },
    {
      "epoch": 0.22737,
      "grad_norm": 1.2556409246801243,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 22737
    },
    {
      "epoch": 0.22738,
      "grad_norm": 1.5023911495167528,
      "learning_rate": 0.003,
      "loss": 4.0797,
      "step": 22738
    },
    {
      "epoch": 0.22739,
      "grad_norm": 1.089950690583449,
      "learning_rate": 0.003,
      "loss": 4.0776,
      "step": 22739
    },
    {
      "epoch": 0.2274,
      "grad_norm": 1.388094751486672,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 22740
    },
    {
      "epoch": 0.22741,
      "grad_norm": 1.1041767662188406,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 22741
    },
    {
      "epoch": 0.22742,
      "grad_norm": 1.577536409684799,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 22742
    },
    {
      "epoch": 0.22743,
      "grad_norm": 1.5105192793850055,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 22743
    },
    {
      "epoch": 0.22744,
      "grad_norm": 1.3115780040631255,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 22744
    },
    {
      "epoch": 0.22745,
      "grad_norm": 1.4678226325592918,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 22745
    },
    {
      "epoch": 0.22746,
      "grad_norm": 1.2946841486547571,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 22746
    },
    {
      "epoch": 0.22747,
      "grad_norm": 1.0972080533745272,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 22747
    },
    {
      "epoch": 0.22748,
      "grad_norm": 1.4098976880591747,
      "learning_rate": 0.003,
      "loss": 4.0254,
      "step": 22748
    },
    {
      "epoch": 0.22749,
      "grad_norm": 0.9706114850723467,
      "learning_rate": 0.003,
      "loss": 4.0212,
      "step": 22749
    },
    {
      "epoch": 0.2275,
      "grad_norm": 1.7768992997687312,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 22750
    },
    {
      "epoch": 0.22751,
      "grad_norm": 1.5976243640323782,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 22751
    },
    {
      "epoch": 0.22752,
      "grad_norm": 1.208176486669619,
      "learning_rate": 0.003,
      "loss": 4.0394,
      "step": 22752
    },
    {
      "epoch": 0.22753,
      "grad_norm": 1.3220546996996818,
      "learning_rate": 0.003,
      "loss": 4.0203,
      "step": 22753
    },
    {
      "epoch": 0.22754,
      "grad_norm": 1.2117167761290428,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 22754
    },
    {
      "epoch": 0.22755,
      "grad_norm": 1.5439747829457355,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 22755
    },
    {
      "epoch": 0.22756,
      "grad_norm": 1.2509292818980267,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 22756
    },
    {
      "epoch": 0.22757,
      "grad_norm": 1.353529833415662,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 22757
    },
    {
      "epoch": 0.22758,
      "grad_norm": 1.22119780866865,
      "learning_rate": 0.003,
      "loss": 4.0675,
      "step": 22758
    },
    {
      "epoch": 0.22759,
      "grad_norm": 1.2846534764837474,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 22759
    },
    {
      "epoch": 0.2276,
      "grad_norm": 1.2337042940346146,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 22760
    },
    {
      "epoch": 0.22761,
      "grad_norm": 1.334364310105172,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 22761
    },
    {
      "epoch": 0.22762,
      "grad_norm": 1.0545672102456172,
      "learning_rate": 0.003,
      "loss": 4.0788,
      "step": 22762
    },
    {
      "epoch": 0.22763,
      "grad_norm": 1.4889896620876106,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 22763
    },
    {
      "epoch": 0.22764,
      "grad_norm": 1.1729072021046347,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 22764
    },
    {
      "epoch": 0.22765,
      "grad_norm": 1.205257120595343,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 22765
    },
    {
      "epoch": 0.22766,
      "grad_norm": 1.1970006365925647,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 22766
    },
    {
      "epoch": 0.22767,
      "grad_norm": 1.3730117095039738,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 22767
    },
    {
      "epoch": 0.22768,
      "grad_norm": 1.0673400445379233,
      "learning_rate": 0.003,
      "loss": 4.0805,
      "step": 22768
    },
    {
      "epoch": 0.22769,
      "grad_norm": 1.389501589959973,
      "learning_rate": 0.003,
      "loss": 4.0394,
      "step": 22769
    },
    {
      "epoch": 0.2277,
      "grad_norm": 1.1542861521096173,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 22770
    },
    {
      "epoch": 0.22771,
      "grad_norm": 1.189489063108176,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 22771
    },
    {
      "epoch": 0.22772,
      "grad_norm": 1.2319560086110348,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 22772
    },
    {
      "epoch": 0.22773,
      "grad_norm": 1.3255860520775213,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 22773
    },
    {
      "epoch": 0.22774,
      "grad_norm": 1.6902334263287309,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 22774
    },
    {
      "epoch": 0.22775,
      "grad_norm": 1.3989969471055916,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 22775
    },
    {
      "epoch": 0.22776,
      "grad_norm": 1.2443381445096287,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 22776
    },
    {
      "epoch": 0.22777,
      "grad_norm": 1.4293361992662825,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 22777
    },
    {
      "epoch": 0.22778,
      "grad_norm": 1.3550414229713295,
      "learning_rate": 0.003,
      "loss": 4.0312,
      "step": 22778
    },
    {
      "epoch": 0.22779,
      "grad_norm": 1.3182152091949482,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 22779
    },
    {
      "epoch": 0.2278,
      "grad_norm": 1.2091175791261757,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 22780
    },
    {
      "epoch": 0.22781,
      "grad_norm": 1.115419144054797,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 22781
    },
    {
      "epoch": 0.22782,
      "grad_norm": 1.3232712279784384,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 22782
    },
    {
      "epoch": 0.22783,
      "grad_norm": 1.2005645416277266,
      "learning_rate": 0.003,
      "loss": 4.0319,
      "step": 22783
    },
    {
      "epoch": 0.22784,
      "grad_norm": 1.5097870070482657,
      "learning_rate": 0.003,
      "loss": 4.0665,
      "step": 22784
    },
    {
      "epoch": 0.22785,
      "grad_norm": 1.0688476025622304,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 22785
    },
    {
      "epoch": 0.22786,
      "grad_norm": 1.3204882262420305,
      "learning_rate": 0.003,
      "loss": 4.0056,
      "step": 22786
    },
    {
      "epoch": 0.22787,
      "grad_norm": 1.013031325356019,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 22787
    },
    {
      "epoch": 0.22788,
      "grad_norm": 1.3314224675818025,
      "learning_rate": 0.003,
      "loss": 4.0683,
      "step": 22788
    },
    {
      "epoch": 0.22789,
      "grad_norm": 1.0705568142322244,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 22789
    },
    {
      "epoch": 0.2279,
      "grad_norm": 1.2883963183099956,
      "learning_rate": 0.003,
      "loss": 4.071,
      "step": 22790
    },
    {
      "epoch": 0.22791,
      "grad_norm": 1.3430730702231144,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 22791
    },
    {
      "epoch": 0.22792,
      "grad_norm": 1.2845662137609606,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 22792
    },
    {
      "epoch": 0.22793,
      "grad_norm": 1.121581724185981,
      "learning_rate": 0.003,
      "loss": 4.0178,
      "step": 22793
    },
    {
      "epoch": 0.22794,
      "grad_norm": 1.4068842948849558,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 22794
    },
    {
      "epoch": 0.22795,
      "grad_norm": 1.3629735629978736,
      "learning_rate": 0.003,
      "loss": 4.0993,
      "step": 22795
    },
    {
      "epoch": 0.22796,
      "grad_norm": 1.4727742275118698,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 22796
    },
    {
      "epoch": 0.22797,
      "grad_norm": 1.5091489239273752,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 22797
    },
    {
      "epoch": 0.22798,
      "grad_norm": 1.257883528943881,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 22798
    },
    {
      "epoch": 0.22799,
      "grad_norm": 1.3645306525309961,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 22799
    },
    {
      "epoch": 0.228,
      "grad_norm": 1.2680722205796182,
      "learning_rate": 0.003,
      "loss": 4.0355,
      "step": 22800
    },
    {
      "epoch": 0.22801,
      "grad_norm": 1.3520412484618418,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 22801
    },
    {
      "epoch": 0.22802,
      "grad_norm": 1.1886074017749466,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 22802
    },
    {
      "epoch": 0.22803,
      "grad_norm": 1.1935355561367111,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 22803
    },
    {
      "epoch": 0.22804,
      "grad_norm": 1.3201940689028522,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 22804
    },
    {
      "epoch": 0.22805,
      "grad_norm": 1.0934454414120258,
      "learning_rate": 0.003,
      "loss": 4.0293,
      "step": 22805
    },
    {
      "epoch": 0.22806,
      "grad_norm": 1.3482588962313506,
      "learning_rate": 0.003,
      "loss": 4.0423,
      "step": 22806
    },
    {
      "epoch": 0.22807,
      "grad_norm": 1.1237577331274458,
      "learning_rate": 0.003,
      "loss": 4.0359,
      "step": 22807
    },
    {
      "epoch": 0.22808,
      "grad_norm": 1.50413009597263,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 22808
    },
    {
      "epoch": 0.22809,
      "grad_norm": 1.0720239819102442,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 22809
    },
    {
      "epoch": 0.2281,
      "grad_norm": 1.34377944310639,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 22810
    },
    {
      "epoch": 0.22811,
      "grad_norm": 1.2586971636561433,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 22811
    },
    {
      "epoch": 0.22812,
      "grad_norm": 1.326930680078535,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 22812
    },
    {
      "epoch": 0.22813,
      "grad_norm": 1.2083646576642904,
      "learning_rate": 0.003,
      "loss": 4.0209,
      "step": 22813
    },
    {
      "epoch": 0.22814,
      "grad_norm": 1.5251985459951989,
      "learning_rate": 0.003,
      "loss": 4.0328,
      "step": 22814
    },
    {
      "epoch": 0.22815,
      "grad_norm": 1.0479209292349305,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 22815
    },
    {
      "epoch": 0.22816,
      "grad_norm": 1.5954414927716987,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 22816
    },
    {
      "epoch": 0.22817,
      "grad_norm": 1.0961782039927528,
      "learning_rate": 0.003,
      "loss": 4.0165,
      "step": 22817
    },
    {
      "epoch": 0.22818,
      "grad_norm": 1.401675335635331,
      "learning_rate": 0.003,
      "loss": 4.0342,
      "step": 22818
    },
    {
      "epoch": 0.22819,
      "grad_norm": 1.2230643459470691,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 22819
    },
    {
      "epoch": 0.2282,
      "grad_norm": 1.3305995274495828,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 22820
    },
    {
      "epoch": 0.22821,
      "grad_norm": 1.2484291763090232,
      "learning_rate": 0.003,
      "loss": 4.0265,
      "step": 22821
    },
    {
      "epoch": 0.22822,
      "grad_norm": 1.3922284995542147,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 22822
    },
    {
      "epoch": 0.22823,
      "grad_norm": 1.331782803765551,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 22823
    },
    {
      "epoch": 0.22824,
      "grad_norm": 1.2936263790312528,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 22824
    },
    {
      "epoch": 0.22825,
      "grad_norm": 1.2238490062297172,
      "learning_rate": 0.003,
      "loss": 4.0206,
      "step": 22825
    },
    {
      "epoch": 0.22826,
      "grad_norm": 1.3873058960375435,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 22826
    },
    {
      "epoch": 0.22827,
      "grad_norm": 1.2621561196069349,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 22827
    },
    {
      "epoch": 0.22828,
      "grad_norm": 1.3191413240495282,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 22828
    },
    {
      "epoch": 0.22829,
      "grad_norm": 1.1449900765963297,
      "learning_rate": 0.003,
      "loss": 4.0181,
      "step": 22829
    },
    {
      "epoch": 0.2283,
      "grad_norm": 1.5723864078810423,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 22830
    },
    {
      "epoch": 0.22831,
      "grad_norm": 0.9270222188318371,
      "learning_rate": 0.003,
      "loss": 4.0267,
      "step": 22831
    },
    {
      "epoch": 0.22832,
      "grad_norm": 1.59614219616322,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 22832
    },
    {
      "epoch": 0.22833,
      "grad_norm": 1.0525211230538725,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 22833
    },
    {
      "epoch": 0.22834,
      "grad_norm": 1.3192374056002631,
      "learning_rate": 0.003,
      "loss": 4.0704,
      "step": 22834
    },
    {
      "epoch": 0.22835,
      "grad_norm": 1.194476809455396,
      "learning_rate": 0.003,
      "loss": 4.011,
      "step": 22835
    },
    {
      "epoch": 0.22836,
      "grad_norm": 1.3874118940182827,
      "learning_rate": 0.003,
      "loss": 4.0836,
      "step": 22836
    },
    {
      "epoch": 0.22837,
      "grad_norm": 1.3744848109449395,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 22837
    },
    {
      "epoch": 0.22838,
      "grad_norm": 1.3423859284378152,
      "learning_rate": 0.003,
      "loss": 4.096,
      "step": 22838
    },
    {
      "epoch": 0.22839,
      "grad_norm": 1.198426965913589,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 22839
    },
    {
      "epoch": 0.2284,
      "grad_norm": 1.493836846331223,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 22840
    },
    {
      "epoch": 0.22841,
      "grad_norm": 1.3312138491836896,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 22841
    },
    {
      "epoch": 0.22842,
      "grad_norm": 1.5003509037800031,
      "learning_rate": 0.003,
      "loss": 4.0212,
      "step": 22842
    },
    {
      "epoch": 0.22843,
      "grad_norm": 1.205974901084229,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 22843
    },
    {
      "epoch": 0.22844,
      "grad_norm": 1.3298920680682018,
      "learning_rate": 0.003,
      "loss": 4.0266,
      "step": 22844
    },
    {
      "epoch": 0.22845,
      "grad_norm": 1.3421983269805815,
      "learning_rate": 0.003,
      "loss": 4.0178,
      "step": 22845
    },
    {
      "epoch": 0.22846,
      "grad_norm": 1.166847014841375,
      "learning_rate": 0.003,
      "loss": 4.0304,
      "step": 22846
    },
    {
      "epoch": 0.22847,
      "grad_norm": 1.4362377752411226,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 22847
    },
    {
      "epoch": 0.22848,
      "grad_norm": 1.299435089518182,
      "learning_rate": 0.003,
      "loss": 4.0793,
      "step": 22848
    },
    {
      "epoch": 0.22849,
      "grad_norm": 1.3610712132367684,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 22849
    },
    {
      "epoch": 0.2285,
      "grad_norm": 1.2032297939998098,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 22850
    },
    {
      "epoch": 0.22851,
      "grad_norm": 1.3579332227391925,
      "learning_rate": 0.003,
      "loss": 4.0346,
      "step": 22851
    },
    {
      "epoch": 0.22852,
      "grad_norm": 1.0683078712159948,
      "learning_rate": 0.003,
      "loss": 4.0108,
      "step": 22852
    },
    {
      "epoch": 0.22853,
      "grad_norm": 1.6043031294033705,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 22853
    },
    {
      "epoch": 0.22854,
      "grad_norm": 1.1035714288783613,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 22854
    },
    {
      "epoch": 0.22855,
      "grad_norm": 1.46243142753709,
      "learning_rate": 0.003,
      "loss": 4.0799,
      "step": 22855
    },
    {
      "epoch": 0.22856,
      "grad_norm": 1.1288347218629557,
      "learning_rate": 0.003,
      "loss": 4.0804,
      "step": 22856
    },
    {
      "epoch": 0.22857,
      "grad_norm": 1.4360025524383433,
      "learning_rate": 0.003,
      "loss": 4.0295,
      "step": 22857
    },
    {
      "epoch": 0.22858,
      "grad_norm": 1.177066293382833,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 22858
    },
    {
      "epoch": 0.22859,
      "grad_norm": 1.6349447769258982,
      "learning_rate": 0.003,
      "loss": 4.046,
      "step": 22859
    },
    {
      "epoch": 0.2286,
      "grad_norm": 1.0161831975431206,
      "learning_rate": 0.003,
      "loss": 4.0761,
      "step": 22860
    },
    {
      "epoch": 0.22861,
      "grad_norm": 1.4289237473634253,
      "learning_rate": 0.003,
      "loss": 4.0343,
      "step": 22861
    },
    {
      "epoch": 0.22862,
      "grad_norm": 1.1343028599970997,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 22862
    },
    {
      "epoch": 0.22863,
      "grad_norm": 1.3729139119942362,
      "learning_rate": 0.003,
      "loss": 4.0756,
      "step": 22863
    },
    {
      "epoch": 0.22864,
      "grad_norm": 1.0333333687581276,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 22864
    },
    {
      "epoch": 0.22865,
      "grad_norm": 1.3602636566405157,
      "learning_rate": 0.003,
      "loss": 4.0357,
      "step": 22865
    },
    {
      "epoch": 0.22866,
      "grad_norm": 1.2408380139459476,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 22866
    },
    {
      "epoch": 0.22867,
      "grad_norm": 1.432421938149444,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 22867
    },
    {
      "epoch": 0.22868,
      "grad_norm": 1.2288204950857995,
      "learning_rate": 0.003,
      "loss": 4.0912,
      "step": 22868
    },
    {
      "epoch": 0.22869,
      "grad_norm": 1.2689135070146418,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 22869
    },
    {
      "epoch": 0.2287,
      "grad_norm": 1.3511059785670072,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 22870
    },
    {
      "epoch": 0.22871,
      "grad_norm": 1.1712551823346788,
      "learning_rate": 0.003,
      "loss": 4.026,
      "step": 22871
    },
    {
      "epoch": 0.22872,
      "grad_norm": 1.29458357449702,
      "learning_rate": 0.003,
      "loss": 4.0225,
      "step": 22872
    },
    {
      "epoch": 0.22873,
      "grad_norm": 1.4486140513751453,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 22873
    },
    {
      "epoch": 0.22874,
      "grad_norm": 1.3324991638906907,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 22874
    },
    {
      "epoch": 0.22875,
      "grad_norm": 1.4379664962729155,
      "learning_rate": 0.003,
      "loss": 4.0147,
      "step": 22875
    },
    {
      "epoch": 0.22876,
      "grad_norm": 1.4621484333769805,
      "learning_rate": 0.003,
      "loss": 4.017,
      "step": 22876
    },
    {
      "epoch": 0.22877,
      "grad_norm": 1.3807068471846857,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 22877
    },
    {
      "epoch": 0.22878,
      "grad_norm": 1.1550550968192956,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 22878
    },
    {
      "epoch": 0.22879,
      "grad_norm": 1.2105770412043309,
      "learning_rate": 0.003,
      "loss": 4.0237,
      "step": 22879
    },
    {
      "epoch": 0.2288,
      "grad_norm": 1.25293275091284,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 22880
    },
    {
      "epoch": 0.22881,
      "grad_norm": 1.1599415415362224,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 22881
    },
    {
      "epoch": 0.22882,
      "grad_norm": 1.356880953365146,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 22882
    },
    {
      "epoch": 0.22883,
      "grad_norm": 1.3682136629109296,
      "learning_rate": 0.003,
      "loss": 4.0221,
      "step": 22883
    },
    {
      "epoch": 0.22884,
      "grad_norm": 1.5085045762272722,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 22884
    },
    {
      "epoch": 0.22885,
      "grad_norm": 1.1662355438875212,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 22885
    },
    {
      "epoch": 0.22886,
      "grad_norm": 1.3460202999737598,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 22886
    },
    {
      "epoch": 0.22887,
      "grad_norm": 1.2062139329278307,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 22887
    },
    {
      "epoch": 0.22888,
      "grad_norm": 1.3084987947299225,
      "learning_rate": 0.003,
      "loss": 4.0099,
      "step": 22888
    },
    {
      "epoch": 0.22889,
      "grad_norm": 1.2595171984947928,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 22889
    },
    {
      "epoch": 0.2289,
      "grad_norm": 1.15468884380117,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 22890
    },
    {
      "epoch": 0.22891,
      "grad_norm": 1.2945936313729483,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 22891
    },
    {
      "epoch": 0.22892,
      "grad_norm": 1.4032639666308364,
      "learning_rate": 0.003,
      "loss": 4.0156,
      "step": 22892
    },
    {
      "epoch": 0.22893,
      "grad_norm": 1.1528761000140313,
      "learning_rate": 0.003,
      "loss": 4.0319,
      "step": 22893
    },
    {
      "epoch": 0.22894,
      "grad_norm": 1.2795750717064307,
      "learning_rate": 0.003,
      "loss": 4.0739,
      "step": 22894
    },
    {
      "epoch": 0.22895,
      "grad_norm": 1.1631033517975322,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 22895
    },
    {
      "epoch": 0.22896,
      "grad_norm": 1.347506369499531,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 22896
    },
    {
      "epoch": 0.22897,
      "grad_norm": 1.1553451292295511,
      "learning_rate": 0.003,
      "loss": 4.0043,
      "step": 22897
    },
    {
      "epoch": 0.22898,
      "grad_norm": 1.5172033899346415,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 22898
    },
    {
      "epoch": 0.22899,
      "grad_norm": 1.4964559666410877,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 22899
    },
    {
      "epoch": 0.229,
      "grad_norm": 1.0558874002499534,
      "learning_rate": 0.003,
      "loss": 4.0232,
      "step": 22900
    },
    {
      "epoch": 0.22901,
      "grad_norm": 1.5504407578681763,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 22901
    },
    {
      "epoch": 0.22902,
      "grad_norm": 1.323277562639125,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 22902
    },
    {
      "epoch": 0.22903,
      "grad_norm": 1.3913718127575616,
      "learning_rate": 0.003,
      "loss": 4.0712,
      "step": 22903
    },
    {
      "epoch": 0.22904,
      "grad_norm": 1.285094747518903,
      "learning_rate": 0.003,
      "loss": 4.0087,
      "step": 22904
    },
    {
      "epoch": 0.22905,
      "grad_norm": 1.2167559944301476,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 22905
    },
    {
      "epoch": 0.22906,
      "grad_norm": 1.234721589474369,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 22906
    },
    {
      "epoch": 0.22907,
      "grad_norm": 1.177360279709588,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 22907
    },
    {
      "epoch": 0.22908,
      "grad_norm": 1.3467052007901217,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 22908
    },
    {
      "epoch": 0.22909,
      "grad_norm": 1.2610553027562679,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 22909
    },
    {
      "epoch": 0.2291,
      "grad_norm": 1.2236670577041506,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 22910
    },
    {
      "epoch": 0.22911,
      "grad_norm": 1.455089190712894,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 22911
    },
    {
      "epoch": 0.22912,
      "grad_norm": 1.2685255647901654,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 22912
    },
    {
      "epoch": 0.22913,
      "grad_norm": 1.3934264149350137,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 22913
    },
    {
      "epoch": 0.22914,
      "grad_norm": 1.21597673338576,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 22914
    },
    {
      "epoch": 0.22915,
      "grad_norm": 1.3134165822709327,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 22915
    },
    {
      "epoch": 0.22916,
      "grad_norm": 1.2909098050528915,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 22916
    },
    {
      "epoch": 0.22917,
      "grad_norm": 1.4851169396437347,
      "learning_rate": 0.003,
      "loss": 4.0308,
      "step": 22917
    },
    {
      "epoch": 0.22918,
      "grad_norm": 1.2788011360923555,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 22918
    },
    {
      "epoch": 0.22919,
      "grad_norm": 1.3221037668694506,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 22919
    },
    {
      "epoch": 0.2292,
      "grad_norm": 1.5433161913020008,
      "learning_rate": 0.003,
      "loss": 4.075,
      "step": 22920
    },
    {
      "epoch": 0.22921,
      "grad_norm": 1.2422243655526495,
      "learning_rate": 0.003,
      "loss": 4.0513,
      "step": 22921
    },
    {
      "epoch": 0.22922,
      "grad_norm": 1.2065210526072263,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 22922
    },
    {
      "epoch": 0.22923,
      "grad_norm": 1.1431078244331845,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 22923
    },
    {
      "epoch": 0.22924,
      "grad_norm": 1.397505579264044,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 22924
    },
    {
      "epoch": 0.22925,
      "grad_norm": 1.1318338746097767,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 22925
    },
    {
      "epoch": 0.22926,
      "grad_norm": 1.5262692206953534,
      "learning_rate": 0.003,
      "loss": 4.0773,
      "step": 22926
    },
    {
      "epoch": 0.22927,
      "grad_norm": 1.2262615429393733,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 22927
    },
    {
      "epoch": 0.22928,
      "grad_norm": 1.389586082414331,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 22928
    },
    {
      "epoch": 0.22929,
      "grad_norm": 1.2247846332805663,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 22929
    },
    {
      "epoch": 0.2293,
      "grad_norm": 1.3678563895752596,
      "learning_rate": 0.003,
      "loss": 4.0152,
      "step": 22930
    },
    {
      "epoch": 0.22931,
      "grad_norm": 1.3002273140429303,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 22931
    },
    {
      "epoch": 0.22932,
      "grad_norm": 1.1659909609621704,
      "learning_rate": 0.003,
      "loss": 4.0052,
      "step": 22932
    },
    {
      "epoch": 0.22933,
      "grad_norm": 1.3198104999793905,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 22933
    },
    {
      "epoch": 0.22934,
      "grad_norm": 1.2667220713990213,
      "learning_rate": 0.003,
      "loss": 4.0337,
      "step": 22934
    },
    {
      "epoch": 0.22935,
      "grad_norm": 1.3144717008615145,
      "learning_rate": 0.003,
      "loss": 4.0102,
      "step": 22935
    },
    {
      "epoch": 0.22936,
      "grad_norm": 1.2291184178785146,
      "learning_rate": 0.003,
      "loss": 4.0393,
      "step": 22936
    },
    {
      "epoch": 0.22937,
      "grad_norm": 1.64357979360127,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 22937
    },
    {
      "epoch": 0.22938,
      "grad_norm": 0.9299019781222478,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 22938
    },
    {
      "epoch": 0.22939,
      "grad_norm": 1.4757190447923763,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 22939
    },
    {
      "epoch": 0.2294,
      "grad_norm": 1.3135447530156399,
      "learning_rate": 0.003,
      "loss": 4.0317,
      "step": 22940
    },
    {
      "epoch": 0.22941,
      "grad_norm": 1.4439066531604614,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 22941
    },
    {
      "epoch": 0.22942,
      "grad_norm": 1.261143192288765,
      "learning_rate": 0.003,
      "loss": 4.0036,
      "step": 22942
    },
    {
      "epoch": 0.22943,
      "grad_norm": 1.295165090004534,
      "learning_rate": 0.003,
      "loss": 4.0242,
      "step": 22943
    },
    {
      "epoch": 0.22944,
      "grad_norm": 1.1359597889951656,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 22944
    },
    {
      "epoch": 0.22945,
      "grad_norm": 1.3768127618117638,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 22945
    },
    {
      "epoch": 0.22946,
      "grad_norm": 1.3472858451524485,
      "learning_rate": 0.003,
      "loss": 4.0318,
      "step": 22946
    },
    {
      "epoch": 0.22947,
      "grad_norm": 1.0278995294613298,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 22947
    },
    {
      "epoch": 0.22948,
      "grad_norm": 1.5433280328381243,
      "learning_rate": 0.003,
      "loss": 4.079,
      "step": 22948
    },
    {
      "epoch": 0.22949,
      "grad_norm": 1.1117553909390965,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 22949
    },
    {
      "epoch": 0.2295,
      "grad_norm": 1.4920059526550298,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 22950
    },
    {
      "epoch": 0.22951,
      "grad_norm": 0.993928301649757,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 22951
    },
    {
      "epoch": 0.22952,
      "grad_norm": 1.4335446024072818,
      "learning_rate": 0.003,
      "loss": 4.0792,
      "step": 22952
    },
    {
      "epoch": 0.22953,
      "grad_norm": 1.2702029848287875,
      "learning_rate": 0.003,
      "loss": 4.0319,
      "step": 22953
    },
    {
      "epoch": 0.22954,
      "grad_norm": 1.422156469107183,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 22954
    },
    {
      "epoch": 0.22955,
      "grad_norm": 1.5017688053877793,
      "learning_rate": 0.003,
      "loss": 4.0547,
      "step": 22955
    },
    {
      "epoch": 0.22956,
      "grad_norm": 1.1832689528429465,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 22956
    },
    {
      "epoch": 0.22957,
      "grad_norm": 1.3202163592733576,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 22957
    },
    {
      "epoch": 0.22958,
      "grad_norm": 1.2773921373579953,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 22958
    },
    {
      "epoch": 0.22959,
      "grad_norm": 1.375776974065839,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 22959
    },
    {
      "epoch": 0.2296,
      "grad_norm": 1.173674651647164,
      "learning_rate": 0.003,
      "loss": 3.9901,
      "step": 22960
    },
    {
      "epoch": 0.22961,
      "grad_norm": 1.137614899206587,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 22961
    },
    {
      "epoch": 0.22962,
      "grad_norm": 1.2713892856399858,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 22962
    },
    {
      "epoch": 0.22963,
      "grad_norm": 1.1695322673610669,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 22963
    },
    {
      "epoch": 0.22964,
      "grad_norm": 1.4291921316022251,
      "learning_rate": 0.003,
      "loss": 4.0359,
      "step": 22964
    },
    {
      "epoch": 0.22965,
      "grad_norm": 1.2511351489355886,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 22965
    },
    {
      "epoch": 0.22966,
      "grad_norm": 1.577778526342389,
      "learning_rate": 0.003,
      "loss": 4.0326,
      "step": 22966
    },
    {
      "epoch": 0.22967,
      "grad_norm": 1.3302290332583357,
      "learning_rate": 0.003,
      "loss": 4.0767,
      "step": 22967
    },
    {
      "epoch": 0.22968,
      "grad_norm": 1.0985676142568965,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 22968
    },
    {
      "epoch": 0.22969,
      "grad_norm": 1.256983007767673,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 22969
    },
    {
      "epoch": 0.2297,
      "grad_norm": 1.4345333632950201,
      "learning_rate": 0.003,
      "loss": 4.0315,
      "step": 22970
    },
    {
      "epoch": 0.22971,
      "grad_norm": 1.400421469249285,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 22971
    },
    {
      "epoch": 0.22972,
      "grad_norm": 1.1891576143755764,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 22972
    },
    {
      "epoch": 0.22973,
      "grad_norm": 1.1912768620700784,
      "learning_rate": 0.003,
      "loss": 4.0299,
      "step": 22973
    },
    {
      "epoch": 0.22974,
      "grad_norm": 1.3218740906006485,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 22974
    },
    {
      "epoch": 0.22975,
      "grad_norm": 1.4549893013901607,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 22975
    },
    {
      "epoch": 0.22976,
      "grad_norm": 1.2714160560273011,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 22976
    },
    {
      "epoch": 0.22977,
      "grad_norm": 1.4377271393819862,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 22977
    },
    {
      "epoch": 0.22978,
      "grad_norm": 1.2509815525636199,
      "learning_rate": 0.003,
      "loss": 4.0434,
      "step": 22978
    },
    {
      "epoch": 0.22979,
      "grad_norm": 1.1503119946235718,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 22979
    },
    {
      "epoch": 0.2298,
      "grad_norm": 1.4611575540202049,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 22980
    },
    {
      "epoch": 0.22981,
      "grad_norm": 0.9394057189840224,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 22981
    },
    {
      "epoch": 0.22982,
      "grad_norm": 1.3346211156308416,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 22982
    },
    {
      "epoch": 0.22983,
      "grad_norm": 1.2155765157826248,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 22983
    },
    {
      "epoch": 0.22984,
      "grad_norm": 1.5584576073336336,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 22984
    },
    {
      "epoch": 0.22985,
      "grad_norm": 1.0796233892676972,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 22985
    },
    {
      "epoch": 0.22986,
      "grad_norm": 1.4850812765520647,
      "learning_rate": 0.003,
      "loss": 4.0336,
      "step": 22986
    },
    {
      "epoch": 0.22987,
      "grad_norm": 1.444019742132719,
      "learning_rate": 0.003,
      "loss": 4.0602,
      "step": 22987
    },
    {
      "epoch": 0.22988,
      "grad_norm": 1.2808928261785606,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 22988
    },
    {
      "epoch": 0.22989,
      "grad_norm": 1.2188613157029755,
      "learning_rate": 0.003,
      "loss": 4.0311,
      "step": 22989
    },
    {
      "epoch": 0.2299,
      "grad_norm": 1.3536175566759292,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 22990
    },
    {
      "epoch": 0.22991,
      "grad_norm": 1.4217767897760307,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 22991
    },
    {
      "epoch": 0.22992,
      "grad_norm": 1.589064135663114,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 22992
    },
    {
      "epoch": 0.22993,
      "grad_norm": 1.205479423377943,
      "learning_rate": 0.003,
      "loss": 4.0872,
      "step": 22993
    },
    {
      "epoch": 0.22994,
      "grad_norm": 1.3154851765166542,
      "learning_rate": 0.003,
      "loss": 4.0766,
      "step": 22994
    },
    {
      "epoch": 0.22995,
      "grad_norm": 1.3085659744248501,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 22995
    },
    {
      "epoch": 0.22996,
      "grad_norm": 1.0965907825938541,
      "learning_rate": 0.003,
      "loss": 4.0423,
      "step": 22996
    },
    {
      "epoch": 0.22997,
      "grad_norm": 1.4478601107402107,
      "learning_rate": 0.003,
      "loss": 4.0167,
      "step": 22997
    },
    {
      "epoch": 0.22998,
      "grad_norm": 1.116756667172281,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 22998
    },
    {
      "epoch": 0.22999,
      "grad_norm": 1.4968404198900556,
      "learning_rate": 0.003,
      "loss": 4.027,
      "step": 22999
    },
    {
      "epoch": 0.23,
      "grad_norm": 1.2554377085216648,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 23000
    },
    {
      "epoch": 0.23001,
      "grad_norm": 1.3627960256708616,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 23001
    },
    {
      "epoch": 0.23002,
      "grad_norm": 1.3359068277671018,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 23002
    },
    {
      "epoch": 0.23003,
      "grad_norm": 1.2518805679187248,
      "learning_rate": 0.003,
      "loss": 4.0317,
      "step": 23003
    },
    {
      "epoch": 0.23004,
      "grad_norm": 1.4332295048459291,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 23004
    },
    {
      "epoch": 0.23005,
      "grad_norm": 1.0469876339429016,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 23005
    },
    {
      "epoch": 0.23006,
      "grad_norm": 1.3750879167506647,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 23006
    },
    {
      "epoch": 0.23007,
      "grad_norm": 1.102596208160135,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 23007
    },
    {
      "epoch": 0.23008,
      "grad_norm": 1.4818339892917813,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 23008
    },
    {
      "epoch": 0.23009,
      "grad_norm": 1.2458263803707696,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 23009
    },
    {
      "epoch": 0.2301,
      "grad_norm": 1.2125471959171752,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 23010
    },
    {
      "epoch": 0.23011,
      "grad_norm": 1.3658388882001644,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 23011
    },
    {
      "epoch": 0.23012,
      "grad_norm": 1.353890222092544,
      "learning_rate": 0.003,
      "loss": 4.0326,
      "step": 23012
    },
    {
      "epoch": 0.23013,
      "grad_norm": 1.6489701885843449,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 23013
    },
    {
      "epoch": 0.23014,
      "grad_norm": 1.088813020669272,
      "learning_rate": 0.003,
      "loss": 4.027,
      "step": 23014
    },
    {
      "epoch": 0.23015,
      "grad_norm": 1.4024324704957338,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 23015
    },
    {
      "epoch": 0.23016,
      "grad_norm": 1.2345054187348519,
      "learning_rate": 0.003,
      "loss": 4.0337,
      "step": 23016
    },
    {
      "epoch": 0.23017,
      "grad_norm": 1.5140625233767058,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 23017
    },
    {
      "epoch": 0.23018,
      "grad_norm": 1.0472359944970655,
      "learning_rate": 0.003,
      "loss": 4.0197,
      "step": 23018
    },
    {
      "epoch": 0.23019,
      "grad_norm": 1.5599569042159376,
      "learning_rate": 0.003,
      "loss": 4.0252,
      "step": 23019
    },
    {
      "epoch": 0.2302,
      "grad_norm": 1.1192254538437625,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 23020
    },
    {
      "epoch": 0.23021,
      "grad_norm": 1.672551497850161,
      "learning_rate": 0.003,
      "loss": 4.0354,
      "step": 23021
    },
    {
      "epoch": 0.23022,
      "grad_norm": 1.3578940341893924,
      "learning_rate": 0.003,
      "loss": 4.0418,
      "step": 23022
    },
    {
      "epoch": 0.23023,
      "grad_norm": 1.0490870663434972,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 23023
    },
    {
      "epoch": 0.23024,
      "grad_norm": 1.4833999301909206,
      "learning_rate": 0.003,
      "loss": 4.0201,
      "step": 23024
    },
    {
      "epoch": 0.23025,
      "grad_norm": 1.1904474999043988,
      "learning_rate": 0.003,
      "loss": 4.0363,
      "step": 23025
    },
    {
      "epoch": 0.23026,
      "grad_norm": 1.22093948956686,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 23026
    },
    {
      "epoch": 0.23027,
      "grad_norm": 1.283267201391806,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 23027
    },
    {
      "epoch": 0.23028,
      "grad_norm": 1.3487053628796137,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 23028
    },
    {
      "epoch": 0.23029,
      "grad_norm": 1.3663018286440518,
      "learning_rate": 0.003,
      "loss": 4.0211,
      "step": 23029
    },
    {
      "epoch": 0.2303,
      "grad_norm": 1.3141607999694707,
      "learning_rate": 0.003,
      "loss": 4.0344,
      "step": 23030
    },
    {
      "epoch": 0.23031,
      "grad_norm": 1.3381526802135097,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 23031
    },
    {
      "epoch": 0.23032,
      "grad_norm": 1.147102598297343,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 23032
    },
    {
      "epoch": 0.23033,
      "grad_norm": 1.1537468439259586,
      "learning_rate": 0.003,
      "loss": 4.0374,
      "step": 23033
    },
    {
      "epoch": 0.23034,
      "grad_norm": 1.2070762700690738,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 23034
    },
    {
      "epoch": 0.23035,
      "grad_norm": 1.3833601060265663,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 23035
    },
    {
      "epoch": 0.23036,
      "grad_norm": 1.4867932786299043,
      "learning_rate": 0.003,
      "loss": 4.0192,
      "step": 23036
    },
    {
      "epoch": 0.23037,
      "grad_norm": 1.0908046132988751,
      "learning_rate": 0.003,
      "loss": 4.0684,
      "step": 23037
    },
    {
      "epoch": 0.23038,
      "grad_norm": 1.2936273598672328,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 23038
    },
    {
      "epoch": 0.23039,
      "grad_norm": 1.3801343216314799,
      "learning_rate": 0.003,
      "loss": 4.0261,
      "step": 23039
    },
    {
      "epoch": 0.2304,
      "grad_norm": 1.1626614428690953,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 23040
    },
    {
      "epoch": 0.23041,
      "grad_norm": 1.3698695749482124,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 23041
    },
    {
      "epoch": 0.23042,
      "grad_norm": 1.2378153196003894,
      "learning_rate": 0.003,
      "loss": 3.9983,
      "step": 23042
    },
    {
      "epoch": 0.23043,
      "grad_norm": 1.577881438326901,
      "learning_rate": 0.003,
      "loss": 4.0247,
      "step": 23043
    },
    {
      "epoch": 0.23044,
      "grad_norm": 1.2460208256319625,
      "learning_rate": 0.003,
      "loss": 4.023,
      "step": 23044
    },
    {
      "epoch": 0.23045,
      "grad_norm": 1.4455431353142316,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 23045
    },
    {
      "epoch": 0.23046,
      "grad_norm": 1.2795392871288738,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 23046
    },
    {
      "epoch": 0.23047,
      "grad_norm": 1.3033283084011695,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 23047
    },
    {
      "epoch": 0.23048,
      "grad_norm": 1.3811992126023218,
      "learning_rate": 0.003,
      "loss": 4.0278,
      "step": 23048
    },
    {
      "epoch": 0.23049,
      "grad_norm": 1.1930207159835433,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 23049
    },
    {
      "epoch": 0.2305,
      "grad_norm": 1.4585760788983548,
      "learning_rate": 0.003,
      "loss": 4.0313,
      "step": 23050
    },
    {
      "epoch": 0.23051,
      "grad_norm": 1.358262142646777,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 23051
    },
    {
      "epoch": 0.23052,
      "grad_norm": 1.3419042260630287,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 23052
    },
    {
      "epoch": 0.23053,
      "grad_norm": 1.4121503378881262,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 23053
    },
    {
      "epoch": 0.23054,
      "grad_norm": 1.1599489122641409,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 23054
    },
    {
      "epoch": 0.23055,
      "grad_norm": 1.2760740138601372,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 23055
    },
    {
      "epoch": 0.23056,
      "grad_norm": 1.096737656844803,
      "learning_rate": 0.003,
      "loss": 4.0239,
      "step": 23056
    },
    {
      "epoch": 0.23057,
      "grad_norm": 1.583486108065741,
      "learning_rate": 0.003,
      "loss": 4.0814,
      "step": 23057
    },
    {
      "epoch": 0.23058,
      "grad_norm": 1.1276380647128694,
      "learning_rate": 0.003,
      "loss": 4.0114,
      "step": 23058
    },
    {
      "epoch": 0.23059,
      "grad_norm": 1.5101198500495259,
      "learning_rate": 0.003,
      "loss": 4.0255,
      "step": 23059
    },
    {
      "epoch": 0.2306,
      "grad_norm": 1.0406442245597265,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 23060
    },
    {
      "epoch": 0.23061,
      "grad_norm": 1.6043601362782616,
      "learning_rate": 0.003,
      "loss": 4.0873,
      "step": 23061
    },
    {
      "epoch": 0.23062,
      "grad_norm": 1.1131308412584864,
      "learning_rate": 0.003,
      "loss": 4.0207,
      "step": 23062
    },
    {
      "epoch": 0.23063,
      "grad_norm": 1.3691056020948003,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 23063
    },
    {
      "epoch": 0.23064,
      "grad_norm": 1.1932208340525927,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 23064
    },
    {
      "epoch": 0.23065,
      "grad_norm": 1.4098137332654574,
      "learning_rate": 0.003,
      "loss": 4.044,
      "step": 23065
    },
    {
      "epoch": 0.23066,
      "grad_norm": 1.3104140321873772,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 23066
    },
    {
      "epoch": 0.23067,
      "grad_norm": 1.3614850118300021,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 23067
    },
    {
      "epoch": 0.23068,
      "grad_norm": 1.2275383517188325,
      "learning_rate": 0.003,
      "loss": 4.0249,
      "step": 23068
    },
    {
      "epoch": 0.23069,
      "grad_norm": 1.5518594230598315,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 23069
    },
    {
      "epoch": 0.2307,
      "grad_norm": 1.1123928582963447,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 23070
    },
    {
      "epoch": 0.23071,
      "grad_norm": 1.372531640792637,
      "learning_rate": 0.003,
      "loss": 4.0656,
      "step": 23071
    },
    {
      "epoch": 0.23072,
      "grad_norm": 1.0690552375552709,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 23072
    },
    {
      "epoch": 0.23073,
      "grad_norm": 1.5236579195250115,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 23073
    },
    {
      "epoch": 0.23074,
      "grad_norm": 1.221992758364506,
      "learning_rate": 0.003,
      "loss": 4.0254,
      "step": 23074
    },
    {
      "epoch": 0.23075,
      "grad_norm": 1.483652994064702,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 23075
    },
    {
      "epoch": 0.23076,
      "grad_norm": 1.2949621355925267,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 23076
    },
    {
      "epoch": 0.23077,
      "grad_norm": 1.220737057512629,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 23077
    },
    {
      "epoch": 0.23078,
      "grad_norm": 1.283862303259275,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 23078
    },
    {
      "epoch": 0.23079,
      "grad_norm": 1.2600780269031073,
      "learning_rate": 0.003,
      "loss": 4.0124,
      "step": 23079
    },
    {
      "epoch": 0.2308,
      "grad_norm": 1.3530737646299207,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 23080
    },
    {
      "epoch": 0.23081,
      "grad_norm": 1.253509029242236,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 23081
    },
    {
      "epoch": 0.23082,
      "grad_norm": 1.1352375655895686,
      "learning_rate": 0.003,
      "loss": 4.0291,
      "step": 23082
    },
    {
      "epoch": 0.23083,
      "grad_norm": 1.4336850022395355,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 23083
    },
    {
      "epoch": 0.23084,
      "grad_norm": 1.3638809663352245,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 23084
    },
    {
      "epoch": 0.23085,
      "grad_norm": 1.207880423206292,
      "learning_rate": 0.003,
      "loss": 4.0187,
      "step": 23085
    },
    {
      "epoch": 0.23086,
      "grad_norm": 1.1490890793397543,
      "learning_rate": 0.003,
      "loss": 4.0317,
      "step": 23086
    },
    {
      "epoch": 0.23087,
      "grad_norm": 1.5840436288048507,
      "learning_rate": 0.003,
      "loss": 4.0422,
      "step": 23087
    },
    {
      "epoch": 0.23088,
      "grad_norm": 1.0133310027332907,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 23088
    },
    {
      "epoch": 0.23089,
      "grad_norm": 1.4733120744342365,
      "learning_rate": 0.003,
      "loss": 4.0222,
      "step": 23089
    },
    {
      "epoch": 0.2309,
      "grad_norm": 1.0767299521882396,
      "learning_rate": 0.003,
      "loss": 4.0265,
      "step": 23090
    },
    {
      "epoch": 0.23091,
      "grad_norm": 1.4570100154875747,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 23091
    },
    {
      "epoch": 0.23092,
      "grad_norm": 1.0391219016099587,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 23092
    },
    {
      "epoch": 0.23093,
      "grad_norm": 1.4364810847065508,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 23093
    },
    {
      "epoch": 0.23094,
      "grad_norm": 1.3610556377897853,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 23094
    },
    {
      "epoch": 0.23095,
      "grad_norm": 1.3988893696193503,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 23095
    },
    {
      "epoch": 0.23096,
      "grad_norm": 1.1029080686008839,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 23096
    },
    {
      "epoch": 0.23097,
      "grad_norm": 1.4107123479155623,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 23097
    },
    {
      "epoch": 0.23098,
      "grad_norm": 1.343328213577928,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 23098
    },
    {
      "epoch": 0.23099,
      "grad_norm": 1.3654904195614725,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 23099
    },
    {
      "epoch": 0.231,
      "grad_norm": 1.3704007991239546,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 23100
    },
    {
      "epoch": 0.23101,
      "grad_norm": 1.1214932744145265,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 23101
    },
    {
      "epoch": 0.23102,
      "grad_norm": 1.1804347778162774,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 23102
    },
    {
      "epoch": 0.23103,
      "grad_norm": 1.4653494874052122,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 23103
    },
    {
      "epoch": 0.23104,
      "grad_norm": 1.202233869245019,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 23104
    },
    {
      "epoch": 0.23105,
      "grad_norm": 1.2266739737655035,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 23105
    },
    {
      "epoch": 0.23106,
      "grad_norm": 1.0978365678608377,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 23106
    },
    {
      "epoch": 0.23107,
      "grad_norm": 1.5587540284358783,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 23107
    },
    {
      "epoch": 0.23108,
      "grad_norm": 1.0250454122787522,
      "learning_rate": 0.003,
      "loss": 4.0,
      "step": 23108
    },
    {
      "epoch": 0.23109,
      "grad_norm": 1.400043931350464,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 23109
    },
    {
      "epoch": 0.2311,
      "grad_norm": 1.1424968938310303,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 23110
    },
    {
      "epoch": 0.23111,
      "grad_norm": 1.3105726010209262,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 23111
    },
    {
      "epoch": 0.23112,
      "grad_norm": 1.2783236071241297,
      "learning_rate": 0.003,
      "loss": 4.0924,
      "step": 23112
    },
    {
      "epoch": 0.23113,
      "grad_norm": 1.098052421859732,
      "learning_rate": 0.003,
      "loss": 4.0383,
      "step": 23113
    },
    {
      "epoch": 0.23114,
      "grad_norm": 1.4886710941589372,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 23114
    },
    {
      "epoch": 0.23115,
      "grad_norm": 1.2584381049217355,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 23115
    },
    {
      "epoch": 0.23116,
      "grad_norm": 1.5741984533301971,
      "learning_rate": 0.003,
      "loss": 4.0326,
      "step": 23116
    },
    {
      "epoch": 0.23117,
      "grad_norm": 1.0653124310499962,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 23117
    },
    {
      "epoch": 0.23118,
      "grad_norm": 1.4895368143505685,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 23118
    },
    {
      "epoch": 0.23119,
      "grad_norm": 1.184337772614455,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 23119
    },
    {
      "epoch": 0.2312,
      "grad_norm": 1.3718758827598803,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 23120
    },
    {
      "epoch": 0.23121,
      "grad_norm": 1.3362888794689922,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 23121
    },
    {
      "epoch": 0.23122,
      "grad_norm": 1.323590297729312,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 23122
    },
    {
      "epoch": 0.23123,
      "grad_norm": 1.3800072954163674,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 23123
    },
    {
      "epoch": 0.23124,
      "grad_norm": 1.32135895563633,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 23124
    },
    {
      "epoch": 0.23125,
      "grad_norm": 1.3187542779296537,
      "learning_rate": 0.003,
      "loss": 4.0661,
      "step": 23125
    },
    {
      "epoch": 0.23126,
      "grad_norm": 1.155357502688108,
      "learning_rate": 0.003,
      "loss": 4.0646,
      "step": 23126
    },
    {
      "epoch": 0.23127,
      "grad_norm": 1.4408205504184937,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 23127
    },
    {
      "epoch": 0.23128,
      "grad_norm": 1.1680508849965598,
      "learning_rate": 0.003,
      "loss": 4.0207,
      "step": 23128
    },
    {
      "epoch": 0.23129,
      "grad_norm": 1.3068099641234334,
      "learning_rate": 0.003,
      "loss": 4.0382,
      "step": 23129
    },
    {
      "epoch": 0.2313,
      "grad_norm": 1.255595033145102,
      "learning_rate": 0.003,
      "loss": 4.0345,
      "step": 23130
    },
    {
      "epoch": 0.23131,
      "grad_norm": 1.256673276997919,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 23131
    },
    {
      "epoch": 0.23132,
      "grad_norm": 1.1999917681709251,
      "learning_rate": 0.003,
      "loss": 4.0296,
      "step": 23132
    },
    {
      "epoch": 0.23133,
      "grad_norm": 1.2180017020237284,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 23133
    },
    {
      "epoch": 0.23134,
      "grad_norm": 1.4270176461206947,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 23134
    },
    {
      "epoch": 0.23135,
      "grad_norm": 1.1126326837084093,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 23135
    },
    {
      "epoch": 0.23136,
      "grad_norm": 1.1601847464346535,
      "learning_rate": 0.003,
      "loss": 4.0085,
      "step": 23136
    },
    {
      "epoch": 0.23137,
      "grad_norm": 1.1876828983408563,
      "learning_rate": 0.003,
      "loss": 4.0265,
      "step": 23137
    },
    {
      "epoch": 0.23138,
      "grad_norm": 1.2480541814908157,
      "learning_rate": 0.003,
      "loss": 4.0153,
      "step": 23138
    },
    {
      "epoch": 0.23139,
      "grad_norm": 1.2603734939723252,
      "learning_rate": 0.003,
      "loss": 4.0784,
      "step": 23139
    },
    {
      "epoch": 0.2314,
      "grad_norm": 1.2501705941310617,
      "learning_rate": 0.003,
      "loss": 4.0227,
      "step": 23140
    },
    {
      "epoch": 0.23141,
      "grad_norm": 1.3280082885540658,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 23141
    },
    {
      "epoch": 0.23142,
      "grad_norm": 1.2586145728752787,
      "learning_rate": 0.003,
      "loss": 4.0331,
      "step": 23142
    },
    {
      "epoch": 0.23143,
      "grad_norm": 1.3034597497152454,
      "learning_rate": 0.003,
      "loss": 4.01,
      "step": 23143
    },
    {
      "epoch": 0.23144,
      "grad_norm": 1.4402599066569184,
      "learning_rate": 0.003,
      "loss": 4.0302,
      "step": 23144
    },
    {
      "epoch": 0.23145,
      "grad_norm": 1.69681052767605,
      "learning_rate": 0.003,
      "loss": 4.0293,
      "step": 23145
    },
    {
      "epoch": 0.23146,
      "grad_norm": 1.4345291431594693,
      "learning_rate": 0.003,
      "loss": 4.085,
      "step": 23146
    },
    {
      "epoch": 0.23147,
      "grad_norm": 1.1958868206230282,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 23147
    },
    {
      "epoch": 0.23148,
      "grad_norm": 1.2495945557166919,
      "learning_rate": 0.003,
      "loss": 4.0902,
      "step": 23148
    },
    {
      "epoch": 0.23149,
      "grad_norm": 1.440018602006004,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 23149
    },
    {
      "epoch": 0.2315,
      "grad_norm": 1.1795806910438877,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 23150
    },
    {
      "epoch": 0.23151,
      "grad_norm": 1.496192189070443,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 23151
    },
    {
      "epoch": 0.23152,
      "grad_norm": 1.1909053961441836,
      "learning_rate": 0.003,
      "loss": 4.0285,
      "step": 23152
    },
    {
      "epoch": 0.23153,
      "grad_norm": 1.3962953025431974,
      "learning_rate": 0.003,
      "loss": 4.0824,
      "step": 23153
    },
    {
      "epoch": 0.23154,
      "grad_norm": 1.1675557846803757,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 23154
    },
    {
      "epoch": 0.23155,
      "grad_norm": 1.3015424301448135,
      "learning_rate": 0.003,
      "loss": 4.0238,
      "step": 23155
    },
    {
      "epoch": 0.23156,
      "grad_norm": 1.5827241926282818,
      "learning_rate": 0.003,
      "loss": 4.0325,
      "step": 23156
    },
    {
      "epoch": 0.23157,
      "grad_norm": 1.3530596449290646,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 23157
    },
    {
      "epoch": 0.23158,
      "grad_norm": 1.2588614609207367,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 23158
    },
    {
      "epoch": 0.23159,
      "grad_norm": 1.1243603405515727,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 23159
    },
    {
      "epoch": 0.2316,
      "grad_norm": 1.5716867859951131,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 23160
    },
    {
      "epoch": 0.23161,
      "grad_norm": 1.2191364918003271,
      "learning_rate": 0.003,
      "loss": 4.0254,
      "step": 23161
    },
    {
      "epoch": 0.23162,
      "grad_norm": 1.3556083560268222,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 23162
    },
    {
      "epoch": 0.23163,
      "grad_norm": 1.428243446122848,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 23163
    },
    {
      "epoch": 0.23164,
      "grad_norm": 1.1248025309685776,
      "learning_rate": 0.003,
      "loss": 4.0684,
      "step": 23164
    },
    {
      "epoch": 0.23165,
      "grad_norm": 1.4494731533647893,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 23165
    },
    {
      "epoch": 0.23166,
      "grad_norm": 1.1067007282731376,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 23166
    },
    {
      "epoch": 0.23167,
      "grad_norm": 1.634272919421246,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 23167
    },
    {
      "epoch": 0.23168,
      "grad_norm": 1.1041896195392928,
      "learning_rate": 0.003,
      "loss": 4.0137,
      "step": 23168
    },
    {
      "epoch": 0.23169,
      "grad_norm": 1.3448074533014545,
      "learning_rate": 0.003,
      "loss": 4.0043,
      "step": 23169
    },
    {
      "epoch": 0.2317,
      "grad_norm": 1.3057330465981378,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 23170
    },
    {
      "epoch": 0.23171,
      "grad_norm": 1.2471084396318384,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 23171
    },
    {
      "epoch": 0.23172,
      "grad_norm": 1.334405673948641,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 23172
    },
    {
      "epoch": 0.23173,
      "grad_norm": 1.1157727143956997,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 23173
    },
    {
      "epoch": 0.23174,
      "grad_norm": 1.3155148133688945,
      "learning_rate": 0.003,
      "loss": 4.0357,
      "step": 23174
    },
    {
      "epoch": 0.23175,
      "grad_norm": 1.176538074429829,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 23175
    },
    {
      "epoch": 0.23176,
      "grad_norm": 1.2847015156755377,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 23176
    },
    {
      "epoch": 0.23177,
      "grad_norm": 1.4650567470933238,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 23177
    },
    {
      "epoch": 0.23178,
      "grad_norm": 1.4779059182089649,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 23178
    },
    {
      "epoch": 0.23179,
      "grad_norm": 1.1127730668606501,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 23179
    },
    {
      "epoch": 0.2318,
      "grad_norm": 1.6486347967805015,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 23180
    },
    {
      "epoch": 0.23181,
      "grad_norm": 1.0985864177337061,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 23181
    },
    {
      "epoch": 0.23182,
      "grad_norm": 1.2383694147162692,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 23182
    },
    {
      "epoch": 0.23183,
      "grad_norm": 1.3524554549919405,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 23183
    },
    {
      "epoch": 0.23184,
      "grad_norm": 1.205740605623693,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 23184
    },
    {
      "epoch": 0.23185,
      "grad_norm": 1.3614646044473162,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 23185
    },
    {
      "epoch": 0.23186,
      "grad_norm": 1.0473391191618198,
      "learning_rate": 0.003,
      "loss": 4.0186,
      "step": 23186
    },
    {
      "epoch": 0.23187,
      "grad_norm": 1.4147875306952638,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 23187
    },
    {
      "epoch": 0.23188,
      "grad_norm": 1.171074279477168,
      "learning_rate": 0.003,
      "loss": 4.0876,
      "step": 23188
    },
    {
      "epoch": 0.23189,
      "grad_norm": 1.2003817706389435,
      "learning_rate": 0.003,
      "loss": 4.0204,
      "step": 23189
    },
    {
      "epoch": 0.2319,
      "grad_norm": 1.2329943281245004,
      "learning_rate": 0.003,
      "loss": 4.0237,
      "step": 23190
    },
    {
      "epoch": 0.23191,
      "grad_norm": 1.282464087951763,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 23191
    },
    {
      "epoch": 0.23192,
      "grad_norm": 1.4175510640335267,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 23192
    },
    {
      "epoch": 0.23193,
      "grad_norm": 1.2231555605608266,
      "learning_rate": 0.003,
      "loss": 4.0478,
      "step": 23193
    },
    {
      "epoch": 0.23194,
      "grad_norm": 1.464929973426186,
      "learning_rate": 0.003,
      "loss": 4.0106,
      "step": 23194
    },
    {
      "epoch": 0.23195,
      "grad_norm": 1.2891837757244058,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 23195
    },
    {
      "epoch": 0.23196,
      "grad_norm": 1.284554852519397,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 23196
    },
    {
      "epoch": 0.23197,
      "grad_norm": 1.1835775203695522,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 23197
    },
    {
      "epoch": 0.23198,
      "grad_norm": 1.3405997143577533,
      "learning_rate": 0.003,
      "loss": 4.0266,
      "step": 23198
    },
    {
      "epoch": 0.23199,
      "grad_norm": 1.3221228242110958,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 23199
    },
    {
      "epoch": 0.232,
      "grad_norm": 1.2650841488173297,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 23200
    },
    {
      "epoch": 0.23201,
      "grad_norm": 1.2434903559984762,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 23201
    },
    {
      "epoch": 0.23202,
      "grad_norm": 1.2103808769700988,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 23202
    },
    {
      "epoch": 0.23203,
      "grad_norm": 1.2757788894125792,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 23203
    },
    {
      "epoch": 0.23204,
      "grad_norm": 1.2843571037396353,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 23204
    },
    {
      "epoch": 0.23205,
      "grad_norm": 1.3123870931340436,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 23205
    },
    {
      "epoch": 0.23206,
      "grad_norm": 1.1504312830448848,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 23206
    },
    {
      "epoch": 0.23207,
      "grad_norm": 1.4494120792756744,
      "learning_rate": 0.003,
      "loss": 4.0694,
      "step": 23207
    },
    {
      "epoch": 0.23208,
      "grad_norm": 1.2321102679461702,
      "learning_rate": 0.003,
      "loss": 4.057,
      "step": 23208
    },
    {
      "epoch": 0.23209,
      "grad_norm": 1.4467505845454234,
      "learning_rate": 0.003,
      "loss": 4.0292,
      "step": 23209
    },
    {
      "epoch": 0.2321,
      "grad_norm": 1.5921456379945131,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 23210
    },
    {
      "epoch": 0.23211,
      "grad_norm": 1.25486819305933,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 23211
    },
    {
      "epoch": 0.23212,
      "grad_norm": 1.318159553687812,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 23212
    },
    {
      "epoch": 0.23213,
      "grad_norm": 1.3522028549171818,
      "learning_rate": 0.003,
      "loss": 4.0752,
      "step": 23213
    },
    {
      "epoch": 0.23214,
      "grad_norm": 1.2425751876346849,
      "learning_rate": 0.003,
      "loss": 4.0206,
      "step": 23214
    },
    {
      "epoch": 0.23215,
      "grad_norm": 1.0371338136696169,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 23215
    },
    {
      "epoch": 0.23216,
      "grad_norm": 1.1216127249129784,
      "learning_rate": 0.003,
      "loss": 4.0173,
      "step": 23216
    },
    {
      "epoch": 0.23217,
      "grad_norm": 1.3631832004698994,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 23217
    },
    {
      "epoch": 0.23218,
      "grad_norm": 1.4806873039900939,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 23218
    },
    {
      "epoch": 0.23219,
      "grad_norm": 1.3258900547927888,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 23219
    },
    {
      "epoch": 0.2322,
      "grad_norm": 1.325757282319854,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 23220
    },
    {
      "epoch": 0.23221,
      "grad_norm": 1.3677893861856718,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 23221
    },
    {
      "epoch": 0.23222,
      "grad_norm": 1.2091666637750367,
      "learning_rate": 0.003,
      "loss": 4.0179,
      "step": 23222
    },
    {
      "epoch": 0.23223,
      "grad_norm": 1.407492764753761,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 23223
    },
    {
      "epoch": 0.23224,
      "grad_norm": 0.9543968454000411,
      "learning_rate": 0.003,
      "loss": 4.0344,
      "step": 23224
    },
    {
      "epoch": 0.23225,
      "grad_norm": 1.424205440931275,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 23225
    },
    {
      "epoch": 0.23226,
      "grad_norm": 1.1789994954770877,
      "learning_rate": 0.003,
      "loss": 4.0222,
      "step": 23226
    },
    {
      "epoch": 0.23227,
      "grad_norm": 1.500989460201003,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 23227
    },
    {
      "epoch": 0.23228,
      "grad_norm": 1.0752262785965898,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 23228
    },
    {
      "epoch": 0.23229,
      "grad_norm": 1.2616444613266276,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 23229
    },
    {
      "epoch": 0.2323,
      "grad_norm": 1.371139188479844,
      "learning_rate": 0.003,
      "loss": 4.0549,
      "step": 23230
    },
    {
      "epoch": 0.23231,
      "grad_norm": 1.2715010105964422,
      "learning_rate": 0.003,
      "loss": 4.0142,
      "step": 23231
    },
    {
      "epoch": 0.23232,
      "grad_norm": 1.128084975286615,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 23232
    },
    {
      "epoch": 0.23233,
      "grad_norm": 1.3631250374049788,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 23233
    },
    {
      "epoch": 0.23234,
      "grad_norm": 1.2555405702095657,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 23234
    },
    {
      "epoch": 0.23235,
      "grad_norm": 1.5995130541276383,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 23235
    },
    {
      "epoch": 0.23236,
      "grad_norm": 1.262016998384145,
      "learning_rate": 0.003,
      "loss": 4.0139,
      "step": 23236
    },
    {
      "epoch": 0.23237,
      "grad_norm": 1.366410789894868,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 23237
    },
    {
      "epoch": 0.23238,
      "grad_norm": 1.234722136798343,
      "learning_rate": 0.003,
      "loss": 4.063,
      "step": 23238
    },
    {
      "epoch": 0.23239,
      "grad_norm": 1.4539394754451245,
      "learning_rate": 0.003,
      "loss": 4.0817,
      "step": 23239
    },
    {
      "epoch": 0.2324,
      "grad_norm": 1.1737704757671368,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 23240
    },
    {
      "epoch": 0.23241,
      "grad_norm": 1.6214539718520233,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 23241
    },
    {
      "epoch": 0.23242,
      "grad_norm": 1.088284562816423,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 23242
    },
    {
      "epoch": 0.23243,
      "grad_norm": 1.5156893674451846,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 23243
    },
    {
      "epoch": 0.23244,
      "grad_norm": 1.1443822468829565,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 23244
    },
    {
      "epoch": 0.23245,
      "grad_norm": 1.3462592350267242,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 23245
    },
    {
      "epoch": 0.23246,
      "grad_norm": 1.2110783221956347,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 23246
    },
    {
      "epoch": 0.23247,
      "grad_norm": 1.3039759113548803,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 23247
    },
    {
      "epoch": 0.23248,
      "grad_norm": 1.07813170533897,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 23248
    },
    {
      "epoch": 0.23249,
      "grad_norm": 1.848094974981773,
      "learning_rate": 0.003,
      "loss": 4.0718,
      "step": 23249
    },
    {
      "epoch": 0.2325,
      "grad_norm": 1.071732617437444,
      "learning_rate": 0.003,
      "loss": 4.0923,
      "step": 23250
    },
    {
      "epoch": 0.23251,
      "grad_norm": 1.6644558775451108,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 23251
    },
    {
      "epoch": 0.23252,
      "grad_norm": 1.186053105959287,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 23252
    },
    {
      "epoch": 0.23253,
      "grad_norm": 1.2267337511924554,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 23253
    },
    {
      "epoch": 0.23254,
      "grad_norm": 1.2007454933825648,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 23254
    },
    {
      "epoch": 0.23255,
      "grad_norm": 1.3137764399401184,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 23255
    },
    {
      "epoch": 0.23256,
      "grad_norm": 1.1356408678775007,
      "learning_rate": 0.003,
      "loss": 4.0385,
      "step": 23256
    },
    {
      "epoch": 0.23257,
      "grad_norm": 1.5515651222082167,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 23257
    },
    {
      "epoch": 0.23258,
      "grad_norm": 1.3651398703632192,
      "learning_rate": 0.003,
      "loss": 4.0117,
      "step": 23258
    },
    {
      "epoch": 0.23259,
      "grad_norm": 1.4503685628261243,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 23259
    },
    {
      "epoch": 0.2326,
      "grad_norm": 1.1816761800865787,
      "learning_rate": 0.003,
      "loss": 4.0702,
      "step": 23260
    },
    {
      "epoch": 0.23261,
      "grad_norm": 1.371236268002453,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 23261
    },
    {
      "epoch": 0.23262,
      "grad_norm": 1.2417484239071042,
      "learning_rate": 0.003,
      "loss": 4.022,
      "step": 23262
    },
    {
      "epoch": 0.23263,
      "grad_norm": 1.3351348679475814,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 23263
    },
    {
      "epoch": 0.23264,
      "grad_norm": 1.1375114553965955,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 23264
    },
    {
      "epoch": 0.23265,
      "grad_norm": 1.152906481026978,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 23265
    },
    {
      "epoch": 0.23266,
      "grad_norm": 1.392860597586105,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 23266
    },
    {
      "epoch": 0.23267,
      "grad_norm": 1.1876590651180388,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 23267
    },
    {
      "epoch": 0.23268,
      "grad_norm": 1.2643494790083054,
      "learning_rate": 0.003,
      "loss": 4.0383,
      "step": 23268
    },
    {
      "epoch": 0.23269,
      "grad_norm": 1.2981487311165842,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 23269
    },
    {
      "epoch": 0.2327,
      "grad_norm": 1.37569174521526,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 23270
    },
    {
      "epoch": 0.23271,
      "grad_norm": 1.3189397526707127,
      "learning_rate": 0.003,
      "loss": 4.0331,
      "step": 23271
    },
    {
      "epoch": 0.23272,
      "grad_norm": 1.3527989469943307,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 23272
    },
    {
      "epoch": 0.23273,
      "grad_norm": 1.2582985951640966,
      "learning_rate": 0.003,
      "loss": 4.0498,
      "step": 23273
    },
    {
      "epoch": 0.23274,
      "grad_norm": 1.2558499925607352,
      "learning_rate": 0.003,
      "loss": 4.0256,
      "step": 23274
    },
    {
      "epoch": 0.23275,
      "grad_norm": 1.4347250406858223,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 23275
    },
    {
      "epoch": 0.23276,
      "grad_norm": 1.2249077678685385,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 23276
    },
    {
      "epoch": 0.23277,
      "grad_norm": 1.6413836240818787,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 23277
    },
    {
      "epoch": 0.23278,
      "grad_norm": 1.2936745852776543,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 23278
    },
    {
      "epoch": 0.23279,
      "grad_norm": 1.2424349656014362,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 23279
    },
    {
      "epoch": 0.2328,
      "grad_norm": 1.355113918644518,
      "learning_rate": 0.003,
      "loss": 4.0747,
      "step": 23280
    },
    {
      "epoch": 0.23281,
      "grad_norm": 1.2320748259439178,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 23281
    },
    {
      "epoch": 0.23282,
      "grad_norm": 1.5111517506175998,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 23282
    },
    {
      "epoch": 0.23283,
      "grad_norm": 1.276988340170519,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 23283
    },
    {
      "epoch": 0.23284,
      "grad_norm": 1.1501325406200669,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 23284
    },
    {
      "epoch": 0.23285,
      "grad_norm": 1.5871858862380583,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 23285
    },
    {
      "epoch": 0.23286,
      "grad_norm": 0.9997980241356003,
      "learning_rate": 0.003,
      "loss": 4.0136,
      "step": 23286
    },
    {
      "epoch": 0.23287,
      "grad_norm": 1.5871920968229956,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 23287
    },
    {
      "epoch": 0.23288,
      "grad_norm": 1.0883272856605712,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 23288
    },
    {
      "epoch": 0.23289,
      "grad_norm": 1.3025211560375816,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 23289
    },
    {
      "epoch": 0.2329,
      "grad_norm": 1.2242693005816543,
      "learning_rate": 0.003,
      "loss": 4.077,
      "step": 23290
    },
    {
      "epoch": 0.23291,
      "grad_norm": 1.3148277382310716,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 23291
    },
    {
      "epoch": 0.23292,
      "grad_norm": 1.1256728129228435,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 23292
    },
    {
      "epoch": 0.23293,
      "grad_norm": 1.524326752503377,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 23293
    },
    {
      "epoch": 0.23294,
      "grad_norm": 1.3728702520680502,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 23294
    },
    {
      "epoch": 0.23295,
      "grad_norm": 1.3817397381305565,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 23295
    },
    {
      "epoch": 0.23296,
      "grad_norm": 1.0052534286348047,
      "learning_rate": 0.003,
      "loss": 4.0172,
      "step": 23296
    },
    {
      "epoch": 0.23297,
      "grad_norm": 1.587897522327039,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 23297
    },
    {
      "epoch": 0.23298,
      "grad_norm": 1.0623389759525195,
      "learning_rate": 0.003,
      "loss": 4.046,
      "step": 23298
    },
    {
      "epoch": 0.23299,
      "grad_norm": 1.3452812993470136,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 23299
    },
    {
      "epoch": 0.233,
      "grad_norm": 1.112843801250024,
      "learning_rate": 0.003,
      "loss": 4.0821,
      "step": 23300
    },
    {
      "epoch": 0.23301,
      "grad_norm": 1.2807395161155217,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 23301
    },
    {
      "epoch": 0.23302,
      "grad_norm": 1.2393823980255212,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 23302
    },
    {
      "epoch": 0.23303,
      "grad_norm": 1.177388458780208,
      "learning_rate": 0.003,
      "loss": 3.9993,
      "step": 23303
    },
    {
      "epoch": 0.23304,
      "grad_norm": 1.4993408883728558,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 23304
    },
    {
      "epoch": 0.23305,
      "grad_norm": 1.3478072590870311,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 23305
    },
    {
      "epoch": 0.23306,
      "grad_norm": 1.2621195994423227,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 23306
    },
    {
      "epoch": 0.23307,
      "grad_norm": 1.2399002505059544,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 23307
    },
    {
      "epoch": 0.23308,
      "grad_norm": 1.1400015294578412,
      "learning_rate": 0.003,
      "loss": 4.0206,
      "step": 23308
    },
    {
      "epoch": 0.23309,
      "grad_norm": 1.4750528418226974,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 23309
    },
    {
      "epoch": 0.2331,
      "grad_norm": 1.1650374850103553,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 23310
    },
    {
      "epoch": 0.23311,
      "grad_norm": 1.426177695369418,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 23311
    },
    {
      "epoch": 0.23312,
      "grad_norm": 1.3577724159397382,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 23312
    },
    {
      "epoch": 0.23313,
      "grad_norm": 1.4658900785144333,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 23313
    },
    {
      "epoch": 0.23314,
      "grad_norm": 1.2162271156431093,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 23314
    },
    {
      "epoch": 0.23315,
      "grad_norm": 1.5732571181404769,
      "learning_rate": 0.003,
      "loss": 4.0336,
      "step": 23315
    },
    {
      "epoch": 0.23316,
      "grad_norm": 1.2566505485428612,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 23316
    },
    {
      "epoch": 0.23317,
      "grad_norm": 1.4479241168886905,
      "learning_rate": 0.003,
      "loss": 4.083,
      "step": 23317
    },
    {
      "epoch": 0.23318,
      "grad_norm": 1.204350903825152,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 23318
    },
    {
      "epoch": 0.23319,
      "grad_norm": 1.2414904171838381,
      "learning_rate": 0.003,
      "loss": 4.0272,
      "step": 23319
    },
    {
      "epoch": 0.2332,
      "grad_norm": 1.2229429594840069,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 23320
    },
    {
      "epoch": 0.23321,
      "grad_norm": 1.3985446855655157,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 23321
    },
    {
      "epoch": 0.23322,
      "grad_norm": 1.2873223423469282,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 23322
    },
    {
      "epoch": 0.23323,
      "grad_norm": 1.2732889145538597,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 23323
    },
    {
      "epoch": 0.23324,
      "grad_norm": 1.3982124959311553,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 23324
    },
    {
      "epoch": 0.23325,
      "grad_norm": 1.2716832460961498,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 23325
    },
    {
      "epoch": 0.23326,
      "grad_norm": 1.3899609151312846,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 23326
    },
    {
      "epoch": 0.23327,
      "grad_norm": 1.271183161385945,
      "learning_rate": 0.003,
      "loss": 4.0061,
      "step": 23327
    },
    {
      "epoch": 0.23328,
      "grad_norm": 1.2059489982588343,
      "learning_rate": 0.003,
      "loss": 4.0374,
      "step": 23328
    },
    {
      "epoch": 0.23329,
      "grad_norm": 1.4973451702797238,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 23329
    },
    {
      "epoch": 0.2333,
      "grad_norm": 1.336286258154857,
      "learning_rate": 0.003,
      "loss": 4.0271,
      "step": 23330
    },
    {
      "epoch": 0.23331,
      "grad_norm": 1.3218764464893902,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 23331
    },
    {
      "epoch": 0.23332,
      "grad_norm": 1.189667149685002,
      "learning_rate": 0.003,
      "loss": 4.0161,
      "step": 23332
    },
    {
      "epoch": 0.23333,
      "grad_norm": 1.2322615568078854,
      "learning_rate": 0.003,
      "loss": 4.0213,
      "step": 23333
    },
    {
      "epoch": 0.23334,
      "grad_norm": 1.3163948969762989,
      "learning_rate": 0.003,
      "loss": 4.0235,
      "step": 23334
    },
    {
      "epoch": 0.23335,
      "grad_norm": 1.186609084019781,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 23335
    },
    {
      "epoch": 0.23336,
      "grad_norm": 1.5092553303414036,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 23336
    },
    {
      "epoch": 0.23337,
      "grad_norm": 0.9346544544768131,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 23337
    },
    {
      "epoch": 0.23338,
      "grad_norm": 1.4567871344287167,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 23338
    },
    {
      "epoch": 0.23339,
      "grad_norm": 1.2192066288744325,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 23339
    },
    {
      "epoch": 0.2334,
      "grad_norm": 1.4134711947502165,
      "learning_rate": 0.003,
      "loss": 4.0603,
      "step": 23340
    },
    {
      "epoch": 0.23341,
      "grad_norm": 1.0416974593078356,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 23341
    },
    {
      "epoch": 0.23342,
      "grad_norm": 1.4811986235247379,
      "learning_rate": 0.003,
      "loss": 4.0312,
      "step": 23342
    },
    {
      "epoch": 0.23343,
      "grad_norm": 1.2937531192280796,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 23343
    },
    {
      "epoch": 0.23344,
      "grad_norm": 1.2491946624829435,
      "learning_rate": 0.003,
      "loss": 4.0398,
      "step": 23344
    },
    {
      "epoch": 0.23345,
      "grad_norm": 1.4541947258751813,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 23345
    },
    {
      "epoch": 0.23346,
      "grad_norm": 1.3063125324576894,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 23346
    },
    {
      "epoch": 0.23347,
      "grad_norm": 1.2654606405347482,
      "learning_rate": 0.003,
      "loss": 3.9985,
      "step": 23347
    },
    {
      "epoch": 0.23348,
      "grad_norm": 1.352898458264151,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 23348
    },
    {
      "epoch": 0.23349,
      "grad_norm": 1.419736958452378,
      "learning_rate": 0.003,
      "loss": 4.0085,
      "step": 23349
    },
    {
      "epoch": 0.2335,
      "grad_norm": 1.2239250662377954,
      "learning_rate": 0.003,
      "loss": 4.0184,
      "step": 23350
    },
    {
      "epoch": 0.23351,
      "grad_norm": 1.4556471497734236,
      "learning_rate": 0.003,
      "loss": 4.0199,
      "step": 23351
    },
    {
      "epoch": 0.23352,
      "grad_norm": 1.2775522244513904,
      "learning_rate": 0.003,
      "loss": 4.0743,
      "step": 23352
    },
    {
      "epoch": 0.23353,
      "grad_norm": 1.4390638945032457,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 23353
    },
    {
      "epoch": 0.23354,
      "grad_norm": 1.2013876104286578,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 23354
    },
    {
      "epoch": 0.23355,
      "grad_norm": 1.460624431573545,
      "learning_rate": 0.003,
      "loss": 4.0231,
      "step": 23355
    },
    {
      "epoch": 0.23356,
      "grad_norm": 1.1786276934316429,
      "learning_rate": 0.003,
      "loss": 4.0549,
      "step": 23356
    },
    {
      "epoch": 0.23357,
      "grad_norm": 1.446583890113212,
      "learning_rate": 0.003,
      "loss": 4.0657,
      "step": 23357
    },
    {
      "epoch": 0.23358,
      "grad_norm": 1.3394048256342341,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 23358
    },
    {
      "epoch": 0.23359,
      "grad_norm": 1.4911245231435557,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 23359
    },
    {
      "epoch": 0.2336,
      "grad_norm": 1.3385068391904777,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 23360
    },
    {
      "epoch": 0.23361,
      "grad_norm": 1.245599564200335,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 23361
    },
    {
      "epoch": 0.23362,
      "grad_norm": 1.0990189591725963,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 23362
    },
    {
      "epoch": 0.23363,
      "grad_norm": 1.26945861378687,
      "learning_rate": 0.003,
      "loss": 4.0331,
      "step": 23363
    },
    {
      "epoch": 0.23364,
      "grad_norm": 1.438677382730055,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 23364
    },
    {
      "epoch": 0.23365,
      "grad_norm": 1.1688671630588279,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 23365
    },
    {
      "epoch": 0.23366,
      "grad_norm": 1.2190584746599422,
      "learning_rate": 0.003,
      "loss": 4.0162,
      "step": 23366
    },
    {
      "epoch": 0.23367,
      "grad_norm": 1.3516268608402984,
      "learning_rate": 0.003,
      "loss": 4.0717,
      "step": 23367
    },
    {
      "epoch": 0.23368,
      "grad_norm": 1.0832683915887389,
      "learning_rate": 0.003,
      "loss": 4.0301,
      "step": 23368
    },
    {
      "epoch": 0.23369,
      "grad_norm": 1.388129636200871,
      "learning_rate": 0.003,
      "loss": 4.0192,
      "step": 23369
    },
    {
      "epoch": 0.2337,
      "grad_norm": 1.249431964971318,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 23370
    },
    {
      "epoch": 0.23371,
      "grad_norm": 1.4278674904087199,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 23371
    },
    {
      "epoch": 0.23372,
      "grad_norm": 1.2706777979832753,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 23372
    },
    {
      "epoch": 0.23373,
      "grad_norm": 1.4755016394086917,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 23373
    },
    {
      "epoch": 0.23374,
      "grad_norm": 1.4776964671313162,
      "learning_rate": 0.003,
      "loss": 4.0301,
      "step": 23374
    },
    {
      "epoch": 0.23375,
      "grad_norm": 1.1164524099187323,
      "learning_rate": 0.003,
      "loss": 4.0258,
      "step": 23375
    },
    {
      "epoch": 0.23376,
      "grad_norm": 1.4127839087747904,
      "learning_rate": 0.003,
      "loss": 4.0285,
      "step": 23376
    },
    {
      "epoch": 0.23377,
      "grad_norm": 1.6199554185054557,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 23377
    },
    {
      "epoch": 0.23378,
      "grad_norm": 1.0902052277350867,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 23378
    },
    {
      "epoch": 0.23379,
      "grad_norm": 1.3211830886924998,
      "learning_rate": 0.003,
      "loss": 4.0361,
      "step": 23379
    },
    {
      "epoch": 0.2338,
      "grad_norm": 1.2891991985832214,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 23380
    },
    {
      "epoch": 0.23381,
      "grad_norm": 1.1498749882727495,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 23381
    },
    {
      "epoch": 0.23382,
      "grad_norm": 1.2456215243805242,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 23382
    },
    {
      "epoch": 0.23383,
      "grad_norm": 1.281259435831229,
      "learning_rate": 0.003,
      "loss": 4.0325,
      "step": 23383
    },
    {
      "epoch": 0.23384,
      "grad_norm": 1.4055223207724385,
      "learning_rate": 0.003,
      "loss": 4.025,
      "step": 23384
    },
    {
      "epoch": 0.23385,
      "grad_norm": 1.4293121693887338,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 23385
    },
    {
      "epoch": 0.23386,
      "grad_norm": 1.5747444673804685,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 23386
    },
    {
      "epoch": 0.23387,
      "grad_norm": 1.1126669344311293,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 23387
    },
    {
      "epoch": 0.23388,
      "grad_norm": 1.3099554552952772,
      "learning_rate": 0.003,
      "loss": 4.0389,
      "step": 23388
    },
    {
      "epoch": 0.23389,
      "grad_norm": 1.1230297737483306,
      "learning_rate": 0.003,
      "loss": 4.0328,
      "step": 23389
    },
    {
      "epoch": 0.2339,
      "grad_norm": 1.6483998893832372,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 23390
    },
    {
      "epoch": 0.23391,
      "grad_norm": 0.9630058891032882,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 23391
    },
    {
      "epoch": 0.23392,
      "grad_norm": 1.4841588187258903,
      "learning_rate": 0.003,
      "loss": 4.0846,
      "step": 23392
    },
    {
      "epoch": 0.23393,
      "grad_norm": 1.4368091500267326,
      "learning_rate": 0.003,
      "loss": 4.0932,
      "step": 23393
    },
    {
      "epoch": 0.23394,
      "grad_norm": 1.224397933931255,
      "learning_rate": 0.003,
      "loss": 4.0506,
      "step": 23394
    },
    {
      "epoch": 0.23395,
      "grad_norm": 1.4468463742130788,
      "learning_rate": 0.003,
      "loss": 4.0694,
      "step": 23395
    },
    {
      "epoch": 0.23396,
      "grad_norm": 1.0249706697937349,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 23396
    },
    {
      "epoch": 0.23397,
      "grad_norm": 1.3713755836938135,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 23397
    },
    {
      "epoch": 0.23398,
      "grad_norm": 1.1611916618142029,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 23398
    },
    {
      "epoch": 0.23399,
      "grad_norm": 1.4914468952398496,
      "learning_rate": 0.003,
      "loss": 4.0823,
      "step": 23399
    },
    {
      "epoch": 0.234,
      "grad_norm": 1.264474854899204,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 23400
    },
    {
      "epoch": 0.23401,
      "grad_norm": 1.1446779052357057,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 23401
    },
    {
      "epoch": 0.23402,
      "grad_norm": 1.4639520243379562,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 23402
    },
    {
      "epoch": 0.23403,
      "grad_norm": 1.3668044246529565,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 23403
    },
    {
      "epoch": 0.23404,
      "grad_norm": 1.5335095337809348,
      "learning_rate": 0.003,
      "loss": 4.0241,
      "step": 23404
    },
    {
      "epoch": 0.23405,
      "grad_norm": 1.4608038413972224,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 23405
    },
    {
      "epoch": 0.23406,
      "grad_norm": 1.1831709899512708,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 23406
    },
    {
      "epoch": 0.23407,
      "grad_norm": 1.5170908258570104,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 23407
    },
    {
      "epoch": 0.23408,
      "grad_norm": 1.2951979560311104,
      "learning_rate": 0.003,
      "loss": 4.0337,
      "step": 23408
    },
    {
      "epoch": 0.23409,
      "grad_norm": 1.4367463373648688,
      "learning_rate": 0.003,
      "loss": 4.0295,
      "step": 23409
    },
    {
      "epoch": 0.2341,
      "grad_norm": 1.123484772307956,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 23410
    },
    {
      "epoch": 0.23411,
      "grad_norm": 1.4166871511080863,
      "learning_rate": 0.003,
      "loss": 4.042,
      "step": 23411
    },
    {
      "epoch": 0.23412,
      "grad_norm": 1.270901708599592,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 23412
    },
    {
      "epoch": 0.23413,
      "grad_norm": 1.2685136248395315,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 23413
    },
    {
      "epoch": 0.23414,
      "grad_norm": 1.44777247024794,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 23414
    },
    {
      "epoch": 0.23415,
      "grad_norm": 1.1173661883047519,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 23415
    },
    {
      "epoch": 0.23416,
      "grad_norm": 1.3467413258436467,
      "learning_rate": 0.003,
      "loss": 4.069,
      "step": 23416
    },
    {
      "epoch": 0.23417,
      "grad_norm": 1.1930537002459254,
      "learning_rate": 0.003,
      "loss": 4.0317,
      "step": 23417
    },
    {
      "epoch": 0.23418,
      "grad_norm": 1.5650903952247919,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 23418
    },
    {
      "epoch": 0.23419,
      "grad_norm": 1.266998349523785,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 23419
    },
    {
      "epoch": 0.2342,
      "grad_norm": 1.2893704775377108,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 23420
    },
    {
      "epoch": 0.23421,
      "grad_norm": 1.1791439191452007,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 23421
    },
    {
      "epoch": 0.23422,
      "grad_norm": 1.4245509050405656,
      "learning_rate": 0.003,
      "loss": 4.0355,
      "step": 23422
    },
    {
      "epoch": 0.23423,
      "grad_norm": 1.307165228171369,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 23423
    },
    {
      "epoch": 0.23424,
      "grad_norm": 1.2025462135710496,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 23424
    },
    {
      "epoch": 0.23425,
      "grad_norm": 1.1276283362529052,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 23425
    },
    {
      "epoch": 0.23426,
      "grad_norm": 1.6340636956456778,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 23426
    },
    {
      "epoch": 0.23427,
      "grad_norm": 1.0473584773050157,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 23427
    },
    {
      "epoch": 0.23428,
      "grad_norm": 1.41795237037583,
      "learning_rate": 0.003,
      "loss": 4.0191,
      "step": 23428
    },
    {
      "epoch": 0.23429,
      "grad_norm": 1.251689438580586,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 23429
    },
    {
      "epoch": 0.2343,
      "grad_norm": 1.24312453461535,
      "learning_rate": 0.003,
      "loss": 4.0204,
      "step": 23430
    },
    {
      "epoch": 0.23431,
      "grad_norm": 1.5292958131803067,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 23431
    },
    {
      "epoch": 0.23432,
      "grad_norm": 1.0026100040387071,
      "learning_rate": 0.003,
      "loss": 4.0308,
      "step": 23432
    },
    {
      "epoch": 0.23433,
      "grad_norm": 1.4850961014902797,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 23433
    },
    {
      "epoch": 0.23434,
      "grad_norm": 1.312183879029588,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 23434
    },
    {
      "epoch": 0.23435,
      "grad_norm": 1.3949258439329408,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 23435
    },
    {
      "epoch": 0.23436,
      "grad_norm": 1.0606214454185012,
      "learning_rate": 0.003,
      "loss": 4.0219,
      "step": 23436
    },
    {
      "epoch": 0.23437,
      "grad_norm": 1.309998350683547,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 23437
    },
    {
      "epoch": 0.23438,
      "grad_norm": 1.6471810202782498,
      "learning_rate": 0.003,
      "loss": 4.1063,
      "step": 23438
    },
    {
      "epoch": 0.23439,
      "grad_norm": 0.8944518120383201,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 23439
    },
    {
      "epoch": 0.2344,
      "grad_norm": 1.4210544849423057,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 23440
    },
    {
      "epoch": 0.23441,
      "grad_norm": 1.2186446707547596,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 23441
    },
    {
      "epoch": 0.23442,
      "grad_norm": 1.494003855319048,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 23442
    },
    {
      "epoch": 0.23443,
      "grad_norm": 1.054176325159872,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 23443
    },
    {
      "epoch": 0.23444,
      "grad_norm": 1.2423595007967576,
      "learning_rate": 0.003,
      "loss": 4.0255,
      "step": 23444
    },
    {
      "epoch": 0.23445,
      "grad_norm": 1.2816517990823693,
      "learning_rate": 0.003,
      "loss": 4.0639,
      "step": 23445
    },
    {
      "epoch": 0.23446,
      "grad_norm": 1.1626938469678996,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 23446
    },
    {
      "epoch": 0.23447,
      "grad_norm": 1.4096546631106326,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 23447
    },
    {
      "epoch": 0.23448,
      "grad_norm": 1.3479844981950733,
      "learning_rate": 0.003,
      "loss": 4.0236,
      "step": 23448
    },
    {
      "epoch": 0.23449,
      "grad_norm": 1.2963676549501473,
      "learning_rate": 0.003,
      "loss": 4.0753,
      "step": 23449
    },
    {
      "epoch": 0.2345,
      "grad_norm": 1.4253073749545635,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 23450
    },
    {
      "epoch": 0.23451,
      "grad_norm": 1.4171133991274112,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 23451
    },
    {
      "epoch": 0.23452,
      "grad_norm": 1.1785833076166021,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 23452
    },
    {
      "epoch": 0.23453,
      "grad_norm": 1.2528821116649267,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 23453
    },
    {
      "epoch": 0.23454,
      "grad_norm": 1.2101387604618083,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 23454
    },
    {
      "epoch": 0.23455,
      "grad_norm": 1.195179141503579,
      "learning_rate": 0.003,
      "loss": 4.0329,
      "step": 23455
    },
    {
      "epoch": 0.23456,
      "grad_norm": 1.4079311615674788,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 23456
    },
    {
      "epoch": 0.23457,
      "grad_norm": 1.2056265379259656,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 23457
    },
    {
      "epoch": 0.23458,
      "grad_norm": 1.3973622134010926,
      "learning_rate": 0.003,
      "loss": 4.0095,
      "step": 23458
    },
    {
      "epoch": 0.23459,
      "grad_norm": 1.3555146070260597,
      "learning_rate": 0.003,
      "loss": 4.029,
      "step": 23459
    },
    {
      "epoch": 0.2346,
      "grad_norm": 1.0957213427482388,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 23460
    },
    {
      "epoch": 0.23461,
      "grad_norm": 1.3887908367780266,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 23461
    },
    {
      "epoch": 0.23462,
      "grad_norm": 1.2282466648526902,
      "learning_rate": 0.003,
      "loss": 4.0648,
      "step": 23462
    },
    {
      "epoch": 0.23463,
      "grad_norm": 1.3751474104027561,
      "learning_rate": 0.003,
      "loss": 4.0306,
      "step": 23463
    },
    {
      "epoch": 0.23464,
      "grad_norm": 1.1243949525543444,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 23464
    },
    {
      "epoch": 0.23465,
      "grad_norm": 1.3575816508528418,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 23465
    },
    {
      "epoch": 0.23466,
      "grad_norm": 1.2282764477527965,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 23466
    },
    {
      "epoch": 0.23467,
      "grad_norm": 1.4233168149193143,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 23467
    },
    {
      "epoch": 0.23468,
      "grad_norm": 1.4474934405616533,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 23468
    },
    {
      "epoch": 0.23469,
      "grad_norm": 1.4218676326800723,
      "learning_rate": 0.003,
      "loss": 4.0189,
      "step": 23469
    },
    {
      "epoch": 0.2347,
      "grad_norm": 1.664032858766595,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 23470
    },
    {
      "epoch": 0.23471,
      "grad_norm": 1.102384835105614,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 23471
    },
    {
      "epoch": 0.23472,
      "grad_norm": 1.558404434106586,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 23472
    },
    {
      "epoch": 0.23473,
      "grad_norm": 1.4654681533016647,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 23473
    },
    {
      "epoch": 0.23474,
      "grad_norm": 1.0643767449235741,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 23474
    },
    {
      "epoch": 0.23475,
      "grad_norm": 1.3229509118922687,
      "learning_rate": 0.003,
      "loss": 4.0289,
      "step": 23475
    },
    {
      "epoch": 0.23476,
      "grad_norm": 1.1436970071364307,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 23476
    },
    {
      "epoch": 0.23477,
      "grad_norm": 1.299810402344442,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 23477
    },
    {
      "epoch": 0.23478,
      "grad_norm": 1.410001754430526,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 23478
    },
    {
      "epoch": 0.23479,
      "grad_norm": 1.556235327561,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 23479
    },
    {
      "epoch": 0.2348,
      "grad_norm": 1.2825652799276612,
      "learning_rate": 0.003,
      "loss": 4.0998,
      "step": 23480
    },
    {
      "epoch": 0.23481,
      "grad_norm": 1.2644227757042912,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 23481
    },
    {
      "epoch": 0.23482,
      "grad_norm": 1.3954059311583669,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 23482
    },
    {
      "epoch": 0.23483,
      "grad_norm": 1.2727811322791007,
      "learning_rate": 0.003,
      "loss": 4.0253,
      "step": 23483
    },
    {
      "epoch": 0.23484,
      "grad_norm": 1.229768603415025,
      "learning_rate": 0.003,
      "loss": 4.0189,
      "step": 23484
    },
    {
      "epoch": 0.23485,
      "grad_norm": 1.3334129651079165,
      "learning_rate": 0.003,
      "loss": 4.0127,
      "step": 23485
    },
    {
      "epoch": 0.23486,
      "grad_norm": 1.13762202483748,
      "learning_rate": 0.003,
      "loss": 4.035,
      "step": 23486
    },
    {
      "epoch": 0.23487,
      "grad_norm": 1.5323904826671573,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 23487
    },
    {
      "epoch": 0.23488,
      "grad_norm": 1.054834634325706,
      "learning_rate": 0.003,
      "loss": 4.0382,
      "step": 23488
    },
    {
      "epoch": 0.23489,
      "grad_norm": 1.5246077700703264,
      "learning_rate": 0.003,
      "loss": 4.0636,
      "step": 23489
    },
    {
      "epoch": 0.2349,
      "grad_norm": 1.7121003945182949,
      "learning_rate": 0.003,
      "loss": 4.0756,
      "step": 23490
    },
    {
      "epoch": 0.23491,
      "grad_norm": 1.0920165177493513,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 23491
    },
    {
      "epoch": 0.23492,
      "grad_norm": 1.3298167289889824,
      "learning_rate": 0.003,
      "loss": 4.0078,
      "step": 23492
    },
    {
      "epoch": 0.23493,
      "grad_norm": 1.3828459675807627,
      "learning_rate": 0.003,
      "loss": 4.0269,
      "step": 23493
    },
    {
      "epoch": 0.23494,
      "grad_norm": 1.221615658964387,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 23494
    },
    {
      "epoch": 0.23495,
      "grad_norm": 1.4375394642189678,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 23495
    },
    {
      "epoch": 0.23496,
      "grad_norm": 1.1857413863217876,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 23496
    },
    {
      "epoch": 0.23497,
      "grad_norm": 1.4778501948927982,
      "learning_rate": 0.003,
      "loss": 4.0227,
      "step": 23497
    },
    {
      "epoch": 0.23498,
      "grad_norm": 1.0787725661340666,
      "learning_rate": 0.003,
      "loss": 4.0232,
      "step": 23498
    },
    {
      "epoch": 0.23499,
      "grad_norm": 1.3473963181966018,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 23499
    },
    {
      "epoch": 0.235,
      "grad_norm": 1.2036853420996367,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 23500
    },
    {
      "epoch": 0.23501,
      "grad_norm": 1.223826477588823,
      "learning_rate": 0.003,
      "loss": 4.0266,
      "step": 23501
    },
    {
      "epoch": 0.23502,
      "grad_norm": 1.364407731652484,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 23502
    },
    {
      "epoch": 0.23503,
      "grad_norm": 1.1295936347117093,
      "learning_rate": 0.003,
      "loss": 4.0255,
      "step": 23503
    },
    {
      "epoch": 0.23504,
      "grad_norm": 1.6254081267240414,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 23504
    },
    {
      "epoch": 0.23505,
      "grad_norm": 1.1570251855775018,
      "learning_rate": 0.003,
      "loss": 4.0868,
      "step": 23505
    },
    {
      "epoch": 0.23506,
      "grad_norm": 1.48082942571748,
      "learning_rate": 0.003,
      "loss": 4.0359,
      "step": 23506
    },
    {
      "epoch": 0.23507,
      "grad_norm": 1.1792640412381283,
      "learning_rate": 0.003,
      "loss": 4.0378,
      "step": 23507
    },
    {
      "epoch": 0.23508,
      "grad_norm": 1.2190174513725403,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 23508
    },
    {
      "epoch": 0.23509,
      "grad_norm": 1.2423780823318002,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 23509
    },
    {
      "epoch": 0.2351,
      "grad_norm": 1.1311859862325964,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 23510
    },
    {
      "epoch": 0.23511,
      "grad_norm": 1.3564786712449024,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 23511
    },
    {
      "epoch": 0.23512,
      "grad_norm": 1.2417781889293547,
      "learning_rate": 0.003,
      "loss": 4.0658,
      "step": 23512
    },
    {
      "epoch": 0.23513,
      "grad_norm": 1.344399604598758,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 23513
    },
    {
      "epoch": 0.23514,
      "grad_norm": 1.7041998176781976,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 23514
    },
    {
      "epoch": 0.23515,
      "grad_norm": 1.6275217000215694,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 23515
    },
    {
      "epoch": 0.23516,
      "grad_norm": 1.1442946514548158,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 23516
    },
    {
      "epoch": 0.23517,
      "grad_norm": 1.5762637464493032,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 23517
    },
    {
      "epoch": 0.23518,
      "grad_norm": 1.2188318647418979,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 23518
    },
    {
      "epoch": 0.23519,
      "grad_norm": 1.3055805380645995,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 23519
    },
    {
      "epoch": 0.2352,
      "grad_norm": 1.2737345581728645,
      "learning_rate": 0.003,
      "loss": 4.0853,
      "step": 23520
    },
    {
      "epoch": 0.23521,
      "grad_norm": 1.2852203267288158,
      "learning_rate": 0.003,
      "loss": 4.0245,
      "step": 23521
    },
    {
      "epoch": 0.23522,
      "grad_norm": 1.1579369875805805,
      "learning_rate": 0.003,
      "loss": 4.0229,
      "step": 23522
    },
    {
      "epoch": 0.23523,
      "grad_norm": 1.4677316102183895,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 23523
    },
    {
      "epoch": 0.23524,
      "grad_norm": 1.1430443031419644,
      "learning_rate": 0.003,
      "loss": 4.098,
      "step": 23524
    },
    {
      "epoch": 0.23525,
      "grad_norm": 1.375666600673755,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 23525
    },
    {
      "epoch": 0.23526,
      "grad_norm": 1.272444260566715,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 23526
    },
    {
      "epoch": 0.23527,
      "grad_norm": 1.115615574247192,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 23527
    },
    {
      "epoch": 0.23528,
      "grad_norm": 1.3639004778067785,
      "learning_rate": 0.003,
      "loss": 4.0199,
      "step": 23528
    },
    {
      "epoch": 0.23529,
      "grad_norm": 1.4220766235449773,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 23529
    },
    {
      "epoch": 0.2353,
      "grad_norm": 1.4496060048333888,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 23530
    },
    {
      "epoch": 0.23531,
      "grad_norm": 1.1550650266203935,
      "learning_rate": 0.003,
      "loss": 3.9924,
      "step": 23531
    },
    {
      "epoch": 0.23532,
      "grad_norm": 1.4439004272198512,
      "learning_rate": 0.003,
      "loss": 4.0827,
      "step": 23532
    },
    {
      "epoch": 0.23533,
      "grad_norm": 1.0282806112006082,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 23533
    },
    {
      "epoch": 0.23534,
      "grad_norm": 1.5221722552914796,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 23534
    },
    {
      "epoch": 0.23535,
      "grad_norm": 1.142393286351315,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 23535
    },
    {
      "epoch": 0.23536,
      "grad_norm": 1.3858806885932367,
      "learning_rate": 0.003,
      "loss": 4.0232,
      "step": 23536
    },
    {
      "epoch": 0.23537,
      "grad_norm": 1.2887760091250255,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 23537
    },
    {
      "epoch": 0.23538,
      "grad_norm": 1.268491137227561,
      "learning_rate": 0.003,
      "loss": 4.0167,
      "step": 23538
    },
    {
      "epoch": 0.23539,
      "grad_norm": 1.3745065016018787,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 23539
    },
    {
      "epoch": 0.2354,
      "grad_norm": 1.2379029621010118,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 23540
    },
    {
      "epoch": 0.23541,
      "grad_norm": 1.4016614074941376,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 23541
    },
    {
      "epoch": 0.23542,
      "grad_norm": 1.1048640443595201,
      "learning_rate": 0.003,
      "loss": 4.0411,
      "step": 23542
    },
    {
      "epoch": 0.23543,
      "grad_norm": 1.3942164398860328,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 23543
    },
    {
      "epoch": 0.23544,
      "grad_norm": 1.2790291542295937,
      "learning_rate": 0.003,
      "loss": 4.0332,
      "step": 23544
    },
    {
      "epoch": 0.23545,
      "grad_norm": 1.3862190297543544,
      "learning_rate": 0.003,
      "loss": 4.0332,
      "step": 23545
    },
    {
      "epoch": 0.23546,
      "grad_norm": 1.2095973979703265,
      "learning_rate": 0.003,
      "loss": 4.026,
      "step": 23546
    },
    {
      "epoch": 0.23547,
      "grad_norm": 1.5758682439787788,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 23547
    },
    {
      "epoch": 0.23548,
      "grad_norm": 1.2828088939122402,
      "learning_rate": 0.003,
      "loss": 4.0602,
      "step": 23548
    },
    {
      "epoch": 0.23549,
      "grad_norm": 1.4679174046132317,
      "learning_rate": 0.003,
      "loss": 4.0605,
      "step": 23549
    },
    {
      "epoch": 0.2355,
      "grad_norm": 1.0639583078929091,
      "learning_rate": 0.003,
      "loss": 4.0316,
      "step": 23550
    },
    {
      "epoch": 0.23551,
      "grad_norm": 1.556049948036192,
      "learning_rate": 0.003,
      "loss": 4.0347,
      "step": 23551
    },
    {
      "epoch": 0.23552,
      "grad_norm": 1.248536199129532,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 23552
    },
    {
      "epoch": 0.23553,
      "grad_norm": 1.5064653223459554,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 23553
    },
    {
      "epoch": 0.23554,
      "grad_norm": 1.1439828566478294,
      "learning_rate": 0.003,
      "loss": 4.0645,
      "step": 23554
    },
    {
      "epoch": 0.23555,
      "grad_norm": 1.713054405907999,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 23555
    },
    {
      "epoch": 0.23556,
      "grad_norm": 1.0470829391495002,
      "learning_rate": 0.003,
      "loss": 4.0186,
      "step": 23556
    },
    {
      "epoch": 0.23557,
      "grad_norm": 1.366403003368298,
      "learning_rate": 0.003,
      "loss": 4.0201,
      "step": 23557
    },
    {
      "epoch": 0.23558,
      "grad_norm": 1.2510403515167263,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 23558
    },
    {
      "epoch": 0.23559,
      "grad_norm": 1.5170501567192332,
      "learning_rate": 0.003,
      "loss": 4.0228,
      "step": 23559
    },
    {
      "epoch": 0.2356,
      "grad_norm": 1.5571243941431558,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 23560
    },
    {
      "epoch": 0.23561,
      "grad_norm": 1.2034867092975143,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 23561
    },
    {
      "epoch": 0.23562,
      "grad_norm": 1.2143662031534421,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 23562
    },
    {
      "epoch": 0.23563,
      "grad_norm": 1.188212442106662,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 23563
    },
    {
      "epoch": 0.23564,
      "grad_norm": 1.3677431216054141,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 23564
    },
    {
      "epoch": 0.23565,
      "grad_norm": 1.101889247978777,
      "learning_rate": 0.003,
      "loss": 4.0273,
      "step": 23565
    },
    {
      "epoch": 0.23566,
      "grad_norm": 1.2717039396998226,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 23566
    },
    {
      "epoch": 0.23567,
      "grad_norm": 1.1856496646020174,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 23567
    },
    {
      "epoch": 0.23568,
      "grad_norm": 1.2855123764395402,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 23568
    },
    {
      "epoch": 0.23569,
      "grad_norm": 1.3967086315002388,
      "learning_rate": 0.003,
      "loss": 4.0712,
      "step": 23569
    },
    {
      "epoch": 0.2357,
      "grad_norm": 1.3082550029704532,
      "learning_rate": 0.003,
      "loss": 4.0423,
      "step": 23570
    },
    {
      "epoch": 0.23571,
      "grad_norm": 1.7442156804502371,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 23571
    },
    {
      "epoch": 0.23572,
      "grad_norm": 1.1207225006713477,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 23572
    },
    {
      "epoch": 0.23573,
      "grad_norm": 1.2888850203454607,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 23573
    },
    {
      "epoch": 0.23574,
      "grad_norm": 1.1377771881493937,
      "learning_rate": 0.003,
      "loss": 4.0213,
      "step": 23574
    },
    {
      "epoch": 0.23575,
      "grad_norm": 1.4572297206713176,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 23575
    },
    {
      "epoch": 0.23576,
      "grad_norm": 1.0875871086980016,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 23576
    },
    {
      "epoch": 0.23577,
      "grad_norm": 1.5637828316273845,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 23577
    },
    {
      "epoch": 0.23578,
      "grad_norm": 1.1770711611707216,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 23578
    },
    {
      "epoch": 0.23579,
      "grad_norm": 1.820602698168912,
      "learning_rate": 0.003,
      "loss": 4.0304,
      "step": 23579
    },
    {
      "epoch": 0.2358,
      "grad_norm": 1.171823469053568,
      "learning_rate": 0.003,
      "loss": 4.0389,
      "step": 23580
    },
    {
      "epoch": 0.23581,
      "grad_norm": 1.2935324234332046,
      "learning_rate": 0.003,
      "loss": 4.0434,
      "step": 23581
    },
    {
      "epoch": 0.23582,
      "grad_norm": 1.3044972822919219,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 23582
    },
    {
      "epoch": 0.23583,
      "grad_norm": 1.238231255195047,
      "learning_rate": 0.003,
      "loss": 4.0179,
      "step": 23583
    },
    {
      "epoch": 0.23584,
      "grad_norm": 1.4004548670675205,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 23584
    },
    {
      "epoch": 0.23585,
      "grad_norm": 1.0231016296401765,
      "learning_rate": 0.003,
      "loss": 4.0318,
      "step": 23585
    },
    {
      "epoch": 0.23586,
      "grad_norm": 1.4767443977757324,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 23586
    },
    {
      "epoch": 0.23587,
      "grad_norm": 1.5119451251525389,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 23587
    },
    {
      "epoch": 0.23588,
      "grad_norm": 1.1989637011059378,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 23588
    },
    {
      "epoch": 0.23589,
      "grad_norm": 1.5270406155035605,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 23589
    },
    {
      "epoch": 0.2359,
      "grad_norm": 1.269032908594783,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 23590
    },
    {
      "epoch": 0.23591,
      "grad_norm": 1.6225865933820671,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 23591
    },
    {
      "epoch": 0.23592,
      "grad_norm": 1.08473728649285,
      "learning_rate": 0.003,
      "loss": 4.0794,
      "step": 23592
    },
    {
      "epoch": 0.23593,
      "grad_norm": 1.3425190691770323,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 23593
    },
    {
      "epoch": 0.23594,
      "grad_norm": 1.2466637972544092,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 23594
    },
    {
      "epoch": 0.23595,
      "grad_norm": 1.2785123651003583,
      "learning_rate": 0.003,
      "loss": 4.0631,
      "step": 23595
    },
    {
      "epoch": 0.23596,
      "grad_norm": 1.4788456954119416,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 23596
    },
    {
      "epoch": 0.23597,
      "grad_norm": 1.0538422980668722,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 23597
    },
    {
      "epoch": 0.23598,
      "grad_norm": 1.7099588121690856,
      "learning_rate": 0.003,
      "loss": 4.0714,
      "step": 23598
    },
    {
      "epoch": 0.23599,
      "grad_norm": 1.003066627245704,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 23599
    },
    {
      "epoch": 0.236,
      "grad_norm": 1.9791699549245332,
      "learning_rate": 0.003,
      "loss": 4.0916,
      "step": 23600
    },
    {
      "epoch": 0.23601,
      "grad_norm": 1.2652429275722812,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 23601
    },
    {
      "epoch": 0.23602,
      "grad_norm": 1.1216889993479584,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 23602
    },
    {
      "epoch": 0.23603,
      "grad_norm": 1.394702868129797,
      "learning_rate": 0.003,
      "loss": 4.0917,
      "step": 23603
    },
    {
      "epoch": 0.23604,
      "grad_norm": 1.2191916467988568,
      "learning_rate": 0.003,
      "loss": 4.0613,
      "step": 23604
    },
    {
      "epoch": 0.23605,
      "grad_norm": 1.548537069103473,
      "learning_rate": 0.003,
      "loss": 4.0799,
      "step": 23605
    },
    {
      "epoch": 0.23606,
      "grad_norm": 1.1493443935645882,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 23606
    },
    {
      "epoch": 0.23607,
      "grad_norm": 1.3073424183189124,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 23607
    },
    {
      "epoch": 0.23608,
      "grad_norm": 1.4108859714119362,
      "learning_rate": 0.003,
      "loss": 4.0337,
      "step": 23608
    },
    {
      "epoch": 0.23609,
      "grad_norm": 1.1970254370362037,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 23609
    },
    {
      "epoch": 0.2361,
      "grad_norm": 1.0952019716437682,
      "learning_rate": 0.003,
      "loss": 4.0328,
      "step": 23610
    },
    {
      "epoch": 0.23611,
      "grad_norm": 1.3565307731217888,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 23611
    },
    {
      "epoch": 0.23612,
      "grad_norm": 1.3095323280163718,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 23612
    },
    {
      "epoch": 0.23613,
      "grad_norm": 1.2567105525421158,
      "learning_rate": 0.003,
      "loss": 4.0617,
      "step": 23613
    },
    {
      "epoch": 0.23614,
      "grad_norm": 1.3368337684650289,
      "learning_rate": 0.003,
      "loss": 4.0269,
      "step": 23614
    },
    {
      "epoch": 0.23615,
      "grad_norm": 1.34774045634497,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 23615
    },
    {
      "epoch": 0.23616,
      "grad_norm": 1.3199422151102764,
      "learning_rate": 0.003,
      "loss": 4.0288,
      "step": 23616
    },
    {
      "epoch": 0.23617,
      "grad_norm": 1.2142054499970667,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 23617
    },
    {
      "epoch": 0.23618,
      "grad_norm": 1.4447440314536921,
      "learning_rate": 0.003,
      "loss": 4.019,
      "step": 23618
    },
    {
      "epoch": 0.23619,
      "grad_norm": 1.1610116807628668,
      "learning_rate": 0.003,
      "loss": 4.0226,
      "step": 23619
    },
    {
      "epoch": 0.2362,
      "grad_norm": 1.2968802701414694,
      "learning_rate": 0.003,
      "loss": 4.0398,
      "step": 23620
    },
    {
      "epoch": 0.23621,
      "grad_norm": 1.411809158090715,
      "learning_rate": 0.003,
      "loss": 4.0422,
      "step": 23621
    },
    {
      "epoch": 0.23622,
      "grad_norm": 1.0018815118641649,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 23622
    },
    {
      "epoch": 0.23623,
      "grad_norm": 1.3635308876029366,
      "learning_rate": 0.003,
      "loss": 4.0112,
      "step": 23623
    },
    {
      "epoch": 0.23624,
      "grad_norm": 1.10965217854949,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 23624
    },
    {
      "epoch": 0.23625,
      "grad_norm": 1.7488481805435205,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 23625
    },
    {
      "epoch": 0.23626,
      "grad_norm": 1.0859923544661083,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 23626
    },
    {
      "epoch": 0.23627,
      "grad_norm": 1.2384710397490812,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 23627
    },
    {
      "epoch": 0.23628,
      "grad_norm": 1.2155553676919961,
      "learning_rate": 0.003,
      "loss": 4.0261,
      "step": 23628
    },
    {
      "epoch": 0.23629,
      "grad_norm": 1.0133477747370572,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 23629
    },
    {
      "epoch": 0.2363,
      "grad_norm": 1.4125211879000776,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 23630
    },
    {
      "epoch": 0.23631,
      "grad_norm": 1.3163483627954522,
      "learning_rate": 0.003,
      "loss": 4.0223,
      "step": 23631
    },
    {
      "epoch": 0.23632,
      "grad_norm": 1.5009282652314748,
      "learning_rate": 0.003,
      "loss": 4.0422,
      "step": 23632
    },
    {
      "epoch": 0.23633,
      "grad_norm": 1.461536087930038,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 23633
    },
    {
      "epoch": 0.23634,
      "grad_norm": 1.3360946359702124,
      "learning_rate": 0.003,
      "loss": 4.0828,
      "step": 23634
    },
    {
      "epoch": 0.23635,
      "grad_norm": 1.6226273615839206,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 23635
    },
    {
      "epoch": 0.23636,
      "grad_norm": 0.946379008272811,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 23636
    },
    {
      "epoch": 0.23637,
      "grad_norm": 1.2687992609766823,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 23637
    },
    {
      "epoch": 0.23638,
      "grad_norm": 1.3144689070142017,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 23638
    },
    {
      "epoch": 0.23639,
      "grad_norm": 1.1833706412679799,
      "learning_rate": 0.003,
      "loss": 4.0346,
      "step": 23639
    },
    {
      "epoch": 0.2364,
      "grad_norm": 1.2643846450362188,
      "learning_rate": 0.003,
      "loss": 4.0167,
      "step": 23640
    },
    {
      "epoch": 0.23641,
      "grad_norm": 1.2836297052401486,
      "learning_rate": 0.003,
      "loss": 4.0231,
      "step": 23641
    },
    {
      "epoch": 0.23642,
      "grad_norm": 1.350728821115279,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 23642
    },
    {
      "epoch": 0.23643,
      "grad_norm": 1.195000845841808,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 23643
    },
    {
      "epoch": 0.23644,
      "grad_norm": 1.4629558253860953,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 23644
    },
    {
      "epoch": 0.23645,
      "grad_norm": 1.093914745066469,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 23645
    },
    {
      "epoch": 0.23646,
      "grad_norm": 1.3877748865140367,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 23646
    },
    {
      "epoch": 0.23647,
      "grad_norm": 1.264758900032,
      "learning_rate": 0.003,
      "loss": 4.0363,
      "step": 23647
    },
    {
      "epoch": 0.23648,
      "grad_norm": 1.5739334685255446,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 23648
    },
    {
      "epoch": 0.23649,
      "grad_norm": 1.0311298235821733,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 23649
    },
    {
      "epoch": 0.2365,
      "grad_norm": 1.6397741977463254,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 23650
    },
    {
      "epoch": 0.23651,
      "grad_norm": 1.0232840837952728,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 23651
    },
    {
      "epoch": 0.23652,
      "grad_norm": 1.629541944846788,
      "learning_rate": 0.003,
      "loss": 4.0328,
      "step": 23652
    },
    {
      "epoch": 0.23653,
      "grad_norm": 1.177581598969811,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 23653
    },
    {
      "epoch": 0.23654,
      "grad_norm": 1.1235775126833063,
      "learning_rate": 0.003,
      "loss": 4.016,
      "step": 23654
    },
    {
      "epoch": 0.23655,
      "grad_norm": 1.2878011900426842,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 23655
    },
    {
      "epoch": 0.23656,
      "grad_norm": 1.1940669610180175,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 23656
    },
    {
      "epoch": 0.23657,
      "grad_norm": 1.206844629046312,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 23657
    },
    {
      "epoch": 0.23658,
      "grad_norm": 1.4371758952293205,
      "learning_rate": 0.003,
      "loss": 4.0699,
      "step": 23658
    },
    {
      "epoch": 0.23659,
      "grad_norm": 1.2523693023114035,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 23659
    },
    {
      "epoch": 0.2366,
      "grad_norm": 1.462068175556941,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 23660
    },
    {
      "epoch": 0.23661,
      "grad_norm": 1.481961618084773,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 23661
    },
    {
      "epoch": 0.23662,
      "grad_norm": 1.269959000324739,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 23662
    },
    {
      "epoch": 0.23663,
      "grad_norm": 1.2376874660119221,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 23663
    },
    {
      "epoch": 0.23664,
      "grad_norm": 1.465345469478452,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 23664
    },
    {
      "epoch": 0.23665,
      "grad_norm": 1.2198269537172273,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 23665
    },
    {
      "epoch": 0.23666,
      "grad_norm": 1.2361049303348048,
      "learning_rate": 0.003,
      "loss": 4.1018,
      "step": 23666
    },
    {
      "epoch": 0.23667,
      "grad_norm": 1.2426580260886706,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 23667
    },
    {
      "epoch": 0.23668,
      "grad_norm": 1.0462775177853703,
      "learning_rate": 0.003,
      "loss": 4.015,
      "step": 23668
    },
    {
      "epoch": 0.23669,
      "grad_norm": 1.4892376243472623,
      "learning_rate": 0.003,
      "loss": 4.0573,
      "step": 23669
    },
    {
      "epoch": 0.2367,
      "grad_norm": 1.227079176290411,
      "learning_rate": 0.003,
      "loss": 4.0141,
      "step": 23670
    },
    {
      "epoch": 0.23671,
      "grad_norm": 1.6896739591178331,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 23671
    },
    {
      "epoch": 0.23672,
      "grad_norm": 1.166689132632801,
      "learning_rate": 0.003,
      "loss": 4.0547,
      "step": 23672
    },
    {
      "epoch": 0.23673,
      "grad_norm": 1.4623926653176087,
      "learning_rate": 0.003,
      "loss": 4.018,
      "step": 23673
    },
    {
      "epoch": 0.23674,
      "grad_norm": 1.1253555650291334,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 23674
    },
    {
      "epoch": 0.23675,
      "grad_norm": 1.4424946620000527,
      "learning_rate": 0.003,
      "loss": 4.035,
      "step": 23675
    },
    {
      "epoch": 0.23676,
      "grad_norm": 1.1775474693912467,
      "learning_rate": 0.003,
      "loss": 4.08,
      "step": 23676
    },
    {
      "epoch": 0.23677,
      "grad_norm": 1.2019398953946223,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 23677
    },
    {
      "epoch": 0.23678,
      "grad_norm": 1.2624286059830971,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 23678
    },
    {
      "epoch": 0.23679,
      "grad_norm": 1.353696407430561,
      "learning_rate": 0.003,
      "loss": 4.04,
      "step": 23679
    },
    {
      "epoch": 0.2368,
      "grad_norm": 1.4063596347236127,
      "learning_rate": 0.003,
      "loss": 4.0037,
      "step": 23680
    },
    {
      "epoch": 0.23681,
      "grad_norm": 1.0400918466665534,
      "learning_rate": 0.003,
      "loss": 4.0195,
      "step": 23681
    },
    {
      "epoch": 0.23682,
      "grad_norm": 1.7751778071967164,
      "learning_rate": 0.003,
      "loss": 4.025,
      "step": 23682
    },
    {
      "epoch": 0.23683,
      "grad_norm": 1.1440786910415348,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 23683
    },
    {
      "epoch": 0.23684,
      "grad_norm": 1.374883240005277,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 23684
    },
    {
      "epoch": 0.23685,
      "grad_norm": 1.0745801359741007,
      "learning_rate": 0.003,
      "loss": 4.0316,
      "step": 23685
    },
    {
      "epoch": 0.23686,
      "grad_norm": 1.5386474927017875,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 23686
    },
    {
      "epoch": 0.23687,
      "grad_norm": 1.1690145054557997,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 23687
    },
    {
      "epoch": 0.23688,
      "grad_norm": 1.356249541312422,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 23688
    },
    {
      "epoch": 0.23689,
      "grad_norm": 1.0793811140424336,
      "learning_rate": 0.003,
      "loss": 4.0481,
      "step": 23689
    },
    {
      "epoch": 0.2369,
      "grad_norm": 1.3056075597987458,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 23690
    },
    {
      "epoch": 0.23691,
      "grad_norm": 1.1297859569042559,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 23691
    },
    {
      "epoch": 0.23692,
      "grad_norm": 1.220226725732296,
      "learning_rate": 0.003,
      "loss": 4.0389,
      "step": 23692
    },
    {
      "epoch": 0.23693,
      "grad_norm": 1.4298512360059767,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 23693
    },
    {
      "epoch": 0.23694,
      "grad_norm": 1.2220316175695687,
      "learning_rate": 0.003,
      "loss": 4.0097,
      "step": 23694
    },
    {
      "epoch": 0.23695,
      "grad_norm": 1.2735929220920998,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 23695
    },
    {
      "epoch": 0.23696,
      "grad_norm": 1.3782266394623592,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 23696
    },
    {
      "epoch": 0.23697,
      "grad_norm": 1.2610129440776563,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 23697
    },
    {
      "epoch": 0.23698,
      "grad_norm": 1.3325089768412006,
      "learning_rate": 0.003,
      "loss": 4.0217,
      "step": 23698
    },
    {
      "epoch": 0.23699,
      "grad_norm": 1.2869215805307337,
      "learning_rate": 0.003,
      "loss": 4.0143,
      "step": 23699
    },
    {
      "epoch": 0.237,
      "grad_norm": 1.284724772908701,
      "learning_rate": 0.003,
      "loss": 4.0229,
      "step": 23700
    },
    {
      "epoch": 0.23701,
      "grad_norm": 1.5090489407528482,
      "learning_rate": 0.003,
      "loss": 4.0174,
      "step": 23701
    },
    {
      "epoch": 0.23702,
      "grad_norm": 1.3073404647654603,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 23702
    },
    {
      "epoch": 0.23703,
      "grad_norm": 1.1558030374791959,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 23703
    },
    {
      "epoch": 0.23704,
      "grad_norm": 1.4692820358607068,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 23704
    },
    {
      "epoch": 0.23705,
      "grad_norm": 0.9417283807886037,
      "learning_rate": 0.003,
      "loss": 4.0127,
      "step": 23705
    },
    {
      "epoch": 0.23706,
      "grad_norm": 1.5206367146642286,
      "learning_rate": 0.003,
      "loss": 4.0191,
      "step": 23706
    },
    {
      "epoch": 0.23707,
      "grad_norm": 1.2779932353414432,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 23707
    },
    {
      "epoch": 0.23708,
      "grad_norm": 1.545714886319892,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 23708
    },
    {
      "epoch": 0.23709,
      "grad_norm": 1.3814072514689995,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 23709
    },
    {
      "epoch": 0.2371,
      "grad_norm": 1.2825906797456401,
      "learning_rate": 0.003,
      "loss": 4.0331,
      "step": 23710
    },
    {
      "epoch": 0.23711,
      "grad_norm": 1.2494070433367912,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 23711
    },
    {
      "epoch": 0.23712,
      "grad_norm": 1.30400311295083,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 23712
    },
    {
      "epoch": 0.23713,
      "grad_norm": 1.2562514373354503,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 23713
    },
    {
      "epoch": 0.23714,
      "grad_norm": 1.2832290456993536,
      "learning_rate": 0.003,
      "loss": 4.0621,
      "step": 23714
    },
    {
      "epoch": 0.23715,
      "grad_norm": 1.3039640135418236,
      "learning_rate": 0.003,
      "loss": 4.0352,
      "step": 23715
    },
    {
      "epoch": 0.23716,
      "grad_norm": 1.222289107950515,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 23716
    },
    {
      "epoch": 0.23717,
      "grad_norm": 1.5755722655773658,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 23717
    },
    {
      "epoch": 0.23718,
      "grad_norm": 1.1780195442081456,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 23718
    },
    {
      "epoch": 0.23719,
      "grad_norm": 1.4001038713573974,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 23719
    },
    {
      "epoch": 0.2372,
      "grad_norm": 1.120587085206351,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 23720
    },
    {
      "epoch": 0.23721,
      "grad_norm": 1.113506003852723,
      "learning_rate": 0.003,
      "loss": 4.0089,
      "step": 23721
    },
    {
      "epoch": 0.23722,
      "grad_norm": 1.1902530140764338,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 23722
    },
    {
      "epoch": 0.23723,
      "grad_norm": 1.5474127615484905,
      "learning_rate": 0.003,
      "loss": 4.0665,
      "step": 23723
    },
    {
      "epoch": 0.23724,
      "grad_norm": 1.2084857165780836,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 23724
    },
    {
      "epoch": 0.23725,
      "grad_norm": 1.3756020473215986,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 23725
    },
    {
      "epoch": 0.23726,
      "grad_norm": 1.2561203221931616,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 23726
    },
    {
      "epoch": 0.23727,
      "grad_norm": 1.5585393178990727,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 23727
    },
    {
      "epoch": 0.23728,
      "grad_norm": 1.1033787870196319,
      "learning_rate": 0.003,
      "loss": 3.9958,
      "step": 23728
    },
    {
      "epoch": 0.23729,
      "grad_norm": 1.5542847328246168,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 23729
    },
    {
      "epoch": 0.2373,
      "grad_norm": 1.0816735241659021,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 23730
    },
    {
      "epoch": 0.23731,
      "grad_norm": 1.3889145716303541,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 23731
    },
    {
      "epoch": 0.23732,
      "grad_norm": 1.2593014438570076,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 23732
    },
    {
      "epoch": 0.23733,
      "grad_norm": 1.3321997521846503,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 23733
    },
    {
      "epoch": 0.23734,
      "grad_norm": 1.1744229189687176,
      "learning_rate": 0.003,
      "loss": 4.0383,
      "step": 23734
    },
    {
      "epoch": 0.23735,
      "grad_norm": 1.457656813230531,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 23735
    },
    {
      "epoch": 0.23736,
      "grad_norm": 1.1030522299872654,
      "learning_rate": 0.003,
      "loss": 4.0134,
      "step": 23736
    },
    {
      "epoch": 0.23737,
      "grad_norm": 1.424446338633868,
      "learning_rate": 0.003,
      "loss": 4.0394,
      "step": 23737
    },
    {
      "epoch": 0.23738,
      "grad_norm": 1.2894176123642855,
      "learning_rate": 0.003,
      "loss": 4.0553,
      "step": 23738
    },
    {
      "epoch": 0.23739,
      "grad_norm": 1.3409103580448187,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 23739
    },
    {
      "epoch": 0.2374,
      "grad_norm": 1.4057281996507969,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 23740
    },
    {
      "epoch": 0.23741,
      "grad_norm": 1.115664020440304,
      "learning_rate": 0.003,
      "loss": 4.0302,
      "step": 23741
    },
    {
      "epoch": 0.23742,
      "grad_norm": 1.3408818434111343,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 23742
    },
    {
      "epoch": 0.23743,
      "grad_norm": 1.3386866261604355,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 23743
    },
    {
      "epoch": 0.23744,
      "grad_norm": 1.8501301660584664,
      "learning_rate": 0.003,
      "loss": 4.052,
      "step": 23744
    },
    {
      "epoch": 0.23745,
      "grad_norm": 1.3553097999844423,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 23745
    },
    {
      "epoch": 0.23746,
      "grad_norm": 1.146161360704984,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 23746
    },
    {
      "epoch": 0.23747,
      "grad_norm": 1.332728259047778,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 23747
    },
    {
      "epoch": 0.23748,
      "grad_norm": 1.4008393918644393,
      "learning_rate": 0.003,
      "loss": 4.0247,
      "step": 23748
    },
    {
      "epoch": 0.23749,
      "grad_norm": 1.480555260688311,
      "learning_rate": 0.003,
      "loss": 4.0355,
      "step": 23749
    },
    {
      "epoch": 0.2375,
      "grad_norm": 1.208726374303708,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 23750
    },
    {
      "epoch": 0.23751,
      "grad_norm": 1.3185136266661017,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 23751
    },
    {
      "epoch": 0.23752,
      "grad_norm": 1.198548566656275,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 23752
    },
    {
      "epoch": 0.23753,
      "grad_norm": 1.3611165764660003,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 23753
    },
    {
      "epoch": 0.23754,
      "grad_norm": 1.1900264302317343,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 23754
    },
    {
      "epoch": 0.23755,
      "grad_norm": 1.3856426778562365,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 23755
    },
    {
      "epoch": 0.23756,
      "grad_norm": 1.1452716099963869,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 23756
    },
    {
      "epoch": 0.23757,
      "grad_norm": 1.2830380403883017,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 23757
    },
    {
      "epoch": 0.23758,
      "grad_norm": 1.263668970515008,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 23758
    },
    {
      "epoch": 0.23759,
      "grad_norm": 1.345236670290279,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 23759
    },
    {
      "epoch": 0.2376,
      "grad_norm": 1.292150063376207,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 23760
    },
    {
      "epoch": 0.23761,
      "grad_norm": 1.1808226954554617,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 23761
    },
    {
      "epoch": 0.23762,
      "grad_norm": 1.2465742158976219,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 23762
    },
    {
      "epoch": 0.23763,
      "grad_norm": 1.261865763600032,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 23763
    },
    {
      "epoch": 0.23764,
      "grad_norm": 1.1471541620118675,
      "learning_rate": 0.003,
      "loss": 4.0027,
      "step": 23764
    },
    {
      "epoch": 0.23765,
      "grad_norm": 1.5774345085773744,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 23765
    },
    {
      "epoch": 0.23766,
      "grad_norm": 1.1769892792339534,
      "learning_rate": 0.003,
      "loss": 4.031,
      "step": 23766
    },
    {
      "epoch": 0.23767,
      "grad_norm": 1.5802083274816956,
      "learning_rate": 0.003,
      "loss": 4.0208,
      "step": 23767
    },
    {
      "epoch": 0.23768,
      "grad_norm": 1.0660115952853646,
      "learning_rate": 0.003,
      "loss": 4.0329,
      "step": 23768
    },
    {
      "epoch": 0.23769,
      "grad_norm": 1.096845953913119,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 23769
    },
    {
      "epoch": 0.2377,
      "grad_norm": 1.6428235416320027,
      "learning_rate": 0.003,
      "loss": 4.044,
      "step": 23770
    },
    {
      "epoch": 0.23771,
      "grad_norm": 1.2268917946899078,
      "learning_rate": 0.003,
      "loss": 4.026,
      "step": 23771
    },
    {
      "epoch": 0.23772,
      "grad_norm": 1.481420221928791,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 23772
    },
    {
      "epoch": 0.23773,
      "grad_norm": 1.2066449681381066,
      "learning_rate": 0.003,
      "loss": 4.0299,
      "step": 23773
    },
    {
      "epoch": 0.23774,
      "grad_norm": 1.3852901385359326,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 23774
    },
    {
      "epoch": 0.23775,
      "grad_norm": 1.2098759784121849,
      "learning_rate": 0.003,
      "loss": 4.0478,
      "step": 23775
    },
    {
      "epoch": 0.23776,
      "grad_norm": 1.35072879631268,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 23776
    },
    {
      "epoch": 0.23777,
      "grad_norm": 1.0212375161493594,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 23777
    },
    {
      "epoch": 0.23778,
      "grad_norm": 1.5916799860075879,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 23778
    },
    {
      "epoch": 0.23779,
      "grad_norm": 1.2941785183522605,
      "learning_rate": 0.003,
      "loss": 4.0332,
      "step": 23779
    },
    {
      "epoch": 0.2378,
      "grad_norm": 1.389404951829121,
      "learning_rate": 0.003,
      "loss": 3.9991,
      "step": 23780
    },
    {
      "epoch": 0.23781,
      "grad_norm": 1.23583468677105,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 23781
    },
    {
      "epoch": 0.23782,
      "grad_norm": 1.1860316882252406,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 23782
    },
    {
      "epoch": 0.23783,
      "grad_norm": 1.3522011828676728,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 23783
    },
    {
      "epoch": 0.23784,
      "grad_norm": 1.4237710626629256,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 23784
    },
    {
      "epoch": 0.23785,
      "grad_norm": 1.4850213377224577,
      "learning_rate": 0.003,
      "loss": 4.0241,
      "step": 23785
    },
    {
      "epoch": 0.23786,
      "grad_norm": 1.367666695197778,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 23786
    },
    {
      "epoch": 0.23787,
      "grad_norm": 1.5293138274104534,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 23787
    },
    {
      "epoch": 0.23788,
      "grad_norm": 1.0747528320121753,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 23788
    },
    {
      "epoch": 0.23789,
      "grad_norm": 1.661082382912658,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 23789
    },
    {
      "epoch": 0.2379,
      "grad_norm": 1.0991220074339236,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 23790
    },
    {
      "epoch": 0.23791,
      "grad_norm": 1.6177981022933468,
      "learning_rate": 0.003,
      "loss": 4.0305,
      "step": 23791
    },
    {
      "epoch": 0.23792,
      "grad_norm": 1.2496492678688773,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 23792
    },
    {
      "epoch": 0.23793,
      "grad_norm": 1.270639004872499,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 23793
    },
    {
      "epoch": 0.23794,
      "grad_norm": 1.396290220103336,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 23794
    },
    {
      "epoch": 0.23795,
      "grad_norm": 1.2041502942283608,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 23795
    },
    {
      "epoch": 0.23796,
      "grad_norm": 1.4160771650035684,
      "learning_rate": 0.003,
      "loss": 4.0778,
      "step": 23796
    },
    {
      "epoch": 0.23797,
      "grad_norm": 1.2315464099960576,
      "learning_rate": 0.003,
      "loss": 4.0265,
      "step": 23797
    },
    {
      "epoch": 0.23798,
      "grad_norm": 1.4166731645042991,
      "learning_rate": 0.003,
      "loss": 4.025,
      "step": 23798
    },
    {
      "epoch": 0.23799,
      "grad_norm": 1.1756818998517438,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 23799
    },
    {
      "epoch": 0.238,
      "grad_norm": 1.3068080497004517,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 23800
    },
    {
      "epoch": 0.23801,
      "grad_norm": 1.0583396063313557,
      "learning_rate": 0.003,
      "loss": 4.0854,
      "step": 23801
    },
    {
      "epoch": 0.23802,
      "grad_norm": 1.4194066619768246,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 23802
    },
    {
      "epoch": 0.23803,
      "grad_norm": 0.9248461116780902,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 23803
    },
    {
      "epoch": 0.23804,
      "grad_norm": 1.5982794283903097,
      "learning_rate": 0.003,
      "loss": 4.0426,
      "step": 23804
    },
    {
      "epoch": 0.23805,
      "grad_norm": 1.5213386475082742,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 23805
    },
    {
      "epoch": 0.23806,
      "grad_norm": 1.296312226802364,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 23806
    },
    {
      "epoch": 0.23807,
      "grad_norm": 1.2186993021713506,
      "learning_rate": 0.003,
      "loss": 4.0283,
      "step": 23807
    },
    {
      "epoch": 0.23808,
      "grad_norm": 1.5581545633728306,
      "learning_rate": 0.003,
      "loss": 4.0311,
      "step": 23808
    },
    {
      "epoch": 0.23809,
      "grad_norm": 1.0839293973349182,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 23809
    },
    {
      "epoch": 0.2381,
      "grad_norm": 1.4127437288819267,
      "learning_rate": 0.003,
      "loss": 4.073,
      "step": 23810
    },
    {
      "epoch": 0.23811,
      "grad_norm": 0.9772413731718163,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 23811
    },
    {
      "epoch": 0.23812,
      "grad_norm": 1.2074401285427336,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 23812
    },
    {
      "epoch": 0.23813,
      "grad_norm": 1.2826858301353392,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 23813
    },
    {
      "epoch": 0.23814,
      "grad_norm": 1.3832318258593754,
      "learning_rate": 0.003,
      "loss": 4.0816,
      "step": 23814
    },
    {
      "epoch": 0.23815,
      "grad_norm": 1.4700508680675612,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 23815
    },
    {
      "epoch": 0.23816,
      "grad_norm": 1.0910979547311042,
      "learning_rate": 0.003,
      "loss": 4.0359,
      "step": 23816
    },
    {
      "epoch": 0.23817,
      "grad_norm": 1.5588218804038259,
      "learning_rate": 0.003,
      "loss": 4.0989,
      "step": 23817
    },
    {
      "epoch": 0.23818,
      "grad_norm": 1.3386480825097924,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 23818
    },
    {
      "epoch": 0.23819,
      "grad_norm": 1.5134689319234114,
      "learning_rate": 0.003,
      "loss": 4.0638,
      "step": 23819
    },
    {
      "epoch": 0.2382,
      "grad_norm": 1.241625547934892,
      "learning_rate": 0.003,
      "loss": 4.0525,
      "step": 23820
    },
    {
      "epoch": 0.23821,
      "grad_norm": 1.3534401482195397,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 23821
    },
    {
      "epoch": 0.23822,
      "grad_norm": 1.1628791461240653,
      "learning_rate": 0.003,
      "loss": 4.0644,
      "step": 23822
    },
    {
      "epoch": 0.23823,
      "grad_norm": 1.5041129084766678,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 23823
    },
    {
      "epoch": 0.23824,
      "grad_norm": 1.1665011979447957,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 23824
    },
    {
      "epoch": 0.23825,
      "grad_norm": 1.2121294063615315,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 23825
    },
    {
      "epoch": 0.23826,
      "grad_norm": 1.2558788993358732,
      "learning_rate": 0.003,
      "loss": 4.0168,
      "step": 23826
    },
    {
      "epoch": 0.23827,
      "grad_norm": 1.2735924026925578,
      "learning_rate": 0.003,
      "loss": 4.0227,
      "step": 23827
    },
    {
      "epoch": 0.23828,
      "grad_norm": 1.3808488526296263,
      "learning_rate": 0.003,
      "loss": 4.0326,
      "step": 23828
    },
    {
      "epoch": 0.23829,
      "grad_norm": 1.2224072738269918,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 23829
    },
    {
      "epoch": 0.2383,
      "grad_norm": 1.5641554627001137,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 23830
    },
    {
      "epoch": 0.23831,
      "grad_norm": 1.2992866981548012,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 23831
    },
    {
      "epoch": 0.23832,
      "grad_norm": 1.549147357978281,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 23832
    },
    {
      "epoch": 0.23833,
      "grad_norm": 1.0098718573679912,
      "learning_rate": 0.003,
      "loss": 4.0109,
      "step": 23833
    },
    {
      "epoch": 0.23834,
      "grad_norm": 1.4027929190896795,
      "learning_rate": 0.003,
      "loss": 4.0117,
      "step": 23834
    },
    {
      "epoch": 0.23835,
      "grad_norm": 1.4908354679872946,
      "learning_rate": 0.003,
      "loss": 4.0196,
      "step": 23835
    },
    {
      "epoch": 0.23836,
      "grad_norm": 1.2272859919591117,
      "learning_rate": 0.003,
      "loss": 4.0422,
      "step": 23836
    },
    {
      "epoch": 0.23837,
      "grad_norm": 1.4516299417369554,
      "learning_rate": 0.003,
      "loss": 4.0861,
      "step": 23837
    },
    {
      "epoch": 0.23838,
      "grad_norm": 1.160717661601082,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 23838
    },
    {
      "epoch": 0.23839,
      "grad_norm": 1.3644040754585327,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 23839
    },
    {
      "epoch": 0.2384,
      "grad_norm": 1.2573914287935082,
      "learning_rate": 0.003,
      "loss": 4.0222,
      "step": 23840
    },
    {
      "epoch": 0.23841,
      "grad_norm": 1.4239647317799968,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 23841
    },
    {
      "epoch": 0.23842,
      "grad_norm": 1.249961617385262,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 23842
    },
    {
      "epoch": 0.23843,
      "grad_norm": 1.2642193504748251,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 23843
    },
    {
      "epoch": 0.23844,
      "grad_norm": 1.2590280416023358,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 23844
    },
    {
      "epoch": 0.23845,
      "grad_norm": 1.2844352399921637,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 23845
    },
    {
      "epoch": 0.23846,
      "grad_norm": 1.3920554222875168,
      "learning_rate": 0.003,
      "loss": 4.0188,
      "step": 23846
    },
    {
      "epoch": 0.23847,
      "grad_norm": 1.124013501853405,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 23847
    },
    {
      "epoch": 0.23848,
      "grad_norm": 1.5373515416062635,
      "learning_rate": 0.003,
      "loss": 4.0226,
      "step": 23848
    },
    {
      "epoch": 0.23849,
      "grad_norm": 1.598751276652976,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 23849
    },
    {
      "epoch": 0.2385,
      "grad_norm": 1.377683284401099,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 23850
    },
    {
      "epoch": 0.23851,
      "grad_norm": 1.3489381886145224,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 23851
    },
    {
      "epoch": 0.23852,
      "grad_norm": 1.4180338550410077,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 23852
    },
    {
      "epoch": 0.23853,
      "grad_norm": 1.1771854326221283,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 23853
    },
    {
      "epoch": 0.23854,
      "grad_norm": 1.2383274412901384,
      "learning_rate": 0.003,
      "loss": 4.0337,
      "step": 23854
    },
    {
      "epoch": 0.23855,
      "grad_norm": 1.302489599276082,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 23855
    },
    {
      "epoch": 0.23856,
      "grad_norm": 1.1536083341454884,
      "learning_rate": 0.003,
      "loss": 4.0175,
      "step": 23856
    },
    {
      "epoch": 0.23857,
      "grad_norm": 1.3706374162366113,
      "learning_rate": 0.003,
      "loss": 4.0802,
      "step": 23857
    },
    {
      "epoch": 0.23858,
      "grad_norm": 1.0242977102554751,
      "learning_rate": 0.003,
      "loss": 4.0867,
      "step": 23858
    },
    {
      "epoch": 0.23859,
      "grad_norm": 1.7850463443733227,
      "learning_rate": 0.003,
      "loss": 4.0478,
      "step": 23859
    },
    {
      "epoch": 0.2386,
      "grad_norm": 1.0391871690177923,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 23860
    },
    {
      "epoch": 0.23861,
      "grad_norm": 1.2922037975821985,
      "learning_rate": 0.003,
      "loss": 4.0198,
      "step": 23861
    },
    {
      "epoch": 0.23862,
      "grad_norm": 1.592220877943124,
      "learning_rate": 0.003,
      "loss": 4.0313,
      "step": 23862
    },
    {
      "epoch": 0.23863,
      "grad_norm": 1.4838739569698698,
      "learning_rate": 0.003,
      "loss": 4.0572,
      "step": 23863
    },
    {
      "epoch": 0.23864,
      "grad_norm": 1.0019969279981116,
      "learning_rate": 0.003,
      "loss": 4.0326,
      "step": 23864
    },
    {
      "epoch": 0.23865,
      "grad_norm": 1.504574838198865,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 23865
    },
    {
      "epoch": 0.23866,
      "grad_norm": 1.4502433564161168,
      "learning_rate": 0.003,
      "loss": 4.0493,
      "step": 23866
    },
    {
      "epoch": 0.23867,
      "grad_norm": 1.2194261114786873,
      "learning_rate": 0.003,
      "loss": 4.0735,
      "step": 23867
    },
    {
      "epoch": 0.23868,
      "grad_norm": 1.4199451992544048,
      "learning_rate": 0.003,
      "loss": 4.044,
      "step": 23868
    },
    {
      "epoch": 0.23869,
      "grad_norm": 0.9722460871001879,
      "learning_rate": 0.003,
      "loss": 4.0651,
      "step": 23869
    },
    {
      "epoch": 0.2387,
      "grad_norm": 1.3275709415085666,
      "learning_rate": 0.003,
      "loss": 4.081,
      "step": 23870
    },
    {
      "epoch": 0.23871,
      "grad_norm": 1.3124575739063766,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 23871
    },
    {
      "epoch": 0.23872,
      "grad_norm": 1.12416231680309,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 23872
    },
    {
      "epoch": 0.23873,
      "grad_norm": 1.3655021803586156,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 23873
    },
    {
      "epoch": 0.23874,
      "grad_norm": 1.3642139417615187,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 23874
    },
    {
      "epoch": 0.23875,
      "grad_norm": 1.4203298654199221,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 23875
    },
    {
      "epoch": 0.23876,
      "grad_norm": 1.3287913685715587,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 23876
    },
    {
      "epoch": 0.23877,
      "grad_norm": 1.1979706403322938,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 23877
    },
    {
      "epoch": 0.23878,
      "grad_norm": 1.398354538785704,
      "learning_rate": 0.003,
      "loss": 4.0205,
      "step": 23878
    },
    {
      "epoch": 0.23879,
      "grad_norm": 1.4483339112877203,
      "learning_rate": 0.003,
      "loss": 4.0316,
      "step": 23879
    },
    {
      "epoch": 0.2388,
      "grad_norm": 1.3617929015169337,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 23880
    },
    {
      "epoch": 0.23881,
      "grad_norm": 1.2376669318853526,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 23881
    },
    {
      "epoch": 0.23882,
      "grad_norm": 1.2989851904192538,
      "learning_rate": 0.003,
      "loss": 4.0282,
      "step": 23882
    },
    {
      "epoch": 0.23883,
      "grad_norm": 1.2192240097749598,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 23883
    },
    {
      "epoch": 0.23884,
      "grad_norm": 1.5565234952957556,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 23884
    },
    {
      "epoch": 0.23885,
      "grad_norm": 1.2852851775894476,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 23885
    },
    {
      "epoch": 0.23886,
      "grad_norm": 1.0855797188437504,
      "learning_rate": 0.003,
      "loss": 4.0126,
      "step": 23886
    },
    {
      "epoch": 0.23887,
      "grad_norm": 1.3733923882090262,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 23887
    },
    {
      "epoch": 0.23888,
      "grad_norm": 1.2477493118859189,
      "learning_rate": 0.003,
      "loss": 4.0268,
      "step": 23888
    },
    {
      "epoch": 0.23889,
      "grad_norm": 1.378770132035632,
      "learning_rate": 0.003,
      "loss": 4.0139,
      "step": 23889
    },
    {
      "epoch": 0.2389,
      "grad_norm": 1.1236372215200345,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 23890
    },
    {
      "epoch": 0.23891,
      "grad_norm": 1.4752441865290327,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 23891
    },
    {
      "epoch": 0.23892,
      "grad_norm": 1.491994115649753,
      "learning_rate": 0.003,
      "loss": 4.0241,
      "step": 23892
    },
    {
      "epoch": 0.23893,
      "grad_norm": 1.3611222759248516,
      "learning_rate": 0.003,
      "loss": 4.0527,
      "step": 23893
    },
    {
      "epoch": 0.23894,
      "grad_norm": 1.3772955454325002,
      "learning_rate": 0.003,
      "loss": 4.0696,
      "step": 23894
    },
    {
      "epoch": 0.23895,
      "grad_norm": 1.1042473469982372,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 23895
    },
    {
      "epoch": 0.23896,
      "grad_norm": 1.2933288487486887,
      "learning_rate": 0.003,
      "loss": 4.0191,
      "step": 23896
    },
    {
      "epoch": 0.23897,
      "grad_norm": 1.3700696460329007,
      "learning_rate": 0.003,
      "loss": 4.0438,
      "step": 23897
    },
    {
      "epoch": 0.23898,
      "grad_norm": 1.1711112000889272,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 23898
    },
    {
      "epoch": 0.23899,
      "grad_norm": 1.3390976906059344,
      "learning_rate": 0.003,
      "loss": 4.0173,
      "step": 23899
    },
    {
      "epoch": 0.239,
      "grad_norm": 1.343183119442978,
      "learning_rate": 0.003,
      "loss": 4.0315,
      "step": 23900
    },
    {
      "epoch": 0.23901,
      "grad_norm": 1.1599047190899325,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 23901
    },
    {
      "epoch": 0.23902,
      "grad_norm": 1.3154615208285156,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 23902
    },
    {
      "epoch": 0.23903,
      "grad_norm": 1.2701478756736824,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 23903
    },
    {
      "epoch": 0.23904,
      "grad_norm": 1.3176561557078468,
      "learning_rate": 0.003,
      "loss": 4.0422,
      "step": 23904
    },
    {
      "epoch": 0.23905,
      "grad_norm": 1.517375181933842,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 23905
    },
    {
      "epoch": 0.23906,
      "grad_norm": 1.2153736528093153,
      "learning_rate": 0.003,
      "loss": 4.0416,
      "step": 23906
    },
    {
      "epoch": 0.23907,
      "grad_norm": 1.2966550047377552,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 23907
    },
    {
      "epoch": 0.23908,
      "grad_norm": 1.42990230247806,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 23908
    },
    {
      "epoch": 0.23909,
      "grad_norm": 1.4349264854921642,
      "learning_rate": 0.003,
      "loss": 4.0389,
      "step": 23909
    },
    {
      "epoch": 0.2391,
      "grad_norm": 1.4429118568525954,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 23910
    },
    {
      "epoch": 0.23911,
      "grad_norm": 0.9919740989677,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 23911
    },
    {
      "epoch": 0.23912,
      "grad_norm": 1.6443804619890663,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 23912
    },
    {
      "epoch": 0.23913,
      "grad_norm": 1.1913209952388295,
      "learning_rate": 0.003,
      "loss": 4.0314,
      "step": 23913
    },
    {
      "epoch": 0.23914,
      "grad_norm": 1.4260590770010235,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 23914
    },
    {
      "epoch": 0.23915,
      "grad_norm": 1.2864533400980362,
      "learning_rate": 0.003,
      "loss": 4.0383,
      "step": 23915
    },
    {
      "epoch": 0.23916,
      "grad_norm": 1.174091255374752,
      "learning_rate": 0.003,
      "loss": 4.0383,
      "step": 23916
    },
    {
      "epoch": 0.23917,
      "grad_norm": 1.245240603183376,
      "learning_rate": 0.003,
      "loss": 4.0247,
      "step": 23917
    },
    {
      "epoch": 0.23918,
      "grad_norm": 1.1735040253911069,
      "learning_rate": 0.003,
      "loss": 4.025,
      "step": 23918
    },
    {
      "epoch": 0.23919,
      "grad_norm": 1.3955068968486166,
      "learning_rate": 0.003,
      "loss": 4.082,
      "step": 23919
    },
    {
      "epoch": 0.2392,
      "grad_norm": 1.343259166379999,
      "learning_rate": 0.003,
      "loss": 4.0393,
      "step": 23920
    },
    {
      "epoch": 0.23921,
      "grad_norm": 1.1960584462807435,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 23921
    },
    {
      "epoch": 0.23922,
      "grad_norm": 1.4362051634626094,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 23922
    },
    {
      "epoch": 0.23923,
      "grad_norm": 1.0843870193553204,
      "learning_rate": 0.003,
      "loss": 4.0113,
      "step": 23923
    },
    {
      "epoch": 0.23924,
      "grad_norm": 1.8139929167710738,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 23924
    },
    {
      "epoch": 0.23925,
      "grad_norm": 1.102214726939177,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 23925
    },
    {
      "epoch": 0.23926,
      "grad_norm": 1.361268818889144,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 23926
    },
    {
      "epoch": 0.23927,
      "grad_norm": 1.2781462400356522,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 23927
    },
    {
      "epoch": 0.23928,
      "grad_norm": 1.4085237853480144,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 23928
    },
    {
      "epoch": 0.23929,
      "grad_norm": 1.1299103698118826,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 23929
    },
    {
      "epoch": 0.2393,
      "grad_norm": 1.1193149770907784,
      "learning_rate": 0.003,
      "loss": 4.0159,
      "step": 23930
    },
    {
      "epoch": 0.23931,
      "grad_norm": 1.2000036179737923,
      "learning_rate": 0.003,
      "loss": 4.0753,
      "step": 23931
    },
    {
      "epoch": 0.23932,
      "grad_norm": 1.3428055454012973,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 23932
    },
    {
      "epoch": 0.23933,
      "grad_norm": 1.1231079789096867,
      "learning_rate": 0.003,
      "loss": 4.0228,
      "step": 23933
    },
    {
      "epoch": 0.23934,
      "grad_norm": 1.2047630901118993,
      "learning_rate": 0.003,
      "loss": 4.0157,
      "step": 23934
    },
    {
      "epoch": 0.23935,
      "grad_norm": 1.349759862704681,
      "learning_rate": 0.003,
      "loss": 4.0446,
      "step": 23935
    },
    {
      "epoch": 0.23936,
      "grad_norm": 1.1590498799121691,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 23936
    },
    {
      "epoch": 0.23937,
      "grad_norm": 1.684076940402808,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 23937
    },
    {
      "epoch": 0.23938,
      "grad_norm": 1.0512109044816607,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 23938
    },
    {
      "epoch": 0.23939,
      "grad_norm": 1.58449620312223,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 23939
    },
    {
      "epoch": 0.2394,
      "grad_norm": 1.1058617777341166,
      "learning_rate": 0.003,
      "loss": 4.0671,
      "step": 23940
    },
    {
      "epoch": 0.23941,
      "grad_norm": 1.4603807552559986,
      "learning_rate": 0.003,
      "loss": 4.0293,
      "step": 23941
    },
    {
      "epoch": 0.23942,
      "grad_norm": 1.3355246499353914,
      "learning_rate": 0.003,
      "loss": 4.0599,
      "step": 23942
    },
    {
      "epoch": 0.23943,
      "grad_norm": 1.130022072646399,
      "learning_rate": 0.003,
      "loss": 4.0194,
      "step": 23943
    },
    {
      "epoch": 0.23944,
      "grad_norm": 1.2861785264164831,
      "learning_rate": 0.003,
      "loss": 4.0416,
      "step": 23944
    },
    {
      "epoch": 0.23945,
      "grad_norm": 1.4141456332668252,
      "learning_rate": 0.003,
      "loss": 4.0416,
      "step": 23945
    },
    {
      "epoch": 0.23946,
      "grad_norm": 1.4075912134877817,
      "learning_rate": 0.003,
      "loss": 4.0271,
      "step": 23946
    },
    {
      "epoch": 0.23947,
      "grad_norm": 1.3455042193960427,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 23947
    },
    {
      "epoch": 0.23948,
      "grad_norm": 1.395223326067633,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 23948
    },
    {
      "epoch": 0.23949,
      "grad_norm": 1.3483496449754433,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 23949
    },
    {
      "epoch": 0.2395,
      "grad_norm": 1.651674997521108,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 23950
    },
    {
      "epoch": 0.23951,
      "grad_norm": 0.9889639249406295,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 23951
    },
    {
      "epoch": 0.23952,
      "grad_norm": 1.3426795766213986,
      "learning_rate": 0.003,
      "loss": 4.0284,
      "step": 23952
    },
    {
      "epoch": 0.23953,
      "grad_norm": 1.3464411438419432,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 23953
    },
    {
      "epoch": 0.23954,
      "grad_norm": 1.3850160219657013,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 23954
    },
    {
      "epoch": 0.23955,
      "grad_norm": 1.3406011368608362,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 23955
    },
    {
      "epoch": 0.23956,
      "grad_norm": 1.187828698337367,
      "learning_rate": 0.003,
      "loss": 4.0719,
      "step": 23956
    },
    {
      "epoch": 0.23957,
      "grad_norm": 1.2188035987237125,
      "learning_rate": 0.003,
      "loss": 4.0222,
      "step": 23957
    },
    {
      "epoch": 0.23958,
      "grad_norm": 1.4967539978718947,
      "learning_rate": 0.003,
      "loss": 4.0749,
      "step": 23958
    },
    {
      "epoch": 0.23959,
      "grad_norm": 1.1673118162064886,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 23959
    },
    {
      "epoch": 0.2396,
      "grad_norm": 1.3084845718881184,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 23960
    },
    {
      "epoch": 0.23961,
      "grad_norm": 1.3004831964065695,
      "learning_rate": 0.003,
      "loss": 4.0217,
      "step": 23961
    },
    {
      "epoch": 0.23962,
      "grad_norm": 1.2292601474880807,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 23962
    },
    {
      "epoch": 0.23963,
      "grad_norm": 1.2771172613658468,
      "learning_rate": 0.003,
      "loss": 4.0114,
      "step": 23963
    },
    {
      "epoch": 0.23964,
      "grad_norm": 1.3711370469195556,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 23964
    },
    {
      "epoch": 0.23965,
      "grad_norm": 1.5315316601779236,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 23965
    },
    {
      "epoch": 0.23966,
      "grad_norm": 1.2648013543316192,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 23966
    },
    {
      "epoch": 0.23967,
      "grad_norm": 1.2972828432340149,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 23967
    },
    {
      "epoch": 0.23968,
      "grad_norm": 1.242978208396597,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 23968
    },
    {
      "epoch": 0.23969,
      "grad_norm": 1.5614535313214628,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 23969
    },
    {
      "epoch": 0.2397,
      "grad_norm": 1.1211391780293622,
      "learning_rate": 0.003,
      "loss": 4.0318,
      "step": 23970
    },
    {
      "epoch": 0.23971,
      "grad_norm": 1.402249127312032,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 23971
    },
    {
      "epoch": 0.23972,
      "grad_norm": 1.3028140291524055,
      "learning_rate": 0.003,
      "loss": 4.0312,
      "step": 23972
    },
    {
      "epoch": 0.23973,
      "grad_norm": 1.2924055010066422,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 23973
    },
    {
      "epoch": 0.23974,
      "grad_norm": 1.2035223173514789,
      "learning_rate": 0.003,
      "loss": 4.0552,
      "step": 23974
    },
    {
      "epoch": 0.23975,
      "grad_norm": 1.2502018478978507,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 23975
    },
    {
      "epoch": 0.23976,
      "grad_norm": 1.1864741266835643,
      "learning_rate": 0.003,
      "loss": 4.0592,
      "step": 23976
    },
    {
      "epoch": 0.23977,
      "grad_norm": 1.4745737306346887,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 23977
    },
    {
      "epoch": 0.23978,
      "grad_norm": 1.3180764479988878,
      "learning_rate": 0.003,
      "loss": 4.0189,
      "step": 23978
    },
    {
      "epoch": 0.23979,
      "grad_norm": 1.2358016750429341,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 23979
    },
    {
      "epoch": 0.2398,
      "grad_norm": 1.6022903238570678,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 23980
    },
    {
      "epoch": 0.23981,
      "grad_norm": 1.1999729232944245,
      "learning_rate": 0.003,
      "loss": 4.0721,
      "step": 23981
    },
    {
      "epoch": 0.23982,
      "grad_norm": 1.563962617779391,
      "learning_rate": 0.003,
      "loss": 4.0405,
      "step": 23982
    },
    {
      "epoch": 0.23983,
      "grad_norm": 1.0149302408333853,
      "learning_rate": 0.003,
      "loss": 4.0259,
      "step": 23983
    },
    {
      "epoch": 0.23984,
      "grad_norm": 1.4591034645874765,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 23984
    },
    {
      "epoch": 0.23985,
      "grad_norm": 1.5435033214114402,
      "learning_rate": 0.003,
      "loss": 4.0239,
      "step": 23985
    },
    {
      "epoch": 0.23986,
      "grad_norm": 1.1173463970891755,
      "learning_rate": 0.003,
      "loss": 4.0205,
      "step": 23986
    },
    {
      "epoch": 0.23987,
      "grad_norm": 1.3489871220510115,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 23987
    },
    {
      "epoch": 0.23988,
      "grad_norm": 1.2899771570825291,
      "learning_rate": 0.003,
      "loss": 4.0579,
      "step": 23988
    },
    {
      "epoch": 0.23989,
      "grad_norm": 1.4604472197547178,
      "learning_rate": 0.003,
      "loss": 4.0198,
      "step": 23989
    },
    {
      "epoch": 0.2399,
      "grad_norm": 1.0023798980935106,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 23990
    },
    {
      "epoch": 0.23991,
      "grad_norm": 1.6076021486754355,
      "learning_rate": 0.003,
      "loss": 4.0425,
      "step": 23991
    },
    {
      "epoch": 0.23992,
      "grad_norm": 1.1405577515065455,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 23992
    },
    {
      "epoch": 0.23993,
      "grad_norm": 1.5959655985346612,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 23993
    },
    {
      "epoch": 0.23994,
      "grad_norm": 1.04858574801776,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 23994
    },
    {
      "epoch": 0.23995,
      "grad_norm": 1.5945825663224402,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 23995
    },
    {
      "epoch": 0.23996,
      "grad_norm": 1.4518659707503745,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 23996
    },
    {
      "epoch": 0.23997,
      "grad_norm": 1.1267224429043454,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 23997
    },
    {
      "epoch": 0.23998,
      "grad_norm": 1.462903258157101,
      "learning_rate": 0.003,
      "loss": 4.0494,
      "step": 23998
    },
    {
      "epoch": 0.23999,
      "grad_norm": 1.1498469435385599,
      "learning_rate": 0.003,
      "loss": 4.027,
      "step": 23999
    },
    {
      "epoch": 0.24,
      "grad_norm": 1.3310701791888737,
      "learning_rate": 0.003,
      "loss": 4.0662,
      "step": 24000
    },
    {
      "epoch": 0.24001,
      "grad_norm": 1.1982137549497318,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 24001
    },
    {
      "epoch": 0.24002,
      "grad_norm": 1.2646313314859452,
      "learning_rate": 0.003,
      "loss": 4.04,
      "step": 24002
    },
    {
      "epoch": 0.24003,
      "grad_norm": 1.3246323138326386,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 24003
    },
    {
      "epoch": 0.24004,
      "grad_norm": 1.3850664593683781,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 24004
    },
    {
      "epoch": 0.24005,
      "grad_norm": 1.0703186089822372,
      "learning_rate": 0.003,
      "loss": 4.0317,
      "step": 24005
    },
    {
      "epoch": 0.24006,
      "grad_norm": 1.629681560304667,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 24006
    },
    {
      "epoch": 0.24007,
      "grad_norm": 1.1777003924327751,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 24007
    },
    {
      "epoch": 0.24008,
      "grad_norm": 1.3143147464021314,
      "learning_rate": 0.003,
      "loss": 4.0627,
      "step": 24008
    },
    {
      "epoch": 0.24009,
      "grad_norm": 1.4833433096793711,
      "learning_rate": 0.003,
      "loss": 4.0667,
      "step": 24009
    },
    {
      "epoch": 0.2401,
      "grad_norm": 1.2583862993411046,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 24010
    },
    {
      "epoch": 0.24011,
      "grad_norm": 1.1851522531391423,
      "learning_rate": 0.003,
      "loss": 4.0337,
      "step": 24011
    },
    {
      "epoch": 0.24012,
      "grad_norm": 1.5723383028490112,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 24012
    },
    {
      "epoch": 0.24013,
      "grad_norm": 1.158314434475222,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 24013
    },
    {
      "epoch": 0.24014,
      "grad_norm": 1.357320373169641,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 24014
    },
    {
      "epoch": 0.24015,
      "grad_norm": 1.1867233915244144,
      "learning_rate": 0.003,
      "loss": 4.0215,
      "step": 24015
    },
    {
      "epoch": 0.24016,
      "grad_norm": 1.3372652701141177,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 24016
    },
    {
      "epoch": 0.24017,
      "grad_norm": 1.5583296043937156,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 24017
    },
    {
      "epoch": 0.24018,
      "grad_norm": 1.4495285214185716,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 24018
    },
    {
      "epoch": 0.24019,
      "grad_norm": 1.2649948758687706,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 24019
    },
    {
      "epoch": 0.2402,
      "grad_norm": 1.2851584101425262,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 24020
    },
    {
      "epoch": 0.24021,
      "grad_norm": 1.1646020783663935,
      "learning_rate": 0.003,
      "loss": 4.0179,
      "step": 24021
    },
    {
      "epoch": 0.24022,
      "grad_norm": 1.2377366689250193,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 24022
    },
    {
      "epoch": 0.24023,
      "grad_norm": 1.1456376136850606,
      "learning_rate": 0.003,
      "loss": 4.0253,
      "step": 24023
    },
    {
      "epoch": 0.24024,
      "grad_norm": 1.3262440424356252,
      "learning_rate": 0.003,
      "loss": 4.0331,
      "step": 24024
    },
    {
      "epoch": 0.24025,
      "grad_norm": 1.1368202150579607,
      "learning_rate": 0.003,
      "loss": 4.0345,
      "step": 24025
    },
    {
      "epoch": 0.24026,
      "grad_norm": 1.5654731240234023,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 24026
    },
    {
      "epoch": 0.24027,
      "grad_norm": 1.2113056981443557,
      "learning_rate": 0.003,
      "loss": 4.0745,
      "step": 24027
    },
    {
      "epoch": 0.24028,
      "grad_norm": 1.2880896491858853,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 24028
    },
    {
      "epoch": 0.24029,
      "grad_norm": 1.2484307292541577,
      "learning_rate": 0.003,
      "loss": 4.0226,
      "step": 24029
    },
    {
      "epoch": 0.2403,
      "grad_norm": 1.3369505276842217,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 24030
    },
    {
      "epoch": 0.24031,
      "grad_norm": 1.2610814423552485,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 24031
    },
    {
      "epoch": 0.24032,
      "grad_norm": 1.5951341796225895,
      "learning_rate": 0.003,
      "loss": 4.0041,
      "step": 24032
    },
    {
      "epoch": 0.24033,
      "grad_norm": 1.0315160471783094,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 24033
    },
    {
      "epoch": 0.24034,
      "grad_norm": 1.6147503672621737,
      "learning_rate": 0.003,
      "loss": 4.0775,
      "step": 24034
    },
    {
      "epoch": 0.24035,
      "grad_norm": 1.1054836851268373,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 24035
    },
    {
      "epoch": 0.24036,
      "grad_norm": 1.3727561803944797,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 24036
    },
    {
      "epoch": 0.24037,
      "grad_norm": 1.2110746012597284,
      "learning_rate": 0.003,
      "loss": 4.0383,
      "step": 24037
    },
    {
      "epoch": 0.24038,
      "grad_norm": 1.3168542529178489,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 24038
    },
    {
      "epoch": 0.24039,
      "grad_norm": 1.2606419184589108,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 24039
    },
    {
      "epoch": 0.2404,
      "grad_norm": 1.5938366407875177,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 24040
    },
    {
      "epoch": 0.24041,
      "grad_norm": 1.1955604830900401,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 24041
    },
    {
      "epoch": 0.24042,
      "grad_norm": 1.3797002199196038,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 24042
    },
    {
      "epoch": 0.24043,
      "grad_norm": 1.304431247908543,
      "learning_rate": 0.003,
      "loss": 4.0723,
      "step": 24043
    },
    {
      "epoch": 0.24044,
      "grad_norm": 1.5420504565226043,
      "learning_rate": 0.003,
      "loss": 4.0332,
      "step": 24044
    },
    {
      "epoch": 0.24045,
      "grad_norm": 1.1785198375684904,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 24045
    },
    {
      "epoch": 0.24046,
      "grad_norm": 1.2507553757849583,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 24046
    },
    {
      "epoch": 0.24047,
      "grad_norm": 1.5860365861893448,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 24047
    },
    {
      "epoch": 0.24048,
      "grad_norm": 1.1769672222956735,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 24048
    },
    {
      "epoch": 0.24049,
      "grad_norm": 1.3754960823087479,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 24049
    },
    {
      "epoch": 0.2405,
      "grad_norm": 1.2091215555240222,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 24050
    },
    {
      "epoch": 0.24051,
      "grad_norm": 1.3007701248629797,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 24051
    },
    {
      "epoch": 0.24052,
      "grad_norm": 1.3936599313191824,
      "learning_rate": 0.003,
      "loss": 4.046,
      "step": 24052
    },
    {
      "epoch": 0.24053,
      "grad_norm": 1.216248177628059,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 24053
    },
    {
      "epoch": 0.24054,
      "grad_norm": 1.572958969016613,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 24054
    },
    {
      "epoch": 0.24055,
      "grad_norm": 1.2010040821767707,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 24055
    },
    {
      "epoch": 0.24056,
      "grad_norm": 1.287138029491045,
      "learning_rate": 0.003,
      "loss": 4.0275,
      "step": 24056
    },
    {
      "epoch": 0.24057,
      "grad_norm": 1.4102215201793016,
      "learning_rate": 0.003,
      "loss": 4.0174,
      "step": 24057
    },
    {
      "epoch": 0.24058,
      "grad_norm": 1.0508476097811532,
      "learning_rate": 0.003,
      "loss": 4.0296,
      "step": 24058
    },
    {
      "epoch": 0.24059,
      "grad_norm": 1.3301794685429869,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 24059
    },
    {
      "epoch": 0.2406,
      "grad_norm": 1.2957012304118594,
      "learning_rate": 0.003,
      "loss": 4.0438,
      "step": 24060
    },
    {
      "epoch": 0.24061,
      "grad_norm": 1.4571789840800085,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 24061
    },
    {
      "epoch": 0.24062,
      "grad_norm": 1.0925374024588101,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 24062
    },
    {
      "epoch": 0.24063,
      "grad_norm": 1.3884803422530934,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 24063
    },
    {
      "epoch": 0.24064,
      "grad_norm": 1.2080720749884977,
      "learning_rate": 0.003,
      "loss": 4.006,
      "step": 24064
    },
    {
      "epoch": 0.24065,
      "grad_norm": 1.2546024585843576,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 24065
    },
    {
      "epoch": 0.24066,
      "grad_norm": 1.3292052379495949,
      "learning_rate": 0.003,
      "loss": 4.051,
      "step": 24066
    },
    {
      "epoch": 0.24067,
      "grad_norm": 1.1706258732078991,
      "learning_rate": 0.003,
      "loss": 4.0458,
      "step": 24067
    },
    {
      "epoch": 0.24068,
      "grad_norm": 1.2431780350750468,
      "learning_rate": 0.003,
      "loss": 4.0172,
      "step": 24068
    },
    {
      "epoch": 0.24069,
      "grad_norm": 1.2778698583267054,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 24069
    },
    {
      "epoch": 0.2407,
      "grad_norm": 1.4366405045158976,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 24070
    },
    {
      "epoch": 0.24071,
      "grad_norm": 1.2773845164410116,
      "learning_rate": 0.003,
      "loss": 4.0361,
      "step": 24071
    },
    {
      "epoch": 0.24072,
      "grad_norm": 1.3043612377841143,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 24072
    },
    {
      "epoch": 0.24073,
      "grad_norm": 1.0605411956497945,
      "learning_rate": 0.003,
      "loss": 4.0288,
      "step": 24073
    },
    {
      "epoch": 0.24074,
      "grad_norm": 1.576054650539737,
      "learning_rate": 0.003,
      "loss": 4.0926,
      "step": 24074
    },
    {
      "epoch": 0.24075,
      "grad_norm": 1.4201987062640198,
      "learning_rate": 0.003,
      "loss": 4.0359,
      "step": 24075
    },
    {
      "epoch": 0.24076,
      "grad_norm": 1.4596407788197026,
      "learning_rate": 0.003,
      "loss": 4.0977,
      "step": 24076
    },
    {
      "epoch": 0.24077,
      "grad_norm": 1.1723779214160353,
      "learning_rate": 0.003,
      "loss": 4.0347,
      "step": 24077
    },
    {
      "epoch": 0.24078,
      "grad_norm": 1.3307489772858394,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 24078
    },
    {
      "epoch": 0.24079,
      "grad_norm": 1.2614043198420335,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 24079
    },
    {
      "epoch": 0.2408,
      "grad_norm": 1.18445922083957,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 24080
    },
    {
      "epoch": 0.24081,
      "grad_norm": 1.2861209160470866,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 24081
    },
    {
      "epoch": 0.24082,
      "grad_norm": 1.3215214659767252,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 24082
    },
    {
      "epoch": 0.24083,
      "grad_norm": 1.342771261033664,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 24083
    },
    {
      "epoch": 0.24084,
      "grad_norm": 1.177998771956405,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 24084
    },
    {
      "epoch": 0.24085,
      "grad_norm": 1.2560780848427364,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 24085
    },
    {
      "epoch": 0.24086,
      "grad_norm": 1.2655013123576726,
      "learning_rate": 0.003,
      "loss": 4.0222,
      "step": 24086
    },
    {
      "epoch": 0.24087,
      "grad_norm": 1.401608379446091,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 24087
    },
    {
      "epoch": 0.24088,
      "grad_norm": 1.412674505098607,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 24088
    },
    {
      "epoch": 0.24089,
      "grad_norm": 1.338184524837426,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 24089
    },
    {
      "epoch": 0.2409,
      "grad_norm": 1.4202003864751755,
      "learning_rate": 0.003,
      "loss": 4.0286,
      "step": 24090
    },
    {
      "epoch": 0.24091,
      "grad_norm": 1.2523241458205954,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 24091
    },
    {
      "epoch": 0.24092,
      "grad_norm": 1.6625972411922345,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 24092
    },
    {
      "epoch": 0.24093,
      "grad_norm": 1.158347401400497,
      "learning_rate": 0.003,
      "loss": 3.9999,
      "step": 24093
    },
    {
      "epoch": 0.24094,
      "grad_norm": 1.4852284057617953,
      "learning_rate": 0.003,
      "loss": 4.0185,
      "step": 24094
    },
    {
      "epoch": 0.24095,
      "grad_norm": 1.0070997358920182,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 24095
    },
    {
      "epoch": 0.24096,
      "grad_norm": 1.3903977487078547,
      "learning_rate": 0.003,
      "loss": 4.028,
      "step": 24096
    },
    {
      "epoch": 0.24097,
      "grad_norm": 1.4053241640400327,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 24097
    },
    {
      "epoch": 0.24098,
      "grad_norm": 1.3461881361065275,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 24098
    },
    {
      "epoch": 0.24099,
      "grad_norm": 1.2159455208325554,
      "learning_rate": 0.003,
      "loss": 4.0526,
      "step": 24099
    },
    {
      "epoch": 0.241,
      "grad_norm": 1.1785523608312385,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 24100
    },
    {
      "epoch": 0.24101,
      "grad_norm": 1.4863888449565132,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 24101
    },
    {
      "epoch": 0.24102,
      "grad_norm": 0.9853543881409829,
      "learning_rate": 0.003,
      "loss": 4.0272,
      "step": 24102
    },
    {
      "epoch": 0.24103,
      "grad_norm": 1.51006194624815,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 24103
    },
    {
      "epoch": 0.24104,
      "grad_norm": 1.3078043330706701,
      "learning_rate": 0.003,
      "loss": 4.0107,
      "step": 24104
    },
    {
      "epoch": 0.24105,
      "grad_norm": 1.2549421001517347,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 24105
    },
    {
      "epoch": 0.24106,
      "grad_norm": 1.3438400649769986,
      "learning_rate": 0.003,
      "loss": 4.0821,
      "step": 24106
    },
    {
      "epoch": 0.24107,
      "grad_norm": 1.151860758649009,
      "learning_rate": 0.003,
      "loss": 4.0255,
      "step": 24107
    },
    {
      "epoch": 0.24108,
      "grad_norm": 1.316968475541291,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 24108
    },
    {
      "epoch": 0.24109,
      "grad_norm": 1.3956319483161022,
      "learning_rate": 0.003,
      "loss": 4.023,
      "step": 24109
    },
    {
      "epoch": 0.2411,
      "grad_norm": 1.395446475941159,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 24110
    },
    {
      "epoch": 0.24111,
      "grad_norm": 1.1877786251620326,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 24111
    },
    {
      "epoch": 0.24112,
      "grad_norm": 2.061453801897167,
      "learning_rate": 0.003,
      "loss": 4.0582,
      "step": 24112
    },
    {
      "epoch": 0.24113,
      "grad_norm": 1.3566006701899251,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 24113
    },
    {
      "epoch": 0.24114,
      "grad_norm": 1.267988731299872,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 24114
    },
    {
      "epoch": 0.24115,
      "grad_norm": 1.2913275469196175,
      "learning_rate": 0.003,
      "loss": 4.028,
      "step": 24115
    },
    {
      "epoch": 0.24116,
      "grad_norm": 1.484784690043986,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 24116
    },
    {
      "epoch": 0.24117,
      "grad_norm": 1.217173827574909,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 24117
    },
    {
      "epoch": 0.24118,
      "grad_norm": 1.3945843985752577,
      "learning_rate": 0.003,
      "loss": 4.0277,
      "step": 24118
    },
    {
      "epoch": 0.24119,
      "grad_norm": 1.1373727010916976,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 24119
    },
    {
      "epoch": 0.2412,
      "grad_norm": 1.383729558410073,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 24120
    },
    {
      "epoch": 0.24121,
      "grad_norm": 1.0536316602535394,
      "learning_rate": 0.003,
      "loss": 4.0311,
      "step": 24121
    },
    {
      "epoch": 0.24122,
      "grad_norm": 1.61885902593575,
      "learning_rate": 0.003,
      "loss": 4.0074,
      "step": 24122
    },
    {
      "epoch": 0.24123,
      "grad_norm": 1.1815668812768427,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 24123
    },
    {
      "epoch": 0.24124,
      "grad_norm": 1.4004992415250197,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 24124
    },
    {
      "epoch": 0.24125,
      "grad_norm": 1.3826544078435026,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 24125
    },
    {
      "epoch": 0.24126,
      "grad_norm": 1.3714547949108018,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 24126
    },
    {
      "epoch": 0.24127,
      "grad_norm": 1.1468589706740249,
      "learning_rate": 0.003,
      "loss": 4.0112,
      "step": 24127
    },
    {
      "epoch": 0.24128,
      "grad_norm": 1.609320363080084,
      "learning_rate": 0.003,
      "loss": 4.0733,
      "step": 24128
    },
    {
      "epoch": 0.24129,
      "grad_norm": 1.0848059707492403,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 24129
    },
    {
      "epoch": 0.2413,
      "grad_norm": 1.397503821827954,
      "learning_rate": 0.003,
      "loss": 4.0487,
      "step": 24130
    },
    {
      "epoch": 0.24131,
      "grad_norm": 1.1307636939554195,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 24131
    },
    {
      "epoch": 0.24132,
      "grad_norm": 1.4307423328710627,
      "learning_rate": 0.003,
      "loss": 4.0291,
      "step": 24132
    },
    {
      "epoch": 0.24133,
      "grad_norm": 1.2789014925741,
      "learning_rate": 0.003,
      "loss": 4.0243,
      "step": 24133
    },
    {
      "epoch": 0.24134,
      "grad_norm": 1.5011241911903856,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 24134
    },
    {
      "epoch": 0.24135,
      "grad_norm": 1.0176273730996737,
      "learning_rate": 0.003,
      "loss": 4.0448,
      "step": 24135
    },
    {
      "epoch": 0.24136,
      "grad_norm": 1.3978111834325413,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 24136
    },
    {
      "epoch": 0.24137,
      "grad_norm": 1.446912261572512,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 24137
    },
    {
      "epoch": 0.24138,
      "grad_norm": 1.1992773910946033,
      "learning_rate": 0.003,
      "loss": 4.0261,
      "step": 24138
    },
    {
      "epoch": 0.24139,
      "grad_norm": 1.3476554319462777,
      "learning_rate": 0.003,
      "loss": 4.0422,
      "step": 24139
    },
    {
      "epoch": 0.2414,
      "grad_norm": 1.24275737536399,
      "learning_rate": 0.003,
      "loss": 4.0331,
      "step": 24140
    },
    {
      "epoch": 0.24141,
      "grad_norm": 1.4448544422715612,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 24141
    },
    {
      "epoch": 0.24142,
      "grad_norm": 1.1972062783899364,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 24142
    },
    {
      "epoch": 0.24143,
      "grad_norm": 1.7132485479267714,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 24143
    },
    {
      "epoch": 0.24144,
      "grad_norm": 1.0388734757674982,
      "learning_rate": 0.003,
      "loss": 4.042,
      "step": 24144
    },
    {
      "epoch": 0.24145,
      "grad_norm": 1.599562870156419,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 24145
    },
    {
      "epoch": 0.24146,
      "grad_norm": 1.1054069167389455,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 24146
    },
    {
      "epoch": 0.24147,
      "grad_norm": 1.4286220053188623,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 24147
    },
    {
      "epoch": 0.24148,
      "grad_norm": 1.1585888574766916,
      "learning_rate": 0.003,
      "loss": 4.0311,
      "step": 24148
    },
    {
      "epoch": 0.24149,
      "grad_norm": 1.5220214127417249,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 24149
    },
    {
      "epoch": 0.2415,
      "grad_norm": 1.2399500095587201,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 24150
    },
    {
      "epoch": 0.24151,
      "grad_norm": 1.4139993181087314,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 24151
    },
    {
      "epoch": 0.24152,
      "grad_norm": 1.1853165761252153,
      "learning_rate": 0.003,
      "loss": 4.0429,
      "step": 24152
    },
    {
      "epoch": 0.24153,
      "grad_norm": 1.3849721384973632,
      "learning_rate": 0.003,
      "loss": 4.0364,
      "step": 24153
    },
    {
      "epoch": 0.24154,
      "grad_norm": 1.2048305559811268,
      "learning_rate": 0.003,
      "loss": 4.0454,
      "step": 24154
    },
    {
      "epoch": 0.24155,
      "grad_norm": 1.215891108060119,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 24155
    },
    {
      "epoch": 0.24156,
      "grad_norm": 1.3866339481387726,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 24156
    },
    {
      "epoch": 0.24157,
      "grad_norm": 1.2027140089421595,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 24157
    },
    {
      "epoch": 0.24158,
      "grad_norm": 1.2553627133674687,
      "learning_rate": 0.003,
      "loss": 4.0255,
      "step": 24158
    },
    {
      "epoch": 0.24159,
      "grad_norm": 1.4002793767433759,
      "learning_rate": 0.003,
      "loss": 4.0478,
      "step": 24159
    },
    {
      "epoch": 0.2416,
      "grad_norm": 1.2452213689519354,
      "learning_rate": 0.003,
      "loss": 4.0412,
      "step": 24160
    },
    {
      "epoch": 0.24161,
      "grad_norm": 1.377309109897054,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 24161
    },
    {
      "epoch": 0.24162,
      "grad_norm": 1.1850278520964292,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 24162
    },
    {
      "epoch": 0.24163,
      "grad_norm": 1.2707558088155932,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 24163
    },
    {
      "epoch": 0.24164,
      "grad_norm": 1.158081770626405,
      "learning_rate": 0.003,
      "loss": 4.0077,
      "step": 24164
    },
    {
      "epoch": 0.24165,
      "grad_norm": 1.3424118307531159,
      "learning_rate": 0.003,
      "loss": 4.0544,
      "step": 24165
    },
    {
      "epoch": 0.24166,
      "grad_norm": 1.1320121788540976,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 24166
    },
    {
      "epoch": 0.24167,
      "grad_norm": 1.4212208024498993,
      "learning_rate": 0.003,
      "loss": 4.0394,
      "step": 24167
    },
    {
      "epoch": 0.24168,
      "grad_norm": 1.2979045883112956,
      "learning_rate": 0.003,
      "loss": 4.0867,
      "step": 24168
    },
    {
      "epoch": 0.24169,
      "grad_norm": 1.4194937361222688,
      "learning_rate": 0.003,
      "loss": 4.0308,
      "step": 24169
    },
    {
      "epoch": 0.2417,
      "grad_norm": 1.3487465387417823,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 24170
    },
    {
      "epoch": 0.24171,
      "grad_norm": 1.3690426466054055,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 24171
    },
    {
      "epoch": 0.24172,
      "grad_norm": 1.4580168425443805,
      "learning_rate": 0.003,
      "loss": 4.0284,
      "step": 24172
    },
    {
      "epoch": 0.24173,
      "grad_norm": 1.1467184413618816,
      "learning_rate": 0.003,
      "loss": 4.0396,
      "step": 24173
    },
    {
      "epoch": 0.24174,
      "grad_norm": 1.413430395678707,
      "learning_rate": 0.003,
      "loss": 4.0104,
      "step": 24174
    },
    {
      "epoch": 0.24175,
      "grad_norm": 1.1808060653876629,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 24175
    },
    {
      "epoch": 0.24176,
      "grad_norm": 1.4933671896603207,
      "learning_rate": 0.003,
      "loss": 4.067,
      "step": 24176
    },
    {
      "epoch": 0.24177,
      "grad_norm": 1.2332074232620434,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 24177
    },
    {
      "epoch": 0.24178,
      "grad_norm": 1.4811142454039032,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 24178
    },
    {
      "epoch": 0.24179,
      "grad_norm": 1.114262381817086,
      "learning_rate": 0.003,
      "loss": 4.0221,
      "step": 24179
    },
    {
      "epoch": 0.2418,
      "grad_norm": 1.4990236239677852,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 24180
    },
    {
      "epoch": 0.24181,
      "grad_norm": 1.2373042967318026,
      "learning_rate": 0.003,
      "loss": 4.0207,
      "step": 24181
    },
    {
      "epoch": 0.24182,
      "grad_norm": 1.1170506943862697,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 24182
    },
    {
      "epoch": 0.24183,
      "grad_norm": 1.448564075754916,
      "learning_rate": 0.003,
      "loss": 4.0268,
      "step": 24183
    },
    {
      "epoch": 0.24184,
      "grad_norm": 1.010696038721016,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 24184
    },
    {
      "epoch": 0.24185,
      "grad_norm": 1.4877647877060574,
      "learning_rate": 0.003,
      "loss": 4.0244,
      "step": 24185
    },
    {
      "epoch": 0.24186,
      "grad_norm": 1.2688204068011295,
      "learning_rate": 0.003,
      "loss": 4.0687,
      "step": 24186
    },
    {
      "epoch": 0.24187,
      "grad_norm": 1.4597897955671664,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 24187
    },
    {
      "epoch": 0.24188,
      "grad_norm": 1.2855935580104527,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 24188
    },
    {
      "epoch": 0.24189,
      "grad_norm": 1.4047741224409784,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 24189
    },
    {
      "epoch": 0.2419,
      "grad_norm": 1.1240708415452145,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 24190
    },
    {
      "epoch": 0.24191,
      "grad_norm": 1.366042198017698,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 24191
    },
    {
      "epoch": 0.24192,
      "grad_norm": 1.1332015181389599,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 24192
    },
    {
      "epoch": 0.24193,
      "grad_norm": 1.4331915469323964,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 24193
    },
    {
      "epoch": 0.24194,
      "grad_norm": 1.3090691943231467,
      "learning_rate": 0.003,
      "loss": 4.0349,
      "step": 24194
    },
    {
      "epoch": 0.24195,
      "grad_norm": 1.368338182236077,
      "learning_rate": 0.003,
      "loss": 4.0282,
      "step": 24195
    },
    {
      "epoch": 0.24196,
      "grad_norm": 1.162640578033725,
      "learning_rate": 0.003,
      "loss": 4.0869,
      "step": 24196
    },
    {
      "epoch": 0.24197,
      "grad_norm": 1.3403368656488615,
      "learning_rate": 0.003,
      "loss": 4.0311,
      "step": 24197
    },
    {
      "epoch": 0.24198,
      "grad_norm": 1.1680758027513731,
      "learning_rate": 0.003,
      "loss": 4.0282,
      "step": 24198
    },
    {
      "epoch": 0.24199,
      "grad_norm": 1.7607108783389123,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 24199
    },
    {
      "epoch": 0.242,
      "grad_norm": 1.2047350174285414,
      "learning_rate": 0.003,
      "loss": 4.0665,
      "step": 24200
    },
    {
      "epoch": 0.24201,
      "grad_norm": 1.0961437779727718,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 24201
    },
    {
      "epoch": 0.24202,
      "grad_norm": 1.434942644690441,
      "learning_rate": 0.003,
      "loss": 4.0363,
      "step": 24202
    },
    {
      "epoch": 0.24203,
      "grad_norm": 1.295186528501165,
      "learning_rate": 0.003,
      "loss": 4.0317,
      "step": 24203
    },
    {
      "epoch": 0.24204,
      "grad_norm": 1.5122603337371539,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 24204
    },
    {
      "epoch": 0.24205,
      "grad_norm": 1.048177867933873,
      "learning_rate": 0.003,
      "loss": 4.0238,
      "step": 24205
    },
    {
      "epoch": 0.24206,
      "grad_norm": 1.467996421274092,
      "learning_rate": 0.003,
      "loss": 4.0162,
      "step": 24206
    },
    {
      "epoch": 0.24207,
      "grad_norm": 1.1646541773412056,
      "learning_rate": 0.003,
      "loss": 4.0642,
      "step": 24207
    },
    {
      "epoch": 0.24208,
      "grad_norm": 1.414036026722916,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 24208
    },
    {
      "epoch": 0.24209,
      "grad_norm": 1.1193202830813809,
      "learning_rate": 0.003,
      "loss": 4.0289,
      "step": 24209
    },
    {
      "epoch": 0.2421,
      "grad_norm": 1.3794354522643395,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 24210
    },
    {
      "epoch": 0.24211,
      "grad_norm": 1.4281462957787163,
      "learning_rate": 0.003,
      "loss": 4.0756,
      "step": 24211
    },
    {
      "epoch": 0.24212,
      "grad_norm": 1.4723415243212516,
      "learning_rate": 0.003,
      "loss": 4.0304,
      "step": 24212
    },
    {
      "epoch": 0.24213,
      "grad_norm": 1.5812721690714122,
      "learning_rate": 0.003,
      "loss": 4.0824,
      "step": 24213
    },
    {
      "epoch": 0.24214,
      "grad_norm": 1.2187648935517055,
      "learning_rate": 0.003,
      "loss": 4.0623,
      "step": 24214
    },
    {
      "epoch": 0.24215,
      "grad_norm": 1.3346064574693712,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 24215
    },
    {
      "epoch": 0.24216,
      "grad_norm": 1.1344116079183073,
      "learning_rate": 0.003,
      "loss": 4.0247,
      "step": 24216
    },
    {
      "epoch": 0.24217,
      "grad_norm": 1.2778453683659157,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 24217
    },
    {
      "epoch": 0.24218,
      "grad_norm": 1.2674101944841287,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 24218
    },
    {
      "epoch": 0.24219,
      "grad_norm": 1.1557672568613029,
      "learning_rate": 0.003,
      "loss": 4.0279,
      "step": 24219
    },
    {
      "epoch": 0.2422,
      "grad_norm": 1.481880075511149,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 24220
    },
    {
      "epoch": 0.24221,
      "grad_norm": 1.371758660476148,
      "learning_rate": 0.003,
      "loss": 4.0267,
      "step": 24221
    },
    {
      "epoch": 0.24222,
      "grad_norm": 1.308356176851615,
      "learning_rate": 0.003,
      "loss": 4.0178,
      "step": 24222
    },
    {
      "epoch": 0.24223,
      "grad_norm": 1.1225815741420113,
      "learning_rate": 0.003,
      "loss": 4.0129,
      "step": 24223
    },
    {
      "epoch": 0.24224,
      "grad_norm": 1.6620733678889827,
      "learning_rate": 0.003,
      "loss": 4.0406,
      "step": 24224
    },
    {
      "epoch": 0.24225,
      "grad_norm": 1.0717480896235967,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 24225
    },
    {
      "epoch": 0.24226,
      "grad_norm": 1.568252971844198,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 24226
    },
    {
      "epoch": 0.24227,
      "grad_norm": 1.047001777031136,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 24227
    },
    {
      "epoch": 0.24228,
      "grad_norm": 1.578879586049578,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 24228
    },
    {
      "epoch": 0.24229,
      "grad_norm": 1.255557463317058,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 24229
    },
    {
      "epoch": 0.2423,
      "grad_norm": 1.3344929981570273,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 24230
    },
    {
      "epoch": 0.24231,
      "grad_norm": 1.2765307928136662,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 24231
    },
    {
      "epoch": 0.24232,
      "grad_norm": 1.1689759846324472,
      "learning_rate": 0.003,
      "loss": 4.0144,
      "step": 24232
    },
    {
      "epoch": 0.24233,
      "grad_norm": 1.277077445032698,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 24233
    },
    {
      "epoch": 0.24234,
      "grad_norm": 1.3343097937856678,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 24234
    },
    {
      "epoch": 0.24235,
      "grad_norm": 1.2624879423626758,
      "learning_rate": 0.003,
      "loss": 4.0051,
      "step": 24235
    },
    {
      "epoch": 0.24236,
      "grad_norm": 1.5093815483667568,
      "learning_rate": 0.003,
      "loss": 4.0345,
      "step": 24236
    },
    {
      "epoch": 0.24237,
      "grad_norm": 1.1616988737126757,
      "learning_rate": 0.003,
      "loss": 4.0322,
      "step": 24237
    },
    {
      "epoch": 0.24238,
      "grad_norm": 1.32435415953714,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 24238
    },
    {
      "epoch": 0.24239,
      "grad_norm": 1.3531919944226536,
      "learning_rate": 0.003,
      "loss": 4.0537,
      "step": 24239
    },
    {
      "epoch": 0.2424,
      "grad_norm": 1.277315980957559,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 24240
    },
    {
      "epoch": 0.24241,
      "grad_norm": 1.249898718096146,
      "learning_rate": 0.003,
      "loss": 4.0383,
      "step": 24241
    },
    {
      "epoch": 0.24242,
      "grad_norm": 1.3099634113763017,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 24242
    },
    {
      "epoch": 0.24243,
      "grad_norm": 1.3078930431906108,
      "learning_rate": 0.003,
      "loss": 4.0339,
      "step": 24243
    },
    {
      "epoch": 0.24244,
      "grad_norm": 1.2572928772215564,
      "learning_rate": 0.003,
      "loss": 4.0296,
      "step": 24244
    },
    {
      "epoch": 0.24245,
      "grad_norm": 1.3101455000662532,
      "learning_rate": 0.003,
      "loss": 4.0274,
      "step": 24245
    },
    {
      "epoch": 0.24246,
      "grad_norm": 1.1508340042190612,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 24246
    },
    {
      "epoch": 0.24247,
      "grad_norm": 1.5441562857769022,
      "learning_rate": 0.003,
      "loss": 4.0587,
      "step": 24247
    },
    {
      "epoch": 0.24248,
      "grad_norm": 1.2496805076173356,
      "learning_rate": 0.003,
      "loss": 4.0663,
      "step": 24248
    },
    {
      "epoch": 0.24249,
      "grad_norm": 1.4245732948720276,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 24249
    },
    {
      "epoch": 0.2425,
      "grad_norm": 1.3938018914946457,
      "learning_rate": 0.003,
      "loss": 4.0373,
      "step": 24250
    },
    {
      "epoch": 0.24251,
      "grad_norm": 1.2968316982519055,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 24251
    },
    {
      "epoch": 0.24252,
      "grad_norm": 1.3120068378531047,
      "learning_rate": 0.003,
      "loss": 4.0258,
      "step": 24252
    },
    {
      "epoch": 0.24253,
      "grad_norm": 1.2013476087596637,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 24253
    },
    {
      "epoch": 0.24254,
      "grad_norm": 1.2935208700558307,
      "learning_rate": 0.003,
      "loss": 4.0377,
      "step": 24254
    },
    {
      "epoch": 0.24255,
      "grad_norm": 1.1377021591754488,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 24255
    },
    {
      "epoch": 0.24256,
      "grad_norm": 1.412766070803767,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 24256
    },
    {
      "epoch": 0.24257,
      "grad_norm": 1.1053486006144808,
      "learning_rate": 0.003,
      "loss": 4.0242,
      "step": 24257
    },
    {
      "epoch": 0.24258,
      "grad_norm": 1.4926667315805335,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 24258
    },
    {
      "epoch": 0.24259,
      "grad_norm": 1.5672772468871516,
      "learning_rate": 0.003,
      "loss": 4.0064,
      "step": 24259
    },
    {
      "epoch": 0.2426,
      "grad_norm": 1.1536041794405874,
      "learning_rate": 0.003,
      "loss": 4.0413,
      "step": 24260
    },
    {
      "epoch": 0.24261,
      "grad_norm": 1.3519942069054107,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 24261
    },
    {
      "epoch": 0.24262,
      "grad_norm": 1.3141326041698043,
      "learning_rate": 0.003,
      "loss": 4.0546,
      "step": 24262
    },
    {
      "epoch": 0.24263,
      "grad_norm": 1.198962458986583,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 24263
    },
    {
      "epoch": 0.24264,
      "grad_norm": 1.0927644248715185,
      "learning_rate": 0.003,
      "loss": 4.0792,
      "step": 24264
    },
    {
      "epoch": 0.24265,
      "grad_norm": 1.3462870878086504,
      "learning_rate": 0.003,
      "loss": 4.0354,
      "step": 24265
    },
    {
      "epoch": 0.24266,
      "grad_norm": 1.3672031064169925,
      "learning_rate": 0.003,
      "loss": 4.0243,
      "step": 24266
    },
    {
      "epoch": 0.24267,
      "grad_norm": 1.291376543037462,
      "learning_rate": 0.003,
      "loss": 4.054,
      "step": 24267
    },
    {
      "epoch": 0.24268,
      "grad_norm": 1.3418901765583673,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 24268
    },
    {
      "epoch": 0.24269,
      "grad_norm": 1.3574717174686124,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 24269
    },
    {
      "epoch": 0.2427,
      "grad_norm": 1.3571988322609831,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 24270
    },
    {
      "epoch": 0.24271,
      "grad_norm": 1.2935055122759802,
      "learning_rate": 0.003,
      "loss": 4.0232,
      "step": 24271
    },
    {
      "epoch": 0.24272,
      "grad_norm": 1.3327856806884089,
      "learning_rate": 0.003,
      "loss": 4.0295,
      "step": 24272
    },
    {
      "epoch": 0.24273,
      "grad_norm": 1.198564049426773,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 24273
    },
    {
      "epoch": 0.24274,
      "grad_norm": 1.2504595965816272,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 24274
    },
    {
      "epoch": 0.24275,
      "grad_norm": 1.4428564297078146,
      "learning_rate": 0.003,
      "loss": 4.0271,
      "step": 24275
    },
    {
      "epoch": 0.24276,
      "grad_norm": 1.1674450737238984,
      "learning_rate": 0.003,
      "loss": 4.0295,
      "step": 24276
    },
    {
      "epoch": 0.24277,
      "grad_norm": 1.3185220628168617,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 24277
    },
    {
      "epoch": 0.24278,
      "grad_norm": 1.2340535805395114,
      "learning_rate": 0.003,
      "loss": 4.0341,
      "step": 24278
    },
    {
      "epoch": 0.24279,
      "grad_norm": 1.507975611775976,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 24279
    },
    {
      "epoch": 0.2428,
      "grad_norm": 1.2131537403595838,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 24280
    },
    {
      "epoch": 0.24281,
      "grad_norm": 1.2613812387359136,
      "learning_rate": 0.003,
      "loss": 4.0614,
      "step": 24281
    },
    {
      "epoch": 0.24282,
      "grad_norm": 1.264160759089061,
      "learning_rate": 0.003,
      "loss": 4.0209,
      "step": 24282
    },
    {
      "epoch": 0.24283,
      "grad_norm": 1.3453013059909218,
      "learning_rate": 0.003,
      "loss": 4.0359,
      "step": 24283
    },
    {
      "epoch": 0.24284,
      "grad_norm": 1.5025609629209615,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 24284
    },
    {
      "epoch": 0.24285,
      "grad_norm": 1.231970294944696,
      "learning_rate": 0.003,
      "loss": 4.0159,
      "step": 24285
    },
    {
      "epoch": 0.24286,
      "grad_norm": 1.4273096806634538,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 24286
    },
    {
      "epoch": 0.24287,
      "grad_norm": 1.1738704895508498,
      "learning_rate": 0.003,
      "loss": 4.0354,
      "step": 24287
    },
    {
      "epoch": 0.24288,
      "grad_norm": 1.3807691622228144,
      "learning_rate": 0.003,
      "loss": 4.0178,
      "step": 24288
    },
    {
      "epoch": 0.24289,
      "grad_norm": 1.446508532115049,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 24289
    },
    {
      "epoch": 0.2429,
      "grad_norm": 1.5243046036714811,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 24290
    },
    {
      "epoch": 0.24291,
      "grad_norm": 1.279020751005401,
      "learning_rate": 0.003,
      "loss": 4.0251,
      "step": 24291
    },
    {
      "epoch": 0.24292,
      "grad_norm": 1.344298576833801,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 24292
    },
    {
      "epoch": 0.24293,
      "grad_norm": 1.34572143624435,
      "learning_rate": 0.003,
      "loss": 3.9985,
      "step": 24293
    },
    {
      "epoch": 0.24294,
      "grad_norm": 1.2387541801867668,
      "learning_rate": 0.003,
      "loss": 4.03,
      "step": 24294
    },
    {
      "epoch": 0.24295,
      "grad_norm": 1.2741373636751783,
      "learning_rate": 0.003,
      "loss": 4.0398,
      "step": 24295
    },
    {
      "epoch": 0.24296,
      "grad_norm": 1.2745234807409727,
      "learning_rate": 0.003,
      "loss": 4.0249,
      "step": 24296
    },
    {
      "epoch": 0.24297,
      "grad_norm": 1.0891295685417062,
      "learning_rate": 0.003,
      "loss": 4.0311,
      "step": 24297
    },
    {
      "epoch": 0.24298,
      "grad_norm": 1.1796609971169227,
      "learning_rate": 0.003,
      "loss": 4.0324,
      "step": 24298
    },
    {
      "epoch": 0.24299,
      "grad_norm": 1.379088872647827,
      "learning_rate": 0.003,
      "loss": 4.027,
      "step": 24299
    },
    {
      "epoch": 0.243,
      "grad_norm": 1.3023287424810517,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 24300
    },
    {
      "epoch": 0.24301,
      "grad_norm": 1.3542299752803761,
      "learning_rate": 0.003,
      "loss": 4.01,
      "step": 24301
    },
    {
      "epoch": 0.24302,
      "grad_norm": 1.097581145180247,
      "learning_rate": 0.003,
      "loss": 4.0304,
      "step": 24302
    },
    {
      "epoch": 0.24303,
      "grad_norm": 1.4773640604740808,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 24303
    },
    {
      "epoch": 0.24304,
      "grad_norm": 1.0991500770619063,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 24304
    },
    {
      "epoch": 0.24305,
      "grad_norm": 1.608905695567478,
      "learning_rate": 0.003,
      "loss": 4.0668,
      "step": 24305
    },
    {
      "epoch": 0.24306,
      "grad_norm": 1.2088648847581218,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 24306
    },
    {
      "epoch": 0.24307,
      "grad_norm": 1.5473059889716545,
      "learning_rate": 0.003,
      "loss": 4.0878,
      "step": 24307
    },
    {
      "epoch": 0.24308,
      "grad_norm": 1.1063845682215177,
      "learning_rate": 0.003,
      "loss": 4.0289,
      "step": 24308
    },
    {
      "epoch": 0.24309,
      "grad_norm": 1.7032377624789472,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 24309
    },
    {
      "epoch": 0.2431,
      "grad_norm": 1.2348758434532585,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 24310
    },
    {
      "epoch": 0.24311,
      "grad_norm": 1.675901933527108,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 24311
    },
    {
      "epoch": 0.24312,
      "grad_norm": 1.28701685329977,
      "learning_rate": 0.003,
      "loss": 4.0274,
      "step": 24312
    },
    {
      "epoch": 0.24313,
      "grad_norm": 1.2493169303194676,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 24313
    },
    {
      "epoch": 0.24314,
      "grad_norm": 1.3730708220398244,
      "learning_rate": 0.003,
      "loss": 4.0299,
      "step": 24314
    },
    {
      "epoch": 0.24315,
      "grad_norm": 1.3990743664317271,
      "learning_rate": 0.003,
      "loss": 4.055,
      "step": 24315
    },
    {
      "epoch": 0.24316,
      "grad_norm": 1.4150105767793484,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 24316
    },
    {
      "epoch": 0.24317,
      "grad_norm": 1.3229023075818864,
      "learning_rate": 0.003,
      "loss": 4.0858,
      "step": 24317
    },
    {
      "epoch": 0.24318,
      "grad_norm": 1.316400854388295,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 24318
    },
    {
      "epoch": 0.24319,
      "grad_norm": 1.2861302703731605,
      "learning_rate": 0.003,
      "loss": 4.0386,
      "step": 24319
    },
    {
      "epoch": 0.2432,
      "grad_norm": 1.2302203390549422,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 24320
    },
    {
      "epoch": 0.24321,
      "grad_norm": 1.1870239088361496,
      "learning_rate": 0.003,
      "loss": 4.0252,
      "step": 24321
    },
    {
      "epoch": 0.24322,
      "grad_norm": 1.1098744348245855,
      "learning_rate": 0.003,
      "loss": 4.0632,
      "step": 24322
    },
    {
      "epoch": 0.24323,
      "grad_norm": 1.4013358626080086,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 24323
    },
    {
      "epoch": 0.24324,
      "grad_norm": 1.2774390008585295,
      "learning_rate": 0.003,
      "loss": 4.045,
      "step": 24324
    },
    {
      "epoch": 0.24325,
      "grad_norm": 1.4232646560783073,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 24325
    },
    {
      "epoch": 0.24326,
      "grad_norm": 1.258223538648565,
      "learning_rate": 0.003,
      "loss": 4.0423,
      "step": 24326
    },
    {
      "epoch": 0.24327,
      "grad_norm": 1.2570427920975569,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 24327
    },
    {
      "epoch": 0.24328,
      "grad_norm": 1.4334107535479221,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 24328
    },
    {
      "epoch": 0.24329,
      "grad_norm": 1.2406233655124415,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 24329
    },
    {
      "epoch": 0.2433,
      "grad_norm": 1.4310929648549848,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 24330
    },
    {
      "epoch": 0.24331,
      "grad_norm": 1.103687581279719,
      "learning_rate": 0.003,
      "loss": 4.014,
      "step": 24331
    },
    {
      "epoch": 0.24332,
      "grad_norm": 1.5151387176506648,
      "learning_rate": 0.003,
      "loss": 4.0726,
      "step": 24332
    },
    {
      "epoch": 0.24333,
      "grad_norm": 1.138491482282826,
      "learning_rate": 0.003,
      "loss": 4.0193,
      "step": 24333
    },
    {
      "epoch": 0.24334,
      "grad_norm": 1.3784484008845785,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 24334
    },
    {
      "epoch": 0.24335,
      "grad_norm": 1.399359208029288,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 24335
    },
    {
      "epoch": 0.24336,
      "grad_norm": 1.303246903348923,
      "learning_rate": 0.003,
      "loss": 4.0231,
      "step": 24336
    },
    {
      "epoch": 0.24337,
      "grad_norm": 1.3594896867625685,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 24337
    },
    {
      "epoch": 0.24338,
      "grad_norm": 1.3014414192233448,
      "learning_rate": 0.003,
      "loss": 4.0677,
      "step": 24338
    },
    {
      "epoch": 0.24339,
      "grad_norm": 1.1903845685813315,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 24339
    },
    {
      "epoch": 0.2434,
      "grad_norm": 1.4573698097913577,
      "learning_rate": 0.003,
      "loss": 4.0299,
      "step": 24340
    },
    {
      "epoch": 0.24341,
      "grad_norm": 1.1391009869645503,
      "learning_rate": 0.003,
      "loss": 4.0145,
      "step": 24341
    },
    {
      "epoch": 0.24342,
      "grad_norm": 1.5077851029104923,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 24342
    },
    {
      "epoch": 0.24343,
      "grad_norm": 1.4460109922666824,
      "learning_rate": 0.003,
      "loss": 4.0415,
      "step": 24343
    },
    {
      "epoch": 0.24344,
      "grad_norm": 1.2328575413037233,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 24344
    },
    {
      "epoch": 0.24345,
      "grad_norm": 1.4941659042148472,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 24345
    },
    {
      "epoch": 0.24346,
      "grad_norm": 1.266366569169842,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 24346
    },
    {
      "epoch": 0.24347,
      "grad_norm": 1.3315629415854084,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 24347
    },
    {
      "epoch": 0.24348,
      "grad_norm": 1.1091768802830682,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 24348
    },
    {
      "epoch": 0.24349,
      "grad_norm": 1.29103590953526,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 24349
    },
    {
      "epoch": 0.2435,
      "grad_norm": 1.331637533256418,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 24350
    },
    {
      "epoch": 0.24351,
      "grad_norm": 1.2626612531390127,
      "learning_rate": 0.003,
      "loss": 4.0304,
      "step": 24351
    },
    {
      "epoch": 0.24352,
      "grad_norm": 1.5421735794153124,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 24352
    },
    {
      "epoch": 0.24353,
      "grad_norm": 1.1070581822841798,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 24353
    },
    {
      "epoch": 0.24354,
      "grad_norm": 1.6211992093023875,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 24354
    },
    {
      "epoch": 0.24355,
      "grad_norm": 1.2349649328258079,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 24355
    },
    {
      "epoch": 0.24356,
      "grad_norm": 1.383760494906793,
      "learning_rate": 0.003,
      "loss": 4.0691,
      "step": 24356
    },
    {
      "epoch": 0.24357,
      "grad_norm": 1.2794547079780425,
      "learning_rate": 0.003,
      "loss": 4.0652,
      "step": 24357
    },
    {
      "epoch": 0.24358,
      "grad_norm": 1.3882579218338404,
      "learning_rate": 0.003,
      "loss": 4.035,
      "step": 24358
    },
    {
      "epoch": 0.24359,
      "grad_norm": 1.363947204059705,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 24359
    },
    {
      "epoch": 0.2436,
      "grad_norm": 1.3386789358283258,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 24360
    },
    {
      "epoch": 0.24361,
      "grad_norm": 1.3178675101574866,
      "learning_rate": 0.003,
      "loss": 4.0558,
      "step": 24361
    },
    {
      "epoch": 0.24362,
      "grad_norm": 1.3907757394468752,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 24362
    },
    {
      "epoch": 0.24363,
      "grad_norm": 1.2141064776142552,
      "learning_rate": 0.003,
      "loss": 4.0423,
      "step": 24363
    },
    {
      "epoch": 0.24364,
      "grad_norm": 1.2105623553777896,
      "learning_rate": 0.003,
      "loss": 4.0337,
      "step": 24364
    },
    {
      "epoch": 0.24365,
      "grad_norm": 1.1482711614982255,
      "learning_rate": 0.003,
      "loss": 4.0301,
      "step": 24365
    },
    {
      "epoch": 0.24366,
      "grad_norm": 1.5031399374710517,
      "learning_rate": 0.003,
      "loss": 4.0555,
      "step": 24366
    },
    {
      "epoch": 0.24367,
      "grad_norm": 1.03573165729022,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 24367
    },
    {
      "epoch": 0.24368,
      "grad_norm": 1.469894046856681,
      "learning_rate": 0.003,
      "loss": 4.0741,
      "step": 24368
    },
    {
      "epoch": 0.24369,
      "grad_norm": 1.1427960540261197,
      "learning_rate": 0.003,
      "loss": 4.0559,
      "step": 24369
    },
    {
      "epoch": 0.2437,
      "grad_norm": 1.3353205949902167,
      "learning_rate": 0.003,
      "loss": 4.022,
      "step": 24370
    },
    {
      "epoch": 0.24371,
      "grad_norm": 1.30374156245724,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 24371
    },
    {
      "epoch": 0.24372,
      "grad_norm": 1.3048593353312705,
      "learning_rate": 0.003,
      "loss": 4.0578,
      "step": 24372
    },
    {
      "epoch": 0.24373,
      "grad_norm": 1.277169693813719,
      "learning_rate": 0.003,
      "loss": 4.0604,
      "step": 24373
    },
    {
      "epoch": 0.24374,
      "grad_norm": 1.1710966085423475,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 24374
    },
    {
      "epoch": 0.24375,
      "grad_norm": 1.4131985407574053,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 24375
    },
    {
      "epoch": 0.24376,
      "grad_norm": 1.5634989100931784,
      "learning_rate": 0.003,
      "loss": 4.0278,
      "step": 24376
    },
    {
      "epoch": 0.24377,
      "grad_norm": 1.6439959070235346,
      "learning_rate": 0.003,
      "loss": 4.031,
      "step": 24377
    },
    {
      "epoch": 0.24378,
      "grad_norm": 1.2612910964210828,
      "learning_rate": 0.003,
      "loss": 4.0163,
      "step": 24378
    },
    {
      "epoch": 0.24379,
      "grad_norm": 1.2274938496146883,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 24379
    },
    {
      "epoch": 0.2438,
      "grad_norm": 1.5446903779722578,
      "learning_rate": 0.003,
      "loss": 4.0473,
      "step": 24380
    },
    {
      "epoch": 0.24381,
      "grad_norm": 1.2130595182196149,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 24381
    },
    {
      "epoch": 0.24382,
      "grad_norm": 1.501681768875987,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 24382
    },
    {
      "epoch": 0.24383,
      "grad_norm": 1.1199131797469297,
      "learning_rate": 0.003,
      "loss": 4.0277,
      "step": 24383
    },
    {
      "epoch": 0.24384,
      "grad_norm": 1.4750102998432133,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 24384
    },
    {
      "epoch": 0.24385,
      "grad_norm": 1.1469196242822652,
      "learning_rate": 0.003,
      "loss": 4.0285,
      "step": 24385
    },
    {
      "epoch": 0.24386,
      "grad_norm": 1.3636585490293693,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 24386
    },
    {
      "epoch": 0.24387,
      "grad_norm": 1.2484401742059057,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 24387
    },
    {
      "epoch": 0.24388,
      "grad_norm": 1.3543994028077897,
      "learning_rate": 0.003,
      "loss": 4.0293,
      "step": 24388
    },
    {
      "epoch": 0.24389,
      "grad_norm": 1.3625939017751754,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 24389
    },
    {
      "epoch": 0.2439,
      "grad_norm": 1.286243334650969,
      "learning_rate": 0.003,
      "loss": 4.0216,
      "step": 24390
    },
    {
      "epoch": 0.24391,
      "grad_norm": 1.4120548491492744,
      "learning_rate": 0.003,
      "loss": 4.0298,
      "step": 24391
    },
    {
      "epoch": 0.24392,
      "grad_norm": 1.2679482414437517,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 24392
    },
    {
      "epoch": 0.24393,
      "grad_norm": 1.5188804793319215,
      "learning_rate": 0.003,
      "loss": 4.0288,
      "step": 24393
    },
    {
      "epoch": 0.24394,
      "grad_norm": 1.1513339940591412,
      "learning_rate": 0.003,
      "loss": 4.0577,
      "step": 24394
    },
    {
      "epoch": 0.24395,
      "grad_norm": 1.4249037706512802,
      "learning_rate": 0.003,
      "loss": 4.0064,
      "step": 24395
    },
    {
      "epoch": 0.24396,
      "grad_norm": 1.2539243573385133,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 24396
    },
    {
      "epoch": 0.24397,
      "grad_norm": 1.4088073980669185,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 24397
    },
    {
      "epoch": 0.24398,
      "grad_norm": 1.1193242361233235,
      "learning_rate": 0.003,
      "loss": 4.0194,
      "step": 24398
    },
    {
      "epoch": 0.24399,
      "grad_norm": 1.5437511059858533,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 24399
    },
    {
      "epoch": 0.244,
      "grad_norm": 1.167029003402302,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 24400
    },
    {
      "epoch": 0.24401,
      "grad_norm": 1.4423006843544683,
      "learning_rate": 0.003,
      "loss": 4.0136,
      "step": 24401
    },
    {
      "epoch": 0.24402,
      "grad_norm": 1.19660625567266,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 24402
    },
    {
      "epoch": 0.24403,
      "grad_norm": 1.6566764224708204,
      "learning_rate": 0.003,
      "loss": 4.0685,
      "step": 24403
    },
    {
      "epoch": 0.24404,
      "grad_norm": 1.217797948898943,
      "learning_rate": 0.003,
      "loss": 4.0409,
      "step": 24404
    },
    {
      "epoch": 0.24405,
      "grad_norm": 1.2816964607024994,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 24405
    },
    {
      "epoch": 0.24406,
      "grad_norm": 1.160572080266186,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 24406
    },
    {
      "epoch": 0.24407,
      "grad_norm": 1.3570768974084684,
      "learning_rate": 0.003,
      "loss": 4.048,
      "step": 24407
    },
    {
      "epoch": 0.24408,
      "grad_norm": 1.3505746210838767,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 24408
    },
    {
      "epoch": 0.24409,
      "grad_norm": 1.278392742908736,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 24409
    },
    {
      "epoch": 0.2441,
      "grad_norm": 1.5412685559591057,
      "learning_rate": 0.003,
      "loss": 4.0656,
      "step": 24410
    },
    {
      "epoch": 0.24411,
      "grad_norm": 1.2919760468073036,
      "learning_rate": 0.003,
      "loss": 4.0253,
      "step": 24411
    },
    {
      "epoch": 0.24412,
      "grad_norm": 1.2053676679336764,
      "learning_rate": 0.003,
      "loss": 4.0306,
      "step": 24412
    },
    {
      "epoch": 0.24413,
      "grad_norm": 1.4097765389792798,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 24413
    },
    {
      "epoch": 0.24414,
      "grad_norm": 1.1520224665585594,
      "learning_rate": 0.003,
      "loss": 4.0641,
      "step": 24414
    },
    {
      "epoch": 0.24415,
      "grad_norm": 1.3172573487463803,
      "learning_rate": 0.003,
      "loss": 4.0103,
      "step": 24415
    },
    {
      "epoch": 0.24416,
      "grad_norm": 1.181501462944302,
      "learning_rate": 0.003,
      "loss": 4.0533,
      "step": 24416
    },
    {
      "epoch": 0.24417,
      "grad_norm": 1.2850826135589895,
      "learning_rate": 0.003,
      "loss": 4.0304,
      "step": 24417
    },
    {
      "epoch": 0.24418,
      "grad_norm": 1.1813625743702172,
      "learning_rate": 0.003,
      "loss": 4.0257,
      "step": 24418
    },
    {
      "epoch": 0.24419,
      "grad_norm": 1.498848069287447,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 24419
    },
    {
      "epoch": 0.2442,
      "grad_norm": 1.3365506223278083,
      "learning_rate": 0.003,
      "loss": 4.0464,
      "step": 24420
    },
    {
      "epoch": 0.24421,
      "grad_norm": 1.507610616619639,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 24421
    },
    {
      "epoch": 0.24422,
      "grad_norm": 1.375523461219169,
      "learning_rate": 0.003,
      "loss": 4.0607,
      "step": 24422
    },
    {
      "epoch": 0.24423,
      "grad_norm": 1.4578343728913137,
      "learning_rate": 0.003,
      "loss": 4.0343,
      "step": 24423
    },
    {
      "epoch": 0.24424,
      "grad_norm": 1.3188601759502132,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 24424
    },
    {
      "epoch": 0.24425,
      "grad_norm": 1.208672839410013,
      "learning_rate": 0.003,
      "loss": 4.0504,
      "step": 24425
    },
    {
      "epoch": 0.24426,
      "grad_norm": 1.2003951521769713,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 24426
    },
    {
      "epoch": 0.24427,
      "grad_norm": 1.421264008091024,
      "learning_rate": 0.003,
      "loss": 4.0547,
      "step": 24427
    },
    {
      "epoch": 0.24428,
      "grad_norm": 1.256893595211034,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 24428
    },
    {
      "epoch": 0.24429,
      "grad_norm": 1.3900440844068316,
      "learning_rate": 0.003,
      "loss": 4.0436,
      "step": 24429
    },
    {
      "epoch": 0.2443,
      "grad_norm": 1.2781165917986486,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 24430
    },
    {
      "epoch": 0.24431,
      "grad_norm": 1.3807708537073173,
      "learning_rate": 0.003,
      "loss": 4.0502,
      "step": 24431
    },
    {
      "epoch": 0.24432,
      "grad_norm": 1.257664141834115,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 24432
    },
    {
      "epoch": 0.24433,
      "grad_norm": 1.4134994521861892,
      "learning_rate": 0.003,
      "loss": 4.0619,
      "step": 24433
    },
    {
      "epoch": 0.24434,
      "grad_norm": 1.2339324484004306,
      "learning_rate": 0.003,
      "loss": 4.0371,
      "step": 24434
    },
    {
      "epoch": 0.24435,
      "grad_norm": 1.1599182271248694,
      "learning_rate": 0.003,
      "loss": 4.0289,
      "step": 24435
    },
    {
      "epoch": 0.24436,
      "grad_norm": 1.4577503314799183,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 24436
    },
    {
      "epoch": 0.24437,
      "grad_norm": 1.250495587068921,
      "learning_rate": 0.003,
      "loss": 4.019,
      "step": 24437
    },
    {
      "epoch": 0.24438,
      "grad_norm": 1.4686940998364177,
      "learning_rate": 0.003,
      "loss": 4.0264,
      "step": 24438
    },
    {
      "epoch": 0.24439,
      "grad_norm": 1.131835032873698,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 24439
    },
    {
      "epoch": 0.2444,
      "grad_norm": 1.6518414140033024,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 24440
    },
    {
      "epoch": 0.24441,
      "grad_norm": 1.0403182420996597,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 24441
    },
    {
      "epoch": 0.24442,
      "grad_norm": 1.5430143646665895,
      "learning_rate": 0.003,
      "loss": 4.0402,
      "step": 24442
    },
    {
      "epoch": 0.24443,
      "grad_norm": 1.1011653742817602,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 24443
    },
    {
      "epoch": 0.24444,
      "grad_norm": 1.4417584027895425,
      "learning_rate": 0.003,
      "loss": 4.0211,
      "step": 24444
    },
    {
      "epoch": 0.24445,
      "grad_norm": 1.2341631869518908,
      "learning_rate": 0.003,
      "loss": 4.0767,
      "step": 24445
    },
    {
      "epoch": 0.24446,
      "grad_norm": 1.3842106460037205,
      "learning_rate": 0.003,
      "loss": 4.0602,
      "step": 24446
    },
    {
      "epoch": 0.24447,
      "grad_norm": 1.4287571022327497,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 24447
    },
    {
      "epoch": 0.24448,
      "grad_norm": 1.0384178499521848,
      "learning_rate": 0.003,
      "loss": 4.0274,
      "step": 24448
    },
    {
      "epoch": 0.24449,
      "grad_norm": 1.3357146506276172,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 24449
    },
    {
      "epoch": 0.2445,
      "grad_norm": 1.2822850399810086,
      "learning_rate": 0.003,
      "loss": 4.02,
      "step": 24450
    },
    {
      "epoch": 0.24451,
      "grad_norm": 1.1737612358847367,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 24451
    },
    {
      "epoch": 0.24452,
      "grad_norm": 1.4190079707525498,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 24452
    },
    {
      "epoch": 0.24453,
      "grad_norm": 1.3956098662686127,
      "learning_rate": 0.003,
      "loss": 4.0689,
      "step": 24453
    },
    {
      "epoch": 0.24454,
      "grad_norm": 1.6616896674216715,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 24454
    },
    {
      "epoch": 0.24455,
      "grad_norm": 1.4288305035999362,
      "learning_rate": 0.003,
      "loss": 4.0398,
      "step": 24455
    },
    {
      "epoch": 0.24456,
      "grad_norm": 1.208736748260487,
      "learning_rate": 0.003,
      "loss": 4.0457,
      "step": 24456
    },
    {
      "epoch": 0.24457,
      "grad_norm": 1.4906421917853605,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 24457
    },
    {
      "epoch": 0.24458,
      "grad_norm": 1.219671554369932,
      "learning_rate": 0.003,
      "loss": 4.064,
      "step": 24458
    },
    {
      "epoch": 0.24459,
      "grad_norm": 1.311815167755257,
      "learning_rate": 0.003,
      "loss": 4.0284,
      "step": 24459
    },
    {
      "epoch": 0.2446,
      "grad_norm": 1.1017975530896578,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 24460
    },
    {
      "epoch": 0.24461,
      "grad_norm": 1.3433925817368302,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 24461
    },
    {
      "epoch": 0.24462,
      "grad_norm": 1.2182044458211283,
      "learning_rate": 0.003,
      "loss": 4.0389,
      "step": 24462
    },
    {
      "epoch": 0.24463,
      "grad_norm": 1.4343536697065558,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 24463
    },
    {
      "epoch": 0.24464,
      "grad_norm": 1.326231448343906,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 24464
    },
    {
      "epoch": 0.24465,
      "grad_norm": 1.274985724216275,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 24465
    },
    {
      "epoch": 0.24466,
      "grad_norm": 1.5335393823798642,
      "learning_rate": 0.003,
      "loss": 4.066,
      "step": 24466
    },
    {
      "epoch": 0.24467,
      "grad_norm": 1.161642014877953,
      "learning_rate": 0.003,
      "loss": 4.0585,
      "step": 24467
    },
    {
      "epoch": 0.24468,
      "grad_norm": 1.613223118669463,
      "learning_rate": 0.003,
      "loss": 4.0419,
      "step": 24468
    },
    {
      "epoch": 0.24469,
      "grad_norm": 1.1301038545968003,
      "learning_rate": 0.003,
      "loss": 4.0867,
      "step": 24469
    },
    {
      "epoch": 0.2447,
      "grad_norm": 1.3914040131798775,
      "learning_rate": 0.003,
      "loss": 4.0239,
      "step": 24470
    },
    {
      "epoch": 0.24471,
      "grad_norm": 1.2760577866377025,
      "learning_rate": 0.003,
      "loss": 4.0301,
      "step": 24471
    },
    {
      "epoch": 0.24472,
      "grad_norm": 1.2836135686488033,
      "learning_rate": 0.003,
      "loss": 4.032,
      "step": 24472
    },
    {
      "epoch": 0.24473,
      "grad_norm": 1.1251757470155173,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 24473
    },
    {
      "epoch": 0.24474,
      "grad_norm": 1.3949337531023083,
      "learning_rate": 0.003,
      "loss": 4.0358,
      "step": 24474
    },
    {
      "epoch": 0.24475,
      "grad_norm": 1.1358461155418933,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 24475
    },
    {
      "epoch": 0.24476,
      "grad_norm": 1.3852183903795379,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 24476
    },
    {
      "epoch": 0.24477,
      "grad_norm": 1.0656679158903217,
      "learning_rate": 0.003,
      "loss": 4.0231,
      "step": 24477
    },
    {
      "epoch": 0.24478,
      "grad_norm": 1.3576232048478207,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 24478
    },
    {
      "epoch": 0.24479,
      "grad_norm": 1.196936959037576,
      "learning_rate": 0.003,
      "loss": 4.0435,
      "step": 24479
    },
    {
      "epoch": 0.2448,
      "grad_norm": 1.2764074590267935,
      "learning_rate": 0.003,
      "loss": 4.0191,
      "step": 24480
    },
    {
      "epoch": 0.24481,
      "grad_norm": 1.5425521827525461,
      "learning_rate": 0.003,
      "loss": 4.0294,
      "step": 24481
    },
    {
      "epoch": 0.24482,
      "grad_norm": 1.4028500733814346,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 24482
    },
    {
      "epoch": 0.24483,
      "grad_norm": 1.9279301181966597,
      "learning_rate": 0.003,
      "loss": 4.0184,
      "step": 24483
    },
    {
      "epoch": 0.24484,
      "grad_norm": 1.0313661319034655,
      "learning_rate": 0.003,
      "loss": 4.0632,
      "step": 24484
    },
    {
      "epoch": 0.24485,
      "grad_norm": 1.2940843844400831,
      "learning_rate": 0.003,
      "loss": 4.0363,
      "step": 24485
    },
    {
      "epoch": 0.24486,
      "grad_norm": 1.2195266355531074,
      "learning_rate": 0.003,
      "loss": 4.0407,
      "step": 24486
    },
    {
      "epoch": 0.24487,
      "grad_norm": 1.4029352470606242,
      "learning_rate": 0.003,
      "loss": 4.0417,
      "step": 24487
    },
    {
      "epoch": 0.24488,
      "grad_norm": 1.404083273868884,
      "learning_rate": 0.003,
      "loss": 4.0809,
      "step": 24488
    },
    {
      "epoch": 0.24489,
      "grad_norm": 1.2174610046897985,
      "learning_rate": 0.003,
      "loss": 4.0665,
      "step": 24489
    },
    {
      "epoch": 0.2449,
      "grad_norm": 1.3279445169071793,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 24490
    },
    {
      "epoch": 0.24491,
      "grad_norm": 1.239670336608579,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 24491
    },
    {
      "epoch": 0.24492,
      "grad_norm": 1.3742538228163146,
      "learning_rate": 0.003,
      "loss": 4.0449,
      "step": 24492
    },
    {
      "epoch": 0.24493,
      "grad_norm": 1.3652170532685641,
      "learning_rate": 0.003,
      "loss": 4.0164,
      "step": 24493
    },
    {
      "epoch": 0.24494,
      "grad_norm": 1.336665678580845,
      "learning_rate": 0.003,
      "loss": 4.0384,
      "step": 24494
    },
    {
      "epoch": 0.24495,
      "grad_norm": 1.4015942063806432,
      "learning_rate": 0.003,
      "loss": 4.0548,
      "step": 24495
    },
    {
      "epoch": 0.24496,
      "grad_norm": 1.0381947803290668,
      "learning_rate": 0.003,
      "loss": 4.0452,
      "step": 24496
    },
    {
      "epoch": 0.24497,
      "grad_norm": 1.6132895916769672,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 24497
    },
    {
      "epoch": 0.24498,
      "grad_norm": 1.1547106062903594,
      "learning_rate": 0.003,
      "loss": 4.0551,
      "step": 24498
    },
    {
      "epoch": 0.24499,
      "grad_norm": 1.5681254668710123,
      "learning_rate": 0.003,
      "loss": 4.0249,
      "step": 24499
    },
    {
      "epoch": 0.245,
      "grad_norm": 1.20962983361933,
      "learning_rate": 0.003,
      "loss": 4.0388,
      "step": 24500
    },
    {
      "epoch": 0.24501,
      "grad_norm": 1.2669359949920802,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 24501
    },
    {
      "epoch": 0.24502,
      "grad_norm": 1.247588521331606,
      "learning_rate": 0.003,
      "loss": 4.0763,
      "step": 24502
    },
    {
      "epoch": 0.24503,
      "grad_norm": 1.3477524654614856,
      "learning_rate": 0.003,
      "loss": 4.0304,
      "step": 24503
    },
    {
      "epoch": 0.24504,
      "grad_norm": 1.3385828413177792,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 24504
    },
    {
      "epoch": 0.24505,
      "grad_norm": 1.2603885391578664,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 24505
    },
    {
      "epoch": 0.24506,
      "grad_norm": 1.3773839120861833,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 24506
    },
    {
      "epoch": 0.24507,
      "grad_norm": 1.6678716711570647,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 24507
    },
    {
      "epoch": 0.24508,
      "grad_norm": 1.243321260104112,
      "learning_rate": 0.003,
      "loss": 4.0199,
      "step": 24508
    },
    {
      "epoch": 0.24509,
      "grad_norm": 1.4479103879536566,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 24509
    },
    {
      "epoch": 0.2451,
      "grad_norm": 1.2348195679519878,
      "learning_rate": 0.003,
      "loss": 4.0332,
      "step": 24510
    },
    {
      "epoch": 0.24511,
      "grad_norm": 1.2659594312271438,
      "learning_rate": 0.003,
      "loss": 4.0423,
      "step": 24511
    },
    {
      "epoch": 0.24512,
      "grad_norm": 1.1857350221228964,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 24512
    },
    {
      "epoch": 0.24513,
      "grad_norm": 1.3947144517267773,
      "learning_rate": 0.003,
      "loss": 4.0372,
      "step": 24513
    },
    {
      "epoch": 0.24514,
      "grad_norm": 1.1174055011501585,
      "learning_rate": 0.003,
      "loss": 4.0742,
      "step": 24514
    },
    {
      "epoch": 0.24515,
      "grad_norm": 1.294287122790688,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 24515
    },
    {
      "epoch": 0.24516,
      "grad_norm": 1.055639455236206,
      "learning_rate": 0.003,
      "loss": 4.0292,
      "step": 24516
    },
    {
      "epoch": 0.24517,
      "grad_norm": 1.4594500753428272,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 24517
    },
    {
      "epoch": 0.24518,
      "grad_norm": 1.1671583501748672,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 24518
    },
    {
      "epoch": 0.24519,
      "grad_norm": 1.796210645180149,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 24519
    },
    {
      "epoch": 0.2452,
      "grad_norm": 1.1656828766903715,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 24520
    },
    {
      "epoch": 0.24521,
      "grad_norm": 1.3545401317287933,
      "learning_rate": 0.003,
      "loss": 4.05,
      "step": 24521
    },
    {
      "epoch": 0.24522,
      "grad_norm": 1.0884210364788287,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 24522
    },
    {
      "epoch": 0.24523,
      "grad_norm": 1.6566413709952723,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 24523
    },
    {
      "epoch": 0.24524,
      "grad_norm": 1.237552191869169,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 24524
    },
    {
      "epoch": 0.24525,
      "grad_norm": 1.4451472074967542,
      "learning_rate": 0.003,
      "loss": 4.0566,
      "step": 24525
    },
    {
      "epoch": 0.24526,
      "grad_norm": 1.0565917543706052,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 24526
    },
    {
      "epoch": 0.24527,
      "grad_norm": 1.6121808891605443,
      "learning_rate": 0.003,
      "loss": 4.0833,
      "step": 24527
    },
    {
      "epoch": 0.24528,
      "grad_norm": 1.2307575216954734,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 24528
    },
    {
      "epoch": 0.24529,
      "grad_norm": 1.4754564653970716,
      "learning_rate": 0.003,
      "loss": 4.0611,
      "step": 24529
    },
    {
      "epoch": 0.2453,
      "grad_norm": 1.5447846233884956,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 24530
    },
    {
      "epoch": 0.24531,
      "grad_norm": 1.4122065983205467,
      "learning_rate": 0.003,
      "loss": 4.0183,
      "step": 24531
    },
    {
      "epoch": 0.24532,
      "grad_norm": 1.1605230042780361,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 24532
    },
    {
      "epoch": 0.24533,
      "grad_norm": 1.3733446178738185,
      "learning_rate": 0.003,
      "loss": 4.0308,
      "step": 24533
    },
    {
      "epoch": 0.24534,
      "grad_norm": 1.393069005317022,
      "learning_rate": 0.003,
      "loss": 4.0625,
      "step": 24534
    },
    {
      "epoch": 0.24535,
      "grad_norm": 1.1777037861050252,
      "learning_rate": 0.003,
      "loss": 4.0483,
      "step": 24535
    },
    {
      "epoch": 0.24536,
      "grad_norm": 1.4839888861445882,
      "learning_rate": 0.003,
      "loss": 4.0477,
      "step": 24536
    },
    {
      "epoch": 0.24537,
      "grad_norm": 1.135476649976957,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 24537
    },
    {
      "epoch": 0.24538,
      "grad_norm": 1.6042579829009473,
      "learning_rate": 0.003,
      "loss": 4.0115,
      "step": 24538
    },
    {
      "epoch": 0.24539,
      "grad_norm": 1.0277540019348497,
      "learning_rate": 0.003,
      "loss": 4.0371,
      "step": 24539
    },
    {
      "epoch": 0.2454,
      "grad_norm": 1.4905150030061323,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 24540
    },
    {
      "epoch": 0.24541,
      "grad_norm": 1.0833401398421967,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 24541
    },
    {
      "epoch": 0.24542,
      "grad_norm": 1.6393310050095853,
      "learning_rate": 0.003,
      "loss": 4.059,
      "step": 24542
    },
    {
      "epoch": 0.24543,
      "grad_norm": 1.2489270071015557,
      "learning_rate": 0.003,
      "loss": 4.0664,
      "step": 24543
    },
    {
      "epoch": 0.24544,
      "grad_norm": 1.3765663950696332,
      "learning_rate": 0.003,
      "loss": 4.0737,
      "step": 24544
    },
    {
      "epoch": 0.24545,
      "grad_norm": 1.2498631490998917,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 24545
    },
    {
      "epoch": 0.24546,
      "grad_norm": 1.222010572283642,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 24546
    },
    {
      "epoch": 0.24547,
      "grad_norm": 1.3206084983028674,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 24547
    },
    {
      "epoch": 0.24548,
      "grad_norm": 1.236253129593086,
      "learning_rate": 0.003,
      "loss": 4.0515,
      "step": 24548
    },
    {
      "epoch": 0.24549,
      "grad_norm": 1.4034262110781486,
      "learning_rate": 0.003,
      "loss": 4.0482,
      "step": 24549
    },
    {
      "epoch": 0.2455,
      "grad_norm": 1.1882870256342752,
      "learning_rate": 0.003,
      "loss": 4.0311,
      "step": 24550
    },
    {
      "epoch": 0.24551,
      "grad_norm": 1.5632220912390589,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 24551
    },
    {
      "epoch": 0.24552,
      "grad_norm": 1.346154084408589,
      "learning_rate": 0.003,
      "loss": 4.0441,
      "step": 24552
    },
    {
      "epoch": 0.24553,
      "grad_norm": 1.2091822152564482,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 24553
    },
    {
      "epoch": 0.24554,
      "grad_norm": 1.1585792156401415,
      "learning_rate": 0.003,
      "loss": 4.0724,
      "step": 24554
    },
    {
      "epoch": 0.24555,
      "grad_norm": 1.1888195496287441,
      "learning_rate": 0.003,
      "loss": 4.0343,
      "step": 24555
    },
    {
      "epoch": 0.24556,
      "grad_norm": 1.2994592204498774,
      "learning_rate": 0.003,
      "loss": 4.0433,
      "step": 24556
    },
    {
      "epoch": 0.24557,
      "grad_norm": 1.2953732094825978,
      "learning_rate": 0.003,
      "loss": 4.009,
      "step": 24557
    },
    {
      "epoch": 0.24558,
      "grad_norm": 1.4551132309697592,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 24558
    },
    {
      "epoch": 0.24559,
      "grad_norm": 1.2664372475090542,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 24559
    },
    {
      "epoch": 0.2456,
      "grad_norm": 1.5680081647814643,
      "learning_rate": 0.003,
      "loss": 4.0847,
      "step": 24560
    },
    {
      "epoch": 0.24561,
      "grad_norm": 1.4986508065676625,
      "learning_rate": 0.003,
      "loss": 4.0229,
      "step": 24561
    },
    {
      "epoch": 0.24562,
      "grad_norm": 1.3175557421315478,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 24562
    },
    {
      "epoch": 0.24563,
      "grad_norm": 1.487003322296809,
      "learning_rate": 0.003,
      "loss": 4.0706,
      "step": 24563
    },
    {
      "epoch": 0.24564,
      "grad_norm": 0.9274034468608103,
      "learning_rate": 0.003,
      "loss": 4.0239,
      "step": 24564
    },
    {
      "epoch": 0.24565,
      "grad_norm": 1.315593903885781,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 24565
    },
    {
      "epoch": 0.24566,
      "grad_norm": 1.1380597690493108,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 24566
    },
    {
      "epoch": 0.24567,
      "grad_norm": 1.5286179953409904,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 24567
    },
    {
      "epoch": 0.24568,
      "grad_norm": 1.2748392172647982,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 24568
    },
    {
      "epoch": 0.24569,
      "grad_norm": 1.5881659245536273,
      "learning_rate": 0.003,
      "loss": 4.0157,
      "step": 24569
    },
    {
      "epoch": 0.2457,
      "grad_norm": 1.1704966863521384,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 24570
    },
    {
      "epoch": 0.24571,
      "grad_norm": 1.363024976952242,
      "learning_rate": 0.003,
      "loss": 4.0729,
      "step": 24571
    },
    {
      "epoch": 0.24572,
      "grad_norm": 1.209518326800387,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 24572
    },
    {
      "epoch": 0.24573,
      "grad_norm": 1.5397765217767965,
      "learning_rate": 0.003,
      "loss": 4.0503,
      "step": 24573
    },
    {
      "epoch": 0.24574,
      "grad_norm": 1.134602315350047,
      "learning_rate": 0.003,
      "loss": 4.0659,
      "step": 24574
    },
    {
      "epoch": 0.24575,
      "grad_norm": 1.4678815697012149,
      "learning_rate": 0.003,
      "loss": 4.0337,
      "step": 24575
    },
    {
      "epoch": 0.24576,
      "grad_norm": 1.0790848780382025,
      "learning_rate": 0.003,
      "loss": 4.0397,
      "step": 24576
    },
    {
      "epoch": 0.24577,
      "grad_norm": 1.335404480150775,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 24577
    },
    {
      "epoch": 0.24578,
      "grad_norm": 1.3021074891682802,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 24578
    },
    {
      "epoch": 0.24579,
      "grad_norm": 1.2286809005795094,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 24579
    },
    {
      "epoch": 0.2458,
      "grad_norm": 1.4162283650362506,
      "learning_rate": 0.003,
      "loss": 4.0306,
      "step": 24580
    },
    {
      "epoch": 0.24581,
      "grad_norm": 1.2150062756548052,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 24581
    },
    {
      "epoch": 0.24582,
      "grad_norm": 1.2836573080150377,
      "learning_rate": 0.003,
      "loss": 4.0877,
      "step": 24582
    },
    {
      "epoch": 0.24583,
      "grad_norm": 1.1362060948012536,
      "learning_rate": 0.003,
      "loss": 4.0499,
      "step": 24583
    },
    {
      "epoch": 0.24584,
      "grad_norm": 1.3166619351385562,
      "learning_rate": 0.003,
      "loss": 4.0673,
      "step": 24584
    },
    {
      "epoch": 0.24585,
      "grad_norm": 1.361701905821797,
      "learning_rate": 0.003,
      "loss": 4.0785,
      "step": 24585
    },
    {
      "epoch": 0.24586,
      "grad_norm": 1.174692030608153,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 24586
    },
    {
      "epoch": 0.24587,
      "grad_norm": 1.5193122545143058,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 24587
    },
    {
      "epoch": 0.24588,
      "grad_norm": 1.1221785087614495,
      "learning_rate": 0.003,
      "loss": 4.028,
      "step": 24588
    },
    {
      "epoch": 0.24589,
      "grad_norm": 1.4140066228993404,
      "learning_rate": 0.003,
      "loss": 4.024,
      "step": 24589
    },
    {
      "epoch": 0.2459,
      "grad_norm": 1.2189373034345057,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 24590
    },
    {
      "epoch": 0.24591,
      "grad_norm": 1.3954985919171259,
      "learning_rate": 0.003,
      "loss": 4.0438,
      "step": 24591
    },
    {
      "epoch": 0.24592,
      "grad_norm": 1.2570197988187979,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 24592
    },
    {
      "epoch": 0.24593,
      "grad_norm": 1.3830657924088505,
      "learning_rate": 0.003,
      "loss": 4.0647,
      "step": 24593
    },
    {
      "epoch": 0.24594,
      "grad_norm": 1.3109099888904479,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 24594
    },
    {
      "epoch": 0.24595,
      "grad_norm": 1.4752542833426738,
      "learning_rate": 0.003,
      "loss": 4.0445,
      "step": 24595
    },
    {
      "epoch": 0.24596,
      "grad_norm": 1.8093942114634118,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 24596
    },
    {
      "epoch": 0.24597,
      "grad_norm": 1.1875163494228915,
      "learning_rate": 0.003,
      "loss": 4.0748,
      "step": 24597
    },
    {
      "epoch": 0.24598,
      "grad_norm": 1.5295106400673966,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 24598
    },
    {
      "epoch": 0.24599,
      "grad_norm": 1.5553621730584248,
      "learning_rate": 0.003,
      "loss": 4.023,
      "step": 24599
    },
    {
      "epoch": 0.246,
      "grad_norm": 1.0943708649938018,
      "learning_rate": 0.003,
      "loss": 4.0025,
      "step": 24600
    },
    {
      "epoch": 0.24601,
      "grad_norm": 1.5324238663763916,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 24601
    },
    {
      "epoch": 0.24602,
      "grad_norm": 0.9865929654013261,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 24602
    },
    {
      "epoch": 0.24603,
      "grad_norm": 1.39837091860186,
      "learning_rate": 0.003,
      "loss": 4.0231,
      "step": 24603
    },
    {
      "epoch": 0.24604,
      "grad_norm": 1.1674379021235621,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 24604
    },
    {
      "epoch": 0.24605,
      "grad_norm": 1.3303618331297282,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 24605
    },
    {
      "epoch": 0.24606,
      "grad_norm": 1.3426334514107259,
      "learning_rate": 0.003,
      "loss": 4.0248,
      "step": 24606
    },
    {
      "epoch": 0.24607,
      "grad_norm": 1.1819994896526633,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 24607
    },
    {
      "epoch": 0.24608,
      "grad_norm": 1.225025403617744,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 24608
    },
    {
      "epoch": 0.24609,
      "grad_norm": 1.353660922034048,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 24609
    },
    {
      "epoch": 0.2461,
      "grad_norm": 1.4180331439397997,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 24610
    },
    {
      "epoch": 0.24611,
      "grad_norm": 1.391685917972673,
      "learning_rate": 0.003,
      "loss": 4.021,
      "step": 24611
    },
    {
      "epoch": 0.24612,
      "grad_norm": 1.1943647528055936,
      "learning_rate": 0.003,
      "loss": 4.0172,
      "step": 24612
    },
    {
      "epoch": 0.24613,
      "grad_norm": 1.3195618406423208,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 24613
    },
    {
      "epoch": 0.24614,
      "grad_norm": 1.270598821188231,
      "learning_rate": 0.003,
      "loss": 4.035,
      "step": 24614
    },
    {
      "epoch": 0.24615,
      "grad_norm": 1.3185533091717765,
      "learning_rate": 0.003,
      "loss": 4.0394,
      "step": 24615
    },
    {
      "epoch": 0.24616,
      "grad_norm": 1.1593932524997574,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 24616
    },
    {
      "epoch": 0.24617,
      "grad_norm": 1.5655085440115326,
      "learning_rate": 0.003,
      "loss": 4.0447,
      "step": 24617
    },
    {
      "epoch": 0.24618,
      "grad_norm": 1.075335990727989,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 24618
    },
    {
      "epoch": 0.24619,
      "grad_norm": 1.420058777248943,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 24619
    },
    {
      "epoch": 0.2462,
      "grad_norm": 1.0918268751673923,
      "learning_rate": 0.003,
      "loss": 4.057,
      "step": 24620
    },
    {
      "epoch": 0.24621,
      "grad_norm": 1.976111999697177,
      "learning_rate": 0.003,
      "loss": 4.087,
      "step": 24621
    },
    {
      "epoch": 0.24622,
      "grad_norm": 1.0911559202404375,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 24622
    },
    {
      "epoch": 0.24623,
      "grad_norm": 1.4037741843675444,
      "learning_rate": 0.003,
      "loss": 4.0516,
      "step": 24623
    },
    {
      "epoch": 0.24624,
      "grad_norm": 1.255028535780465,
      "learning_rate": 0.003,
      "loss": 4.0495,
      "step": 24624
    },
    {
      "epoch": 0.24625,
      "grad_norm": 1.4018604150067269,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 24625
    },
    {
      "epoch": 0.24626,
      "grad_norm": 1.5939892138353882,
      "learning_rate": 0.003,
      "loss": 4.0321,
      "step": 24626
    },
    {
      "epoch": 0.24627,
      "grad_norm": 1.4351595313608372,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 24627
    },
    {
      "epoch": 0.24628,
      "grad_norm": 1.4204216618273355,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 24628
    },
    {
      "epoch": 0.24629,
      "grad_norm": 1.1369933346609602,
      "learning_rate": 0.003,
      "loss": 4.0398,
      "step": 24629
    },
    {
      "epoch": 0.2463,
      "grad_norm": 1.5131769063727587,
      "learning_rate": 0.003,
      "loss": 4.0281,
      "step": 24630
    },
    {
      "epoch": 0.24631,
      "grad_norm": 1.0898521553812368,
      "learning_rate": 0.003,
      "loss": 4.0432,
      "step": 24631
    },
    {
      "epoch": 0.24632,
      "grad_norm": 1.3970642840223215,
      "learning_rate": 0.003,
      "loss": 4.0246,
      "step": 24632
    },
    {
      "epoch": 0.24633,
      "grad_norm": 1.0579311195392256,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 24633
    },
    {
      "epoch": 0.24634,
      "grad_norm": 1.3657986467401328,
      "learning_rate": 0.003,
      "loss": 4.0211,
      "step": 24634
    },
    {
      "epoch": 0.24635,
      "grad_norm": 1.109865105267184,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 24635
    },
    {
      "epoch": 0.24636,
      "grad_norm": 1.4600265981516272,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 24636
    },
    {
      "epoch": 0.24637,
      "grad_norm": 1.3169650620458038,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 24637
    },
    {
      "epoch": 0.24638,
      "grad_norm": 1.5109969429114762,
      "learning_rate": 0.003,
      "loss": 4.0373,
      "step": 24638
    },
    {
      "epoch": 0.24639,
      "grad_norm": 1.2036021203825946,
      "learning_rate": 0.003,
      "loss": 4.0237,
      "step": 24639
    },
    {
      "epoch": 0.2464,
      "grad_norm": 1.2882482932566781,
      "learning_rate": 0.003,
      "loss": 4.025,
      "step": 24640
    },
    {
      "epoch": 0.24641,
      "grad_norm": 1.6248550739869179,
      "learning_rate": 0.003,
      "loss": 4.0626,
      "step": 24641
    },
    {
      "epoch": 0.24642,
      "grad_norm": 1.0442128909171355,
      "learning_rate": 0.003,
      "loss": 4.0391,
      "step": 24642
    },
    {
      "epoch": 0.24643,
      "grad_norm": 1.393027503895784,
      "learning_rate": 0.003,
      "loss": 4.0736,
      "step": 24643
    },
    {
      "epoch": 0.24644,
      "grad_norm": 1.1759422390190883,
      "learning_rate": 0.003,
      "loss": 4.0347,
      "step": 24644
    },
    {
      "epoch": 0.24645,
      "grad_norm": 1.5185546369545304,
      "learning_rate": 0.003,
      "loss": 4.0521,
      "step": 24645
    },
    {
      "epoch": 0.24646,
      "grad_norm": 1.09837983903636,
      "learning_rate": 0.003,
      "loss": 4.036,
      "step": 24646
    },
    {
      "epoch": 0.24647,
      "grad_norm": 1.6195419550061216,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 24647
    },
    {
      "epoch": 0.24648,
      "grad_norm": 1.3013617301688172,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 24648
    },
    {
      "epoch": 0.24649,
      "grad_norm": 1.1932286571998207,
      "learning_rate": 0.003,
      "loss": 4.0371,
      "step": 24649
    },
    {
      "epoch": 0.2465,
      "grad_norm": 1.39804739285826,
      "learning_rate": 0.003,
      "loss": 4.0479,
      "step": 24650
    },
    {
      "epoch": 0.24651,
      "grad_norm": 1.291531026975577,
      "learning_rate": 0.003,
      "loss": 4.0242,
      "step": 24651
    },
    {
      "epoch": 0.24652,
      "grad_norm": 1.3042821706559988,
      "learning_rate": 0.003,
      "loss": 4.0258,
      "step": 24652
    },
    {
      "epoch": 0.24653,
      "grad_norm": 1.3248442248467729,
      "learning_rate": 0.003,
      "loss": 4.0257,
      "step": 24653
    },
    {
      "epoch": 0.24654,
      "grad_norm": 1.3082035128477907,
      "learning_rate": 0.003,
      "loss": 4.0693,
      "step": 24654
    },
    {
      "epoch": 0.24655,
      "grad_norm": 1.3693317044900468,
      "learning_rate": 0.003,
      "loss": 4.0863,
      "step": 24655
    },
    {
      "epoch": 0.24656,
      "grad_norm": 1.11185556858586,
      "learning_rate": 0.003,
      "loss": 4.0534,
      "step": 24656
    },
    {
      "epoch": 0.24657,
      "grad_norm": 1.6261556068405063,
      "learning_rate": 0.003,
      "loss": 4.0308,
      "step": 24657
    },
    {
      "epoch": 0.24658,
      "grad_norm": 1.2304751326220844,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 24658
    },
    {
      "epoch": 0.24659,
      "grad_norm": 1.4508123845516905,
      "learning_rate": 0.003,
      "loss": 4.0375,
      "step": 24659
    },
    {
      "epoch": 0.2466,
      "grad_norm": 1.2527596978161486,
      "learning_rate": 0.003,
      "loss": 4.0492,
      "step": 24660
    },
    {
      "epoch": 0.24661,
      "grad_norm": 1.4080833239751158,
      "learning_rate": 0.003,
      "loss": 4.0254,
      "step": 24661
    },
    {
      "epoch": 0.24662,
      "grad_norm": 1.1748121378591787,
      "learning_rate": 0.003,
      "loss": 4.0097,
      "step": 24662
    },
    {
      "epoch": 0.24663,
      "grad_norm": 1.148162313165084,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 24663
    },
    {
      "epoch": 0.24664,
      "grad_norm": 1.5627436868044455,
      "learning_rate": 0.003,
      "loss": 4.0434,
      "step": 24664
    },
    {
      "epoch": 0.24665,
      "grad_norm": 1.0266064427870762,
      "learning_rate": 0.003,
      "loss": 4.0281,
      "step": 24665
    },
    {
      "epoch": 0.24666,
      "grad_norm": 1.3119867924575164,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 24666
    },
    {
      "epoch": 0.24667,
      "grad_norm": 1.246639895630165,
      "learning_rate": 0.003,
      "loss": 4.0345,
      "step": 24667
    },
    {
      "epoch": 0.24668,
      "grad_norm": 1.8385608686779658,
      "learning_rate": 0.003,
      "loss": 4.0564,
      "step": 24668
    },
    {
      "epoch": 0.24669,
      "grad_norm": 1.1381394160669267,
      "learning_rate": 0.003,
      "loss": 4.0416,
      "step": 24669
    },
    {
      "epoch": 0.2467,
      "grad_norm": 1.2824021096641982,
      "learning_rate": 0.003,
      "loss": 4.02,
      "step": 24670
    },
    {
      "epoch": 0.24671,
      "grad_norm": 1.1762549218653497,
      "learning_rate": 0.003,
      "loss": 4.0365,
      "step": 24671
    },
    {
      "epoch": 0.24672,
      "grad_norm": 1.58865933635969,
      "learning_rate": 0.003,
      "loss": 4.0859,
      "step": 24672
    },
    {
      "epoch": 0.24673,
      "grad_norm": 1.1840540412335352,
      "learning_rate": 0.003,
      "loss": 4.0315,
      "step": 24673
    },
    {
      "epoch": 0.24674,
      "grad_norm": 1.2995494641211922,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 24674
    },
    {
      "epoch": 0.24675,
      "grad_norm": 1.3030218988693136,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 24675
    },
    {
      "epoch": 0.24676,
      "grad_norm": 1.2629691373253202,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 24676
    },
    {
      "epoch": 0.24677,
      "grad_norm": 1.3937666364863655,
      "learning_rate": 0.003,
      "loss": 4.0654,
      "step": 24677
    },
    {
      "epoch": 0.24678,
      "grad_norm": 1.3413538812072792,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 24678
    },
    {
      "epoch": 0.24679,
      "grad_norm": 1.197981971325603,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 24679
    },
    {
      "epoch": 0.2468,
      "grad_norm": 1.3491724993718825,
      "learning_rate": 0.003,
      "loss": 4.0278,
      "step": 24680
    },
    {
      "epoch": 0.24681,
      "grad_norm": 1.426774027491637,
      "learning_rate": 0.003,
      "loss": 4.0292,
      "step": 24681
    },
    {
      "epoch": 0.24682,
      "grad_norm": 1.209217554220248,
      "learning_rate": 0.003,
      "loss": 4.0202,
      "step": 24682
    },
    {
      "epoch": 0.24683,
      "grad_norm": 1.4165296966409964,
      "learning_rate": 0.003,
      "loss": 4.0427,
      "step": 24683
    },
    {
      "epoch": 0.24684,
      "grad_norm": 1.1709265323177698,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 24684
    },
    {
      "epoch": 0.24685,
      "grad_norm": 1.075528142048488,
      "learning_rate": 0.003,
      "loss": 4.0123,
      "step": 24685
    },
    {
      "epoch": 0.24686,
      "grad_norm": 1.4081802136054868,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 24686
    },
    {
      "epoch": 0.24687,
      "grad_norm": 1.1185911634221184,
      "learning_rate": 0.003,
      "loss": 4.058,
      "step": 24687
    },
    {
      "epoch": 0.24688,
      "grad_norm": 1.4922467097586183,
      "learning_rate": 0.003,
      "loss": 4.0543,
      "step": 24688
    },
    {
      "epoch": 0.24689,
      "grad_norm": 1.303196131951756,
      "learning_rate": 0.003,
      "loss": 4.0133,
      "step": 24689
    },
    {
      "epoch": 0.2469,
      "grad_norm": 1.1429592682731438,
      "learning_rate": 0.003,
      "loss": 4.0681,
      "step": 24690
    },
    {
      "epoch": 0.24691,
      "grad_norm": 1.3209312908754482,
      "learning_rate": 0.003,
      "loss": 4.0675,
      "step": 24691
    },
    {
      "epoch": 0.24692,
      "grad_norm": 1.248526223449253,
      "learning_rate": 0.003,
      "loss": 4.0254,
      "step": 24692
    },
    {
      "epoch": 0.24693,
      "grad_norm": 1.3123696700134033,
      "learning_rate": 0.003,
      "loss": 4.0602,
      "step": 24693
    },
    {
      "epoch": 0.24694,
      "grad_norm": 1.5610281502167933,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 24694
    },
    {
      "epoch": 0.24695,
      "grad_norm": 1.0662173736404557,
      "learning_rate": 0.003,
      "loss": 4.0283,
      "step": 24695
    },
    {
      "epoch": 0.24696,
      "grad_norm": 1.4153850277726499,
      "learning_rate": 0.003,
      "loss": 4.0317,
      "step": 24696
    },
    {
      "epoch": 0.24697,
      "grad_norm": 1.4243857413173024,
      "learning_rate": 0.003,
      "loss": 4.0366,
      "step": 24697
    },
    {
      "epoch": 0.24698,
      "grad_norm": 1.6706664425847921,
      "learning_rate": 0.003,
      "loss": 4.0509,
      "step": 24698
    },
    {
      "epoch": 0.24699,
      "grad_norm": 1.1272008388777834,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 24699
    },
    {
      "epoch": 0.247,
      "grad_norm": 1.315220519213806,
      "learning_rate": 0.003,
      "loss": 4.0459,
      "step": 24700
    },
    {
      "epoch": 0.24701,
      "grad_norm": 1.4036575572677676,
      "learning_rate": 0.003,
      "loss": 4.0278,
      "step": 24701
    },
    {
      "epoch": 0.24702,
      "grad_norm": 1.320689760719037,
      "learning_rate": 0.003,
      "loss": 4.0351,
      "step": 24702
    },
    {
      "epoch": 0.24703,
      "grad_norm": 1.3729646519522445,
      "learning_rate": 0.003,
      "loss": 4.0303,
      "step": 24703
    },
    {
      "epoch": 0.24704,
      "grad_norm": 1.4122543204723723,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 24704
    },
    {
      "epoch": 0.24705,
      "grad_norm": 1.1329785659810372,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 24705
    },
    {
      "epoch": 0.24706,
      "grad_norm": 1.225864799648413,
      "learning_rate": 0.003,
      "loss": 4.0589,
      "step": 24706
    },
    {
      "epoch": 0.24707,
      "grad_norm": 1.3888954628970043,
      "learning_rate": 0.003,
      "loss": 4.0709,
      "step": 24707
    },
    {
      "epoch": 0.24708,
      "grad_norm": 1.2574468237656262,
      "learning_rate": 0.003,
      "loss": 4.0563,
      "step": 24708
    },
    {
      "epoch": 0.24709,
      "grad_norm": 1.3557089101752695,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 24709
    },
    {
      "epoch": 0.2471,
      "grad_norm": 1.5100492022687908,
      "learning_rate": 0.003,
      "loss": 4.0462,
      "step": 24710
    },
    {
      "epoch": 0.24711,
      "grad_norm": 1.1096014896358144,
      "learning_rate": 0.003,
      "loss": 4.0188,
      "step": 24711
    },
    {
      "epoch": 0.24712,
      "grad_norm": 1.3578662203165828,
      "learning_rate": 0.003,
      "loss": 4.0263,
      "step": 24712
    },
    {
      "epoch": 0.24713,
      "grad_norm": 1.2826864850854105,
      "learning_rate": 0.003,
      "loss": 4.0309,
      "step": 24713
    },
    {
      "epoch": 0.24714,
      "grad_norm": 1.2651306368814537,
      "learning_rate": 0.003,
      "loss": 4.0258,
      "step": 24714
    },
    {
      "epoch": 0.24715,
      "grad_norm": 1.3512547056840147,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 24715
    },
    {
      "epoch": 0.24716,
      "grad_norm": 1.1950572755603974,
      "learning_rate": 0.003,
      "loss": 4.0214,
      "step": 24716
    },
    {
      "epoch": 0.24717,
      "grad_norm": 1.430252233614018,
      "learning_rate": 0.003,
      "loss": 4.0271,
      "step": 24717
    },
    {
      "epoch": 0.24718,
      "grad_norm": 1.245590730430256,
      "learning_rate": 0.003,
      "loss": 4.057,
      "step": 24718
    },
    {
      "epoch": 0.24719,
      "grad_norm": 1.7860250976044554,
      "learning_rate": 0.003,
      "loss": 4.0361,
      "step": 24719
    },
    {
      "epoch": 0.2472,
      "grad_norm": 1.0308833507021609,
      "learning_rate": 0.003,
      "loss": 4.0299,
      "step": 24720
    },
    {
      "epoch": 0.24721,
      "grad_norm": 1.3425210027908412,
      "learning_rate": 0.003,
      "loss": 4.0634,
      "step": 24721
    },
    {
      "epoch": 0.24722,
      "grad_norm": 1.5274247626978565,
      "learning_rate": 0.003,
      "loss": 4.0501,
      "step": 24722
    },
    {
      "epoch": 0.24723,
      "grad_norm": 1.1421212200220046,
      "learning_rate": 0.003,
      "loss": 4.0098,
      "step": 24723
    },
    {
      "epoch": 0.24724,
      "grad_norm": 1.558193025652777,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 24724
    },
    {
      "epoch": 0.24725,
      "grad_norm": 1.0427942658773595,
      "learning_rate": 0.003,
      "loss": 4.0271,
      "step": 24725
    },
    {
      "epoch": 0.24726,
      "grad_norm": 1.5394102963297531,
      "learning_rate": 0.003,
      "loss": 4.0682,
      "step": 24726
    },
    {
      "epoch": 0.24727,
      "grad_norm": 1.260448997386808,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 24727
    },
    {
      "epoch": 0.24728,
      "grad_norm": 1.184134464797292,
      "learning_rate": 0.003,
      "loss": 4.0214,
      "step": 24728
    },
    {
      "epoch": 0.24729,
      "grad_norm": 1.2955163227837003,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 24729
    },
    {
      "epoch": 0.2473,
      "grad_norm": 1.341876772559969,
      "learning_rate": 0.003,
      "loss": 4.0286,
      "step": 24730
    },
    {
      "epoch": 0.24731,
      "grad_norm": 1.2552013840497218,
      "learning_rate": 0.003,
      "loss": 4.0734,
      "step": 24731
    },
    {
      "epoch": 0.24732,
      "grad_norm": 1.6944801308915869,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 24732
    },
    {
      "epoch": 0.24733,
      "grad_norm": 1.1967212144215253,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 24733
    },
    {
      "epoch": 0.24734,
      "grad_norm": 1.165790288801221,
      "learning_rate": 0.003,
      "loss": 4.0083,
      "step": 24734
    },
    {
      "epoch": 0.24735,
      "grad_norm": 1.5622013523275582,
      "learning_rate": 0.003,
      "loss": 4.07,
      "step": 24735
    },
    {
      "epoch": 0.24736,
      "grad_norm": 1.2390457894957734,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 24736
    },
    {
      "epoch": 0.24737,
      "grad_norm": 1.30194934702204,
      "learning_rate": 0.003,
      "loss": 4.0354,
      "step": 24737
    },
    {
      "epoch": 0.24738,
      "grad_norm": 1.1536400015172708,
      "learning_rate": 0.003,
      "loss": 4.0471,
      "step": 24738
    },
    {
      "epoch": 0.24739,
      "grad_norm": 1.4124285335637157,
      "learning_rate": 0.003,
      "loss": 4.0813,
      "step": 24739
    },
    {
      "epoch": 0.2474,
      "grad_norm": 1.0967050518465338,
      "learning_rate": 0.003,
      "loss": 4.0523,
      "step": 24740
    },
    {
      "epoch": 0.24741,
      "grad_norm": 1.4710657972621985,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 24741
    },
    {
      "epoch": 0.24742,
      "grad_norm": 1.2096606346118244,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 24742
    },
    {
      "epoch": 0.24743,
      "grad_norm": 1.3679647516220717,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 24743
    },
    {
      "epoch": 0.24744,
      "grad_norm": 1.1742150836353038,
      "learning_rate": 0.003,
      "loss": 4.0255,
      "step": 24744
    },
    {
      "epoch": 0.24745,
      "grad_norm": 1.2732545294816122,
      "learning_rate": 0.003,
      "loss": 4.0392,
      "step": 24745
    },
    {
      "epoch": 0.24746,
      "grad_norm": 1.5917479743270604,
      "learning_rate": 0.003,
      "loss": 4.0588,
      "step": 24746
    },
    {
      "epoch": 0.24747,
      "grad_norm": 1.1161440284078028,
      "learning_rate": 0.003,
      "loss": 4.0633,
      "step": 24747
    },
    {
      "epoch": 0.24748,
      "grad_norm": 1.4639040731628656,
      "learning_rate": 0.003,
      "loss": 4.0556,
      "step": 24748
    },
    {
      "epoch": 0.24749,
      "grad_norm": 1.4738704838118968,
      "learning_rate": 0.003,
      "loss": 4.0474,
      "step": 24749
    },
    {
      "epoch": 0.2475,
      "grad_norm": 1.5126924294911215,
      "learning_rate": 0.003,
      "loss": 4.0203,
      "step": 24750
    },
    {
      "epoch": 0.24751,
      "grad_norm": 1.1082978514104664,
      "learning_rate": 0.003,
      "loss": 4.0574,
      "step": 24751
    },
    {
      "epoch": 0.24752,
      "grad_norm": 1.438038418817806,
      "learning_rate": 0.003,
      "loss": 4.0301,
      "step": 24752
    },
    {
      "epoch": 0.24753,
      "grad_norm": 1.2730727211030077,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 24753
    },
    {
      "epoch": 0.24754,
      "grad_norm": 1.397641301636969,
      "learning_rate": 0.003,
      "loss": 4.0731,
      "step": 24754
    },
    {
      "epoch": 0.24755,
      "grad_norm": 1.248110866539072,
      "learning_rate": 0.003,
      "loss": 4.0012,
      "step": 24755
    },
    {
      "epoch": 0.24756,
      "grad_norm": 1.434642856028332,
      "learning_rate": 0.003,
      "loss": 4.0522,
      "step": 24756
    },
    {
      "epoch": 0.24757,
      "grad_norm": 1.3081070512561468,
      "learning_rate": 0.003,
      "loss": 4.024,
      "step": 24757
    },
    {
      "epoch": 0.24758,
      "grad_norm": 1.3937514744875894,
      "learning_rate": 0.003,
      "loss": 4.0374,
      "step": 24758
    },
    {
      "epoch": 0.24759,
      "grad_norm": 1.162035572462083,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 24759
    },
    {
      "epoch": 0.2476,
      "grad_norm": 1.3917216881671628,
      "learning_rate": 0.003,
      "loss": 4.0528,
      "step": 24760
    },
    {
      "epoch": 0.24761,
      "grad_norm": 1.294718613942674,
      "learning_rate": 0.003,
      "loss": 4.0324,
      "step": 24761
    },
    {
      "epoch": 0.24762,
      "grad_norm": 1.186068602184914,
      "learning_rate": 0.003,
      "loss": 4.0707,
      "step": 24762
    },
    {
      "epoch": 0.24763,
      "grad_norm": 1.488225379369762,
      "learning_rate": 0.003,
      "loss": 4.072,
      "step": 24763
    },
    {
      "epoch": 0.24764,
      "grad_norm": 1.1990694990406738,
      "learning_rate": 0.003,
      "loss": 4.0653,
      "step": 24764
    },
    {
      "epoch": 0.24765,
      "grad_norm": 1.4359569549121276,
      "learning_rate": 0.003,
      "loss": 4.0467,
      "step": 24765
    },
    {
      "epoch": 0.24766,
      "grad_norm": 1.3549635171675725,
      "learning_rate": 0.003,
      "loss": 4.0489,
      "step": 24766
    },
    {
      "epoch": 0.24767,
      "grad_norm": 1.4808283938961393,
      "learning_rate": 0.003,
      "loss": 4.0823,
      "step": 24767
    },
    {
      "epoch": 0.24768,
      "grad_norm": 1.2438624114965242,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 24768
    },
    {
      "epoch": 0.24769,
      "grad_norm": 1.1912376400445561,
      "learning_rate": 0.003,
      "loss": 4.0171,
      "step": 24769
    },
    {
      "epoch": 0.2477,
      "grad_norm": 1.2325990814215235,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 24770
    },
    {
      "epoch": 0.24771,
      "grad_norm": 1.2736129763514705,
      "learning_rate": 0.003,
      "loss": 4.0466,
      "step": 24771
    },
    {
      "epoch": 0.24772,
      "grad_norm": 1.465094437512808,
      "learning_rate": 0.003,
      "loss": 4.02,
      "step": 24772
    },
    {
      "epoch": 0.24773,
      "grad_norm": 1.2135453610233906,
      "learning_rate": 0.003,
      "loss": 4.0282,
      "step": 24773
    },
    {
      "epoch": 0.24774,
      "grad_norm": 1.3624481935093014,
      "learning_rate": 0.003,
      "loss": 4.0468,
      "step": 24774
    },
    {
      "epoch": 0.24775,
      "grad_norm": 1.1536811584715754,
      "learning_rate": 0.003,
      "loss": 4.0254,
      "step": 24775
    },
    {
      "epoch": 0.24776,
      "grad_norm": 1.3187340249875852,
      "learning_rate": 0.003,
      "loss": 4.0569,
      "step": 24776
    },
    {
      "epoch": 0.24777,
      "grad_norm": 1.233953781610818,
      "learning_rate": 0.003,
      "loss": 4.049,
      "step": 24777
    },
    {
      "epoch": 0.24778,
      "grad_norm": 1.3960283235642599,
      "learning_rate": 0.003,
      "loss": 4.0684,
      "step": 24778
    },
    {
      "epoch": 0.24779,
      "grad_norm": 1.3948062730493118,
      "learning_rate": 0.003,
      "loss": 4.0335,
      "step": 24779
    },
    {
      "epoch": 0.2478,
      "grad_norm": 1.5227610942988106,
      "learning_rate": 0.003,
      "loss": 4.0557,
      "step": 24780
    },
    {
      "epoch": 0.24781,
      "grad_norm": 1.325066226081347,
      "learning_rate": 0.003,
      "loss": 4.026,
      "step": 24781
    },
    {
      "epoch": 0.24782,
      "grad_norm": 1.2162613188969975,
      "learning_rate": 0.003,
      "loss": 4.0511,
      "step": 24782
    },
    {
      "epoch": 0.24783,
      "grad_norm": 1.3143009857887438,
      "learning_rate": 0.003,
      "loss": 4.065,
      "step": 24783
    },
    {
      "epoch": 0.24784,
      "grad_norm": 1.1747425297920604,
      "learning_rate": 0.003,
      "loss": 4.047,
      "step": 24784
    },
    {
      "epoch": 0.24785,
      "grad_norm": 1.2020442552266992,
      "learning_rate": 0.003,
      "loss": 4.0338,
      "step": 24785
    },
    {
      "epoch": 0.24786,
      "grad_norm": 1.4193470845232725,
      "learning_rate": 0.003,
      "loss": 4.0581,
      "step": 24786
    },
    {
      "epoch": 0.24787,
      "grad_norm": 1.0744574878557753,
      "learning_rate": 0.003,
      "loss": 4.0408,
      "step": 24787
    },
    {
      "epoch": 0.24788,
      "grad_norm": 1.3867147598561682,
      "learning_rate": 0.003,
      "loss": 4.0273,
      "step": 24788
    },
    {
      "epoch": 0.24789,
      "grad_norm": 1.1033279944956433,
      "learning_rate": 0.003,
      "loss": 4.041,
      "step": 24789
    },
    {
      "epoch": 0.2479,
      "grad_norm": 1.514456470926197,
      "learning_rate": 0.003,
      "loss": 4.0456,
      "step": 24790
    },
    {
      "epoch": 0.24791,
      "grad_norm": 1.1661299733164954,
      "learning_rate": 0.003,
      "loss": 4.035,
      "step": 24791
    },
    {
      "epoch": 0.24792,
      "grad_norm": 1.3674813529224923,
      "learning_rate": 0.003,
      "loss": 4.0343,
      "step": 24792
    },
    {
      "epoch": 0.24793,
      "grad_norm": 1.3447654072368525,
      "learning_rate": 0.003,
      "loss": 4.03,
      "step": 24793
    },
    {
      "epoch": 0.24794,
      "grad_norm": 1.2024270036449556,
      "learning_rate": 0.003,
      "loss": 4.04,
      "step": 24794
    },
    {
      "epoch": 0.24795,
      "grad_norm": 1.189755279279851,
      "learning_rate": 0.003,
      "loss": 4.0484,
      "step": 24795
    },
    {
      "epoch": 0.24796,
      "grad_norm": 1.4394369789290944,
      "learning_rate": 0.003,
      "loss": 4.0596,
      "step": 24796
    },
    {
      "epoch": 0.24797,
      "grad_norm": 1.490508812769484,
      "learning_rate": 0.003,
      "loss": 4.0379,
      "step": 24797
    },
    {
      "epoch": 0.24798,
      "grad_norm": 1.1360240109791595,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 24798
    },
    {
      "epoch": 0.24799,
      "grad_norm": 1.3107246746499897,
      "learning_rate": 0.003,
      "loss": 4.0368,
      "step": 24799
    },
    {
      "epoch": 0.248,
      "grad_norm": 1.388135958393749,
      "learning_rate": 0.003,
      "loss": 4.0245,
      "step": 24800
    },
    {
      "epoch": 0.24801,
      "grad_norm": 1.4166670972679642,
      "learning_rate": 0.003,
      "loss": 4.0754,
      "step": 24801
    },
    {
      "epoch": 0.24802,
      "grad_norm": 1.3719663717740094,
      "learning_rate": 0.003,
      "loss": 4.0239,
      "step": 24802
    },
    {
      "epoch": 0.24803,
      "grad_norm": 1.315136630776163,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 24803
    },
    {
      "epoch": 0.24804,
      "grad_norm": 1.6254520770721155,
      "learning_rate": 0.003,
      "loss": 4.0716,
      "step": 24804
    },
    {
      "epoch": 0.24805,
      "grad_norm": 1.204621142480316,
      "learning_rate": 0.003,
      "loss": 4.017,
      "step": 24805
    },
    {
      "epoch": 0.24806,
      "grad_norm": 1.3101489215025623,
      "learning_rate": 0.003,
      "loss": 4.011,
      "step": 24806
    },
    {
      "epoch": 0.24807,
      "grad_norm": 1.2783580755497896,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 24807
    },
    {
      "epoch": 0.24808,
      "grad_norm": 1.345337236584261,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 24808
    },
    {
      "epoch": 0.24809,
      "grad_norm": 1.2315383657301913,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 24809
    },
    {
      "epoch": 0.2481,
      "grad_norm": 1.3277653672294916,
      "learning_rate": 0.003,
      "loss": 4.0238,
      "step": 24810
    },
    {
      "epoch": 0.24811,
      "grad_norm": 1.1690599936909545,
      "learning_rate": 0.003,
      "loss": 4.0346,
      "step": 24811
    },
    {
      "epoch": 0.24812,
      "grad_norm": 1.5045631353919042,
      "learning_rate": 0.003,
      "loss": 4.0875,
      "step": 24812
    },
    {
      "epoch": 0.24813,
      "grad_norm": 1.0267103629545606,
      "learning_rate": 0.003,
      "loss": 4.03,
      "step": 24813
    },
    {
      "epoch": 0.24814,
      "grad_norm": 1.3509188671929264,
      "learning_rate": 0.003,
      "loss": 4.0512,
      "step": 24814
    },
    {
      "epoch": 0.24815,
      "grad_norm": 1.033809952506082,
      "learning_rate": 0.003,
      "loss": 4.0196,
      "step": 24815
    },
    {
      "epoch": 0.24816,
      "grad_norm": 1.7073427245155273,
      "learning_rate": 0.003,
      "loss": 4.0508,
      "step": 24816
    },
    {
      "epoch": 0.24817,
      "grad_norm": 1.1447934399893742,
      "learning_rate": 0.003,
      "loss": 4.0536,
      "step": 24817
    },
    {
      "epoch": 0.24818,
      "grad_norm": 1.7777331993370546,
      "learning_rate": 0.003,
      "loss": 4.0583,
      "step": 24818
    },
    {
      "epoch": 0.24819,
      "grad_norm": 1.2187031506523225,
      "learning_rate": 0.003,
      "loss": 4.0332,
      "step": 24819
    },
    {
      "epoch": 0.2482,
      "grad_norm": 1.2791381097948606,
      "learning_rate": 0.003,
      "loss": 4.0812,
      "step": 24820
    },
    {
      "epoch": 0.24821,
      "grad_norm": 1.3223645942575406,
      "learning_rate": 0.003,
      "loss": 4.0796,
      "step": 24821
    },
    {
      "epoch": 0.24822,
      "grad_norm": 1.3462386833969726,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 24822
    },
    {
      "epoch": 0.24823,
      "grad_norm": 1.3254529953182204,
      "learning_rate": 0.003,
      "loss": 4.0561,
      "step": 24823
    },
    {
      "epoch": 0.24824,
      "grad_norm": 1.3873215194984942,
      "learning_rate": 0.003,
      "loss": 4.014,
      "step": 24824
    },
    {
      "epoch": 0.24825,
      "grad_norm": 1.2892530199315535,
      "learning_rate": 0.003,
      "loss": 4.0469,
      "step": 24825
    },
    {
      "epoch": 0.24826,
      "grad_norm": 1.1606606625653142,
      "learning_rate": 0.003,
      "loss": 4.0678,
      "step": 24826
    },
    {
      "epoch": 0.24827,
      "grad_norm": 1.3272815149363186,
      "learning_rate": 0.003,
      "loss": 4.0362,
      "step": 24827
    },
    {
      "epoch": 0.24828,
      "grad_norm": 1.2169734939715162,
      "learning_rate": 0.003,
      "loss": 4.0401,
      "step": 24828
    },
    {
      "epoch": 0.24829,
      "grad_norm": 1.321292976815381,
      "learning_rate": 0.003,
      "loss": 4.0765,
      "step": 24829
    },
    {
      "epoch": 0.2483,
      "grad_norm": 1.2275146962478736,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 24830
    },
    {
      "epoch": 0.24831,
      "grad_norm": 1.2849327916822382,
      "learning_rate": 0.003,
      "loss": 4.0568,
      "step": 24831
    },
    {
      "epoch": 0.24832,
      "grad_norm": 1.2552342176496158,
      "learning_rate": 0.003,
      "loss": 4.0328,
      "step": 24832
    },
    {
      "epoch": 0.24833,
      "grad_norm": 1.1850142780196402,
      "learning_rate": 0.003,
      "loss": 4.0297,
      "step": 24833
    },
    {
      "epoch": 0.24834,
      "grad_norm": 1.4761905541094922,
      "learning_rate": 0.003,
      "loss": 4.0507,
      "step": 24834
    },
    {
      "epoch": 0.24835,
      "grad_norm": 0.9952552845547482,
      "learning_rate": 0.003,
      "loss": 4.0838,
      "step": 24835
    },
    {
      "epoch": 0.24836,
      "grad_norm": 1.64567562436426,
      "learning_rate": 0.003,
      "loss": 4.0759,
      "step": 24836
    },
    {
      "epoch": 0.24837,
      "grad_norm": 1.0335218112244615,
      "learning_rate": 0.003,
      "loss": 4.0591,
      "step": 24837
    },
    {
      "epoch": 0.24838,
      "grad_norm": 1.456751886992233,
      "learning_rate": 0.003,
      "loss": 4.068,
      "step": 24838
    },
    {
      "epoch": 0.24839,
      "grad_norm": 1.370694060841814,
      "learning_rate": 0.003,
      "loss": 4.0472,
      "step": 24839
    },
    {
      "epoch": 0.2484,
      "grad_norm": 1.6010495367342046,
      "learning_rate": 0.003,
      "loss": 4.0799,
      "step": 24840
    },
    {
      "epoch": 0.24841,
      "grad_norm": 1.2585243376857214,
      "learning_rate": 0.003,
      "loss": 4.061,
      "step": 24841
    },
    {
      "epoch": 0.24842,
      "grad_norm": 1.3955169206604339,
      "learning_rate": 0.003,
      "loss": 4.0374,
      "step": 24842
    },
    {
      "epoch": 0.24843,
      "grad_norm": 1.217558330392425,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 24843
    },
    {
      "epoch": 0.24844,
      "grad_norm": 1.3320087226292747,
      "learning_rate": 0.003,
      "loss": 4.0601,
      "step": 24844
    },
    {
      "epoch": 0.24845,
      "grad_norm": 1.2662195145008068,
      "learning_rate": 0.003,
      "loss": 4.0597,
      "step": 24845
    },
    {
      "epoch": 0.24846,
      "grad_norm": 1.6459588163912817,
      "learning_rate": 0.003,
      "loss": 4.0541,
      "step": 24846
    },
    {
      "epoch": 0.24847,
      "grad_norm": 1.0615972540018532,
      "learning_rate": 0.003,
      "loss": 4.034,
      "step": 24847
    },
    {
      "epoch": 0.24848,
      "grad_norm": 1.4247782708807473,
      "learning_rate": 0.003,
      "loss": 4.0545,
      "step": 24848
    },
    {
      "epoch": 0.24849,
      "grad_norm": 1.404094115799113,
      "learning_rate": 0.003,
      "loss": 4.0744,
      "step": 24849
    },
    {
      "epoch": 0.2485,
      "grad_norm": 1.1321383446283486,
      "learning_rate": 0.003,
      "loss": 4.038,
      "step": 24850
    },
    {
      "epoch": 0.24851,
      "grad_norm": 1.4268603954689754,
      "learning_rate": 0.003,
      "loss": 4.0403,
      "step": 24851
    },
    {
      "epoch": 0.24852,
      "grad_norm": 1.124554039505304,
      "learning_rate": 0.003,
      "loss": 4.0584,
      "step": 24852
    },
    {
      "epoch": 0.24853,
      "grad_norm": 1.5391706100709552,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 24853
    },
    {
      "epoch": 0.24854,
      "grad_norm": 1.345324769236744,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 24854
    },
    {
      "epoch": 0.24855,
      "grad_norm": 1.2178827084696286,
      "learning_rate": 0.003,
      "loss": 4.0301,
      "step": 24855
    },
    {
      "epoch": 0.24856,
      "grad_norm": 1.3985756046679478,
      "learning_rate": 0.003,
      "loss": 4.0618,
      "step": 24856
    },
    {
      "epoch": 0.24857,
      "grad_norm": 1.282094905809099,
      "learning_rate": 0.003,
      "loss": 4.0323,
      "step": 24857
    },
    {
      "epoch": 0.24858,
      "grad_norm": 1.3971841829725187,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 24858
    },
    {
      "epoch": 0.24859,
      "grad_norm": 1.0167270092121614,
      "learning_rate": 0.003,
      "loss": 4.029,
      "step": 24859
    },
    {
      "epoch": 0.2486,
      "grad_norm": 1.6410253455301005,
      "learning_rate": 0.003,
      "loss": 4.0628,
      "step": 24860
    },
    {
      "epoch": 0.24861,
      "grad_norm": 1.0947343488654815,
      "learning_rate": 0.003,
      "loss": 4.0496,
      "step": 24861
    },
    {
      "epoch": 0.24862,
      "grad_norm": 1.615653705581511,
      "learning_rate": 0.003,
      "loss": 4.0453,
      "step": 24862
    },
    {
      "epoch": 0.24863,
      "grad_norm": 1.0480602943795017,
      "learning_rate": 0.003,
      "loss": 4.0575,
      "step": 24863
    },
    {
      "epoch": 0.24864,
      "grad_norm": 1.4340938238238872,
      "learning_rate": 0.003,
      "loss": 4.0439,
      "step": 24864
    },
    {
      "epoch": 0.24865,
      "grad_norm": 1.256553088722349,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 24865
    },
    {
      "epoch": 0.24866,
      "grad_norm": 1.2796771910035982,
      "learning_rate": 0.003,
      "loss": 4.0465,
      "step": 24866
    },
    {
      "epoch": 0.24867,
      "grad_norm": 1.4318073566017118,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 24867
    },
    {
      "epoch": 0.24868,
      "grad_norm": 1.3578843653158852,
      "learning_rate": 0.003,
      "loss": 4.0424,
      "step": 24868
    },
    {
      "epoch": 0.24869,
      "grad_norm": 1.2312214246564268,
      "learning_rate": 0.003,
      "loss": 4.027,
      "step": 24869
    },
    {
      "epoch": 0.2487,
      "grad_norm": 1.443613645179922,
      "learning_rate": 0.003,
      "loss": 4.0755,
      "step": 24870
    },
    {
      "epoch": 0.24871,
      "grad_norm": 1.1716001114947323,
      "learning_rate": 0.003,
      "loss": 4.053,
      "step": 24871
    },
    {
      "epoch": 0.24872,
      "grad_norm": 1.25469613093655,
      "learning_rate": 0.003,
      "loss": 4.0758,
      "step": 24872
    },
    {
      "epoch": 0.24873,
      "grad_norm": 1.1976399321957636,
      "learning_rate": 0.003,
      "loss": 4.0348,
      "step": 24873
    },
    {
      "epoch": 0.24874,
      "grad_norm": 1.4825872385087249,
      "learning_rate": 0.003,
      "loss": 4.0674,
      "step": 24874
    },
    {
      "epoch": 0.24875,
      "grad_norm": 1.199075633660232,
      "learning_rate": 0.003,
      "loss": 4.0442,
      "step": 24875
    },
    {
      "epoch": 0.24876,
      "grad_norm": 1.3294412459496947,
      "learning_rate": 0.003,
      "loss": 4.0491,
      "step": 24876
    },
    {
      "epoch": 0.24877,
      "grad_norm": 1.1899160828251503,
      "learning_rate": 0.003,
      "loss": 4.0858,
      "step": 24877
    },
    {
      "epoch": 0.24878,
      "grad_norm": 1.3507263116966315,
      "learning_rate": 0.003,
      "loss": 4.0269,
      "step": 24878
    },
    {
      "epoch": 0.24879,
      "grad_norm": 1.1752668344798443,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 24879
    },
    {
      "epoch": 0.2488,
      "grad_norm": 1.365871832238357,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 24880
    },
    {
      "epoch": 0.24881,
      "grad_norm": 1.1864351338647452,
      "learning_rate": 0.003,
      "loss": 4.0418,
      "step": 24881
    },
    {
      "epoch": 0.24882,
      "grad_norm": 1.8130414723760673,
      "learning_rate": 0.003,
      "loss": 4.0345,
      "step": 24882
    },
    {
      "epoch": 0.24883,
      "grad_norm": 1.1902322356464166,
      "learning_rate": 0.003,
      "loss": 4.0443,
      "step": 24883
    },
    {
      "epoch": 0.24884,
      "grad_norm": 1.3805386263469854,
      "learning_rate": 0.003,
      "loss": 4.0708,
      "step": 24884
    },
    {
      "epoch": 0.24885,
      "grad_norm": 1.393028605181782,
      "learning_rate": 0.003,
      "loss": 4.0701,
      "step": 24885
    },
    {
      "epoch": 0.24886,
      "grad_norm": 1.1816888359306001,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 24886
    },
    {
      "epoch": 0.24887,
      "grad_norm": 1.2593770053397053,
      "learning_rate": 0.003,
      "loss": 4.0282,
      "step": 24887
    },
    {
      "epoch": 0.24888,
      "grad_norm": 1.2623398076025991,
      "learning_rate": 0.003,
      "loss": 4.0369,
      "step": 24888
    },
    {
      "epoch": 0.24889,
      "grad_norm": 1.4081637875533586,
      "learning_rate": 0.003,
      "loss": 4.0476,
      "step": 24889
    },
    {
      "epoch": 0.2489,
      "grad_norm": 1.2347770438980452,
      "learning_rate": 0.003,
      "loss": 4.0259,
      "step": 24890
    },
    {
      "epoch": 0.24891,
      "grad_norm": 1.3143360459998792,
      "learning_rate": 0.003,
      "loss": 4.0535,
      "step": 24891
    },
    {
      "epoch": 0.24892,
      "grad_norm": 1.2505351261334239,
      "learning_rate": 0.003,
      "loss": 4.0871,
      "step": 24892
    },
    {
      "epoch": 0.24893,
      "grad_norm": 1.4881107895539392,
      "learning_rate": 0.003,
      "loss": 4.0542,
      "step": 24893
    },
    {
      "epoch": 0.24894,
      "grad_norm": 1.1980400185654467,
      "learning_rate": 0.003,
      "loss": 4.0272,
      "step": 24894
    },
    {
      "epoch": 0.24895,
      "grad_norm": 1.3963607941149216,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 24895
    },
    {
      "epoch": 0.24896,
      "grad_norm": 1.1261507696117585,
      "learning_rate": 0.003,
      "loss": 4.0565,
      "step": 24896
    },
    {
      "epoch": 0.24897,
      "grad_norm": 1.4303821433547546,
      "learning_rate": 0.003,
      "loss": 4.0799,
      "step": 24897
    },
    {
      "epoch": 0.24898,
      "grad_norm": 1.106842255482435,
      "learning_rate": 0.003,
      "loss": 4.0562,
      "step": 24898
    },
    {
      "epoch": 0.24899,
      "grad_norm": 1.5781518443465556,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 24899
    },
    {
      "epoch": 0.249,
      "grad_norm": 1.1324549165897573,
      "learning_rate": 0.003,
      "loss": 4.0539,
      "step": 24900
    },
    {
      "epoch": 0.24901,
      "grad_norm": 1.4005635818984645,
      "learning_rate": 0.003,
      "loss": 4.0265,
      "step": 24901
    },
    {
      "epoch": 0.24902,
      "grad_norm": 1.186576276095947,
      "learning_rate": 0.003,
      "loss": 4.0437,
      "step": 24902
    },
    {
      "epoch": 0.24903,
      "grad_norm": 1.4884892104138732,
      "learning_rate": 0.003,
      "loss": 4.0518,
      "step": 24903
    },
    {
      "epoch": 0.24904,
      "grad_norm": 1.1401012143422864,
      "learning_rate": 0.003,
      "loss": 4.0331,
      "step": 24904
    },
    {
      "epoch": 0.24905,
      "grad_norm": 1.5654247514891317,
      "learning_rate": 0.003,
      "loss": 4.078,
      "step": 24905
    },
    {
      "epoch": 0.24906,
      "grad_norm": 1.159083492034224,
      "learning_rate": 0.003,
      "loss": 4.0531,
      "step": 24906
    },
    {
      "epoch": 0.24907,
      "grad_norm": 1.3331641174505202,
      "learning_rate": 0.003,
      "loss": 4.0431,
      "step": 24907
    },
    {
      "epoch": 0.24908,
      "grad_norm": 1.3235734104050785,
      "learning_rate": 0.003,
      "loss": 4.0686,
      "step": 24908
    },
    {
      "epoch": 0.24909,
      "grad_norm": 1.266198004793703,
      "learning_rate": 0.003,
      "loss": 4.0586,
      "step": 24909
    },
    {
      "epoch": 0.2491,
      "grad_norm": 1.3622049963928764,
      "learning_rate": 0.003,
      "loss": 4.039,
      "step": 24910
    },
    {
      "epoch": 0.24911,
      "grad_norm": 1.2935211647872877,
      "learning_rate": 0.003,
      "loss": 4.0713,
      "step": 24911
    },
    {
      "epoch": 0.24912,
      "grad_norm": 1.2522251838013183,
      "learning_rate": 0.003,
      "loss": 4.0567,
      "step": 24912
    },
    {
      "epoch": 0.24913,
      "grad_norm": 1.3145815048689953,
      "learning_rate": 0.003,
      "loss": 4.0576,
      "step": 24913
    },
    {
      "epoch": 0.24914,
      "grad_norm": 1.3048849262888975,
      "learning_rate": 0.003,
      "loss": 4.0615,
      "step": 24914
    },
    {
      "epoch": 0.24915,
      "grad_norm": 1.3990876087624526,
      "learning_rate": 0.003,
      "loss": 4.0803,
      "step": 24915
    },
    {
      "epoch": 0.24916,
      "grad_norm": 1.125601860247761,
      "learning_rate": 0.003,
      "loss": 4.056,
      "step": 24916
    },
    {
      "epoch": 0.24917,
      "grad_norm": 1.4314018088372251,
      "learning_rate": 0.003,
      "loss": 4.0519,
      "step": 24917
    },
    {
      "epoch": 0.24918,
      "grad_norm": 1.2916457830616057,
      "learning_rate": 0.003,
      "loss": 4.0381,
      "step": 24918
    },
    {
      "epoch": 0.24919,
      "grad_norm": 1.383588221573143,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 24919
    },
    {
      "epoch": 0.2492,
      "grad_norm": 1.3577897857730004,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 24920
    },
    {
      "epoch": 0.24921,
      "grad_norm": 1.1757574360808953,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 24921
    },
    {
      "epoch": 0.24922,
      "grad_norm": 1.7279469446662432,
      "learning_rate": 0.003,
      "loss": 4.0451,
      "step": 24922
    },
    {
      "epoch": 0.24923,
      "grad_norm": 1.2510772742299967,
      "learning_rate": 0.003,
      "loss": 4.0455,
      "step": 24923
    },
    {
      "epoch": 0.24924,
      "grad_norm": 1.316096105159205,
      "learning_rate": 0.003,
      "loss": 4.0404,
      "step": 24924
    },
    {
      "epoch": 0.24925,
      "grad_norm": 1.3815618594288337,
      "learning_rate": 0.003,
      "loss": 4.0421,
      "step": 24925
    },
    {
      "epoch": 0.24926,
      "grad_norm": 1.2221481336481006,
      "learning_rate": 0.003,
      "loss": 4.0414,
      "step": 24926
    },
    {
      "epoch": 0.24927,
      "grad_norm": 1.50187363948413,
      "learning_rate": 0.003,
      "loss": 4.0488,
      "step": 24927
    },
    {
      "epoch": 0.24928,
      "grad_norm": 1.275336787306216,
      "learning_rate": 0.003,
      "loss": 4.0564,
      "step": 24928
    },
    {
      "epoch": 0.24929,
      "grad_norm": 1.0147941597378278,
      "learning_rate": 0.003,
      "loss": 4.0606,
      "step": 24929
    },
    {
      "epoch": 0.2493,
      "grad_norm": 1.4789979646221407,
      "learning_rate": 0.003,
      "loss": 4.0262,
      "step": 24930
    },
    {
      "epoch": 0.24931,
      "grad_norm": 1.1891010042765089,
      "learning_rate": 0.003,
      "loss": 4.0722,
      "step": 24931
    },
    {
      "epoch": 0.24932,
      "grad_norm": 1.491651679136415,
      "learning_rate": 0.003,
      "loss": 4.0514,
      "step": 24932
    },
    {
      "epoch": 0.24933,
      "grad_norm": 1.265124354694425,
      "learning_rate": 0.003,
      "loss": 4.0612,
      "step": 24933
    },
    {
      "epoch": 0.24934,
      "grad_norm": 1.4084685637239533,
      "learning_rate": 0.003,
      "loss": 4.0656,
      "step": 24934
    },
    {
      "epoch": 0.24935,
      "grad_norm": 1.202236967976963,
      "learning_rate": 0.003,
      "loss": 4.0598,
      "step": 24935
    },
    {
      "epoch": 0.24936,
      "grad_norm": 1.2136212026763542,
      "learning_rate": 0.003,
      "loss": 4.0438,
      "step": 24936
    },
    {
      "epoch": 0.24937,
      "grad_norm": 1.559004736445429,
      "learning_rate": 0.003,
      "loss": 4.0571,
      "step": 24937
    },
    {
      "epoch": 0.24938,
      "grad_norm": 1.35275156518635,
      "learning_rate": 0.003,
      "loss": 4.0529,
      "step": 24938
    },
    {
      "epoch": 0.24939,
      "grad_norm": 1.5015669338305977,
      "learning_rate": 0.003,
      "loss": 4.0554,
      "step": 24939
    },
    {
      "epoch": 0.2494,
      "grad_norm": 1.275156545022829,
      "learning_rate": 0.003,
      "loss": 4.0497,
      "step": 24940
    },
    {
      "epoch": 0.24941,
      "grad_norm": 1.2182848171525187,
      "learning_rate": 0.003,
      "loss": 4.0356,
      "step": 24941
    },
    {
      "epoch": 0.24942,
      "grad_norm": 1.403902010995316,
      "learning_rate": 0.003,
      "loss": 4.0532,
      "step": 24942
    },
    {
      "epoch": 0.24943,
      "grad_norm": 1.2468410086238597,
      "learning_rate": 0.003,
      "loss": 4.0319,
      "step": 24943
    },
    {
      "epoch": 0.24944,
      "grad_norm": 1.2301364873403982,
      "learning_rate": 0.003,
      "loss": 4.019,
      "step": 24944
    },
    {
      "epoch": 0.24945,
      "grad_norm": 1.2033359033910367,
      "learning_rate": 0.003,
      "loss": 4.013,
      "step": 24945
    },
    {
      "epoch": 0.24946,
      "grad_norm": 1.4592038702804437,
      "learning_rate": 0.003,
      "loss": 4.0444,
      "step": 24946
    },
    {
      "epoch": 0.24947,
      "grad_norm": 1.1334231896485036,
      "learning_rate": 0.003,
      "loss": 4.0329,
      "step": 24947
    },
    {
      "epoch": 0.24948,
      "grad_norm": 1.4415079109978626,
      "learning_rate": 0.003,
      "loss": 4.0688,
      "step": 24948
    },
    {
      "epoch": 0.24949,
      "grad_norm": 1.17614349289862,
      "learning_rate": 0.003,
      "loss": 4.0143,
      "step": 24949
    },
    {
      "epoch": 0.2495,
      "grad_norm": 1.5593115471186048,
      "learning_rate": 0.003,
      "loss": 4.0692,
      "step": 24950
    },
    {
      "epoch": 0.24951,
      "grad_norm": 0.9283938653658199,
      "learning_rate": 0.003,
      "loss": 4.0428,
      "step": 24951
    },
    {
      "epoch": 0.24952,
      "grad_norm": 1.5458796456523693,
      "learning_rate": 0.003,
      "loss": 4.0307,
      "step": 24952
    },
    {
      "epoch": 0.24953,
      "grad_norm": 1.1037503801836566,
      "learning_rate": 0.003,
      "loss": 4.0273,
      "step": 24953
    },
    {
      "epoch": 0.24954,
      "grad_norm": 1.4507951213683754,
      "learning_rate": 0.003,
      "loss": 4.0333,
      "step": 24954
    },
    {
      "epoch": 0.24955,
      "grad_norm": 1.217547714662238,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 24955
    },
    {
      "epoch": 0.24956,
      "grad_norm": 1.4072451690922527,
      "learning_rate": 0.003,
      "loss": 4.0538,
      "step": 24956
    },
    {
      "epoch": 0.24957,
      "grad_norm": 1.0918494147694893,
      "learning_rate": 0.003,
      "loss": 4.0649,
      "step": 24957
    },
    {
      "epoch": 0.24958,
      "grad_norm": 1.9321266883903285,
      "learning_rate": 0.003,
      "loss": 4.0669,
      "step": 24958
    },
    {
      "epoch": 0.24959,
      "grad_norm": 1.1152484527119353,
      "learning_rate": 0.003,
      "loss": 4.028,
      "step": 24959
    },
    {
      "epoch": 0.2496,
      "grad_norm": 1.3112365470042384,
      "learning_rate": 0.003,
      "loss": 4.0486,
      "step": 24960
    },
    {
      "epoch": 0.24961,
      "grad_norm": 1.3683398032590752,
      "learning_rate": 0.003,
      "loss": 4.0344,
      "step": 24961
    },
    {
      "epoch": 0.24962,
      "grad_norm": 1.3857230642207905,
      "learning_rate": 0.003,
      "loss": 4.0629,
      "step": 24962
    },
    {
      "epoch": 0.24963,
      "grad_norm": 1.2940292539319926,
      "learning_rate": 0.003,
      "loss": 4.0593,
      "step": 24963
    },
    {
      "epoch": 0.24964,
      "grad_norm": 1.1667678815727338,
      "learning_rate": 0.003,
      "loss": 4.0746,
      "step": 24964
    },
    {
      "epoch": 0.24965,
      "grad_norm": 1.3250197851198933,
      "learning_rate": 0.003,
      "loss": 4.062,
      "step": 24965
    },
    {
      "epoch": 0.24966,
      "grad_norm": 1.2424920919445346,
      "learning_rate": 0.003,
      "loss": 4.0703,
      "step": 24966
    },
    {
      "epoch": 0.24967,
      "grad_norm": 1.1263743998023235,
      "learning_rate": 0.003,
      "loss": 4.0711,
      "step": 24967
    },
    {
      "epoch": 0.24968,
      "grad_norm": 1.4848371770715436,
      "learning_rate": 0.003,
      "loss": 4.0387,
      "step": 24968
    },
    {
      "epoch": 0.24969,
      "grad_norm": 1.0306700634064905,
      "learning_rate": 0.003,
      "loss": 4.0461,
      "step": 24969
    },
    {
      "epoch": 0.2497,
      "grad_norm": 1.5267199933623845,
      "learning_rate": 0.003,
      "loss": 4.0301,
      "step": 24970
    },
    {
      "epoch": 0.24971,
      "grad_norm": 1.639320215013145,
      "learning_rate": 0.003,
      "loss": 4.0715,
      "step": 24971
    },
    {
      "epoch": 0.24972,
      "grad_norm": 1.1525468375552217,
      "learning_rate": 0.003,
      "loss": 4.0637,
      "step": 24972
    },
    {
      "epoch": 0.24973,
      "grad_norm": 1.7626246097754725,
      "learning_rate": 0.003,
      "loss": 4.0635,
      "step": 24973
    },
    {
      "epoch": 0.24974,
      "grad_norm": 1.3275200504636788,
      "learning_rate": 0.003,
      "loss": 4.0655,
      "step": 24974
    },
    {
      "epoch": 0.24975,
      "grad_norm": 1.3945586805684143,
      "learning_rate": 0.003,
      "loss": 4.043,
      "step": 24975
    },
    {
      "epoch": 0.24976,
      "grad_norm": 1.3400437810999533,
      "learning_rate": 0.003,
      "loss": 4.0694,
      "step": 24976
    },
    {
      "epoch": 0.24977,
      "grad_norm": 1.2333577154890487,
      "learning_rate": 0.003,
      "loss": 4.0517,
      "step": 24977
    },
    {
      "epoch": 0.24978,
      "grad_norm": 1.3397728435877232,
      "learning_rate": 0.003,
      "loss": 4.0395,
      "step": 24978
    },
    {
      "epoch": 0.24979,
      "grad_norm": 1.4886406015872247,
      "learning_rate": 0.003,
      "loss": 4.0762,
      "step": 24979
    },
    {
      "epoch": 0.2498,
      "grad_norm": 1.311555594354757,
      "learning_rate": 0.003,
      "loss": 4.0182,
      "step": 24980
    },
    {
      "epoch": 0.24981,
      "grad_norm": 1.4097119802725253,
      "learning_rate": 0.003,
      "loss": 4.0367,
      "step": 24981
    },
    {
      "epoch": 0.24982,
      "grad_norm": 1.257969179716512,
      "learning_rate": 0.003,
      "loss": 4.037,
      "step": 24982
    },
    {
      "epoch": 0.24983,
      "grad_norm": 1.528138306068266,
      "learning_rate": 0.003,
      "loss": 4.0239,
      "step": 24983
    },
    {
      "epoch": 0.24984,
      "grad_norm": 1.1447639613460119,
      "learning_rate": 0.003,
      "loss": 3.9934,
      "step": 24984
    },
    {
      "epoch": 0.24985,
      "grad_norm": 1.3336920987534133,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 24985
    },
    {
      "epoch": 0.24986,
      "grad_norm": 1.3190428493878157,
      "learning_rate": 0.003,
      "loss": 4.0399,
      "step": 24986
    },
    {
      "epoch": 0.24987,
      "grad_norm": 1.3301088479267904,
      "learning_rate": 0.003,
      "loss": 4.0463,
      "step": 24987
    },
    {
      "epoch": 0.24988,
      "grad_norm": 1.1570015010963344,
      "learning_rate": 0.003,
      "loss": 4.0594,
      "step": 24988
    },
    {
      "epoch": 0.24989,
      "grad_norm": 1.6213691303224982,
      "learning_rate": 0.003,
      "loss": 4.0505,
      "step": 24989
    },
    {
      "epoch": 0.2499,
      "grad_norm": 1.255825660263306,
      "learning_rate": 0.003,
      "loss": 4.0347,
      "step": 24990
    },
    {
      "epoch": 0.24991,
      "grad_norm": 1.588967416487042,
      "learning_rate": 0.003,
      "loss": 4.0475,
      "step": 24991
    },
    {
      "epoch": 0.24992,
      "grad_norm": 1.054545737037509,
      "learning_rate": 0.003,
      "loss": 4.0698,
      "step": 24992
    },
    {
      "epoch": 0.24993,
      "grad_norm": 1.433433725581734,
      "learning_rate": 0.003,
      "loss": 4.0524,
      "step": 24993
    },
    {
      "epoch": 0.24994,
      "grad_norm": 1.205648888491351,
      "learning_rate": 0.003,
      "loss": 4.0353,
      "step": 24994
    },
    {
      "epoch": 0.24995,
      "grad_norm": 1.4135515530513658,
      "learning_rate": 0.003,
      "loss": 4.0595,
      "step": 24995
    },
    {
      "epoch": 0.24996,
      "grad_norm": 1.3707149443458984,
      "learning_rate": 0.003,
      "loss": 4.0485,
      "step": 24996
    },
    {
      "epoch": 0.24997,
      "grad_norm": 1.3957772683443745,
      "learning_rate": 0.003,
      "loss": 4.0334,
      "step": 24997
    },
    {
      "epoch": 0.24998,
      "grad_norm": 1.4058385202482575,
      "learning_rate": 0.003,
      "loss": 4.0643,
      "step": 24998
    },
    {
      "epoch": 0.24999,
      "grad_norm": 1.1533823492711095,
      "learning_rate": 0.003,
      "loss": 4.0609,
      "step": 24999
    },
    {
      "epoch": 0.25,
      "grad_norm": 1.5495313955319767,
      "learning_rate": 0.003,
      "loss": 4.06,
      "step": 25000
    }
  ],
  "logging_steps": 1,
  "max_steps": 100000,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 1000,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 9.910910713856e+17,
  "train_batch_size": 1024,
  "trial_name": null,
  "trial_params": null
}