|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.236421725239617, |
|
"eval_steps": 500, |
|
"global_step": 1400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001597444089456869, |
|
"grad_norm": 128.6180419921875, |
|
"learning_rate": 0.0, |
|
"loss": 1.9747, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.003194888178913738, |
|
"grad_norm": 127.44181060791016, |
|
"learning_rate": 7.628243530057963e-08, |
|
"loss": 1.9176, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.004792332268370607, |
|
"grad_norm": 130.29476928710938, |
|
"learning_rate": 1.2090479941510652e-07, |
|
"loss": 1.9397, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.006389776357827476, |
|
"grad_norm": 128.3740234375, |
|
"learning_rate": 1.5256487060115926e-07, |
|
"loss": 1.9046, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.007987220447284345, |
|
"grad_norm": 136.06153869628906, |
|
"learning_rate": 1.7712232967084336e-07, |
|
"loss": 1.947, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.009584664536741214, |
|
"grad_norm": 134.7310333251953, |
|
"learning_rate": 1.9718723471568615e-07, |
|
"loss": 1.9161, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.011182108626198083, |
|
"grad_norm": 136.66058349609375, |
|
"learning_rate": 2.14151870207623e-07, |
|
"loss": 1.779, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.012779552715654952, |
|
"grad_norm": 142.6785888671875, |
|
"learning_rate": 2.288473059017389e-07, |
|
"loss": 1.7793, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01437699680511182, |
|
"grad_norm": 139.76708984375, |
|
"learning_rate": 2.4180959883021303e-07, |
|
"loss": 1.7003, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.01597444089456869, |
|
"grad_norm": 147.7816162109375, |
|
"learning_rate": 2.53404764971423e-07, |
|
"loss": 1.742, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01757188498402556, |
|
"grad_norm": 159.0968780517578, |
|
"learning_rate": 2.638938686254791e-07, |
|
"loss": 1.6079, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.019169329073482427, |
|
"grad_norm": 138.03765869140625, |
|
"learning_rate": 2.7346967001626583e-07, |
|
"loss": 1.1859, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.020766773162939296, |
|
"grad_norm": 130.79318237304688, |
|
"learning_rate": 2.8227855338279304e-07, |
|
"loss": 1.1544, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.022364217252396165, |
|
"grad_norm": 128.29359436035156, |
|
"learning_rate": 2.904343055082026e-07, |
|
"loss": 1.0924, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.023961661341853034, |
|
"grad_norm": 118.21534729003906, |
|
"learning_rate": 2.9802712908594985e-07, |
|
"loss": 1.0143, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.025559105431309903, |
|
"grad_norm": 110.80689239501953, |
|
"learning_rate": 3.051297412023185e-07, |
|
"loss": 0.941, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.027156549520766772, |
|
"grad_norm": 112.3552474975586, |
|
"learning_rate": 3.1180161973120243e-07, |
|
"loss": 0.9826, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.02875399361022364, |
|
"grad_norm": 59.84565353393555, |
|
"learning_rate": 3.1809203413079266e-07, |
|
"loss": 0.6646, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.03035143769968051, |
|
"grad_norm": 61.63838195800781, |
|
"learning_rate": 3.2404225570581245e-07, |
|
"loss": 0.535, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.03194888178913738, |
|
"grad_norm": 57.68476486206055, |
|
"learning_rate": 3.2968720027200265e-07, |
|
"loss": 0.451, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03354632587859425, |
|
"grad_norm": 58.70195388793945, |
|
"learning_rate": 3.350566696227295e-07, |
|
"loss": 0.4398, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.03514376996805112, |
|
"grad_norm": 60.67943572998047, |
|
"learning_rate": 3.401763039260588e-07, |
|
"loss": 0.3542, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.036741214057507986, |
|
"grad_norm": 54.30119705200195, |
|
"learning_rate": 3.450683222410826e-07, |
|
"loss": 0.3362, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.038338658146964855, |
|
"grad_norm": 27.211286544799805, |
|
"learning_rate": 3.4975210531684546e-07, |
|
"loss": 0.3206, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.039936102236421724, |
|
"grad_norm": 20.16046905517578, |
|
"learning_rate": 3.542446593416867e-07, |
|
"loss": 0.2747, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.04153354632587859, |
|
"grad_norm": 18.191621780395508, |
|
"learning_rate": 3.585609886833727e-07, |
|
"loss": 0.2485, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.04313099041533546, |
|
"grad_norm": 17.691118240356445, |
|
"learning_rate": 3.627143982453195e-07, |
|
"loss": 0.2113, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.04472843450479233, |
|
"grad_norm": 16.775856018066406, |
|
"learning_rate": 3.6671674080878223e-07, |
|
"loss": 0.2395, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0463258785942492, |
|
"grad_norm": 11.414810180664062, |
|
"learning_rate": 3.7057862095226456e-07, |
|
"loss": 0.1754, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.04792332268370607, |
|
"grad_norm": 11.763614654541016, |
|
"learning_rate": 3.7430956438652954e-07, |
|
"loss": 0.171, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04952076677316294, |
|
"grad_norm": 11.317436218261719, |
|
"learning_rate": 3.7791815951345716e-07, |
|
"loss": 0.1304, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.051118210862619806, |
|
"grad_norm": 11.866049766540527, |
|
"learning_rate": 3.814121765028982e-07, |
|
"loss": 0.1429, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.052715654952076675, |
|
"grad_norm": 7.949328899383545, |
|
"learning_rate": 3.8479866804058563e-07, |
|
"loss": 0.1008, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.054313099041533544, |
|
"grad_norm": 6.994161605834961, |
|
"learning_rate": 3.880840550317821e-07, |
|
"loss": 0.0818, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.05591054313099041, |
|
"grad_norm": 8.409013748168945, |
|
"learning_rate": 3.912741998784663e-07, |
|
"loss": 0.1117, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.05750798722044728, |
|
"grad_norm": 5.039752006530762, |
|
"learning_rate": 3.943744694313723e-07, |
|
"loss": 0.0769, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.05910543130990415, |
|
"grad_norm": 5.305444717407227, |
|
"learning_rate": 3.973897893149772e-07, |
|
"loss": 0.0547, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.06070287539936102, |
|
"grad_norm": 6.901062488555908, |
|
"learning_rate": 4.003246910063921e-07, |
|
"loss": 0.0888, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.06230031948881789, |
|
"grad_norm": 5.630770683288574, |
|
"learning_rate": 4.031833527978995e-07, |
|
"loss": 0.0664, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.06389776357827476, |
|
"grad_norm": 5.812962532043457, |
|
"learning_rate": 4.059696355725823e-07, |
|
"loss": 0.0643, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06549520766773163, |
|
"grad_norm": 4.684011936187744, |
|
"learning_rate": 4.0868711416176974e-07, |
|
"loss": 0.0563, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0670926517571885, |
|
"grad_norm": 5.2407050132751465, |
|
"learning_rate": 4.113391049233091e-07, |
|
"loss": 0.0546, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.06869009584664537, |
|
"grad_norm": 5.062054634094238, |
|
"learning_rate": 4.139286900743784e-07, |
|
"loss": 0.0488, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.07028753993610223, |
|
"grad_norm": 4.678827285766602, |
|
"learning_rate": 4.1645873922663837e-07, |
|
"loss": 0.039, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.07188498402555911, |
|
"grad_norm": 4.474348068237305, |
|
"learning_rate": 4.1893192850105637e-07, |
|
"loss": 0.0327, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.07348242811501597, |
|
"grad_norm": 5.580406665802002, |
|
"learning_rate": 4.213507575416622e-07, |
|
"loss": 0.0265, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.07507987220447285, |
|
"grad_norm": 4.145895957946777, |
|
"learning_rate": 4.2371756469942035e-07, |
|
"loss": 0.034, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.07667731629392971, |
|
"grad_norm": 2.864013671875, |
|
"learning_rate": 4.260345406174251e-07, |
|
"loss": 0.0195, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.07827476038338659, |
|
"grad_norm": 3.931077718734741, |
|
"learning_rate": 4.28303740415246e-07, |
|
"loss": 0.028, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.07987220447284345, |
|
"grad_norm": 3.2536861896514893, |
|
"learning_rate": 4.305270946422664e-07, |
|
"loss": 0.0167, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08146964856230032, |
|
"grad_norm": 2.361112356185913, |
|
"learning_rate": 4.3270641914630895e-07, |
|
"loss": 0.0137, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.08306709265175719, |
|
"grad_norm": 3.177391529083252, |
|
"learning_rate": 4.3484342398395235e-07, |
|
"loss": 0.0168, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.08466453674121406, |
|
"grad_norm": 3.647589921951294, |
|
"learning_rate": 4.3693972148208397e-07, |
|
"loss": 0.0171, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.08626198083067092, |
|
"grad_norm": 3.1123926639556885, |
|
"learning_rate": 4.389968335458992e-07, |
|
"loss": 0.0096, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.0878594249201278, |
|
"grad_norm": 3.218924045562744, |
|
"learning_rate": 4.4101619829632255e-07, |
|
"loss": 0.0128, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.08945686900958466, |
|
"grad_norm": 3.422558307647705, |
|
"learning_rate": 4.429991761093619e-07, |
|
"loss": 0.0075, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.09105431309904154, |
|
"grad_norm": 4.4138288497924805, |
|
"learning_rate": 4.44947055120919e-07, |
|
"loss": 0.0106, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0926517571884984, |
|
"grad_norm": 4.1317219734191895, |
|
"learning_rate": 4.4686105625284414e-07, |
|
"loss": 0.0147, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.09424920127795527, |
|
"grad_norm": 1.8419156074523926, |
|
"learning_rate": 4.4874233780934923e-07, |
|
"loss": 0.002, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.09584664536741214, |
|
"grad_norm": 5.771890640258789, |
|
"learning_rate": 4.5059199968710917e-07, |
|
"loss": 0.0132, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.09744408945686901, |
|
"grad_norm": 3.2066822052001953, |
|
"learning_rate": 4.5241108723737287e-07, |
|
"loss": 0.0121, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.09904153354632587, |
|
"grad_norm": 2.5449273586273193, |
|
"learning_rate": 4.5420059481403685e-07, |
|
"loss": 0.0112, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.10063897763578275, |
|
"grad_norm": 1.833320140838623, |
|
"learning_rate": 4.55961469037836e-07, |
|
"loss": 0.0057, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.10223642172523961, |
|
"grad_norm": 0.9052779674530029, |
|
"learning_rate": 4.576946118034778e-07, |
|
"loss": 0.0029, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.10383386581469649, |
|
"grad_norm": 1.7721703052520752, |
|
"learning_rate": 4.594008830536363e-07, |
|
"loss": 0.0073, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.10543130990415335, |
|
"grad_norm": 1.3743584156036377, |
|
"learning_rate": 4.6108110334116526e-07, |
|
"loss": 0.0042, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.10702875399361023, |
|
"grad_norm": 5.354936599731445, |
|
"learning_rate": 4.627360561986405e-07, |
|
"loss": 0.0067, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.10862619808306709, |
|
"grad_norm": 0.8433341979980469, |
|
"learning_rate": 4.6436649033236175e-07, |
|
"loss": 0.0036, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.11022364217252396, |
|
"grad_norm": 1.8066205978393555, |
|
"learning_rate": 4.6597312165618915e-07, |
|
"loss": 0.0065, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.11182108626198083, |
|
"grad_norm": 2.4936585426330566, |
|
"learning_rate": 4.6755663517904604e-07, |
|
"loss": 0.013, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1134185303514377, |
|
"grad_norm": 1.3142043352127075, |
|
"learning_rate": 4.691176867585419e-07, |
|
"loss": 0.0106, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.11501597444089456, |
|
"grad_norm": 1.3923885822296143, |
|
"learning_rate": 4.70656904731952e-07, |
|
"loss": 0.0033, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.11661341853035144, |
|
"grad_norm": 1.4490530490875244, |
|
"learning_rate": 4.721748914347038e-07, |
|
"loss": 0.0079, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.1182108626198083, |
|
"grad_norm": 0.8172028660774231, |
|
"learning_rate": 4.736722246155569e-07, |
|
"loss": 0.0039, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.11980830670926518, |
|
"grad_norm": 1.627395510673523, |
|
"learning_rate": 4.751494587567932e-07, |
|
"loss": 0.0038, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.12140575079872204, |
|
"grad_norm": 1.3568391799926758, |
|
"learning_rate": 4.7660712630697176e-07, |
|
"loss": 0.0067, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.12300319488817892, |
|
"grad_norm": 1.5196893215179443, |
|
"learning_rate": 4.780457388331021e-07, |
|
"loss": 0.0064, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.12460063897763578, |
|
"grad_norm": 1.7342876195907593, |
|
"learning_rate": 4.794657880984791e-07, |
|
"loss": 0.0112, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.12619808306709265, |
|
"grad_norm": 1.1238106489181519, |
|
"learning_rate": 4.808677470718594e-07, |
|
"loss": 0.008, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.12779552715654952, |
|
"grad_norm": 0.6927244663238525, |
|
"learning_rate": 4.822520708731619e-07, |
|
"loss": 0.0028, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.12939297124600638, |
|
"grad_norm": 0.710464596748352, |
|
"learning_rate": 4.836191976604261e-07, |
|
"loss": 0.0046, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.13099041533546327, |
|
"grad_norm": 1.29885995388031, |
|
"learning_rate": 4.849695494623494e-07, |
|
"loss": 0.0028, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.13258785942492013, |
|
"grad_norm": 0.7128888964653015, |
|
"learning_rate": 4.863035329603659e-07, |
|
"loss": 0.0017, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.134185303514377, |
|
"grad_norm": 1.3970448970794678, |
|
"learning_rate": 4.876215402238887e-07, |
|
"loss": 0.007, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.13578274760383385, |
|
"grad_norm": 1.8840289115905762, |
|
"learning_rate": 4.889239494020458e-07, |
|
"loss": 0.0073, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.13738019169329074, |
|
"grad_norm": 0.9983999729156494, |
|
"learning_rate": 4.902111253749581e-07, |
|
"loss": 0.0008, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.1389776357827476, |
|
"grad_norm": 1.001712441444397, |
|
"learning_rate": 4.91483420367371e-07, |
|
"loss": 0.0045, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.14057507987220447, |
|
"grad_norm": 0.8022336959838867, |
|
"learning_rate": 4.927411745272181e-07, |
|
"loss": 0.005, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.14217252396166133, |
|
"grad_norm": 1.9497357606887817, |
|
"learning_rate": 4.939847164714948e-07, |
|
"loss": 0.0132, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.14376996805111822, |
|
"grad_norm": 0.4873954951763153, |
|
"learning_rate": 4.952143638016361e-07, |
|
"loss": 0.0036, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.14536741214057508, |
|
"grad_norm": 1.6238665580749512, |
|
"learning_rate": 4.96430423590416e-07, |
|
"loss": 0.0079, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.14696485623003194, |
|
"grad_norm": 2.2722995281219482, |
|
"learning_rate": 4.976331928422418e-07, |
|
"loss": 0.0112, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.1485623003194888, |
|
"grad_norm": 1.2474795579910278, |
|
"learning_rate": 4.988229589285637e-07, |
|
"loss": 0.0063, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.1501597444089457, |
|
"grad_norm": 1.0009785890579224, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0085, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.15175718849840256, |
|
"grad_norm": 0.6343518495559692, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0032, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.15335463258785942, |
|
"grad_norm": 0.4515174329280853, |
|
"learning_rate": 5e-07, |
|
"loss": 0.003, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.15495207667731628, |
|
"grad_norm": 0.7897706627845764, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0057, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.15654952076677317, |
|
"grad_norm": 1.2340161800384521, |
|
"learning_rate": 5e-07, |
|
"loss": 0.006, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.15814696485623003, |
|
"grad_norm": 0.5955281257629395, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0043, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.1597444089456869, |
|
"grad_norm": 1.140373706817627, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0094, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.16134185303514376, |
|
"grad_norm": 0.5656710267066956, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0049, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.16293929712460065, |
|
"grad_norm": 1.3329702615737915, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0102, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.1645367412140575, |
|
"grad_norm": 0.42556625604629517, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.16613418530351437, |
|
"grad_norm": 0.6244504451751709, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0046, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.16773162939297126, |
|
"grad_norm": 0.6920517086982727, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0041, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.16932907348242812, |
|
"grad_norm": 0.660217821598053, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0038, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.17092651757188498, |
|
"grad_norm": 1.0536694526672363, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0039, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.17252396166134185, |
|
"grad_norm": 0.862185537815094, |
|
"learning_rate": 5e-07, |
|
"loss": 0.005, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.17412140575079874, |
|
"grad_norm": 0.33221954107284546, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.1757188498402556, |
|
"grad_norm": 3.744462013244629, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0047, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.17731629392971246, |
|
"grad_norm": 1.6666382551193237, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0067, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.17891373801916932, |
|
"grad_norm": 0.3559662103652954, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.1805111821086262, |
|
"grad_norm": 0.3662293255329132, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.18210862619808307, |
|
"grad_norm": 0.49407243728637695, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.18370607028753994, |
|
"grad_norm": 1.0715949535369873, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0054, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.1853035143769968, |
|
"grad_norm": 0.618974506855011, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0033, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.1869009584664537, |
|
"grad_norm": 1.6139345169067383, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0089, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.18849840255591055, |
|
"grad_norm": 0.9241402745246887, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0041, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.1900958466453674, |
|
"grad_norm": 0.7571372389793396, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0031, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.19169329073482427, |
|
"grad_norm": 0.5674700736999512, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.19329073482428116, |
|
"grad_norm": 2.3434317111968994, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0113, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.19488817891373802, |
|
"grad_norm": 0.5285120010375977, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.1964856230031949, |
|
"grad_norm": 0.3353559076786041, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.19808306709265175, |
|
"grad_norm": 0.6090297698974609, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0052, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.19968051118210864, |
|
"grad_norm": 0.6662172079086304, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0039, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2012779552715655, |
|
"grad_norm": 0.6109351515769958, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0035, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.20287539936102236, |
|
"grad_norm": 1.2161667346954346, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0077, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.20447284345047922, |
|
"grad_norm": 0.5375782251358032, |
|
"learning_rate": 5e-07, |
|
"loss": 0.003, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.20607028753993611, |
|
"grad_norm": 0.8255560398101807, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0046, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.20766773162939298, |
|
"grad_norm": 1.2899589538574219, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0073, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.20926517571884984, |
|
"grad_norm": 0.4597526490688324, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.2108626198083067, |
|
"grad_norm": 0.2904457151889801, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.2124600638977636, |
|
"grad_norm": 0.2778950035572052, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.21405750798722045, |
|
"grad_norm": 0.45779383182525635, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.21565495207667731, |
|
"grad_norm": 1.7134373188018799, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0073, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.21725239616613418, |
|
"grad_norm": 0.8295688033103943, |
|
"learning_rate": 5e-07, |
|
"loss": 0.005, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.21884984025559107, |
|
"grad_norm": 1.5231693983078003, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0087, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.22044728434504793, |
|
"grad_norm": 0.6921976804733276, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.2220447284345048, |
|
"grad_norm": 0.613254964351654, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.22364217252396165, |
|
"grad_norm": 0.9046704769134521, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0035, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.22523961661341854, |
|
"grad_norm": 0.827433705329895, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0035, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.2268370607028754, |
|
"grad_norm": 0.42438846826553345, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.22843450479233227, |
|
"grad_norm": 0.6912664771080017, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.23003194888178913, |
|
"grad_norm": 0.6359020471572876, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0031, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.23162939297124602, |
|
"grad_norm": 0.45392975211143494, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.23322683706070288, |
|
"grad_norm": 0.2925805151462555, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.23482428115015974, |
|
"grad_norm": 0.6216511130332947, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.2364217252396166, |
|
"grad_norm": 1.4257566928863525, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0049, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.2380191693290735, |
|
"grad_norm": 0.5238153338432312, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.23961661341853036, |
|
"grad_norm": 0.4455612301826477, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.24121405750798722, |
|
"grad_norm": 0.23266546428203583, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.24281150159744408, |
|
"grad_norm": 0.3799627125263214, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.24440894568690097, |
|
"grad_norm": 0.48497480154037476, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.24600638977635783, |
|
"grad_norm": 0.6147518157958984, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0039, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.2476038338658147, |
|
"grad_norm": 0.6456011533737183, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.24920127795527156, |
|
"grad_norm": 0.5384286642074585, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.2507987220447284, |
|
"grad_norm": 0.8868528008460999, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.2523961661341853, |
|
"grad_norm": 0.25091347098350525, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.2539936102236422, |
|
"grad_norm": 0.5789719820022583, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.25559105431309903, |
|
"grad_norm": 1.5586203336715698, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2571884984025559, |
|
"grad_norm": 0.705531895160675, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0031, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.25878594249201275, |
|
"grad_norm": 0.7628393173217773, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.26038338658146964, |
|
"grad_norm": 0.8703659176826477, |
|
"learning_rate": 5e-07, |
|
"loss": 0.004, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.26198083067092653, |
|
"grad_norm": 0.3005010783672333, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.26357827476038337, |
|
"grad_norm": 0.19402694702148438, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.26517571884984026, |
|
"grad_norm": 2.271894931793213, |
|
"learning_rate": 5e-07, |
|
"loss": 0.006, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.26677316293929715, |
|
"grad_norm": 0.3555186092853546, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.268370607028754, |
|
"grad_norm": 1.9556201696395874, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0045, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.26996805111821087, |
|
"grad_norm": 1.7738531827926636, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0072, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.2715654952076677, |
|
"grad_norm": 2.2895989418029785, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0049, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.2731629392971246, |
|
"grad_norm": 1.7725859880447388, |
|
"learning_rate": 5e-07, |
|
"loss": 0.009, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.2747603833865815, |
|
"grad_norm": 0.5722166895866394, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0032, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.2763578274760383, |
|
"grad_norm": 1.063871145248413, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.2779552715654952, |
|
"grad_norm": 0.3389774560928345, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.2795527156549521, |
|
"grad_norm": 0.45647579431533813, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.28115015974440893, |
|
"grad_norm": 0.7398928999900818, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0031, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.2827476038338658, |
|
"grad_norm": 0.7850615382194519, |
|
"learning_rate": 5e-07, |
|
"loss": 0.003, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.28434504792332266, |
|
"grad_norm": 3.63692045211792, |
|
"learning_rate": 5e-07, |
|
"loss": 0.006, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.28594249201277955, |
|
"grad_norm": 0.44707363843917847, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.28753993610223644, |
|
"grad_norm": 0.9111068248748779, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0041, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.28913738019169327, |
|
"grad_norm": 0.6590073704719543, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.29073482428115016, |
|
"grad_norm": 0.5553420782089233, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.29233226837060705, |
|
"grad_norm": 0.47674331068992615, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0031, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.2939297124600639, |
|
"grad_norm": 0.29876044392585754, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.2955271565495208, |
|
"grad_norm": 0.6842983365058899, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0031, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.2971246006389776, |
|
"grad_norm": 0.4935712516307831, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0033, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.2987220447284345, |
|
"grad_norm": 1.2657763957977295, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0076, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.3003194888178914, |
|
"grad_norm": 0.48839613795280457, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.3019169329073482, |
|
"grad_norm": 1.063694715499878, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0037, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.3035143769968051, |
|
"grad_norm": 0.7980586290359497, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.305111821086262, |
|
"grad_norm": 0.45484256744384766, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.30670926517571884, |
|
"grad_norm": 1.688433051109314, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0056, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.3083067092651757, |
|
"grad_norm": 0.7312079071998596, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0037, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.30990415335463256, |
|
"grad_norm": 0.4801974594593048, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.31150159744408945, |
|
"grad_norm": 0.4754345118999481, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0058, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.31309904153354634, |
|
"grad_norm": 2.1157238483428955, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0091, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.3146964856230032, |
|
"grad_norm": 0.6343661546707153, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0037, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.31629392971246006, |
|
"grad_norm": 0.5099918842315674, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0029, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.31789137380191695, |
|
"grad_norm": 0.333951860666275, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0032, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.3194888178913738, |
|
"grad_norm": 0.7018607258796692, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0044, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3210862619808307, |
|
"grad_norm": 0.6244291067123413, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0043, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.3226837060702875, |
|
"grad_norm": 0.8400171399116516, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0039, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.3242811501597444, |
|
"grad_norm": 1.1659806966781616, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0047, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.3258785942492013, |
|
"grad_norm": 0.9981520175933838, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0056, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.3274760383386581, |
|
"grad_norm": 0.32153695821762085, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.329073482428115, |
|
"grad_norm": 3.8695359230041504, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.3306709265175719, |
|
"grad_norm": 0.6529371738433838, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0033, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.33226837060702874, |
|
"grad_norm": 0.7385216951370239, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0048, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.33386581469648563, |
|
"grad_norm": 0.2696060836315155, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.3354632587859425, |
|
"grad_norm": 2.1431872844696045, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0104, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.33706070287539935, |
|
"grad_norm": 0.45158419013023376, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0029, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.33865814696485624, |
|
"grad_norm": 0.28861358761787415, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.3402555910543131, |
|
"grad_norm": 0.8536441326141357, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.34185303514376997, |
|
"grad_norm": 0.3299103081226349, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.34345047923322686, |
|
"grad_norm": 0.6404473185539246, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.3450479233226837, |
|
"grad_norm": 0.2772029638290405, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.3466453674121406, |
|
"grad_norm": 3.0106074810028076, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0051, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.34824281150159747, |
|
"grad_norm": 1.7244184017181396, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0066, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.3498402555910543, |
|
"grad_norm": 0.25184711813926697, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0007, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.3514376996805112, |
|
"grad_norm": 0.6831691861152649, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.35303514376996803, |
|
"grad_norm": 0.309230774641037, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.3546325878594249, |
|
"grad_norm": 0.5623671412467957, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0038, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.3562300319488818, |
|
"grad_norm": 0.31381672620773315, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.35782747603833864, |
|
"grad_norm": 0.6246241927146912, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.35942492012779553, |
|
"grad_norm": 0.9443689584732056, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0056, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.3610223642172524, |
|
"grad_norm": 0.8880484104156494, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0038, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.36261980830670926, |
|
"grad_norm": 0.9324328899383545, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0035, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.36421725239616615, |
|
"grad_norm": 0.7231195569038391, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0038, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.365814696485623, |
|
"grad_norm": 0.5298541188240051, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0047, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.36741214057507987, |
|
"grad_norm": 1.5357270240783691, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0054, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.36900958466453676, |
|
"grad_norm": 0.4374171495437622, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.3706070287539936, |
|
"grad_norm": 0.2250860631465912, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.3722044728434505, |
|
"grad_norm": 1.8229856491088867, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.3738019169329074, |
|
"grad_norm": 1.0502337217330933, |
|
"learning_rate": 5e-07, |
|
"loss": 0.005, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.3753993610223642, |
|
"grad_norm": 1.1811085939407349, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.3769968051118211, |
|
"grad_norm": 0.5202884674072266, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.37859424920127793, |
|
"grad_norm": 0.29556524753570557, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0033, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.3801916932907348, |
|
"grad_norm": 0.7253068685531616, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.3817891373801917, |
|
"grad_norm": 0.9315080642700195, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0044, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.38338658146964855, |
|
"grad_norm": 0.5284143090248108, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0043, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.38498402555910544, |
|
"grad_norm": 0.15872539579868317, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.3865814696485623, |
|
"grad_norm": 0.3570381999015808, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0029, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.38817891373801916, |
|
"grad_norm": 1.1983716487884521, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.38977635782747605, |
|
"grad_norm": 0.4511491656303406, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.3913738019169329, |
|
"grad_norm": 0.21462838351726532, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.3929712460063898, |
|
"grad_norm": 3.4422619342803955, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0036, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.39456869009584666, |
|
"grad_norm": 2.029860496520996, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.3961661341853035, |
|
"grad_norm": 0.4887297749519348, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0029, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.3977635782747604, |
|
"grad_norm": 3.5348706245422363, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0038, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.3993610223642173, |
|
"grad_norm": 0.12261940538883209, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0006, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4009584664536741, |
|
"grad_norm": 1.2506657838821411, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0044, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.402555910543131, |
|
"grad_norm": 1.385932445526123, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0071, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.40415335463258784, |
|
"grad_norm": 0.6065950393676758, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0029, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.4057507987220447, |
|
"grad_norm": 0.3198873698711395, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.4073482428115016, |
|
"grad_norm": 0.21003501117229462, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.40894568690095845, |
|
"grad_norm": 0.70599764585495, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0029, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.41054313099041534, |
|
"grad_norm": 0.27071261405944824, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.41214057507987223, |
|
"grad_norm": 0.4544181525707245, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.41373801916932906, |
|
"grad_norm": 0.3489130437374115, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.41533546325878595, |
|
"grad_norm": 2.232909679412842, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0035, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.4169329073482428, |
|
"grad_norm": 0.24653400480747223, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.4185303514376997, |
|
"grad_norm": 0.70169597864151, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.42012779552715657, |
|
"grad_norm": 0.26590874791145325, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.4217252396166134, |
|
"grad_norm": 1.3357137441635132, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0031, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.4233226837060703, |
|
"grad_norm": 0.17348721623420715, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.4249201277955272, |
|
"grad_norm": 0.21971674263477325, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.426517571884984, |
|
"grad_norm": 0.3568759262561798, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.4281150159744409, |
|
"grad_norm": 0.22301608324050903, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.42971246006389774, |
|
"grad_norm": 0.2780276834964752, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.43130990415335463, |
|
"grad_norm": 0.399155855178833, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.4329073482428115, |
|
"grad_norm": 0.35904622077941895, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.43450479233226835, |
|
"grad_norm": 0.14762169122695923, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.43610223642172524, |
|
"grad_norm": 0.26977628469467163, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.43769968051118213, |
|
"grad_norm": 0.9291149377822876, |
|
"learning_rate": 5e-07, |
|
"loss": 0.004, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.43929712460063897, |
|
"grad_norm": 0.6625785827636719, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.44089456869009586, |
|
"grad_norm": 0.8231006860733032, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.4424920127795527, |
|
"grad_norm": 0.44783130288124084, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.4440894568690096, |
|
"grad_norm": 0.8134899139404297, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0036, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.44568690095846647, |
|
"grad_norm": 0.18782544136047363, |
|
"learning_rate": 5e-07, |
|
"loss": 0.001, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.4472843450479233, |
|
"grad_norm": 2.3220651149749756, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0057, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.4488817891373802, |
|
"grad_norm": 3.4821548461914062, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0034, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.4504792332268371, |
|
"grad_norm": 1.2853020429611206, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0029, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.4520766773162939, |
|
"grad_norm": 2.967637777328491, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0045, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.4536741214057508, |
|
"grad_norm": 1.1847063302993774, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0066, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.45527156549520764, |
|
"grad_norm": 0.6564436554908752, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.45686900958466453, |
|
"grad_norm": 1.7592343091964722, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0043, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.4584664536741214, |
|
"grad_norm": 0.21900290250778198, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.46006389776357826, |
|
"grad_norm": 0.31253165006637573, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.46166134185303515, |
|
"grad_norm": 0.7273770570755005, |
|
"learning_rate": 5e-07, |
|
"loss": 0.004, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.46325878594249204, |
|
"grad_norm": 0.9572885036468506, |
|
"learning_rate": 5e-07, |
|
"loss": 0.005, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.46485623003194887, |
|
"grad_norm": 4.609229564666748, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0105, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.46645367412140576, |
|
"grad_norm": 0.7994480729103088, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.4680511182108626, |
|
"grad_norm": 0.512992262840271, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.4696485623003195, |
|
"grad_norm": 0.7132886052131653, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0029, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.4712460063897764, |
|
"grad_norm": 0.4453645348548889, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.4728434504792332, |
|
"grad_norm": 0.32878679037094116, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.4744408945686901, |
|
"grad_norm": 0.4557294249534607, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0029, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.476038338658147, |
|
"grad_norm": 0.7930683493614197, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.4776357827476038, |
|
"grad_norm": 0.16142797470092773, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.4792332268370607, |
|
"grad_norm": 0.16838717460632324, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.48083067092651754, |
|
"grad_norm": 1.0744272470474243, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0032, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.48242811501597443, |
|
"grad_norm": 1.177935004234314, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0043, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.4840255591054313, |
|
"grad_norm": 0.33150434494018555, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.48562300319488816, |
|
"grad_norm": 0.14007702469825745, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.48722044728434505, |
|
"grad_norm": 0.23931963741779327, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0031, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.48881789137380194, |
|
"grad_norm": 1.721514344215393, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.4904153354632588, |
|
"grad_norm": 1.3192071914672852, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0095, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.49201277955271566, |
|
"grad_norm": 6.429077625274658, |
|
"learning_rate": 5e-07, |
|
"loss": 0.011, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.4936102236421725, |
|
"grad_norm": 0.919262707233429, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0066, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.4952076677316294, |
|
"grad_norm": 0.6547032594680786, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.4968051118210863, |
|
"grad_norm": 0.487663209438324, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0033, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.4984025559105431, |
|
"grad_norm": 0.33844229578971863, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0039, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.686505138874054, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0035, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.5015974440894568, |
|
"grad_norm": 0.2250794619321823, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.5031948881789138, |
|
"grad_norm": 0.40359318256378174, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.5047923322683706, |
|
"grad_norm": 0.3062373697757721, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.5063897763578274, |
|
"grad_norm": 0.3083527386188507, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.5079872204472844, |
|
"grad_norm": 0.4790252447128296, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0045, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.5095846645367412, |
|
"grad_norm": 0.4106828570365906, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0045, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.5111821086261981, |
|
"grad_norm": 0.5916593670845032, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.512779552715655, |
|
"grad_norm": 0.29794517159461975, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.5143769968051118, |
|
"grad_norm": 0.3094945251941681, |
|
"learning_rate": 5e-07, |
|
"loss": 0.004, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.5159744408945687, |
|
"grad_norm": 0.8037744760513306, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0035, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.5175718849840255, |
|
"grad_norm": 1.0918713808059692, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0035, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.5191693290734825, |
|
"grad_norm": 0.1457972228527069, |
|
"learning_rate": 5e-07, |
|
"loss": 0.001, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.5207667731629393, |
|
"grad_norm": 0.9666080474853516, |
|
"learning_rate": 5e-07, |
|
"loss": 0.003, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.5223642172523961, |
|
"grad_norm": 0.5684059262275696, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.5239616613418531, |
|
"grad_norm": 0.5098831057548523, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0034, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.5255591054313099, |
|
"grad_norm": 0.23925842344760895, |
|
"learning_rate": 5e-07, |
|
"loss": 0.001, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.5271565495207667, |
|
"grad_norm": 0.17939390242099762, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5287539936102237, |
|
"grad_norm": 0.16178075969219208, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.5303514376996805, |
|
"grad_norm": 0.671220064163208, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0046, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.5319488817891374, |
|
"grad_norm": 0.816387414932251, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.5335463258785943, |
|
"grad_norm": 0.24421027302742004, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.5351437699680511, |
|
"grad_norm": 0.557515025138855, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.536741214057508, |
|
"grad_norm": 1.0557501316070557, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.5383386581469649, |
|
"grad_norm": 0.959538459777832, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0033, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.5399361022364217, |
|
"grad_norm": 0.3274182677268982, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.5415335463258786, |
|
"grad_norm": 0.6471118927001953, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.5431309904153354, |
|
"grad_norm": 0.38304996490478516, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5447284345047924, |
|
"grad_norm": 1.0141305923461914, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.5463258785942492, |
|
"grad_norm": 0.9558109641075134, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.547923322683706, |
|
"grad_norm": 0.20122268795967102, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.549520766773163, |
|
"grad_norm": 0.4421844780445099, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0037, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.5511182108626198, |
|
"grad_norm": 0.4365698993206024, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0032, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.5527156549520766, |
|
"grad_norm": 1.578316330909729, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0053, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.5543130990415336, |
|
"grad_norm": 0.21290868520736694, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.5559105431309904, |
|
"grad_norm": 0.6181979179382324, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.5575079872204473, |
|
"grad_norm": 0.20121750235557556, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0007, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.5591054313099042, |
|
"grad_norm": 0.5325523018836975, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0036, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.560702875399361, |
|
"grad_norm": 0.6079647541046143, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0052, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.5623003194888179, |
|
"grad_norm": 0.2826935946941376, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.5638977635782748, |
|
"grad_norm": 0.15892595052719116, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.5654952076677316, |
|
"grad_norm": 0.3881969749927521, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.5670926517571885, |
|
"grad_norm": 0.3390671908855438, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0032, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.5686900958466453, |
|
"grad_norm": 0.2904258668422699, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.5702875399361023, |
|
"grad_norm": 0.46645137667655945, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0035, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.5718849840255591, |
|
"grad_norm": 0.6814522743225098, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0034, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.5734824281150159, |
|
"grad_norm": 0.9146270751953125, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0044, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.5750798722044729, |
|
"grad_norm": 0.21922031044960022, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0009, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.5766773162939297, |
|
"grad_norm": 0.939836859703064, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0039, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.5782747603833865, |
|
"grad_norm": 0.6049763560295105, |
|
"learning_rate": 5e-07, |
|
"loss": 0.003, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.5798722044728435, |
|
"grad_norm": 0.25285854935646057, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0029, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.5814696485623003, |
|
"grad_norm": 0.3022539019584656, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0029, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.5830670926517572, |
|
"grad_norm": 0.25078529119491577, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.5846645367412141, |
|
"grad_norm": 0.3507131040096283, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0043, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.5862619808306709, |
|
"grad_norm": 0.6294477581977844, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0058, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.5878594249201278, |
|
"grad_norm": 0.48274868726730347, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0029, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.5894568690095847, |
|
"grad_norm": 0.39043542742729187, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0033, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.5910543130990416, |
|
"grad_norm": 0.19649553298950195, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.5926517571884984, |
|
"grad_norm": 0.12885907292366028, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.5942492012779552, |
|
"grad_norm": 0.1488872468471527, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.5958466453674122, |
|
"grad_norm": 0.3566787540912628, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.597444089456869, |
|
"grad_norm": 0.986865758895874, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0047, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.5990415335463258, |
|
"grad_norm": 0.1579206883907318, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.6006389776357828, |
|
"grad_norm": 0.13746462762355804, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.6022364217252396, |
|
"grad_norm": 0.25995907187461853, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0032, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.6038338658146964, |
|
"grad_norm": 0.38791167736053467, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.6054313099041534, |
|
"grad_norm": 1.6591804027557373, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0034, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.6070287539936102, |
|
"grad_norm": 0.11244110763072968, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6086261980830671, |
|
"grad_norm": 0.28090763092041016, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.610223642172524, |
|
"grad_norm": 0.8654282689094543, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0032, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.6118210862619808, |
|
"grad_norm": 0.3197280764579773, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.6134185303514377, |
|
"grad_norm": 0.13376931846141815, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.6150159744408946, |
|
"grad_norm": 0.3771628439426422, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.6166134185303515, |
|
"grad_norm": 3.0465352535247803, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0057, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.6182108626198083, |
|
"grad_norm": 0.6134064793586731, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0045, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.6198083067092651, |
|
"grad_norm": 1.984861969947815, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.6214057507987221, |
|
"grad_norm": 0.7715848684310913, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0034, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.6230031948881789, |
|
"grad_norm": 1.4465832710266113, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.6246006389776357, |
|
"grad_norm": 0.4437814950942993, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.6261980830670927, |
|
"grad_norm": 0.7803134322166443, |
|
"learning_rate": 5e-07, |
|
"loss": 0.003, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.6277955271565495, |
|
"grad_norm": 0.4495057761669159, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.6293929712460063, |
|
"grad_norm": 0.9235338568687439, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0062, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.6309904153354633, |
|
"grad_norm": 1.1879374980926514, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0046, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.6325878594249201, |
|
"grad_norm": 0.22155530750751495, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.634185303514377, |
|
"grad_norm": 0.6639124155044556, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0031, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.6357827476038339, |
|
"grad_norm": 0.24812071025371552, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.6373801916932907, |
|
"grad_norm": 0.24182091653347015, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.6389776357827476, |
|
"grad_norm": 0.25194039940834045, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6405750798722045, |
|
"grad_norm": 1.6056197881698608, |
|
"learning_rate": 5e-07, |
|
"loss": 0.006, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.6421725239616614, |
|
"grad_norm": 3.634866952896118, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0048, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.6437699680511182, |
|
"grad_norm": 1.0231901407241821, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0038, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.645367412140575, |
|
"grad_norm": 0.27629023790359497, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.646964856230032, |
|
"grad_norm": 0.1949998289346695, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.6485623003194888, |
|
"grad_norm": 0.34071701765060425, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.6501597444089456, |
|
"grad_norm": 0.21671850979328156, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.6517571884984026, |
|
"grad_norm": 0.27686911821365356, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.6533546325878594, |
|
"grad_norm": 0.3393467366695404, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.6549520766773163, |
|
"grad_norm": 0.5887823700904846, |
|
"learning_rate": 5e-07, |
|
"loss": 0.004, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.6565495207667732, |
|
"grad_norm": 0.22454427182674408, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.65814696485623, |
|
"grad_norm": 0.13433396816253662, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.6597444089456869, |
|
"grad_norm": 0.14173242449760437, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.6613418530351438, |
|
"grad_norm": 0.30982744693756104, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.6629392971246006, |
|
"grad_norm": 0.47411805391311646, |
|
"learning_rate": 5e-07, |
|
"loss": 0.004, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.6645367412140575, |
|
"grad_norm": 0.19577734172344208, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0029, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.6661341853035144, |
|
"grad_norm": 0.6344949007034302, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0032, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.6677316293929713, |
|
"grad_norm": 0.45841068029403687, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0033, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.6693290734824281, |
|
"grad_norm": 0.4367104768753052, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.670926517571885, |
|
"grad_norm": 0.363915354013443, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0031, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.6725239616613419, |
|
"grad_norm": 0.18759390711784363, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.6741214057507987, |
|
"grad_norm": 0.15988245606422424, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0029, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.6757188498402555, |
|
"grad_norm": 0.1806800663471222, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0032, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.6773162939297125, |
|
"grad_norm": 0.21876202523708344, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.6789137380191693, |
|
"grad_norm": 0.13135991990566254, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0005, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.6805111821086262, |
|
"grad_norm": 1.3671129941940308, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0039, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.6821086261980831, |
|
"grad_norm": 0.1489122062921524, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.6837060702875399, |
|
"grad_norm": 0.17866690456867218, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.6853035143769968, |
|
"grad_norm": 0.655277669429779, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0045, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.6869009584664537, |
|
"grad_norm": 0.17315036058425903, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.6884984025559105, |
|
"grad_norm": 0.20632481575012207, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.6900958466453674, |
|
"grad_norm": 0.2345188409090042, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0033, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.6916932907348243, |
|
"grad_norm": 3.234980583190918, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0033, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.6932907348242812, |
|
"grad_norm": 0.5852277874946594, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.694888178913738, |
|
"grad_norm": 2.7778074741363525, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0047, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.6964856230031949, |
|
"grad_norm": 0.13274647295475006, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.6980830670926518, |
|
"grad_norm": 0.3051118850708008, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.6996805111821086, |
|
"grad_norm": 0.1823217123746872, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0034, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.7012779552715654, |
|
"grad_norm": 0.24809595942497253, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.7028753993610224, |
|
"grad_norm": 0.191731795668602, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.7044728434504792, |
|
"grad_norm": 0.32459452748298645, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.7060702875399361, |
|
"grad_norm": 0.46944916248321533, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0037, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.707667731629393, |
|
"grad_norm": 0.28335535526275635, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.7092651757188498, |
|
"grad_norm": 0.4613920748233795, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.7108626198083067, |
|
"grad_norm": 0.25062379240989685, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0037, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.7124600638977636, |
|
"grad_norm": 0.18467797338962555, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.7140575079872205, |
|
"grad_norm": 0.11972685158252716, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0007, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.7156549520766773, |
|
"grad_norm": 0.42692673206329346, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.7172523961661342, |
|
"grad_norm": 0.24261529743671417, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0036, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.7188498402555911, |
|
"grad_norm": 0.5654745101928711, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7204472843450479, |
|
"grad_norm": 1.1742912530899048, |
|
"learning_rate": 5e-07, |
|
"loss": 0.005, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.7220447284345048, |
|
"grad_norm": 0.4380282163619995, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0032, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.7236421725239617, |
|
"grad_norm": 0.10869846493005753, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.7252396166134185, |
|
"grad_norm": 0.24093355238437653, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.7268370607028753, |
|
"grad_norm": 0.10973268747329712, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.7284345047923323, |
|
"grad_norm": 0.3785342276096344, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.7300319488817891, |
|
"grad_norm": 0.9914066791534424, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.731629392971246, |
|
"grad_norm": 0.4453500211238861, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.7332268370607029, |
|
"grad_norm": 0.2674676477909088, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.7348242811501597, |
|
"grad_norm": 0.9725813269615173, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0039, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.7364217252396166, |
|
"grad_norm": 0.14569059014320374, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.7380191693290735, |
|
"grad_norm": 0.25622859597206116, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.7396166134185304, |
|
"grad_norm": 0.33373990654945374, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.7412140575079872, |
|
"grad_norm": 0.28780269622802734, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.7428115015974441, |
|
"grad_norm": 0.3068907856941223, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.744408945686901, |
|
"grad_norm": 0.1721941977739334, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.7460063897763578, |
|
"grad_norm": 0.08544081449508667, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.7476038338658147, |
|
"grad_norm": 0.5271486043930054, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.7492012779552716, |
|
"grad_norm": 1.1334803104400635, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0045, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.7507987220447284, |
|
"grad_norm": 0.7498302459716797, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.7523961661341853, |
|
"grad_norm": 0.2573578357696533, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.7539936102236422, |
|
"grad_norm": 0.658050537109375, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.755591054313099, |
|
"grad_norm": 0.9222865104675293, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.7571884984025559, |
|
"grad_norm": 0.15267683565616608, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0008, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.7587859424920128, |
|
"grad_norm": 1.6186271905899048, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0042, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.7603833865814696, |
|
"grad_norm": 0.27657464146614075, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.7619808306709265, |
|
"grad_norm": 1.7262461185455322, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.7635782747603834, |
|
"grad_norm": 0.2438318431377411, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.7651757188498403, |
|
"grad_norm": 1.9847776889801025, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0029, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.7667731629392971, |
|
"grad_norm": 1.081231713294983, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.768370607028754, |
|
"grad_norm": 0.23922041058540344, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.7699680511182109, |
|
"grad_norm": 0.9737673401832581, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0065, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.7715654952076677, |
|
"grad_norm": 0.14736409485340118, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.7731629392971247, |
|
"grad_norm": 0.2849075198173523, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0034, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.7747603833865815, |
|
"grad_norm": 0.7391708493232727, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0033, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.7763578274760383, |
|
"grad_norm": 0.24760426580905914, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0033, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.7779552715654952, |
|
"grad_norm": 0.1888248175382614, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.7795527156549521, |
|
"grad_norm": 0.27008160948753357, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0032, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.7811501597444089, |
|
"grad_norm": 0.17157332599163055, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.7827476038338658, |
|
"grad_norm": 0.2907833755016327, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.7843450479233227, |
|
"grad_norm": 0.4900013506412506, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.7859424920127795, |
|
"grad_norm": 0.1433720886707306, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.7875399361022364, |
|
"grad_norm": 0.09725961834192276, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0008, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.7891373801916933, |
|
"grad_norm": 0.8984055519104004, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0048, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.7907348242811502, |
|
"grad_norm": 0.22446835041046143, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.792332268370607, |
|
"grad_norm": 0.2916003167629242, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.7939297124600639, |
|
"grad_norm": 0.42406606674194336, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.7955271565495208, |
|
"grad_norm": 0.8635863661766052, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0029, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.7971246006389776, |
|
"grad_norm": 0.4940405488014221, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0029, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.7987220447284346, |
|
"grad_norm": 0.6001198291778564, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8003194888178914, |
|
"grad_norm": 0.4194888174533844, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.8019169329073482, |
|
"grad_norm": 0.1558961719274521, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.8035143769968051, |
|
"grad_norm": 0.1700737476348877, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.805111821086262, |
|
"grad_norm": 0.15276680886745453, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.8067092651757188, |
|
"grad_norm": 0.21490108966827393, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.8083067092651757, |
|
"grad_norm": 0.48414719104766846, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0031, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.8099041533546326, |
|
"grad_norm": 0.14232124388217926, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0009, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.8115015974440895, |
|
"grad_norm": 0.13643090426921844, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.8130990415335463, |
|
"grad_norm": 0.4378473162651062, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.8146964856230032, |
|
"grad_norm": 0.5300367474555969, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0036, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.8162939297124601, |
|
"grad_norm": 0.3394503593444824, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0031, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.8178913738019169, |
|
"grad_norm": 0.15926502645015717, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0007, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.8194888178913738, |
|
"grad_norm": 0.14627420902252197, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.8210862619808307, |
|
"grad_norm": 0.2294456660747528, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.8226837060702875, |
|
"grad_norm": 0.2830236554145813, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.8242811501597445, |
|
"grad_norm": 0.49337977170944214, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.8258785942492013, |
|
"grad_norm": 0.12382300198078156, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0007, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.8274760383386581, |
|
"grad_norm": 0.5072153210639954, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.829073482428115, |
|
"grad_norm": 0.6549058556556702, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0044, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.8306709265175719, |
|
"grad_norm": 0.21236664056777954, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0008, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.8322683706070287, |
|
"grad_norm": 0.6704789996147156, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0038, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.8338658146964856, |
|
"grad_norm": 0.1254369020462036, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0007, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.8354632587859425, |
|
"grad_norm": 2.3178699016571045, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0033, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.8370607028753994, |
|
"grad_norm": 3.5792298316955566, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0038, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.8386581469648562, |
|
"grad_norm": 0.7073982357978821, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.8402555910543131, |
|
"grad_norm": 0.5759369134902954, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0035, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.84185303514377, |
|
"grad_norm": 1.6669633388519287, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0043, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.8434504792332268, |
|
"grad_norm": 0.6380763649940491, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.8450479233226837, |
|
"grad_norm": 0.19805264472961426, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.8466453674121406, |
|
"grad_norm": 0.6069849133491516, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.8482428115015974, |
|
"grad_norm": 0.1687164306640625, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.8498402555910544, |
|
"grad_norm": 8.362351417541504, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0054, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.8514376996805112, |
|
"grad_norm": 0.2093094438314438, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.853035143769968, |
|
"grad_norm": 0.34365883469581604, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.854632587859425, |
|
"grad_norm": 0.12474914640188217, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.8562300319488818, |
|
"grad_norm": 0.510906994342804, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.8578274760383386, |
|
"grad_norm": 0.587397038936615, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0036, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.8594249201277955, |
|
"grad_norm": 0.844524621963501, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0052, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.8610223642172524, |
|
"grad_norm": 0.13176505267620087, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.8626198083067093, |
|
"grad_norm": 0.16751867532730103, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.8642172523961661, |
|
"grad_norm": 0.41158822178840637, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.865814696485623, |
|
"grad_norm": 0.12536601722240448, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.8674121405750799, |
|
"grad_norm": 0.08342156559228897, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0008, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.8690095846645367, |
|
"grad_norm": 1.5413963794708252, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0033, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.8706070287539937, |
|
"grad_norm": 0.24465155601501465, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.8722044728434505, |
|
"grad_norm": 0.1531795710325241, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.8738019169329073, |
|
"grad_norm": 0.1711510270833969, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.8753993610223643, |
|
"grad_norm": 0.10270589590072632, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.8769968051118211, |
|
"grad_norm": 0.6564199328422546, |
|
"learning_rate": 5e-07, |
|
"loss": 0.004, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.8785942492012779, |
|
"grad_norm": 0.15586939454078674, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.8801916932907349, |
|
"grad_norm": 0.11600361764431, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.8817891373801917, |
|
"grad_norm": 0.10974587500095367, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.8833865814696485, |
|
"grad_norm": 0.2567409574985504, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.8849840255591054, |
|
"grad_norm": 0.27306219935417175, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0043, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.8865814696485623, |
|
"grad_norm": 0.3767613470554352, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.8881789137380192, |
|
"grad_norm": 1.1817584037780762, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0057, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.889776357827476, |
|
"grad_norm": 0.3905037045478821, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0031, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.8913738019169329, |
|
"grad_norm": 0.23763906955718994, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0006, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.8929712460063898, |
|
"grad_norm": 0.3837161362171173, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.8945686900958466, |
|
"grad_norm": 0.6008383631706238, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0041, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.8961661341853036, |
|
"grad_norm": 0.1975802332162857, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.8977635782747604, |
|
"grad_norm": 0.15433986485004425, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.8993610223642172, |
|
"grad_norm": 0.4065248966217041, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.9009584664536742, |
|
"grad_norm": 0.25533992052078247, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.902555910543131, |
|
"grad_norm": 0.46665945649147034, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.9041533546325878, |
|
"grad_norm": 0.16390737891197205, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.9057507987220448, |
|
"grad_norm": 0.06191791966557503, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0005, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.9073482428115016, |
|
"grad_norm": 1.692974328994751, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.9089456869009584, |
|
"grad_norm": 0.21359291672706604, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.9105431309904153, |
|
"grad_norm": 0.09417448937892914, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.9121405750798722, |
|
"grad_norm": 0.22416774928569794, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.9137380191693291, |
|
"grad_norm": 0.15862461924552917, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.9153354632587859, |
|
"grad_norm": 0.325225293636322, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.9169329073482428, |
|
"grad_norm": 0.4235207140445709, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.9185303514376997, |
|
"grad_norm": 0.17048902809619904, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.9201277955271565, |
|
"grad_norm": 0.28779876232147217, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.9217252396166135, |
|
"grad_norm": 0.16058605909347534, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.9233226837060703, |
|
"grad_norm": 0.08717779070138931, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.9249201277955271, |
|
"grad_norm": 0.852260172367096, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0033, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.9265175718849841, |
|
"grad_norm": 0.34097403287887573, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.9281150159744409, |
|
"grad_norm": 0.3408906161785126, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0032, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.9297124600638977, |
|
"grad_norm": 0.4731082618236542, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0037, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.9313099041533547, |
|
"grad_norm": 0.49347537755966187, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.9329073482428115, |
|
"grad_norm": 0.17723320424556732, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.9345047923322684, |
|
"grad_norm": 0.16061758995056152, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.9361022364217252, |
|
"grad_norm": 0.11341333389282227, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.9376996805111821, |
|
"grad_norm": 0.12614861130714417, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.939297124600639, |
|
"grad_norm": 0.1919727921485901, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0033, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.9408945686900958, |
|
"grad_norm": 0.5518249273300171, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0034, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.9424920127795527, |
|
"grad_norm": 0.11890781670808792, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.9440894568690096, |
|
"grad_norm": 0.5765843391418457, |
|
"learning_rate": 5e-07, |
|
"loss": 0.003, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.9456869009584664, |
|
"grad_norm": 0.17196467518806458, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.9472843450479234, |
|
"grad_norm": 0.2513171136379242, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.9488817891373802, |
|
"grad_norm": 1.7397063970565796, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0108, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.950479233226837, |
|
"grad_norm": 0.2406977415084839, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.952076677316294, |
|
"grad_norm": 0.23791703581809998, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.9536741214057508, |
|
"grad_norm": 0.22479400038719177, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.9552715654952076, |
|
"grad_norm": 0.23164299130439758, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.9568690095846646, |
|
"grad_norm": 0.1600014865398407, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.9584664536741214, |
|
"grad_norm": 0.2001635879278183, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9600638977635783, |
|
"grad_norm": 0.2946987450122833, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.9616613418530351, |
|
"grad_norm": 0.2080981582403183, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.963258785942492, |
|
"grad_norm": 0.6526065468788147, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0035, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.9648562300319489, |
|
"grad_norm": 0.3620392382144928, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.9664536741214057, |
|
"grad_norm": 0.21057778596878052, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.9680511182108626, |
|
"grad_norm": 0.11293834447860718, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0008, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.9696485623003195, |
|
"grad_norm": 1.2960716485977173, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.9712460063897763, |
|
"grad_norm": 0.1437765210866928, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.9728434504792333, |
|
"grad_norm": 0.6358578205108643, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0049, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.9744408945686901, |
|
"grad_norm": 0.14403465390205383, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.9760383386581469, |
|
"grad_norm": 0.4725677967071533, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0045, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.9776357827476039, |
|
"grad_norm": 0.8543768525123596, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.9792332268370607, |
|
"grad_norm": 0.27371206879615784, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.9808306709265175, |
|
"grad_norm": 0.23279932141304016, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0009, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.9824281150159745, |
|
"grad_norm": 0.8952144384384155, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0037, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.9840255591054313, |
|
"grad_norm": 0.11720918118953705, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.9856230031948882, |
|
"grad_norm": 0.9963851571083069, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.987220447284345, |
|
"grad_norm": 0.08971522003412247, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.9888178913738019, |
|
"grad_norm": 0.1214229092001915, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.9904153354632588, |
|
"grad_norm": 0.2924940288066864, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.9920127795527156, |
|
"grad_norm": 1.189788579940796, |
|
"learning_rate": 5e-07, |
|
"loss": 0.003, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.9936102236421726, |
|
"grad_norm": 0.5802843570709229, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0044, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.9952076677316294, |
|
"grad_norm": 0.11823207885026932, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.9968051118210862, |
|
"grad_norm": 0.16288286447525024, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0009, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.9984025559105432, |
|
"grad_norm": 0.13317349553108215, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.1848553717136383, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.0015974440894568, |
|
"grad_norm": 0.5851624608039856, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.0031948881789137, |
|
"grad_norm": 0.09685046225786209, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0009, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.0047923322683705, |
|
"grad_norm": 0.11342930048704147, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.0063897763578276, |
|
"grad_norm": 0.46440932154655457, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.0079872204472844, |
|
"grad_norm": 0.19794267416000366, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.0095846645367412, |
|
"grad_norm": 0.3643769919872284, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0041, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.011182108626198, |
|
"grad_norm": 0.08710943907499313, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.012779552715655, |
|
"grad_norm": 0.4915333688259125, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0031, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.0143769968051117, |
|
"grad_norm": 0.23040416836738586, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.0159744408945688, |
|
"grad_norm": 0.24800904095172882, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.0175718849840256, |
|
"grad_norm": 0.29919004440307617, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.0191693290734825, |
|
"grad_norm": 0.11212212592363358, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.0207667731629393, |
|
"grad_norm": 0.16430723667144775, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.0223642172523961, |
|
"grad_norm": 0.32202014327049255, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0046, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.023961661341853, |
|
"grad_norm": 0.3237399458885193, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.0255591054313098, |
|
"grad_norm": 0.1953740268945694, |
|
"learning_rate": 5e-07, |
|
"loss": 0.003, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.0271565495207668, |
|
"grad_norm": 0.12679028511047363, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0008, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.0287539936102237, |
|
"grad_norm": 0.10133963078260422, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.0303514376996805, |
|
"grad_norm": 0.0973275899887085, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.0319488817891374, |
|
"grad_norm": 0.5217294096946716, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.0335463258785942, |
|
"grad_norm": 0.15706409513950348, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.035143769968051, |
|
"grad_norm": 0.10915224999189377, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.036741214057508, |
|
"grad_norm": 0.4759727716445923, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.038338658146965, |
|
"grad_norm": 0.11390835046768188, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.0399361022364217, |
|
"grad_norm": 1.093390703201294, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0046, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.0415335463258786, |
|
"grad_norm": 0.07437872141599655, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.0431309904153354, |
|
"grad_norm": 0.21833601593971252, |
|
"learning_rate": 5e-07, |
|
"loss": 0.003, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.0447284345047922, |
|
"grad_norm": 0.21499225497245789, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.0463258785942493, |
|
"grad_norm": 0.12159962207078934, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.0479233226837061, |
|
"grad_norm": 0.3396832346916199, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.049520766773163, |
|
"grad_norm": 0.21546514332294464, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.0511182108626198, |
|
"grad_norm": 0.19969788193702698, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.0527156549520766, |
|
"grad_norm": 0.08988461643457413, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.0543130990415335, |
|
"grad_norm": 0.26418858766555786, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.0559105431309903, |
|
"grad_norm": 0.2093944102525711, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.0575079872204474, |
|
"grad_norm": 0.36498188972473145, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.0591054313099042, |
|
"grad_norm": 0.07945135235786438, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.060702875399361, |
|
"grad_norm": 0.12991008162498474, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.0623003194888179, |
|
"grad_norm": 0.2589464783668518, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.0638977635782747, |
|
"grad_norm": 0.10802029818296432, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.0654952076677315, |
|
"grad_norm": 0.4823663532733917, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.0670926517571886, |
|
"grad_norm": 0.44493567943573, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.0686900958466454, |
|
"grad_norm": 0.23464570939540863, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0031, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.0702875399361023, |
|
"grad_norm": 0.28489264845848083, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.071884984025559, |
|
"grad_norm": 0.2952616214752197, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.073482428115016, |
|
"grad_norm": 0.20882096886634827, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.0750798722044728, |
|
"grad_norm": 0.12549445033073425, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.0766773162939298, |
|
"grad_norm": 0.22115099430084229, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.0782747603833867, |
|
"grad_norm": 0.15710076689720154, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0009, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.0798722044728435, |
|
"grad_norm": 0.15250109136104584, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.0814696485623003, |
|
"grad_norm": 0.1936073750257492, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.0830670926517572, |
|
"grad_norm": 0.08779247850179672, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0006, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.084664536741214, |
|
"grad_norm": 0.1437022089958191, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.0862619808306708, |
|
"grad_norm": 0.2850930392742157, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0029, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.0878594249201279, |
|
"grad_norm": 0.3779478967189789, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0051, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.0894568690095847, |
|
"grad_norm": 0.20855233073234558, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.0910543130990416, |
|
"grad_norm": 0.11478544771671295, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.0926517571884984, |
|
"grad_norm": 0.3283560276031494, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.0942492012779552, |
|
"grad_norm": 0.35279855132102966, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.095846645367412, |
|
"grad_norm": 0.166551411151886, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.097444089456869, |
|
"grad_norm": 0.10785111039876938, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.099041533546326, |
|
"grad_norm": 0.1150643453001976, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.1006389776357828, |
|
"grad_norm": 0.10543517768383026, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 1.1022364217252396, |
|
"grad_norm": 0.12300563603639603, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.1038338658146964, |
|
"grad_norm": 0.15775950253009796, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 1.1054313099041533, |
|
"grad_norm": 0.052068062126636505, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0008, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.1070287539936103, |
|
"grad_norm": 0.1371004432439804, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0029, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.1086261980830672, |
|
"grad_norm": 0.11871246248483658, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.110223642172524, |
|
"grad_norm": 0.106499083340168, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.1118210862619808, |
|
"grad_norm": 0.05005495250225067, |
|
"learning_rate": 5e-07, |
|
"loss": 0.001, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.1134185303514377, |
|
"grad_norm": 0.6656812429428101, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0036, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.1150159744408945, |
|
"grad_norm": 0.14445379376411438, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.1166134185303513, |
|
"grad_norm": 0.268867164850235, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.1182108626198084, |
|
"grad_norm": 0.16358579695224762, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1198083067092652, |
|
"grad_norm": 0.11881609261035919, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 1.121405750798722, |
|
"grad_norm": 0.10697460919618607, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.123003194888179, |
|
"grad_norm": 0.1314931958913803, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 1.1246006389776357, |
|
"grad_norm": 0.13953609764575958, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.1261980830670926, |
|
"grad_norm": 0.30124762654304504, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0029, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.1277955271565494, |
|
"grad_norm": 0.2047244906425476, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0029, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.1293929712460065, |
|
"grad_norm": 0.28825926780700684, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 1.1309904153354633, |
|
"grad_norm": 0.1030370369553566, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.1325878594249201, |
|
"grad_norm": 0.08416575193405151, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 1.134185303514377, |
|
"grad_norm": 0.6813434958457947, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0046, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.1357827476038338, |
|
"grad_norm": 0.7300125360488892, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.1373801916932909, |
|
"grad_norm": 0.21185138821601868, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 1.1389776357827477, |
|
"grad_norm": 0.3095090389251709, |
|
"learning_rate": 5e-07, |
|
"loss": 0.003, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 1.1405750798722045, |
|
"grad_norm": 0.13913819193840027, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.1421725239616614, |
|
"grad_norm": 0.2016637921333313, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.1437699680511182, |
|
"grad_norm": 0.12468226253986359, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 1.145367412140575, |
|
"grad_norm": 0.1510785073041916, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.1469648562300319, |
|
"grad_norm": 0.11004221439361572, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 1.148562300319489, |
|
"grad_norm": 0.12669166922569275, |
|
"learning_rate": 5e-07, |
|
"loss": 0.003, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 1.1501597444089458, |
|
"grad_norm": 0.11249173432588577, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.1517571884984026, |
|
"grad_norm": 0.21114954352378845, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 1.1533546325878594, |
|
"grad_norm": 0.3067387342453003, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0037, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 1.1549520766773163, |
|
"grad_norm": 0.11942733824253082, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 1.156549520766773, |
|
"grad_norm": 0.2498818337917328, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 1.15814696485623, |
|
"grad_norm": 0.589302122592926, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.159744408945687, |
|
"grad_norm": 0.11040360480546951, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.1613418530351438, |
|
"grad_norm": 0.1865539252758026, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 1.1629392971246006, |
|
"grad_norm": 0.09886899590492249, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 1.1645367412140575, |
|
"grad_norm": 0.10603392124176025, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 1.1661341853035143, |
|
"grad_norm": 0.17114412784576416, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0034, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.1677316293929714, |
|
"grad_norm": 0.11507996916770935, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 1.1693290734824282, |
|
"grad_norm": 0.10117539763450623, |
|
"learning_rate": 5e-07, |
|
"loss": 0.001, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 1.170926517571885, |
|
"grad_norm": 0.18606479465961456, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 1.1725239616613419, |
|
"grad_norm": 0.1562090963125229, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 1.1741214057507987, |
|
"grad_norm": 0.2511482834815979, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.1757188498402555, |
|
"grad_norm": 0.131776362657547, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 1.1773162939297124, |
|
"grad_norm": 0.2350272536277771, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 1.1789137380191694, |
|
"grad_norm": 0.5341865420341492, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0045, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 1.1805111821086263, |
|
"grad_norm": 0.18602542579174042, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 1.182108626198083, |
|
"grad_norm": 0.08645334094762802, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0009, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.18370607028754, |
|
"grad_norm": 0.1708468645811081, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 1.1853035143769968, |
|
"grad_norm": 0.11233728379011154, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 1.1869009584664536, |
|
"grad_norm": 0.20240649580955505, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 1.1884984025559104, |
|
"grad_norm": 0.14982427656650543, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 1.1900958466453675, |
|
"grad_norm": 0.13709725439548492, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.1916932907348243, |
|
"grad_norm": 0.18215787410736084, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 1.1932907348242812, |
|
"grad_norm": 0.12986968457698822, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 1.194888178913738, |
|
"grad_norm": 0.10665430873632431, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.1964856230031948, |
|
"grad_norm": 0.11768273264169693, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 1.1980830670926517, |
|
"grad_norm": 0.6945746541023254, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0008, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.1996805111821087, |
|
"grad_norm": 0.25220727920532227, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 1.2012779552715656, |
|
"grad_norm": 0.07079087197780609, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0005, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 1.2028753993610224, |
|
"grad_norm": 0.16933268308639526, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 1.2044728434504792, |
|
"grad_norm": 0.15669192373752594, |
|
"learning_rate": 5e-07, |
|
"loss": 0.001, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 1.206070287539936, |
|
"grad_norm": 6.026957035064697, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0035, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.207667731629393, |
|
"grad_norm": 0.5998969674110413, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0039, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 1.20926517571885, |
|
"grad_norm": 0.06143517419695854, |
|
"learning_rate": 5e-07, |
|
"loss": 0.001, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 1.2108626198083068, |
|
"grad_norm": 0.2319423258304596, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 1.2124600638977636, |
|
"grad_norm": 0.701130211353302, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0056, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 1.2140575079872205, |
|
"grad_norm": 0.3482913076877594, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0029, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.2156549520766773, |
|
"grad_norm": 0.21112686395645142, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 1.2172523961661341, |
|
"grad_norm": 5.14704704284668, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 1.218849840255591, |
|
"grad_norm": 0.41259634494781494, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 1.220447284345048, |
|
"grad_norm": 0.2882460951805115, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 1.2220447284345048, |
|
"grad_norm": 0.22736166417598724, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.2236421725239617, |
|
"grad_norm": 0.2859097719192505, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 1.2252396166134185, |
|
"grad_norm": 1.8469651937484741, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0032, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 1.2268370607028753, |
|
"grad_norm": 0.15771770477294922, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0007, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.2284345047923322, |
|
"grad_norm": 0.10112886875867844, |
|
"learning_rate": 5e-07, |
|
"loss": 0.001, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 1.230031948881789, |
|
"grad_norm": 0.7125353813171387, |
|
"learning_rate": 5e-07, |
|
"loss": 0.003, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.231629392971246, |
|
"grad_norm": 0.10188297927379608, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0009, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 1.233226837060703, |
|
"grad_norm": 0.9515169262886047, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 1.2348242811501597, |
|
"grad_norm": 0.06836796551942825, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 1.2364217252396166, |
|
"grad_norm": 0.13632221519947052, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 1.2380191693290734, |
|
"grad_norm": 0.572127103805542, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.2396166134185305, |
|
"grad_norm": 0.11243536323308945, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 1.2412140575079873, |
|
"grad_norm": 0.47509658336639404, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 1.2428115015974441, |
|
"grad_norm": 3.768028974533081, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0039, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 1.244408945686901, |
|
"grad_norm": 0.346608966588974, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 1.2460063897763578, |
|
"grad_norm": 0.23147884011268616, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.2476038338658146, |
|
"grad_norm": 0.14679104089736938, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 1.2492012779552715, |
|
"grad_norm": 0.23181918263435364, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.2507987220447285, |
|
"grad_norm": 0.16475149989128113, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 1.2523961661341854, |
|
"grad_norm": 0.08807860314846039, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 1.2539936102236422, |
|
"grad_norm": 0.16376349329948425, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.255591054313099, |
|
"grad_norm": 0.19524359703063965, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 1.2571884984025559, |
|
"grad_norm": 0.06623630225658417, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 1.2587859424920127, |
|
"grad_norm": 0.03929225727915764, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0003, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 1.2603833865814695, |
|
"grad_norm": 0.11669357866048813, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 1.2619808306709266, |
|
"grad_norm": 0.00831072498112917, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.2635782747603834, |
|
"grad_norm": 0.13926006853580475, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 1.2651757188498403, |
|
"grad_norm": 4.567328929901123, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0034, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.266773162939297, |
|
"grad_norm": 1.018916368484497, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0067, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 1.268370607028754, |
|
"grad_norm": 0.2978529632091522, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0034, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 1.269968051118211, |
|
"grad_norm": 0.5247751474380493, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0008, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.2715654952076676, |
|
"grad_norm": 0.06592018157243729, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0006, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 1.2731629392971247, |
|
"grad_norm": 0.1426411271095276, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0007, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 1.2747603833865815, |
|
"grad_norm": 0.14098455011844635, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 1.2763578274760383, |
|
"grad_norm": 0.11731888353824615, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0031, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 1.2779552715654952, |
|
"grad_norm": 0.8000310063362122, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0044, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.279552715654952, |
|
"grad_norm": 0.8283488154411316, |
|
"learning_rate": 5e-07, |
|
"loss": 0.003, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 1.281150159744409, |
|
"grad_norm": 0.6855292916297913, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 1.2827476038338659, |
|
"grad_norm": 0.12922891974449158, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 1.2843450479233227, |
|
"grad_norm": 0.8001663088798523, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0055, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 1.2859424920127795, |
|
"grad_norm": 0.09436812996864319, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.2875399361022364, |
|
"grad_norm": 0.3059661388397217, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 1.2891373801916932, |
|
"grad_norm": 0.22149628400802612, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 1.29073482428115, |
|
"grad_norm": 0.1476268321275711, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 1.292332268370607, |
|
"grad_norm": 0.11144911497831345, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 1.293929712460064, |
|
"grad_norm": 0.2204524427652359, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0031, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.2955271565495208, |
|
"grad_norm": 0.07029737532138824, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0009, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 1.2971246006389776, |
|
"grad_norm": 0.09939467906951904, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0032, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 1.2987220447284344, |
|
"grad_norm": 0.1769058108329773, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 1.3003194888178915, |
|
"grad_norm": 0.3592926561832428, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 1.3019169329073481, |
|
"grad_norm": 0.2275543361902237, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.3035143769968052, |
|
"grad_norm": 0.07945606857538223, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 1.305111821086262, |
|
"grad_norm": 0.08711861073970795, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 1.3067092651757188, |
|
"grad_norm": 0.28836479783058167, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 1.3083067092651757, |
|
"grad_norm": 0.08159520477056503, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 1.3099041533546325, |
|
"grad_norm": 0.07466080784797668, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.3115015974440896, |
|
"grad_norm": 0.5451674461364746, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 1.3130990415335464, |
|
"grad_norm": 0.14322105050086975, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 1.3146964856230032, |
|
"grad_norm": 0.12718692421913147, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0008, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 1.31629392971246, |
|
"grad_norm": 0.13329675793647766, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 1.317891373801917, |
|
"grad_norm": 0.22774100303649902, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0035, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.3194888178913737, |
|
"grad_norm": 0.8411527276039124, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0038, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 1.3210862619808306, |
|
"grad_norm": 0.18316972255706787, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 1.3226837060702876, |
|
"grad_norm": 0.07081547379493713, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 1.3242811501597445, |
|
"grad_norm": 0.3037576973438263, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0036, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 1.3258785942492013, |
|
"grad_norm": 0.10193005204200745, |
|
"learning_rate": 5e-07, |
|
"loss": 0.001, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.3274760383386581, |
|
"grad_norm": 0.10602962970733643, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 1.329073482428115, |
|
"grad_norm": 0.2908097505569458, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 1.330670926517572, |
|
"grad_norm": 0.3724125027656555, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 1.3322683706070286, |
|
"grad_norm": 0.11064999550580978, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 1.3338658146964857, |
|
"grad_norm": 0.228192538022995, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.3354632587859425, |
|
"grad_norm": 0.7090662717819214, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0032, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 1.3370607028753994, |
|
"grad_norm": 0.14330582320690155, |
|
"learning_rate": 5e-07, |
|
"loss": 0.003, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 1.3386581469648562, |
|
"grad_norm": 0.22641916573047638, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0033, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 1.340255591054313, |
|
"grad_norm": 0.21619798243045807, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 1.34185303514377, |
|
"grad_norm": 0.10568831115961075, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.343450479233227, |
|
"grad_norm": 0.1081472709774971, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 1.3450479233226837, |
|
"grad_norm": 0.10540477186441422, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 1.3466453674121406, |
|
"grad_norm": 0.11737114936113358, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0032, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 1.3482428115015974, |
|
"grad_norm": 0.17713451385498047, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 1.3498402555910542, |
|
"grad_norm": 0.2664765417575836, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 1.351437699680511, |
|
"grad_norm": 0.11203097552061081, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 1.3530351437699681, |
|
"grad_norm": 0.24115419387817383, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 1.354632587859425, |
|
"grad_norm": 0.13034223020076752, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 1.3562300319488818, |
|
"grad_norm": 1.2168869972229004, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 1.3578274760383386, |
|
"grad_norm": 0.04777536913752556, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0005, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.3594249201277955, |
|
"grad_norm": 0.14492247998714447, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 1.3610223642172525, |
|
"grad_norm": 0.11650210618972778, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 1.3626198083067091, |
|
"grad_norm": 0.13418716192245483, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0009, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 1.3642172523961662, |
|
"grad_norm": 1.2964794635772705, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0038, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 1.365814696485623, |
|
"grad_norm": 0.17427192628383636, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0035, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.3674121405750799, |
|
"grad_norm": 0.8252497911453247, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0049, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 1.3690095846645367, |
|
"grad_norm": 0.3598852753639221, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 1.3706070287539935, |
|
"grad_norm": 0.23161448538303375, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 1.3722044728434506, |
|
"grad_norm": 0.17750093340873718, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 1.3738019169329074, |
|
"grad_norm": 0.07043975591659546, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.3753993610223643, |
|
"grad_norm": 0.06552740186452866, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 1.376996805111821, |
|
"grad_norm": 0.167598158121109, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 1.378594249201278, |
|
"grad_norm": 0.8627744913101196, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0009, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 1.3801916932907348, |
|
"grad_norm": 0.23403562605381012, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 1.3817891373801916, |
|
"grad_norm": 1.3149168491363525, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0043, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.3833865814696487, |
|
"grad_norm": 0.16277176141738892, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 1.3849840255591055, |
|
"grad_norm": 0.1406593918800354, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 1.3865814696485623, |
|
"grad_norm": 0.24820701777935028, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 1.3881789137380192, |
|
"grad_norm": 0.10332539677619934, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 1.389776357827476, |
|
"grad_norm": 0.16070209443569183, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0007, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.3913738019169328, |
|
"grad_norm": 0.16537906229496002, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 1.3929712460063897, |
|
"grad_norm": 0.2067955881357193, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 1.3945686900958467, |
|
"grad_norm": 0.1595505326986313, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0008, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 1.3961661341853036, |
|
"grad_norm": 0.7071959972381592, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0031, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 1.3977635782747604, |
|
"grad_norm": 0.08548534661531448, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0005, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.3993610223642172, |
|
"grad_norm": 1.5511912107467651, |
|
"learning_rate": 5e-07, |
|
"loss": 0.005, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 1.400958466453674, |
|
"grad_norm": 0.622124969959259, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 1.4025559105431311, |
|
"grad_norm": 3.853699207305908, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0054, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 1.4041533546325877, |
|
"grad_norm": 0.07736141234636307, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 1.4057507987220448, |
|
"grad_norm": 0.2597537338733673, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.4073482428115016, |
|
"grad_norm": 0.09090615063905716, |
|
"learning_rate": 5e-07, |
|
"loss": 0.001, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 1.4089456869009584, |
|
"grad_norm": 0.4754495620727539, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 1.4105431309904153, |
|
"grad_norm": 0.4713440537452698, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0045, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 1.4121405750798721, |
|
"grad_norm": 0.3018096387386322, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 1.4137380191693292, |
|
"grad_norm": 0.21582446992397308, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 1.415335463258786, |
|
"grad_norm": 0.28057220578193665, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 1.4169329073482428, |
|
"grad_norm": 0.25230610370635986, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 1.4185303514376997, |
|
"grad_norm": 0.3251979947090149, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 1.4201277955271565, |
|
"grad_norm": 1.2839736938476562, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0045, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 1.4217252396166133, |
|
"grad_norm": 0.6521425247192383, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.4233226837060702, |
|
"grad_norm": 0.19354148209095, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 1.4249201277955272, |
|
"grad_norm": 0.26791778206825256, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 1.426517571884984, |
|
"grad_norm": 0.9978561997413635, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 1.428115015974441, |
|
"grad_norm": 1.0886672735214233, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0033, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 1.4297124600638977, |
|
"grad_norm": 0.1294880360364914, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 1.4313099041533546, |
|
"grad_norm": 0.1127467229962349, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 1.4329073482428116, |
|
"grad_norm": 0.14215292036533356, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 1.4345047923322682, |
|
"grad_norm": 0.11055054515600204, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 1.4361022364217253, |
|
"grad_norm": 0.8130955100059509, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0029, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 1.4376996805111821, |
|
"grad_norm": 0.6208978295326233, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.439297124600639, |
|
"grad_norm": 0.10077293962240219, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 1.4408945686900958, |
|
"grad_norm": 0.15302683413028717, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 1.4424920127795526, |
|
"grad_norm": 0.2711629867553711, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 1.4440894568690097, |
|
"grad_norm": 0.6740185022354126, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0032, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 1.4456869009584665, |
|
"grad_norm": 0.09207923710346222, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 1.4472843450479234, |
|
"grad_norm": 0.13100707530975342, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 1.4488817891373802, |
|
"grad_norm": 0.22404153645038605, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 1.450479233226837, |
|
"grad_norm": 0.19576634466648102, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 1.4520766773162939, |
|
"grad_norm": 0.3745575547218323, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 1.4536741214057507, |
|
"grad_norm": 0.3619185984134674, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0049, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.4552715654952078, |
|
"grad_norm": 0.11963102966547012, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 1.4568690095846646, |
|
"grad_norm": 0.6312240958213806, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 1.4584664536741214, |
|
"grad_norm": 0.44361862540245056, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0029, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 1.4600638977635783, |
|
"grad_norm": 0.40353521704673767, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 1.461661341853035, |
|
"grad_norm": 0.14119647443294525, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 1.4632587859424921, |
|
"grad_norm": 0.07256528735160828, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 1.4648562300319488, |
|
"grad_norm": 0.4794658422470093, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 1.4664536741214058, |
|
"grad_norm": 0.2629548907279968, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 1.4680511182108626, |
|
"grad_norm": 0.6776370406150818, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0041, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 1.4696485623003195, |
|
"grad_norm": 0.14644776284694672, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.4712460063897763, |
|
"grad_norm": 0.09278970211744308, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 1.4728434504792332, |
|
"grad_norm": 0.08292179554700851, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 1.4744408945686902, |
|
"grad_norm": 0.9622533321380615, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 1.476038338658147, |
|
"grad_norm": 0.17527416348457336, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 1.4776357827476039, |
|
"grad_norm": 0.07049129158258438, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.4792332268370607, |
|
"grad_norm": 0.07232940942049026, |
|
"learning_rate": 5e-07, |
|
"loss": 0.001, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 1.4808306709265175, |
|
"grad_norm": 0.06436185538768768, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 1.4824281150159744, |
|
"grad_norm": 0.24670840799808502, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 1.4840255591054312, |
|
"grad_norm": 0.7605392932891846, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0041, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 1.4856230031948883, |
|
"grad_norm": 0.06602557003498077, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0008, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.487220447284345, |
|
"grad_norm": 0.15429018437862396, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 1.488817891373802, |
|
"grad_norm": 0.155389204621315, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 1.4904153354632588, |
|
"grad_norm": 0.3137715458869934, |
|
"learning_rate": 5e-07, |
|
"loss": 0.003, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 1.4920127795527156, |
|
"grad_norm": 0.13918346166610718, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 1.4936102236421724, |
|
"grad_norm": 0.161741241812706, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 1.4952076677316293, |
|
"grad_norm": 0.2871551513671875, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 1.4968051118210863, |
|
"grad_norm": 0.3078411817550659, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 1.4984025559105432, |
|
"grad_norm": 0.1792358011007309, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.08885855227708817, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 1.5015974440894568, |
|
"grad_norm": 0.10388941317796707, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.5031948881789137, |
|
"grad_norm": 0.1879977583885193, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0035, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 1.5047923322683707, |
|
"grad_norm": 0.08109968155622482, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 1.5063897763578273, |
|
"grad_norm": 0.14684544503688812, |
|
"learning_rate": 5e-07, |
|
"loss": 0.001, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 1.5079872204472844, |
|
"grad_norm": 0.250420480966568, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 1.5095846645367412, |
|
"grad_norm": 0.0925571396946907, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0008, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 1.511182108626198, |
|
"grad_norm": 0.1837424635887146, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 1.5127795527156551, |
|
"grad_norm": 0.17538028955459595, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 1.5143769968051117, |
|
"grad_norm": 0.08512210845947266, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 1.5159744408945688, |
|
"grad_norm": 0.08973109722137451, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0009, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 1.5175718849840254, |
|
"grad_norm": 0.1241975948214531, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.5191693290734825, |
|
"grad_norm": 3.0702695846557617, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 1.5207667731629393, |
|
"grad_norm": 0.07310276478528976, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0006, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 1.5223642172523961, |
|
"grad_norm": 0.2920989692211151, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0035, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 1.5239616613418532, |
|
"grad_norm": 0.5082775950431824, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 1.5255591054313098, |
|
"grad_norm": 0.1549331098794937, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.5271565495207668, |
|
"grad_norm": 0.1703399121761322, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 1.5287539936102237, |
|
"grad_norm": 0.21957652270793915, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 1.5303514376996805, |
|
"grad_norm": 0.3131657838821411, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 1.5319488817891374, |
|
"grad_norm": 0.3629818260669708, |
|
"learning_rate": 5e-07, |
|
"loss": 0.003, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 1.5335463258785942, |
|
"grad_norm": 0.2978671193122864, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.5351437699680512, |
|
"grad_norm": 0.26085761189460754, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0038, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 1.5367412140575079, |
|
"grad_norm": 0.13067355751991272, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0032, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 1.538338658146965, |
|
"grad_norm": 1.0939770936965942, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0075, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 1.5399361022364217, |
|
"grad_norm": 0.4009106755256653, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0041, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 1.5415335463258786, |
|
"grad_norm": 0.69194495677948, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0071, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 1.5431309904153354, |
|
"grad_norm": 0.15541456639766693, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 1.5447284345047922, |
|
"grad_norm": 0.1687278002500534, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 1.5463258785942493, |
|
"grad_norm": 0.08637956529855728, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0004, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 1.547923322683706, |
|
"grad_norm": 0.49107223749160767, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 1.549520766773163, |
|
"grad_norm": 0.40780824422836304, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.5511182108626198, |
|
"grad_norm": 0.14083029329776764, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 1.5527156549520766, |
|
"grad_norm": 0.1509598046541214, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 1.5543130990415337, |
|
"grad_norm": 0.22124463319778442, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 1.5559105431309903, |
|
"grad_norm": 0.29571980237960815, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 1.5575079872204474, |
|
"grad_norm": 0.12444217503070831, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.5591054313099042, |
|
"grad_norm": 0.19605819880962372, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0032, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 1.560702875399361, |
|
"grad_norm": 0.36710816621780396, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 1.5623003194888179, |
|
"grad_norm": 0.4394298493862152, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 1.5638977635782747, |
|
"grad_norm": 3.3451671600341797, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0031, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 1.5654952076677318, |
|
"grad_norm": 0.0950264111161232, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0004, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.5670926517571884, |
|
"grad_norm": 0.5253363847732544, |
|
"learning_rate": 5e-07, |
|
"loss": 0.004, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 1.5686900958466454, |
|
"grad_norm": 0.08567991107702255, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 1.5702875399361023, |
|
"grad_norm": 0.21886466443538666, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 1.571884984025559, |
|
"grad_norm": 0.12182791531085968, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 1.573482428115016, |
|
"grad_norm": 0.10883834213018417, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 1.5750798722044728, |
|
"grad_norm": 0.2287680059671402, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 1.5766773162939298, |
|
"grad_norm": 0.7266914248466492, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0046, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 1.5782747603833864, |
|
"grad_norm": 0.11474244296550751, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 1.5798722044728435, |
|
"grad_norm": 0.23904937505722046, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 1.5814696485623003, |
|
"grad_norm": 0.055184707045555115, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.5830670926517572, |
|
"grad_norm": 0.22353950142860413, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 1.5846645367412142, |
|
"grad_norm": 0.19132664799690247, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 1.5862619808306708, |
|
"grad_norm": 0.2778601348400116, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 1.5878594249201279, |
|
"grad_norm": 0.05275161191821098, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0008, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 1.5894568690095847, |
|
"grad_norm": 0.2460019290447235, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 1.5910543130990416, |
|
"grad_norm": 0.3010371923446655, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 1.5926517571884984, |
|
"grad_norm": 0.10904088616371155, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 1.5942492012779552, |
|
"grad_norm": 0.05686507746577263, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 1.5958466453674123, |
|
"grad_norm": 0.20314623415470123, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 1.5974440894568689, |
|
"grad_norm": 1.588057279586792, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.599041533546326, |
|
"grad_norm": 0.48463931679725647, |
|
"learning_rate": 5e-07, |
|
"loss": 0.004, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 1.6006389776357828, |
|
"grad_norm": 0.6624436974525452, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 1.6022364217252396, |
|
"grad_norm": 0.14726407825946808, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 1.6038338658146964, |
|
"grad_norm": 0.7131458520889282, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 1.6054313099041533, |
|
"grad_norm": 0.46038198471069336, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0043, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 1.6070287539936103, |
|
"grad_norm": 0.2227628529071808, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0009, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 1.608626198083067, |
|
"grad_norm": 0.13341805338859558, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0007, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 1.610223642172524, |
|
"grad_norm": 0.07892493903636932, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0009, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 1.6118210862619808, |
|
"grad_norm": 0.12278909236192703, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 1.6134185303514377, |
|
"grad_norm": 0.5151563882827759, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0032, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.6150159744408947, |
|
"grad_norm": 0.23579691350460052, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 1.6166134185303513, |
|
"grad_norm": 0.05294935405254364, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 1.6182108626198084, |
|
"grad_norm": 0.12360315769910812, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 1.619808306709265, |
|
"grad_norm": 0.35527291893959045, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 1.621405750798722, |
|
"grad_norm": 0.25084754824638367, |
|
"learning_rate": 5e-07, |
|
"loss": 0.003, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 1.623003194888179, |
|
"grad_norm": 0.13500788807868958, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0033, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 1.6246006389776357, |
|
"grad_norm": 0.17942559719085693, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 1.6261980830670928, |
|
"grad_norm": 0.30760741233825684, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 1.6277955271565494, |
|
"grad_norm": 0.12108216434717178, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 1.6293929712460065, |
|
"grad_norm": 0.36486342549324036, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.6309904153354633, |
|
"grad_norm": 0.21493099629878998, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 1.6325878594249201, |
|
"grad_norm": 0.2136039435863495, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 1.634185303514377, |
|
"grad_norm": 0.19852106273174286, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 1.6357827476038338, |
|
"grad_norm": 0.07942666113376617, |
|
"learning_rate": 5e-07, |
|
"loss": 0.001, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 1.6373801916932909, |
|
"grad_norm": 0.14488880336284637, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.6389776357827475, |
|
"grad_norm": 0.07575450092554092, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 1.6405750798722045, |
|
"grad_norm": 0.18136127293109894, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 1.6421725239616614, |
|
"grad_norm": 0.09243427962064743, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 1.6437699680511182, |
|
"grad_norm": 0.15622855722904205, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0009, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 1.645367412140575, |
|
"grad_norm": 0.20699208974838257, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.6469648562300319, |
|
"grad_norm": 0.17613987624645233, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 1.648562300319489, |
|
"grad_norm": 0.10338564217090607, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 1.6501597444089455, |
|
"grad_norm": 0.07882916927337646, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 1.6517571884984026, |
|
"grad_norm": 0.14468686282634735, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 1.6533546325878594, |
|
"grad_norm": 0.33097293972969055, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0037, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 1.6549520766773163, |
|
"grad_norm": 0.46003833413124084, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0009, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 1.6565495207667733, |
|
"grad_norm": 0.38343682885169983, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 1.65814696485623, |
|
"grad_norm": 0.22435209155082703, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 1.659744408945687, |
|
"grad_norm": 0.10138783603906631, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 1.6613418530351438, |
|
"grad_norm": 0.42318132519721985, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0033, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.6629392971246006, |
|
"grad_norm": 0.10508925467729568, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 1.6645367412140575, |
|
"grad_norm": 0.3548614978790283, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0033, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 1.6661341853035143, |
|
"grad_norm": 0.22214291989803314, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 1.6677316293929714, |
|
"grad_norm": 0.23011279106140137, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 1.669329073482428, |
|
"grad_norm": 0.08691424876451492, |
|
"learning_rate": 5e-07, |
|
"loss": 0.001, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 1.670926517571885, |
|
"grad_norm": 0.6495136618614197, |
|
"learning_rate": 5e-07, |
|
"loss": 0.005, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 1.6725239616613419, |
|
"grad_norm": 0.20477008819580078, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 1.6741214057507987, |
|
"grad_norm": 0.3025433123111725, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 1.6757188498402555, |
|
"grad_norm": 0.1731894165277481, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 1.6773162939297124, |
|
"grad_norm": 0.06371040642261505, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.6789137380191694, |
|
"grad_norm": 0.14911451935768127, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 1.680511182108626, |
|
"grad_norm": 0.09349862486124039, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 1.682108626198083, |
|
"grad_norm": 0.21330159902572632, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 1.68370607028754, |
|
"grad_norm": 0.13860629498958588, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 1.6853035143769968, |
|
"grad_norm": 0.1365477740764618, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 1.6869009584664538, |
|
"grad_norm": 0.17503461241722107, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 1.6884984025559104, |
|
"grad_norm": 0.111385278403759, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0029, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 1.6900958466453675, |
|
"grad_norm": 0.06377507001161575, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 1.6916932907348243, |
|
"grad_norm": 0.0641062781214714, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 1.6932907348242812, |
|
"grad_norm": 0.09682592004537582, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.694888178913738, |
|
"grad_norm": 0.1440698206424713, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 1.6964856230031948, |
|
"grad_norm": 0.04743053764104843, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 1.6980830670926519, |
|
"grad_norm": 0.18199731409549713, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0036, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 1.6996805111821085, |
|
"grad_norm": 0.053704094141721725, |
|
"learning_rate": 5e-07, |
|
"loss": 0.001, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 1.7012779552715656, |
|
"grad_norm": 0.10925064235925674, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 1.7028753993610224, |
|
"grad_norm": 0.09475322812795639, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0005, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 1.7044728434504792, |
|
"grad_norm": 0.14424358308315277, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 1.706070287539936, |
|
"grad_norm": 0.07309069484472275, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 1.707667731629393, |
|
"grad_norm": 0.17733772099018097, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0035, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 1.70926517571885, |
|
"grad_norm": 0.9875695705413818, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0086, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.7108626198083066, |
|
"grad_norm": 0.408796101808548, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 1.7124600638977636, |
|
"grad_norm": 0.17582127451896667, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 1.7140575079872205, |
|
"grad_norm": 0.1303548812866211, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 1.7156549520766773, |
|
"grad_norm": 0.10945620387792587, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 1.7172523961661343, |
|
"grad_norm": 0.07630528509616852, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.718849840255591, |
|
"grad_norm": 0.07517647743225098, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 1.720447284345048, |
|
"grad_norm": 0.0669432058930397, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 1.7220447284345048, |
|
"grad_norm": 0.1877792626619339, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 1.7236421725239617, |
|
"grad_norm": 0.04336528107523918, |
|
"learning_rate": 5e-07, |
|
"loss": 0.001, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 1.7252396166134185, |
|
"grad_norm": 0.06632347404956818, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.7268370607028753, |
|
"grad_norm": 0.09459614008665085, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 1.7284345047923324, |
|
"grad_norm": 0.5509624481201172, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 1.730031948881789, |
|
"grad_norm": 0.2771284878253937, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 1.731629392971246, |
|
"grad_norm": 0.05486772954463959, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 1.733226837060703, |
|
"grad_norm": 0.2757831811904907, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0032, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 1.7348242811501597, |
|
"grad_norm": 0.04002672806382179, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 1.7364217252396166, |
|
"grad_norm": 0.06649244576692581, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0003, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 1.7380191693290734, |
|
"grad_norm": 0.11669522523880005, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 1.7396166134185305, |
|
"grad_norm": 0.12358154356479645, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 1.741214057507987, |
|
"grad_norm": 0.0829407200217247, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.7428115015974441, |
|
"grad_norm": 0.09355901926755905, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 1.744408945686901, |
|
"grad_norm": 0.0956120640039444, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 1.7460063897763578, |
|
"grad_norm": 0.4703699052333832, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0055, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 1.7476038338658149, |
|
"grad_norm": 0.3924386203289032, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0039, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 1.7492012779552715, |
|
"grad_norm": 0.16191145777702332, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 1.7507987220447285, |
|
"grad_norm": 0.06510366499423981, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 1.7523961661341851, |
|
"grad_norm": 0.19621287286281586, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 1.7539936102236422, |
|
"grad_norm": 0.1870349496603012, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 1.755591054313099, |
|
"grad_norm": 0.14898639917373657, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 1.7571884984025559, |
|
"grad_norm": 0.15484674274921417, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.758785942492013, |
|
"grad_norm": 0.1420629471540451, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 1.7603833865814695, |
|
"grad_norm": 0.12516824901103973, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 1.7619808306709266, |
|
"grad_norm": 0.09688828140497208, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 1.7635782747603834, |
|
"grad_norm": 0.09412126988172531, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 1.7651757188498403, |
|
"grad_norm": 1.9070310592651367, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 1.766773162939297, |
|
"grad_norm": 0.16800744831562042, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 1.768370607028754, |
|
"grad_norm": 0.10442492365837097, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 1.769968051118211, |
|
"grad_norm": 0.16451716423034668, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 1.7715654952076676, |
|
"grad_norm": 0.8484131097793579, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0039, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 1.7731629392971247, |
|
"grad_norm": 0.3258044719696045, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0032, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.7747603833865815, |
|
"grad_norm": 0.17745210230350494, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 1.7763578274760383, |
|
"grad_norm": 0.07777510583400726, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 1.7779552715654952, |
|
"grad_norm": 0.08588631451129913, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 1.779552715654952, |
|
"grad_norm": 0.6624420881271362, |
|
"learning_rate": 5e-07, |
|
"loss": 0.005, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 1.781150159744409, |
|
"grad_norm": 0.11576593667268753, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 1.7827476038338657, |
|
"grad_norm": 0.859341561794281, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 1.7843450479233227, |
|
"grad_norm": 0.10600177943706512, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 1.7859424920127795, |
|
"grad_norm": 0.14881321787834167, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 1.7875399361022364, |
|
"grad_norm": 0.05584081634879112, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 1.7891373801916934, |
|
"grad_norm": 0.06437966227531433, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.79073482428115, |
|
"grad_norm": 0.2528286874294281, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 1.792332268370607, |
|
"grad_norm": 0.18848662078380585, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 1.793929712460064, |
|
"grad_norm": 0.14006567001342773, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0007, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 1.7955271565495208, |
|
"grad_norm": 0.07045773416757584, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0009, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 1.7971246006389776, |
|
"grad_norm": 0.0998934954404831, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.7987220447284344, |
|
"grad_norm": 0.09063876420259476, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 1.8003194888178915, |
|
"grad_norm": 0.2747619152069092, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0009, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 1.8019169329073481, |
|
"grad_norm": 0.0671829804778099, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 1.8035143769968052, |
|
"grad_norm": 0.23768068850040436, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 1.805111821086262, |
|
"grad_norm": 0.14285339415073395, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.8067092651757188, |
|
"grad_norm": 3.77717661857605, |
|
"learning_rate": 5e-07, |
|
"loss": 0.005, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 1.8083067092651757, |
|
"grad_norm": 0.24507032334804535, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 1.8099041533546325, |
|
"grad_norm": 0.15166565775871277, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 1.8115015974440896, |
|
"grad_norm": 0.1067359670996666, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 1.8130990415335462, |
|
"grad_norm": 0.18612955510616302, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 1.8146964856230032, |
|
"grad_norm": 0.17414437234401703, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 1.81629392971246, |
|
"grad_norm": 0.3064412474632263, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0033, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 1.817891373801917, |
|
"grad_norm": 0.071932353079319, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 1.819488817891374, |
|
"grad_norm": 0.1119665578007698, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 1.8210862619808306, |
|
"grad_norm": 0.20450662076473236, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.8226837060702876, |
|
"grad_norm": 0.13636766374111176, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 1.8242811501597445, |
|
"grad_norm": 0.07295648753643036, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 1.8258785942492013, |
|
"grad_norm": 0.17274846136569977, |
|
"learning_rate": 5e-07, |
|
"loss": 0.003, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 1.8274760383386581, |
|
"grad_norm": 0.14178737998008728, |
|
"learning_rate": 5e-07, |
|
"loss": 0.001, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 1.829073482428115, |
|
"grad_norm": 0.2096203863620758, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 1.830670926517572, |
|
"grad_norm": 0.09562284499406815, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 1.8322683706070286, |
|
"grad_norm": 0.10589710623025894, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 1.8338658146964857, |
|
"grad_norm": 0.2623519003391266, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0035, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 1.8354632587859425, |
|
"grad_norm": 0.06434428691864014, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0009, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 1.8370607028753994, |
|
"grad_norm": 0.31623294949531555, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0035, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.8386581469648562, |
|
"grad_norm": 0.11535608768463135, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 1.840255591054313, |
|
"grad_norm": 0.25482088327407837, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0006, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 1.84185303514377, |
|
"grad_norm": 0.1523568332195282, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 1.8434504792332267, |
|
"grad_norm": 0.09644993394613266, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 1.8450479233226837, |
|
"grad_norm": 0.30026814341545105, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0036, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 1.8466453674121406, |
|
"grad_norm": 5.034646511077881, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 1.8482428115015974, |
|
"grad_norm": 0.1875925064086914, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 1.8498402555910545, |
|
"grad_norm": 0.15650227665901184, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 1.851437699680511, |
|
"grad_norm": 0.17901848256587982, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 1.8530351437699681, |
|
"grad_norm": 0.27782970666885376, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.854632587859425, |
|
"grad_norm": 0.11091984808444977, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 1.8562300319488818, |
|
"grad_norm": 0.3836122453212738, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 1.8578274760383386, |
|
"grad_norm": 2.26542067527771, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 1.8594249201277955, |
|
"grad_norm": 0.8616245985031128, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0051, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 1.8610223642172525, |
|
"grad_norm": 0.1418013572692871, |
|
"learning_rate": 5e-07, |
|
"loss": 0.001, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 1.8626198083067091, |
|
"grad_norm": 0.20582883059978485, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 1.8642172523961662, |
|
"grad_norm": 0.05279695987701416, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0006, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 1.865814696485623, |
|
"grad_norm": 0.30152246356010437, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 1.8674121405750799, |
|
"grad_norm": 1.215949296951294, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0042, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 1.8690095846645367, |
|
"grad_norm": 0.1583871841430664, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0031, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.8706070287539935, |
|
"grad_norm": 0.18552114069461823, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 1.8722044728434506, |
|
"grad_norm": 0.25958728790283203, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 1.8738019169329072, |
|
"grad_norm": 0.3198787271976471, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 1.8753993610223643, |
|
"grad_norm": 0.4293941557407379, |
|
"learning_rate": 5e-07, |
|
"loss": 0.003, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 1.876996805111821, |
|
"grad_norm": 0.12936322391033173, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.878594249201278, |
|
"grad_norm": 0.26641571521759033, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0033, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 1.880191693290735, |
|
"grad_norm": 1.098074197769165, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0036, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 1.8817891373801916, |
|
"grad_norm": 0.11431043595075607, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 1.8833865814696487, |
|
"grad_norm": 0.37071993947029114, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 1.8849840255591053, |
|
"grad_norm": 0.08628468960523605, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.8865814696485623, |
|
"grad_norm": 0.14398378133773804, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 1.8881789137380192, |
|
"grad_norm": 0.35993340611457825, |
|
"learning_rate": 5e-07, |
|
"loss": 0.003, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 1.889776357827476, |
|
"grad_norm": 0.1559559851884842, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 1.891373801916933, |
|
"grad_norm": 0.2564754784107208, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 1.8929712460063897, |
|
"grad_norm": 0.11279148608446121, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0005, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 1.8945686900958467, |
|
"grad_norm": 0.08808083087205887, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 1.8961661341853036, |
|
"grad_norm": 0.10382703691720963, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0004, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 1.8977635782747604, |
|
"grad_norm": 0.15366709232330322, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 1.8993610223642172, |
|
"grad_norm": 0.10865357518196106, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 1.900958466453674, |
|
"grad_norm": 0.24182426929473877, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.9025559105431311, |
|
"grad_norm": 0.4804140627384186, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0048, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 1.9041533546325877, |
|
"grad_norm": 0.23404286801815033, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 1.9057507987220448, |
|
"grad_norm": 0.36086928844451904, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 1.9073482428115016, |
|
"grad_norm": 0.5354902744293213, |
|
"learning_rate": 5e-07, |
|
"loss": 0.004, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 1.9089456869009584, |
|
"grad_norm": 0.11621209979057312, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 1.9105431309904153, |
|
"grad_norm": 0.1943303495645523, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 1.9121405750798721, |
|
"grad_norm": 0.2628275454044342, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 1.9137380191693292, |
|
"grad_norm": 0.20775623619556427, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 1.9153354632587858, |
|
"grad_norm": 0.12529809772968292, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 1.9169329073482428, |
|
"grad_norm": 0.09771095961332321, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.9185303514376997, |
|
"grad_norm": 0.10013385117053986, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 1.9201277955271565, |
|
"grad_norm": 0.29045844078063965, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0033, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 1.9217252396166136, |
|
"grad_norm": 0.15383721888065338, |
|
"learning_rate": 5e-07, |
|
"loss": 0.001, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 1.9233226837060702, |
|
"grad_norm": 0.6897266507148743, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 1.9249201277955272, |
|
"grad_norm": 0.15652583539485931, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 1.926517571884984, |
|
"grad_norm": 0.0853152722120285, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 1.928115015974441, |
|
"grad_norm": 0.11881982535123825, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 1.9297124600638977, |
|
"grad_norm": 0.10427961498498917, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 1.9313099041533546, |
|
"grad_norm": 0.1068776324391365, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 1.9329073482428116, |
|
"grad_norm": 0.13248351216316223, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0009, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.9345047923322682, |
|
"grad_norm": 0.25468289852142334, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 1.9361022364217253, |
|
"grad_norm": 0.16791756451129913, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 1.9376996805111821, |
|
"grad_norm": 0.13099579513072968, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 1.939297124600639, |
|
"grad_norm": 0.26930350065231323, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0035, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 1.9408945686900958, |
|
"grad_norm": 2.077728271484375, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 1.9424920127795526, |
|
"grad_norm": 0.31897902488708496, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 1.9440894568690097, |
|
"grad_norm": 0.07247374951839447, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 1.9456869009584663, |
|
"grad_norm": 0.11995717883110046, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 1.9472843450479234, |
|
"grad_norm": 0.2621629536151886, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 1.9488817891373802, |
|
"grad_norm": 0.1726490557193756, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.950479233226837, |
|
"grad_norm": 0.16272880136966705, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 1.952076677316294, |
|
"grad_norm": 0.10036841779947281, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0008, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 1.9536741214057507, |
|
"grad_norm": 0.7988063097000122, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0046, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 1.9552715654952078, |
|
"grad_norm": 0.2782542407512665, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 1.9568690095846646, |
|
"grad_norm": 0.2055627703666687, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0008, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 1.9584664536741214, |
|
"grad_norm": 0.19908927381038666, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 1.9600638977635783, |
|
"grad_norm": 0.21738676726818085, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 1.961661341853035, |
|
"grad_norm": 0.16997866332530975, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 1.9632587859424921, |
|
"grad_norm": 0.10853756219148636, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 1.9648562300319488, |
|
"grad_norm": 0.2967356741428375, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.9664536741214058, |
|
"grad_norm": 0.24150174856185913, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0035, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 1.9680511182108626, |
|
"grad_norm": 0.09346190094947815, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 1.9696485623003195, |
|
"grad_norm": 0.16779378056526184, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 1.9712460063897763, |
|
"grad_norm": 0.1114976555109024, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 1.9728434504792332, |
|
"grad_norm": 0.45826414227485657, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0032, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 1.9744408945686902, |
|
"grad_norm": 0.4743911921977997, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0034, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 1.9760383386581468, |
|
"grad_norm": 0.144659623503685, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 1.9776357827476039, |
|
"grad_norm": 0.9392029643058777, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0033, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 1.9792332268370607, |
|
"grad_norm": 0.5866590738296509, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0033, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 1.9808306709265175, |
|
"grad_norm": 0.2669076919555664, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0033, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.9824281150159746, |
|
"grad_norm": 0.34604692459106445, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 1.9840255591054312, |
|
"grad_norm": 0.27668237686157227, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 1.9856230031948883, |
|
"grad_norm": 0.1560208648443222, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 1.9872204472843449, |
|
"grad_norm": 0.35375985503196716, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 1.988817891373802, |
|
"grad_norm": 0.134053036570549, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 1.9904153354632588, |
|
"grad_norm": 0.10250476002693176, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 1.9920127795527156, |
|
"grad_norm": 0.13041843473911285, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 1.9936102236421727, |
|
"grad_norm": 0.062465403228998184, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 1.9952076677316293, |
|
"grad_norm": 0.10093759000301361, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 1.9968051118210863, |
|
"grad_norm": 0.08476297557353973, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.9984025559105432, |
|
"grad_norm": 0.22871960699558258, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.19058428704738617, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 2.001597444089457, |
|
"grad_norm": 0.2544306218624115, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 2.0031948881789137, |
|
"grad_norm": 0.14264468848705292, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0029, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 2.0047923322683707, |
|
"grad_norm": 0.18681064248085022, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 2.0063897763578273, |
|
"grad_norm": 0.17501874268054962, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 2.0079872204472844, |
|
"grad_norm": 0.1236346885561943, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 2.009584664536741, |
|
"grad_norm": 0.08623294532299042, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0008, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 2.011182108626198, |
|
"grad_norm": 0.12931625545024872, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 2.012779552715655, |
|
"grad_norm": 0.10413195192813873, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.0143769968051117, |
|
"grad_norm": 0.23485882580280304, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 2.015974440894569, |
|
"grad_norm": 0.07863974571228027, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0005, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 2.0175718849840254, |
|
"grad_norm": 0.33484408259391785, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 2.0191693290734825, |
|
"grad_norm": 0.31939253211021423, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 2.0207667731629395, |
|
"grad_norm": 0.10941127687692642, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 2.022364217252396, |
|
"grad_norm": 0.10784590244293213, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 2.023961661341853, |
|
"grad_norm": 0.16436795890331268, |
|
"learning_rate": 5e-07, |
|
"loss": 0.001, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 2.02555910543131, |
|
"grad_norm": 0.09606944024562836, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 2.027156549520767, |
|
"grad_norm": 0.17230142652988434, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 2.0287539936102235, |
|
"grad_norm": 0.5120985507965088, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0031, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.0303514376996805, |
|
"grad_norm": 0.06792975217103958, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0031, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 2.0319488817891376, |
|
"grad_norm": 0.12373680621385574, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 2.033546325878594, |
|
"grad_norm": 0.10998155921697617, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 1273 |
|
}, |
|
{ |
|
"epoch": 2.0351437699680512, |
|
"grad_norm": 0.15938100218772888, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 2.036741214057508, |
|
"grad_norm": 0.07384390383958817, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0007, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 2.038338658146965, |
|
"grad_norm": 0.23505450785160065, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 2.0399361022364215, |
|
"grad_norm": 1.6043593883514404, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0044, |
|
"step": 1277 |
|
}, |
|
{ |
|
"epoch": 2.0415335463258786, |
|
"grad_norm": 0.08369333297014236, |
|
"learning_rate": 5e-07, |
|
"loss": 0.001, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 2.0431309904153356, |
|
"grad_norm": 0.15266691148281097, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 1279 |
|
}, |
|
{ |
|
"epoch": 2.0447284345047922, |
|
"grad_norm": 0.1948157548904419, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.0463258785942493, |
|
"grad_norm": 0.33584949374198914, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 2.047923322683706, |
|
"grad_norm": 0.6691318154335022, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 2.049520766773163, |
|
"grad_norm": 0.1382388472557068, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 1283 |
|
}, |
|
{ |
|
"epoch": 2.0511182108626196, |
|
"grad_norm": 0.12497832626104355, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 2.0527156549520766, |
|
"grad_norm": 0.27157703042030334, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 2.0543130990415337, |
|
"grad_norm": 0.2626846134662628, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0029, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 2.0559105431309903, |
|
"grad_norm": 0.16861018538475037, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 2.0575079872204474, |
|
"grad_norm": 0.049949146807193756, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0004, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 2.059105431309904, |
|
"grad_norm": 0.0886000543832779, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 2.060702875399361, |
|
"grad_norm": 0.20434832572937012, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.062300319488818, |
|
"grad_norm": 0.11731091886758804, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 1291 |
|
}, |
|
{ |
|
"epoch": 2.0638977635782747, |
|
"grad_norm": 0.20026597380638123, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 2.0654952076677318, |
|
"grad_norm": 0.27669885754585266, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 2.0670926517571884, |
|
"grad_norm": 0.2785263657569885, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0035, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 2.0686900958466454, |
|
"grad_norm": 0.15540768206119537, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 2.070287539936102, |
|
"grad_norm": 0.09532786905765533, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 2.071884984025559, |
|
"grad_norm": 0.1427212655544281, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 1297 |
|
}, |
|
{ |
|
"epoch": 2.073482428115016, |
|
"grad_norm": 0.3582955300807953, |
|
"learning_rate": 5e-07, |
|
"loss": 0.003, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 2.0750798722044728, |
|
"grad_norm": 0.3676643669605255, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0033, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 2.07667731629393, |
|
"grad_norm": 0.15875279903411865, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.0782747603833864, |
|
"grad_norm": 0.17880000174045563, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 1301 |
|
}, |
|
{ |
|
"epoch": 2.0798722044728435, |
|
"grad_norm": 0.09481951594352722, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 2.0814696485623, |
|
"grad_norm": 0.0720115378499031, |
|
"learning_rate": 5e-07, |
|
"loss": 0.001, |
|
"step": 1303 |
|
}, |
|
{ |
|
"epoch": 2.083067092651757, |
|
"grad_norm": 0.1885913908481598, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 2.084664536741214, |
|
"grad_norm": 0.07471724599599838, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0029, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 2.086261980830671, |
|
"grad_norm": 0.10295652598142624, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 2.087859424920128, |
|
"grad_norm": 0.48676663637161255, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 1307 |
|
}, |
|
{ |
|
"epoch": 2.0894568690095845, |
|
"grad_norm": 0.06689424067735672, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 2.0910543130990416, |
|
"grad_norm": 0.3771530091762543, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 2.0926517571884986, |
|
"grad_norm": 0.37652865052223206, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.094249201277955, |
|
"grad_norm": 0.0743250697851181, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 2.0958466453674123, |
|
"grad_norm": 0.1055615022778511, |
|
"learning_rate": 5e-07, |
|
"loss": 0.001, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 2.097444089456869, |
|
"grad_norm": 0.25487279891967773, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 2.099041533546326, |
|
"grad_norm": 0.13495591282844543, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 2.1006389776357826, |
|
"grad_norm": 0.17297884821891785, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 2.1022364217252396, |
|
"grad_norm": 0.14259670674800873, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 2.1038338658146967, |
|
"grad_norm": 0.06441140174865723, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 1317 |
|
}, |
|
{ |
|
"epoch": 2.1054313099041533, |
|
"grad_norm": 0.0832604393362999, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 2.1070287539936103, |
|
"grad_norm": 0.6112750172615051, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 1319 |
|
}, |
|
{ |
|
"epoch": 2.108626198083067, |
|
"grad_norm": 0.14800317585468292, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.110223642172524, |
|
"grad_norm": 0.389436811208725, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 1321 |
|
}, |
|
{ |
|
"epoch": 2.1118210862619806, |
|
"grad_norm": 0.1899157464504242, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 1322 |
|
}, |
|
{ |
|
"epoch": 2.1134185303514377, |
|
"grad_norm": 0.08389966934919357, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 2.1150159744408947, |
|
"grad_norm": 0.10899113118648529, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 2.1166134185303513, |
|
"grad_norm": 0.10662740468978882, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 2.1182108626198084, |
|
"grad_norm": 0.11692536622285843, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 2.119808306709265, |
|
"grad_norm": 0.09900128096342087, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 1327 |
|
}, |
|
{ |
|
"epoch": 2.121405750798722, |
|
"grad_norm": 0.08334943652153015, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0007, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 2.123003194888179, |
|
"grad_norm": 0.10552877187728882, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 1329 |
|
}, |
|
{ |
|
"epoch": 2.1246006389776357, |
|
"grad_norm": 0.8137688040733337, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0014, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.126198083067093, |
|
"grad_norm": 0.06990483403205872, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0009, |
|
"step": 1331 |
|
}, |
|
{ |
|
"epoch": 2.1277955271565494, |
|
"grad_norm": 0.1675483137369156, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 2.1293929712460065, |
|
"grad_norm": 0.2427297979593277, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0024, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 2.130990415335463, |
|
"grad_norm": 0.1828804910182953, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 2.13258785942492, |
|
"grad_norm": 0.35362717509269714, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0029, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 2.134185303514377, |
|
"grad_norm": 0.21310366690158844, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 2.135782747603834, |
|
"grad_norm": 0.14100836217403412, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 1337 |
|
}, |
|
{ |
|
"epoch": 2.137380191693291, |
|
"grad_norm": 0.10196174681186676, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0009, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 2.1389776357827475, |
|
"grad_norm": 0.2148902863264084, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 1339 |
|
}, |
|
{ |
|
"epoch": 2.1405750798722045, |
|
"grad_norm": 0.05017423257231712, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0007, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.142172523961661, |
|
"grad_norm": 2.62032151222229, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0035, |
|
"step": 1341 |
|
}, |
|
{ |
|
"epoch": 2.143769968051118, |
|
"grad_norm": 0.23405258357524872, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 2.1453674121405752, |
|
"grad_norm": 0.05571659281849861, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 1343 |
|
}, |
|
{ |
|
"epoch": 2.146964856230032, |
|
"grad_norm": 0.17637008428573608, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 2.148562300319489, |
|
"grad_norm": 0.10391382873058319, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 2.1501597444089455, |
|
"grad_norm": 0.5443282723426819, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0027, |
|
"step": 1346 |
|
}, |
|
{ |
|
"epoch": 2.1517571884984026, |
|
"grad_norm": 0.09875518828630447, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 1347 |
|
}, |
|
{ |
|
"epoch": 2.1533546325878596, |
|
"grad_norm": 0.3345401883125305, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 2.1549520766773163, |
|
"grad_norm": 0.14103494584560394, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 1349 |
|
}, |
|
{ |
|
"epoch": 2.1565495207667733, |
|
"grad_norm": 0.1581650972366333, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.15814696485623, |
|
"grad_norm": 0.1288004070520401, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0009, |
|
"step": 1351 |
|
}, |
|
{ |
|
"epoch": 2.159744408945687, |
|
"grad_norm": 0.19039283692836761, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0026, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 2.1613418530351436, |
|
"grad_norm": 0.15048496425151825, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0009, |
|
"step": 1353 |
|
}, |
|
{ |
|
"epoch": 2.1629392971246006, |
|
"grad_norm": 0.14754633605480194, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 1354 |
|
}, |
|
{ |
|
"epoch": 2.1645367412140577, |
|
"grad_norm": 0.20571894943714142, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0033, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 2.1661341853035143, |
|
"grad_norm": 0.12298233062028885, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 2.1677316293929714, |
|
"grad_norm": 0.060658592730760574, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0007, |
|
"step": 1357 |
|
}, |
|
{ |
|
"epoch": 2.169329073482428, |
|
"grad_norm": 0.2829779088497162, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 1358 |
|
}, |
|
{ |
|
"epoch": 2.170926517571885, |
|
"grad_norm": 0.1882810741662979, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 1359 |
|
}, |
|
{ |
|
"epoch": 2.1725239616613417, |
|
"grad_norm": 0.5703380703926086, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0039, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.1741214057507987, |
|
"grad_norm": 0.7452375292778015, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0052, |
|
"step": 1361 |
|
}, |
|
{ |
|
"epoch": 2.1757188498402558, |
|
"grad_norm": 0.593784749507904, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0041, |
|
"step": 1362 |
|
}, |
|
{ |
|
"epoch": 2.1773162939297124, |
|
"grad_norm": 0.472791850566864, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0041, |
|
"step": 1363 |
|
}, |
|
{ |
|
"epoch": 2.1789137380191694, |
|
"grad_norm": 0.2769353687763214, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 2.180511182108626, |
|
"grad_norm": 0.5703862309455872, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0031, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 2.182108626198083, |
|
"grad_norm": 0.19744770228862762, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 2.18370607028754, |
|
"grad_norm": 0.7886673212051392, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0043, |
|
"step": 1367 |
|
}, |
|
{ |
|
"epoch": 2.1853035143769968, |
|
"grad_norm": 0.13506360352039337, |
|
"learning_rate": 5e-07, |
|
"loss": 0.001, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 2.186900958466454, |
|
"grad_norm": 0.2344270497560501, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 1369 |
|
}, |
|
{ |
|
"epoch": 2.1884984025559104, |
|
"grad_norm": 0.19468432664871216, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.1900958466453675, |
|
"grad_norm": 0.1934957057237625, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 1371 |
|
}, |
|
{ |
|
"epoch": 2.191693290734824, |
|
"grad_norm": 0.2196214497089386, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 2.193290734824281, |
|
"grad_norm": 0.13796208798885345, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 1373 |
|
}, |
|
{ |
|
"epoch": 2.194888178913738, |
|
"grad_norm": 0.27561473846435547, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0038, |
|
"step": 1374 |
|
}, |
|
{ |
|
"epoch": 2.196485623003195, |
|
"grad_norm": 0.12160798162221909, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0013, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 2.198083067092652, |
|
"grad_norm": 0.10912802815437317, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 2.1996805111821085, |
|
"grad_norm": 0.09718818962574005, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0008, |
|
"step": 1377 |
|
}, |
|
{ |
|
"epoch": 2.2012779552715656, |
|
"grad_norm": 0.11051057279109955, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0006, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 2.202875399361022, |
|
"grad_norm": 0.12758323550224304, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 1379 |
|
}, |
|
{ |
|
"epoch": 2.2044728434504792, |
|
"grad_norm": 0.08249010145664215, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0012, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.2060702875399363, |
|
"grad_norm": 0.1392274647951126, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 1381 |
|
}, |
|
{ |
|
"epoch": 2.207667731629393, |
|
"grad_norm": 0.10520713776350021, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0023, |
|
"step": 1382 |
|
}, |
|
{ |
|
"epoch": 2.20926517571885, |
|
"grad_norm": 0.10856632888317108, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0038, |
|
"step": 1383 |
|
}, |
|
{ |
|
"epoch": 2.2108626198083066, |
|
"grad_norm": 0.08818315714597702, |
|
"learning_rate": 5e-07, |
|
"loss": 0.002, |
|
"step": 1384 |
|
}, |
|
{ |
|
"epoch": 2.2124600638977636, |
|
"grad_norm": 0.13421356678009033, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0016, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 2.2140575079872207, |
|
"grad_norm": 0.32427483797073364, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 1386 |
|
}, |
|
{ |
|
"epoch": 2.2156549520766773, |
|
"grad_norm": 0.2926160991191864, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 1387 |
|
}, |
|
{ |
|
"epoch": 2.2172523961661343, |
|
"grad_norm": 0.2298290878534317, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0032, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 2.218849840255591, |
|
"grad_norm": 0.09151104092597961, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0015, |
|
"step": 1389 |
|
}, |
|
{ |
|
"epoch": 2.220447284345048, |
|
"grad_norm": 0.10674551874399185, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0021, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.2220447284345046, |
|
"grad_norm": 0.2997196614742279, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0025, |
|
"step": 1391 |
|
}, |
|
{ |
|
"epoch": 2.2236421725239617, |
|
"grad_norm": 0.11480361223220825, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0028, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 2.2252396166134187, |
|
"grad_norm": 0.07671017944812775, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 1393 |
|
}, |
|
{ |
|
"epoch": 2.2268370607028753, |
|
"grad_norm": 0.1539941430091858, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 1394 |
|
}, |
|
{ |
|
"epoch": 2.2284345047923324, |
|
"grad_norm": 0.12852609157562256, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0008, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 2.230031948881789, |
|
"grad_norm": 0.13677293062210083, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 1396 |
|
}, |
|
{ |
|
"epoch": 2.231629392971246, |
|
"grad_norm": 0.05409352481365204, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0011, |
|
"step": 1397 |
|
}, |
|
{ |
|
"epoch": 2.2332268370607027, |
|
"grad_norm": 0.07923945039510727, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0018, |
|
"step": 1398 |
|
}, |
|
{ |
|
"epoch": 2.2348242811501597, |
|
"grad_norm": 0.10353945195674896, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0017, |
|
"step": 1399 |
|
}, |
|
{ |
|
"epoch": 2.236421725239617, |
|
"grad_norm": 0.24928894639015198, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0019, |
|
"step": 1400 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 3130, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 387658364567552.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|