|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9992505620784412, |
|
"eval_steps": 500, |
|
"global_step": 1250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007994004496627529, |
|
"grad_norm": 29.08027928947176, |
|
"learning_rate": 0.0, |
|
"loss": 1.7209, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0015988008993255058, |
|
"grad_norm": 9.836200747540412, |
|
"learning_rate": 2.7023815442731975e-06, |
|
"loss": 1.2157, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.002398201348988259, |
|
"grad_norm": 8.732062138142359, |
|
"learning_rate": 4.2831734103139475e-06, |
|
"loss": 1.2213, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0031976017986510116, |
|
"grad_norm": 8.98196608627301, |
|
"learning_rate": 5.404763088546395e-06, |
|
"loss": 1.3207, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.003997002248313765, |
|
"grad_norm": 3.104558237084713, |
|
"learning_rate": 6.274735630753034e-06, |
|
"loss": 1.2009, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.004796402697976518, |
|
"grad_norm": 2.9678718492236587, |
|
"learning_rate": 6.985554954587145e-06, |
|
"loss": 1.1976, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0055958031476392705, |
|
"grad_norm": 2.324032539210556, |
|
"learning_rate": 7.586544129592991e-06, |
|
"loss": 1.1668, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.006395203597302023, |
|
"grad_norm": 2.422145845478249, |
|
"learning_rate": 8.107144632819592e-06, |
|
"loss": 1.1056, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.007194604046964776, |
|
"grad_norm": 2.7795213648793236, |
|
"learning_rate": 8.566346820627895e-06, |
|
"loss": 1.1439, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00799400449662753, |
|
"grad_norm": 2.304173813168448, |
|
"learning_rate": 8.977117175026234e-06, |
|
"loss": 1.0859, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.008793404946290282, |
|
"grad_norm": 2.531444418518243, |
|
"learning_rate": 9.348704159880588e-06, |
|
"loss": 1.1012, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.009592805395953035, |
|
"grad_norm": 2.623744403178605, |
|
"learning_rate": 9.687936498860343e-06, |
|
"loss": 1.1248, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.010392205845615787, |
|
"grad_norm": 2.174204408077499, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0862, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.011191606295278541, |
|
"grad_norm": 2.1375382895043553, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0843, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.011991006744941295, |
|
"grad_norm": 2.3409573740941245, |
|
"learning_rate": 1e-05, |
|
"loss": 1.1007, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.012790407194604047, |
|
"grad_norm": 2.2321265748114443, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0199, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0135898076442668, |
|
"grad_norm": 2.2607491323391997, |
|
"learning_rate": 1e-05, |
|
"loss": 1.1098, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.014389208093929552, |
|
"grad_norm": 2.1345387966971328, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0852, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.015188608543592306, |
|
"grad_norm": 2.0836111411515224, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0227, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.01598800899325506, |
|
"grad_norm": 2.1200221376043826, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0764, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.016787409442917813, |
|
"grad_norm": 2.3277973958562947, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0425, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.017586809892580563, |
|
"grad_norm": 2.4310258538885523, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0437, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.018386210342243317, |
|
"grad_norm": 2.317560454038046, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0027, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.01918561079190607, |
|
"grad_norm": 2.1153613214468923, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0878, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.019985011241568824, |
|
"grad_norm": 2.1138684148369884, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0797, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.020784411691231575, |
|
"grad_norm": 2.3869844261967765, |
|
"learning_rate": 1e-05, |
|
"loss": 1.1126, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.02158381214089433, |
|
"grad_norm": 1.9441687206265474, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0356, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.022383212590557082, |
|
"grad_norm": 1.8858684427680283, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0112, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.023182613040219836, |
|
"grad_norm": 2.0111908392780924, |
|
"learning_rate": 1e-05, |
|
"loss": 1.025, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.02398201348988259, |
|
"grad_norm": 2.3223850597645885, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0608, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02478141393954534, |
|
"grad_norm": 2.282704095464692, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9884, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.025580814389208093, |
|
"grad_norm": 2.2485551406767392, |
|
"learning_rate": 1e-05, |
|
"loss": 1.1609, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.026380214838870847, |
|
"grad_norm": 1.9632420284716974, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0541, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0271796152885336, |
|
"grad_norm": 2.7873694225738963, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9917, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.027979015738196354, |
|
"grad_norm": 2.048096411620949, |
|
"learning_rate": 1e-05, |
|
"loss": 1.012, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.028778416187859104, |
|
"grad_norm": 2.0309944076384494, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0212, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.029577816637521858, |
|
"grad_norm": 2.0949849865314643, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0659, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.03037721708718461, |
|
"grad_norm": 2.059202087957289, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0168, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.031176617536847365, |
|
"grad_norm": 2.0975700429920923, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0216, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.03197601798651012, |
|
"grad_norm": 2.0062452254349714, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0274, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03277541843617287, |
|
"grad_norm": 2.222854538118324, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0656, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.033574818885835626, |
|
"grad_norm": 1.943599598185592, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0782, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.03437421933549838, |
|
"grad_norm": 1.9956218218997503, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0625, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.03517361978516113, |
|
"grad_norm": 2.1611198939392096, |
|
"learning_rate": 1e-05, |
|
"loss": 1.041, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.035973020234823884, |
|
"grad_norm": 1.9975085093102276, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0046, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.036772420684486634, |
|
"grad_norm": 1.8691307201375191, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0243, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.03757182113414939, |
|
"grad_norm": 2.1275630339366667, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0565, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.03837122158381214, |
|
"grad_norm": 1.998529171481795, |
|
"learning_rate": 1e-05, |
|
"loss": 0.972, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.03917062203347489, |
|
"grad_norm": 2.039027660741352, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9604, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.03997002248313765, |
|
"grad_norm": 1.8761207165317535, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9985, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0407694229328004, |
|
"grad_norm": 2.089454409239614, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9963, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.04156882338246315, |
|
"grad_norm": 2.0445251187040134, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0192, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.042368223832125906, |
|
"grad_norm": 2.205588684592072, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9684, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.04316762428178866, |
|
"grad_norm": 2.0208537418585957, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0063, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.043967024731451414, |
|
"grad_norm": 1.7869034029258606, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0368, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.044766425181114164, |
|
"grad_norm": 1.8924926601293262, |
|
"learning_rate": 1e-05, |
|
"loss": 1.011, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.045565825630776914, |
|
"grad_norm": 2.151723728750191, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0275, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.04636522608043967, |
|
"grad_norm": 2.388300807396013, |
|
"learning_rate": 1e-05, |
|
"loss": 0.996, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.04716462653010242, |
|
"grad_norm": 1.9793946104980729, |
|
"learning_rate": 1e-05, |
|
"loss": 1.028, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.04796402697976518, |
|
"grad_norm": 2.050014939910642, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0109, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04876342742942793, |
|
"grad_norm": 1.8842986029616882, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9752, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.04956282787909068, |
|
"grad_norm": 1.7444876770795246, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0228, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.050362228328753436, |
|
"grad_norm": 1.8304676501403103, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9747, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.051161628778416186, |
|
"grad_norm": 2.1540039062270164, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9955, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.051961029228078943, |
|
"grad_norm": 1.6953401550549316, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9811, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.052760429677741694, |
|
"grad_norm": 2.1460856566454987, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0365, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.053559830127404444, |
|
"grad_norm": 1.7390283863943892, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0277, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.0543592305770672, |
|
"grad_norm": 2.0836221978397442, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9953, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.05515863102672995, |
|
"grad_norm": 1.7905448109320714, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9944, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.05595803147639271, |
|
"grad_norm": 1.9504348528444273, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9808, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05675743192605546, |
|
"grad_norm": 1.834972840275589, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9992, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.05755683237571821, |
|
"grad_norm": 1.845072042104488, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9811, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.058356232825380966, |
|
"grad_norm": 1.85534014854077, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9864, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.059155633275043716, |
|
"grad_norm": 1.8650405189842276, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9925, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.05995503372470647, |
|
"grad_norm": 1.862410414010068, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0991, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06075443417436922, |
|
"grad_norm": 2.1389193269284625, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0228, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.061553834624031974, |
|
"grad_norm": 1.7408061970131428, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0034, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.06235323507369473, |
|
"grad_norm": 2.0783333855212653, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0015, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.06315263552335748, |
|
"grad_norm": 2.1794919181439507, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0184, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.06395203597302024, |
|
"grad_norm": 1.8799556566280435, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9807, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06475143642268298, |
|
"grad_norm": 1.6068132265611528, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0318, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.06555083687234574, |
|
"grad_norm": 1.8404529509039422, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9634, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.0663502373220085, |
|
"grad_norm": 1.8490571137069702, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9362, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.06714963777167125, |
|
"grad_norm": 2.1048586741337485, |
|
"learning_rate": 1e-05, |
|
"loss": 1.051, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.067949038221334, |
|
"grad_norm": 1.9361395487099815, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9884, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.06874843867099675, |
|
"grad_norm": 1.882438664110377, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0338, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.06954783912065951, |
|
"grad_norm": 1.9328301399003285, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0123, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.07034723957032225, |
|
"grad_norm": 1.9592492051372121, |
|
"learning_rate": 1e-05, |
|
"loss": 1.015, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.07114664001998501, |
|
"grad_norm": 2.0637394818205035, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0074, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.07194604046964777, |
|
"grad_norm": 1.875788422779308, |
|
"learning_rate": 1e-05, |
|
"loss": 0.966, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07274544091931051, |
|
"grad_norm": 1.8409070357840667, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0463, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.07354484136897327, |
|
"grad_norm": 1.9103779504623786, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9633, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.07434424181863603, |
|
"grad_norm": 2.0590523934839307, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0215, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.07514364226829878, |
|
"grad_norm": 2.104785750263468, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0025, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.07594304271796153, |
|
"grad_norm": 2.1695447340449663, |
|
"learning_rate": 1e-05, |
|
"loss": 0.941, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.07674244316762428, |
|
"grad_norm": 2.0465650220094203, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0093, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.07754184361728704, |
|
"grad_norm": 1.8941011997406154, |
|
"learning_rate": 1e-05, |
|
"loss": 1.064, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.07834124406694978, |
|
"grad_norm": 1.9987845140787637, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9793, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.07914064451661254, |
|
"grad_norm": 1.8233385113626337, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0176, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.0799400449662753, |
|
"grad_norm": 1.8162210777833079, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9699, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08073944541593804, |
|
"grad_norm": 1.8711808189743682, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9865, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.0815388458656008, |
|
"grad_norm": 1.974561488916405, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9806, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.08233824631526356, |
|
"grad_norm": 1.7095584582820083, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9955, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.0831376467649263, |
|
"grad_norm": 1.8952139824297942, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9338, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.08393704721458906, |
|
"grad_norm": 1.8058804845050307, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0062, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.08473644766425181, |
|
"grad_norm": 1.8103680215448428, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9872, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.08553584811391457, |
|
"grad_norm": 1.694736368233996, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9359, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.08633524856357731, |
|
"grad_norm": 1.9235533583641018, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0611, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.08713464901324007, |
|
"grad_norm": 1.619066977691127, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9654, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.08793404946290283, |
|
"grad_norm": 1.8050888311534128, |
|
"learning_rate": 1e-05, |
|
"loss": 1.004, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08873344991256557, |
|
"grad_norm": 1.9960924269335547, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0118, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.08953285036222833, |
|
"grad_norm": 1.9286201089638149, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0025, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.09033225081189108, |
|
"grad_norm": 2.1725480586787396, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9558, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.09113165126155383, |
|
"grad_norm": 1.857962422635593, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9772, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.09193105171121659, |
|
"grad_norm": 1.9166723424153935, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9749, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.09273045216087934, |
|
"grad_norm": 2.0124769392114854, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9548, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.0935298526105421, |
|
"grad_norm": 1.847426445728428, |
|
"learning_rate": 1e-05, |
|
"loss": 0.941, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.09432925306020484, |
|
"grad_norm": 2.163992947673654, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9617, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.0951286535098676, |
|
"grad_norm": 1.8889979598709639, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0272, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.09592805395953036, |
|
"grad_norm": 1.844634955046446, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9669, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0967274544091931, |
|
"grad_norm": 1.9301903181704618, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9717, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.09752685485885586, |
|
"grad_norm": 1.9564195723979845, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9527, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.09832625530851861, |
|
"grad_norm": 1.834090339470851, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9794, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.09912565575818136, |
|
"grad_norm": 1.7936104151665677, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9042, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.09992505620784412, |
|
"grad_norm": 1.7969263674080669, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0397, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.10072445665750687, |
|
"grad_norm": 1.7901986458192694, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0043, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.10152385710716963, |
|
"grad_norm": 1.8947234640723079, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9761, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.10232325755683237, |
|
"grad_norm": 1.8487696622255145, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9899, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.10312265800649513, |
|
"grad_norm": 1.8207862729527453, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0272, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.10392205845615789, |
|
"grad_norm": 1.9816716753688939, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9202, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.10472145890582063, |
|
"grad_norm": 1.8916365109275264, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9629, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.10552085935548339, |
|
"grad_norm": 1.9863329832931071, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9976, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.10632025980514614, |
|
"grad_norm": 1.9194816317308832, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0043, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.10711966025480889, |
|
"grad_norm": 1.9537595846189237, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9453, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.10791906070447164, |
|
"grad_norm": 2.0669579990783253, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9865, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.1087184611541344, |
|
"grad_norm": 1.9760934706997628, |
|
"learning_rate": 1e-05, |
|
"loss": 1.017, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.10951786160379715, |
|
"grad_norm": 1.7260389446366302, |
|
"learning_rate": 1e-05, |
|
"loss": 0.963, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.1103172620534599, |
|
"grad_norm": 1.9203242105800193, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0157, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.11111666250312266, |
|
"grad_norm": 1.9850822013474325, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9438, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.11191606295278542, |
|
"grad_norm": 1.9572946605976695, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0029, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.11271546340244816, |
|
"grad_norm": 1.5451741731912971, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9225, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.11351486385211092, |
|
"grad_norm": 2.0070450938810707, |
|
"learning_rate": 1e-05, |
|
"loss": 0.922, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.11431426430177367, |
|
"grad_norm": 1.89832125508894, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0401, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.11511366475143642, |
|
"grad_norm": 1.950327724703524, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9279, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.11591306520109917, |
|
"grad_norm": 1.9700609199158468, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9864, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.11671246565076193, |
|
"grad_norm": 1.6727783834574599, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9659, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.11751186610042468, |
|
"grad_norm": 1.8484918243414765, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9761, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.11831126655008743, |
|
"grad_norm": 2.045306713844051, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9788, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.11911066699975019, |
|
"grad_norm": 1.8558407244018518, |
|
"learning_rate": 1e-05, |
|
"loss": 0.963, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.11991006744941295, |
|
"grad_norm": 1.777504348074839, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9898, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.12070946789907569, |
|
"grad_norm": 1.7945306209083864, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9475, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.12150886834873845, |
|
"grad_norm": 1.612635014991482, |
|
"learning_rate": 1e-05, |
|
"loss": 0.981, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.1223082687984012, |
|
"grad_norm": 1.5365653630331435, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9336, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.12310766924806395, |
|
"grad_norm": 1.7728163669560009, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9786, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.1239070696977267, |
|
"grad_norm": 1.6363907272750682, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9499, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.12470647014738946, |
|
"grad_norm": 1.8927548789352038, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9537, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.1255058705970522, |
|
"grad_norm": 1.576438438411652, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9273, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.12630527104671496, |
|
"grad_norm": 1.8750460465870347, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9687, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.12710467149637772, |
|
"grad_norm": 1.712737472716492, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9981, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.12790407194604048, |
|
"grad_norm": 1.8944147808763965, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0316, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.12870347239570323, |
|
"grad_norm": 1.6975154876149214, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9921, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.12950287284536596, |
|
"grad_norm": 1.7330196261933866, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9567, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.13030227329502872, |
|
"grad_norm": 2.004904627709956, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9788, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.13110167374469148, |
|
"grad_norm": 1.7565329263507932, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9461, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.13190107419435423, |
|
"grad_norm": 1.6976314021380359, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9926, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.132700474644017, |
|
"grad_norm": 1.573182719519626, |
|
"learning_rate": 1e-05, |
|
"loss": 0.982, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.13349987509367975, |
|
"grad_norm": 1.5753994405016738, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9745, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.1342992755433425, |
|
"grad_norm": 1.9199549441489088, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9916, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.13509867599300523, |
|
"grad_norm": 1.7662832212098252, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9717, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.135898076442668, |
|
"grad_norm": 2.1972236756007506, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9923, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.13669747689233075, |
|
"grad_norm": 1.5845907178152914, |
|
"learning_rate": 1e-05, |
|
"loss": 1.041, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.1374968773419935, |
|
"grad_norm": 1.9027156433363486, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9986, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.13829627779165626, |
|
"grad_norm": 1.938028025396952, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9856, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.13909567824131902, |
|
"grad_norm": 1.7615271251517497, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9879, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.13989507869098175, |
|
"grad_norm": 1.5753792433296703, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9952, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.1406944791406445, |
|
"grad_norm": 1.8071610796834736, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9403, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.14149387959030726, |
|
"grad_norm": 1.8188146399425127, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9166, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.14229328003997002, |
|
"grad_norm": 1.8998134327288991, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9307, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.14309268048963278, |
|
"grad_norm": 1.8148916923977343, |
|
"learning_rate": 1e-05, |
|
"loss": 0.964, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.14389208093929554, |
|
"grad_norm": 1.8025702262604992, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9636, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1446914813889583, |
|
"grad_norm": 1.8970561152549208, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9446, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.14549088183862102, |
|
"grad_norm": 1.774281514717804, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9011, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.14629028228828378, |
|
"grad_norm": 1.6697484592667877, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9732, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.14708968273794654, |
|
"grad_norm": 1.748314198924899, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9294, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.1478890831876093, |
|
"grad_norm": 1.5552333328333348, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9207, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.14868848363727205, |
|
"grad_norm": 1.819375156478493, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9667, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.1494878840869348, |
|
"grad_norm": 1.5853289567427034, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9863, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.15028728453659757, |
|
"grad_norm": 1.7338233390104778, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9088, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.1510866849862603, |
|
"grad_norm": 1.8735214816693204, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9931, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.15188608543592305, |
|
"grad_norm": 1.70836070926444, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9774, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1526854858855858, |
|
"grad_norm": 1.68457840558557, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9971, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.15348488633524857, |
|
"grad_norm": 1.9974046657795066, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0525, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.15428428678491132, |
|
"grad_norm": 1.8637088407144724, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9458, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.15508368723457408, |
|
"grad_norm": 1.5472617342282928, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9321, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.1558830876842368, |
|
"grad_norm": 2.0278392859284224, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9376, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.15668248813389957, |
|
"grad_norm": 1.8610095483452973, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9921, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.15748188858356232, |
|
"grad_norm": 2.0375178580916016, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9985, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.15828128903322508, |
|
"grad_norm": 1.8219362402276909, |
|
"learning_rate": 1e-05, |
|
"loss": 0.924, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.15908068948288784, |
|
"grad_norm": 1.4629250708658383, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0201, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.1598800899325506, |
|
"grad_norm": 1.5628287370754461, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0002, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.16067949038221335, |
|
"grad_norm": 1.8442311252983388, |
|
"learning_rate": 1e-05, |
|
"loss": 0.937, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.16147889083187608, |
|
"grad_norm": 7.441197607810174, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8768, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.16227829128153884, |
|
"grad_norm": 1.7947899683379576, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9524, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.1630776917312016, |
|
"grad_norm": 1.656507654529954, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8953, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.16387709218086435, |
|
"grad_norm": 1.7462816982128921, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9435, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.1646764926305271, |
|
"grad_norm": 1.7013940298273953, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9124, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.16547589308018987, |
|
"grad_norm": 1.6379746843984113, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9508, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.1662752935298526, |
|
"grad_norm": 1.9314822402660798, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0272, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.16707469397951535, |
|
"grad_norm": 1.9961308842740637, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9841, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.1678740944291781, |
|
"grad_norm": 2.0382234178726537, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9785, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.16867349487884087, |
|
"grad_norm": 1.6901064034464468, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9127, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.16947289532850363, |
|
"grad_norm": 1.7273747898471865, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9583, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.17027229577816638, |
|
"grad_norm": 1.7457470216603739, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9799, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.17107169622782914, |
|
"grad_norm": 1.7313522722535573, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9489, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.17187109667749187, |
|
"grad_norm": 1.7762615948567715, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9328, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.17267049712715463, |
|
"grad_norm": 1.6331422537410691, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9446, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.17346989757681738, |
|
"grad_norm": 1.6778510604121997, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9547, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.17426929802648014, |
|
"grad_norm": 1.9041470899144908, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9014, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.1750686984761429, |
|
"grad_norm": 1.8662662755793453, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9709, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.17586809892580565, |
|
"grad_norm": 1.7045357754568997, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9433, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.1766674993754684, |
|
"grad_norm": 1.74409106945116, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9153, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.17746689982513114, |
|
"grad_norm": 1.8132234884702887, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8909, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.1782663002747939, |
|
"grad_norm": 1.6971296927642, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9622, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.17906570072445666, |
|
"grad_norm": 1.781912471031092, |
|
"learning_rate": 1e-05, |
|
"loss": 0.954, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.1798651011741194, |
|
"grad_norm": 1.6629867774088771, |
|
"learning_rate": 1e-05, |
|
"loss": 0.96, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.18066450162378217, |
|
"grad_norm": 2.0699033115205614, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9284, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.18146390207344493, |
|
"grad_norm": 1.7235146329911442, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9456, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.18226330252310766, |
|
"grad_norm": 1.7961113577108625, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9454, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.1830627029727704, |
|
"grad_norm": 1.6808904917909453, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9524, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.18386210342243317, |
|
"grad_norm": 1.5865303307652885, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9863, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.18466150387209593, |
|
"grad_norm": 1.6521878212504149, |
|
"learning_rate": 1e-05, |
|
"loss": 0.946, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.18546090432175869, |
|
"grad_norm": 1.5619375597824243, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0141, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.18626030477142144, |
|
"grad_norm": 1.9668596679027701, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9783, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.1870597052210842, |
|
"grad_norm": 1.7004515677555856, |
|
"learning_rate": 1e-05, |
|
"loss": 0.939, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.18785910567074693, |
|
"grad_norm": 1.8505586367786393, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0186, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.18865850612040969, |
|
"grad_norm": 1.8794093279833084, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9748, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.18945790657007244, |
|
"grad_norm": 1.970577363084186, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9734, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.1902573070197352, |
|
"grad_norm": 1.9827162568725265, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9526, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.19105670746939796, |
|
"grad_norm": 1.6777105787009272, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0038, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.19185610791906071, |
|
"grad_norm": 1.8547665670552458, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9425, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.19265550836872344, |
|
"grad_norm": 1.5739853104069792, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9898, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.1934549088183862, |
|
"grad_norm": 1.7991544252885405, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9068, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.19425430926804896, |
|
"grad_norm": 1.7278046505750493, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9961, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.19505370971771172, |
|
"grad_norm": 1.6738018924260079, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9269, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.19585311016737447, |
|
"grad_norm": 1.704113739011135, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9384, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.19665251061703723, |
|
"grad_norm": 1.953642878567139, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9003, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.1974519110667, |
|
"grad_norm": 1.8994714525376621, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9384, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.19825131151636272, |
|
"grad_norm": 1.7335277476681896, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9164, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.19905071196602547, |
|
"grad_norm": 1.8114996960442162, |
|
"learning_rate": 1e-05, |
|
"loss": 0.909, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.19985011241568823, |
|
"grad_norm": 1.8399064962789757, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9672, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.200649512865351, |
|
"grad_norm": 1.8027482426913095, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9294, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.20144891331501374, |
|
"grad_norm": 1.7914653808525045, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9709, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.2022483137646765, |
|
"grad_norm": 1.8562700822437381, |
|
"learning_rate": 1e-05, |
|
"loss": 0.918, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.20304771421433926, |
|
"grad_norm": 1.592298158180451, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9874, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.203847114664002, |
|
"grad_norm": 1.7885472103550304, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9579, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.20464651511366475, |
|
"grad_norm": 1.8835318053165766, |
|
"learning_rate": 1e-05, |
|
"loss": 0.97, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.2054459155633275, |
|
"grad_norm": 2.2973670794805865, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0196, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.20624531601299026, |
|
"grad_norm": 2.059759101560068, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9051, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.20704471646265302, |
|
"grad_norm": 1.6379487643230517, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9853, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.20784411691231577, |
|
"grad_norm": 1.7739932086505867, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9365, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2086435173619785, |
|
"grad_norm": 1.9378628413327441, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9248, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.20944291781164126, |
|
"grad_norm": 1.8631208677480777, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9417, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.21024231826130402, |
|
"grad_norm": 1.73049947808822, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9039, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.21104171871096677, |
|
"grad_norm": 1.6873959381280914, |
|
"learning_rate": 1e-05, |
|
"loss": 0.945, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.21184111916062953, |
|
"grad_norm": 1.5105067176725349, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9446, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.2126405196102923, |
|
"grad_norm": 1.8337058320691813, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9582, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.21343992005995505, |
|
"grad_norm": 1.644955596385126, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9055, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.21423932050961778, |
|
"grad_norm": 2.0248942495461435, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0207, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.21503872095928053, |
|
"grad_norm": 1.746437687084402, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0093, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.2158381214089433, |
|
"grad_norm": 1.719648906171914, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9533, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.21663752185860605, |
|
"grad_norm": 1.8380592688711606, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9275, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.2174369223082688, |
|
"grad_norm": 1.8205169561312367, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9745, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.21823632275793156, |
|
"grad_norm": 2.326139141853857, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9953, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.2190357232075943, |
|
"grad_norm": 1.6381092977636662, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9203, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.21983512365725705, |
|
"grad_norm": 1.606867524589781, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9007, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.2206345241069198, |
|
"grad_norm": 1.7195338383934604, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9611, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.22143392455658256, |
|
"grad_norm": 1.3840546682546424, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9614, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.22223332500624532, |
|
"grad_norm": 1.6306949714534276, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9271, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.22303272545590808, |
|
"grad_norm": 1.5110189180438256, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9528, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.22383212590557083, |
|
"grad_norm": 1.8612974867734187, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9587, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.22463152635523356, |
|
"grad_norm": 1.664680974165204, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9129, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.22543092680489632, |
|
"grad_norm": 1.7746255109018692, |
|
"learning_rate": 1e-05, |
|
"loss": 0.939, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.22623032725455908, |
|
"grad_norm": 1.575200440251585, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9204, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.22702972770422183, |
|
"grad_norm": 1.7516406660858301, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9537, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.2278291281538846, |
|
"grad_norm": 1.91803098110819, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9363, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.22862852860354735, |
|
"grad_norm": 1.6613035583173086, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9634, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.2294279290532101, |
|
"grad_norm": 1.5842290188976889, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9551, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.23022732950287284, |
|
"grad_norm": 1.9140569815192874, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9512, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.2310267299525356, |
|
"grad_norm": 1.5261307902201178, |
|
"learning_rate": 1e-05, |
|
"loss": 0.96, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.23182613040219835, |
|
"grad_norm": 1.682573363812062, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8925, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.2326255308518611, |
|
"grad_norm": 1.6358092225364382, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8815, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.23342493130152386, |
|
"grad_norm": 1.5670506043722536, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9876, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.23422433175118662, |
|
"grad_norm": 1.6299839564753011, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8892, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.23502373220084935, |
|
"grad_norm": 1.6554910310702649, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9216, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.2358231326505121, |
|
"grad_norm": 1.8037159660461701, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9575, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.23662253310017486, |
|
"grad_norm": 1.629165333497563, |
|
"learning_rate": 1e-05, |
|
"loss": 0.947, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.23742193354983762, |
|
"grad_norm": 1.8459614666127684, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9263, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.23822133399950038, |
|
"grad_norm": 1.5508274722576894, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9002, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.23902073444916314, |
|
"grad_norm": 1.6777079971899138, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9508, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.2398201348988259, |
|
"grad_norm": 1.7100079727592197, |
|
"learning_rate": 1e-05, |
|
"loss": 0.935, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.24061953534848862, |
|
"grad_norm": 2.1307932039198425, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9233, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.24141893579815138, |
|
"grad_norm": 1.883290916019245, |
|
"learning_rate": 1e-05, |
|
"loss": 0.943, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.24221833624781414, |
|
"grad_norm": 1.5909650854809918, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9467, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.2430177366974769, |
|
"grad_norm": 1.7792900727864842, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9342, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.24381713714713965, |
|
"grad_norm": 1.7111474699259361, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9345, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.2446165375968024, |
|
"grad_norm": 1.7771845797925385, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9341, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.24541593804646514, |
|
"grad_norm": 1.6148130323193988, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8944, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.2462153384961279, |
|
"grad_norm": 1.9162065213210437, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9519, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.24701473894579065, |
|
"grad_norm": 1.6110529009706316, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8987, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.2478141393954534, |
|
"grad_norm": 1.7475182646170053, |
|
"learning_rate": 1e-05, |
|
"loss": 0.885, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.24861353984511617, |
|
"grad_norm": 1.8647125722982512, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9214, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.24941294029477892, |
|
"grad_norm": 1.6670715424606828, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9462, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.25021234074444165, |
|
"grad_norm": 1.5198974766775857, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9632, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.2510117411941044, |
|
"grad_norm": 1.5581495649662924, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9602, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.25181114164376717, |
|
"grad_norm": 1.5776975494668843, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9794, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.2526105420934299, |
|
"grad_norm": 1.6005787401081062, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8655, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.2534099425430927, |
|
"grad_norm": 1.7530297645251576, |
|
"learning_rate": 1e-05, |
|
"loss": 0.915, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.25420934299275544, |
|
"grad_norm": 1.8516146569735892, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8734, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.2550087434424182, |
|
"grad_norm": 1.5925556861862051, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9356, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.25580814389208095, |
|
"grad_norm": 1.7942857409055468, |
|
"learning_rate": 1e-05, |
|
"loss": 0.925, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2566075443417437, |
|
"grad_norm": 1.7301914879145586, |
|
"learning_rate": 1e-05, |
|
"loss": 0.896, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.25740694479140647, |
|
"grad_norm": 1.5868880054016326, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9021, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.2582063452410692, |
|
"grad_norm": 1.7680256022363232, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9309, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.2590057456907319, |
|
"grad_norm": 1.586312615898128, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0129, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.2598051461403947, |
|
"grad_norm": 1.8702172203637788, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9423, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.26060454659005744, |
|
"grad_norm": 1.6231753647103917, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9192, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.2614039470397202, |
|
"grad_norm": 1.6717011992423259, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9214, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.26220334748938295, |
|
"grad_norm": 1.6440233759725276, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9525, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.2630027479390457, |
|
"grad_norm": 1.6336229619568068, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9072, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.26380214838870847, |
|
"grad_norm": 1.794138937818925, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9081, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2646015488383712, |
|
"grad_norm": 1.7000293714077805, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9311, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.265400949288034, |
|
"grad_norm": 1.7629207816569556, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8942, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.26620034973769674, |
|
"grad_norm": 1.7243708406916276, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9009, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.2669997501873595, |
|
"grad_norm": 1.5153725886830214, |
|
"learning_rate": 1e-05, |
|
"loss": 0.946, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.26779915063702225, |
|
"grad_norm": 1.5897189873039888, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8988, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.268598551086685, |
|
"grad_norm": 1.7792011474569303, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9075, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.2693979515363477, |
|
"grad_norm": 1.715871716234354, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9488, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.27019735198601047, |
|
"grad_norm": 1.7421673985618036, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9265, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.2709967524356732, |
|
"grad_norm": 1.701591645181251, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9134, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.271796152885336, |
|
"grad_norm": 1.5763851776425317, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9059, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.27259555333499874, |
|
"grad_norm": 1.8860488547053122, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9379, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.2733949537846615, |
|
"grad_norm": 1.6278214908005035, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9041, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.27419435423432426, |
|
"grad_norm": 1.8591339922582193, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9159, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.274993754683987, |
|
"grad_norm": 1.6416932855404107, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9334, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.27579315513364977, |
|
"grad_norm": 1.5841499089670428, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8758, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.2765925555833125, |
|
"grad_norm": 1.4885385714768005, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9482, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.2773919560329753, |
|
"grad_norm": 1.652595269550327, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9341, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.27819135648263804, |
|
"grad_norm": 1.569292511449757, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9395, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.2789907569323008, |
|
"grad_norm": 1.8816669651120839, |
|
"learning_rate": 1e-05, |
|
"loss": 0.879, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.2797901573819635, |
|
"grad_norm": 1.8044366358437511, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9476, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.28058955783162626, |
|
"grad_norm": 1.581864578938978, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9443, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.281388958281289, |
|
"grad_norm": 1.719778574188113, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9682, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.28218835873095177, |
|
"grad_norm": 1.7544745777196906, |
|
"learning_rate": 1e-05, |
|
"loss": 0.935, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.28298775918061453, |
|
"grad_norm": 1.529692690903228, |
|
"learning_rate": 1e-05, |
|
"loss": 0.899, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.2837871596302773, |
|
"grad_norm": 1.7002824332518707, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9089, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.28458656007994004, |
|
"grad_norm": 1.6960676218935922, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9131, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.2853859605296028, |
|
"grad_norm": 1.5467919520374653, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9234, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.28618536097926556, |
|
"grad_norm": 1.5401712398267708, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8821, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.2869847614289283, |
|
"grad_norm": 1.713197431966504, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9755, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.28778416187859107, |
|
"grad_norm": 1.5846038726149987, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9637, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.28858356232825383, |
|
"grad_norm": 1.9337936027301381, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9208, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.2893829627779166, |
|
"grad_norm": 1.6240977396645668, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9235, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.29018236322757934, |
|
"grad_norm": 1.7452206300395003, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9553, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.29098176367724204, |
|
"grad_norm": 1.7207282088148232, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9996, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.2917811641269048, |
|
"grad_norm": 1.6238156006165856, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9492, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.29258056457656756, |
|
"grad_norm": 1.7217996073600954, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9659, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.2933799650262303, |
|
"grad_norm": 1.7599545299893906, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8954, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.2941793654758931, |
|
"grad_norm": 1.8392526222961474, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9028, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.29497876592555583, |
|
"grad_norm": 1.4791987859922466, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9207, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.2957781663752186, |
|
"grad_norm": 1.4806074723615978, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9419, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.29657756682488134, |
|
"grad_norm": 1.7004917267851303, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9354, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.2973769672745441, |
|
"grad_norm": 1.6234361909723023, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8969, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.29817636772420686, |
|
"grad_norm": 1.5271331279708817, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9455, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.2989757681738696, |
|
"grad_norm": 1.622230251696962, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9504, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.2997751686235324, |
|
"grad_norm": 1.807073970989606, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.30057456907319513, |
|
"grad_norm": 1.4951410146162138, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9664, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.30137396952285783, |
|
"grad_norm": 1.813020482613949, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9441, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.3021733699725206, |
|
"grad_norm": 2.0285660578298046, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8861, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.30297277042218335, |
|
"grad_norm": 1.6967916115297645, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9321, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.3037721708718461, |
|
"grad_norm": 1.7022095887528572, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9613, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.30457157132150886, |
|
"grad_norm": 1.719645739549248, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9219, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.3053709717711716, |
|
"grad_norm": 1.6526243786903378, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9069, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.3061703722208344, |
|
"grad_norm": 1.554593608182918, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8863, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.30696977267049713, |
|
"grad_norm": 1.7296741561953324, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9965, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.3077691731201599, |
|
"grad_norm": 1.7765959484743603, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9024, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.30856857356982265, |
|
"grad_norm": 1.7444591927862072, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9491, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.3093679740194854, |
|
"grad_norm": 1.6979650733135505, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9633, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.31016737446914816, |
|
"grad_norm": 1.6518215838203623, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8594, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.3109667749188109, |
|
"grad_norm": 1.597669753265097, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8807, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.3117661753684736, |
|
"grad_norm": 1.7005856529533696, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9136, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.3125655758181364, |
|
"grad_norm": 1.714793495031338, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8969, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.31336497626779913, |
|
"grad_norm": 1.5558141368768388, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9257, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.3141643767174619, |
|
"grad_norm": 1.5404155153049455, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8779, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.31496377716712465, |
|
"grad_norm": 1.5383972642859716, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9707, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.3157631776167874, |
|
"grad_norm": 1.7191998432330473, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9126, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.31656257806645016, |
|
"grad_norm": 1.6051194326495044, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8822, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.3173619785161129, |
|
"grad_norm": 1.6869656351879205, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9343, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.3181613789657757, |
|
"grad_norm": 1.6256734963382786, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9156, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.31896077941543843, |
|
"grad_norm": 1.5756449476038674, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9807, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.3197601798651012, |
|
"grad_norm": 1.6188490159724278, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9644, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.32055958031476395, |
|
"grad_norm": 1.94007311994945, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9614, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.3213589807644267, |
|
"grad_norm": 1.659086295612128, |
|
"learning_rate": 1e-05, |
|
"loss": 0.944, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.3221583812140894, |
|
"grad_norm": 1.9235409755089947, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9259, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.32295778166375216, |
|
"grad_norm": 1.5880918105995026, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9255, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.3237571821134149, |
|
"grad_norm": 1.4948152435643522, |
|
"learning_rate": 1e-05, |
|
"loss": 0.926, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.3245565825630777, |
|
"grad_norm": 1.5350941186461544, |
|
"learning_rate": 1e-05, |
|
"loss": 0.898, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.32535598301274044, |
|
"grad_norm": 1.3466986686471294, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8904, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.3261553834624032, |
|
"grad_norm": 1.459891336046445, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8817, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.32695478391206595, |
|
"grad_norm": 1.4836727854431802, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9216, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.3277541843617287, |
|
"grad_norm": 1.6446226209440065, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9249, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.32855358481139146, |
|
"grad_norm": 1.5635297277867413, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9122, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.3293529852610542, |
|
"grad_norm": 1.6358281167528332, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8843, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.330152385710717, |
|
"grad_norm": 1.5404191221381782, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9106, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.33095178616037974, |
|
"grad_norm": 1.5879004668639547, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9211, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.3317511866100425, |
|
"grad_norm": 1.790797443056402, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9211, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.3325505870597052, |
|
"grad_norm": 1.8179861905661685, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9391, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.33334998750936795, |
|
"grad_norm": 1.4379165089707215, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9021, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.3341493879590307, |
|
"grad_norm": 1.7134688617321956, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9833, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.33494878840869347, |
|
"grad_norm": 1.4039431214440103, |
|
"learning_rate": 1e-05, |
|
"loss": 0.888, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.3357481888583562, |
|
"grad_norm": 1.6586329038004721, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9088, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.336547589308019, |
|
"grad_norm": 1.900128933012227, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.33734698975768174, |
|
"grad_norm": 1.5890662573554606, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9029, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.3381463902073445, |
|
"grad_norm": 1.495628306935103, |
|
"learning_rate": 1e-05, |
|
"loss": 0.919, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.33894579065700725, |
|
"grad_norm": 1.4495521814015604, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9967, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.33974519110667, |
|
"grad_norm": 1.7055256640065686, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9769, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.34054459155633277, |
|
"grad_norm": 1.4909741619159311, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9152, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.3413439920059955, |
|
"grad_norm": 1.628227110908977, |
|
"learning_rate": 1e-05, |
|
"loss": 0.955, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.3421433924556583, |
|
"grad_norm": 1.8220036868892047, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8972, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.34294279290532104, |
|
"grad_norm": 1.693415237669836, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9063, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.34374219335498374, |
|
"grad_norm": 1.5346322329118909, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8737, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3445415938046465, |
|
"grad_norm": 1.523134303904886, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9522, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.34534099425430925, |
|
"grad_norm": 1.8163891768400675, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9254, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.346140394703972, |
|
"grad_norm": 1.6001042968512986, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9393, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.34693979515363477, |
|
"grad_norm": 1.4962110538157338, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9016, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.3477391956032975, |
|
"grad_norm": 1.7041821659704226, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8586, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.3485385960529603, |
|
"grad_norm": 1.6883017856053422, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9729, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.34933799650262304, |
|
"grad_norm": 1.6846925338485461, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9379, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.3501373969522858, |
|
"grad_norm": 1.8235246867955863, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9248, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.35093679740194855, |
|
"grad_norm": 1.935505500625835, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9371, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.3517361978516113, |
|
"grad_norm": 1.67613124761384, |
|
"learning_rate": 1e-05, |
|
"loss": 0.979, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.35253559830127407, |
|
"grad_norm": 1.4449954490901646, |
|
"learning_rate": 1e-05, |
|
"loss": 0.968, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.3533349987509368, |
|
"grad_norm": 1.5913830352404914, |
|
"learning_rate": 1e-05, |
|
"loss": 0.964, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.3541343992005995, |
|
"grad_norm": 1.7168730495466147, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9138, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.3549337996502623, |
|
"grad_norm": 1.6307072180820321, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9472, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.35573320009992504, |
|
"grad_norm": 1.6118353409303823, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9645, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.3565326005495878, |
|
"grad_norm": 1.6940859087140694, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9815, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.35733200099925055, |
|
"grad_norm": 1.8606216696352482, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9805, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.3581314014489133, |
|
"grad_norm": 1.532089096889218, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9393, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.35893080189857607, |
|
"grad_norm": 1.6384290071957173, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9459, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.3597302023482388, |
|
"grad_norm": 1.5244481340256106, |
|
"learning_rate": 1e-05, |
|
"loss": 0.977, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3605296027979016, |
|
"grad_norm": 1.6337567843902518, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8991, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.36132900324756434, |
|
"grad_norm": 1.7963360988533934, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9087, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.3621284036972271, |
|
"grad_norm": 1.696315268595366, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9242, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.36292780414688985, |
|
"grad_norm": 1.710036005807286, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8429, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.3637272045965526, |
|
"grad_norm": 1.749452843653296, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9133, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.3645266050462153, |
|
"grad_norm": 1.4397928987828232, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8427, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.36532600549587807, |
|
"grad_norm": 1.6825466790780408, |
|
"learning_rate": 1e-05, |
|
"loss": 0.89, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.3661254059455408, |
|
"grad_norm": 1.6056927709310882, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9421, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.3669248063952036, |
|
"grad_norm": 1.5861038676425987, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9496, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.36772420684486634, |
|
"grad_norm": 1.6684621776248278, |
|
"learning_rate": 1e-05, |
|
"loss": 0.884, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3685236072945291, |
|
"grad_norm": 1.758026110496432, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9441, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.36932300774419186, |
|
"grad_norm": 1.4763506224586516, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9393, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.3701224081938546, |
|
"grad_norm": 1.5509318071640712, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8643, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.37092180864351737, |
|
"grad_norm": 1.5607192206519345, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9165, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.3717212090931801, |
|
"grad_norm": 1.6511236719507991, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9421, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.3725206095428429, |
|
"grad_norm": 1.6501362966399429, |
|
"learning_rate": 1e-05, |
|
"loss": 0.915, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.37332000999250564, |
|
"grad_norm": 1.5207720771291409, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9454, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.3741194104421684, |
|
"grad_norm": 1.5392735956515966, |
|
"learning_rate": 1e-05, |
|
"loss": 0.956, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.3749188108918311, |
|
"grad_norm": 1.5940306759004237, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8719, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.37571821134149386, |
|
"grad_norm": 1.6908424326030602, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9255, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.3765176117911566, |
|
"grad_norm": 1.4928846149782238, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9378, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.37731701224081937, |
|
"grad_norm": 1.7041500499453686, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8734, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.37811641269048213, |
|
"grad_norm": 1.619189516937598, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9063, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.3789158131401449, |
|
"grad_norm": 1.6588364324248581, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8701, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.37971521358980764, |
|
"grad_norm": 1.5762727848791807, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9497, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.3805146140394704, |
|
"grad_norm": 1.5363970090025982, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9918, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.38131401448913316, |
|
"grad_norm": 1.6404231232106667, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9056, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.3821134149387959, |
|
"grad_norm": 1.6314596845516385, |
|
"learning_rate": 1e-05, |
|
"loss": 0.928, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.38291281538845867, |
|
"grad_norm": 1.6126677835331522, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9978, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.38371221583812143, |
|
"grad_norm": 1.3173664389567725, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9158, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3845116162877842, |
|
"grad_norm": 1.496540187325337, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9378, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.3853110167374469, |
|
"grad_norm": 1.5062068173629883, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9159, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.38611041718710964, |
|
"grad_norm": 1.529187603034289, |
|
"learning_rate": 1e-05, |
|
"loss": 0.951, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.3869098176367724, |
|
"grad_norm": 1.5635118437005366, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9291, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.38770921808643516, |
|
"grad_norm": 1.6646247338291131, |
|
"learning_rate": 1e-05, |
|
"loss": 0.874, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.3885086185360979, |
|
"grad_norm": 1.6470189371191908, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9118, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.3893080189857607, |
|
"grad_norm": 1.4041767343860398, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9193, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.39010741943542343, |
|
"grad_norm": 1.637354519439742, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9622, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.3909068198850862, |
|
"grad_norm": 1.6793914337693705, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8591, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.39170622033474894, |
|
"grad_norm": 1.631823843080509, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9061, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.3925056207844117, |
|
"grad_norm": 1.4551068376984746, |
|
"learning_rate": 1e-05, |
|
"loss": 0.886, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.39330502123407446, |
|
"grad_norm": 1.843148583217912, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8748, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.3941044216837372, |
|
"grad_norm": 1.503885142875128, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9208, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.3949038221334, |
|
"grad_norm": 1.7406094685573732, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8786, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.39570322258306273, |
|
"grad_norm": 1.730586930891903, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9157, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.39650262303272543, |
|
"grad_norm": 1.5528810488930866, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9142, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.3973020234823882, |
|
"grad_norm": 1.5307301129466364, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9415, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.39810142393205095, |
|
"grad_norm": 1.5706393811203467, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8912, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.3989008243817137, |
|
"grad_norm": 1.6199448054984131, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9175, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.39970022483137646, |
|
"grad_norm": 1.4945708663613873, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8961, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4004996252810392, |
|
"grad_norm": 1.5533154327294227, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9356, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.401299025730702, |
|
"grad_norm": 1.5325963522620767, |
|
"learning_rate": 1e-05, |
|
"loss": 0.965, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.40209842618036473, |
|
"grad_norm": 1.671999510186726, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8271, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.4028978266300275, |
|
"grad_norm": 1.5355767548245969, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9497, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.40369722707969025, |
|
"grad_norm": 1.6030539969868434, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9291, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.404496627529353, |
|
"grad_norm": 1.6407538986876247, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9878, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.40529602797901576, |
|
"grad_norm": 1.7688356573735502, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9665, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.4060954284286785, |
|
"grad_norm": 1.5607970481443443, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9488, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.4068948288783412, |
|
"grad_norm": 1.6161754040719796, |
|
"learning_rate": 1e-05, |
|
"loss": 0.858, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.407694229328004, |
|
"grad_norm": 1.5793085315204543, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8956, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.40849362977766673, |
|
"grad_norm": 1.5936599885814402, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9348, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.4092930302273295, |
|
"grad_norm": 1.5658605524297327, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9389, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.41009243067699225, |
|
"grad_norm": 1.5921115812648192, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8438, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.410891831126655, |
|
"grad_norm": 1.8163231036582868, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9064, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.41169123157631776, |
|
"grad_norm": 1.5950813731389535, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9172, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.4124906320259805, |
|
"grad_norm": 1.689588168520015, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9265, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.4132900324756433, |
|
"grad_norm": 1.540041600561803, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9189, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.41408943292530603, |
|
"grad_norm": 1.6662920193878612, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9155, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.4148888333749688, |
|
"grad_norm": 1.6860065883672692, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9608, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.41568823382463155, |
|
"grad_norm": 1.7503429857603447, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8936, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.4164876342742943, |
|
"grad_norm": 1.4349809774745903, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9076, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.417287034723957, |
|
"grad_norm": 1.6525870899508948, |
|
"learning_rate": 1e-05, |
|
"loss": 0.951, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.41808643517361976, |
|
"grad_norm": 1.305941403451334, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9039, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.4188858356232825, |
|
"grad_norm": 1.5535189677415364, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9101, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.4196852360729453, |
|
"grad_norm": 1.390869042188358, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8821, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.42048463652260804, |
|
"grad_norm": 1.6086764868308612, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9494, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.4212840369722708, |
|
"grad_norm": 1.5277453444137763, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9042, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.42208343742193355, |
|
"grad_norm": 1.5037652064794895, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8973, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.4228828378715963, |
|
"grad_norm": 1.630788946234423, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8868, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.42368223832125906, |
|
"grad_norm": 1.476128500837339, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9264, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.4244816387709218, |
|
"grad_norm": 1.4082525457129158, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9194, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.4252810392205846, |
|
"grad_norm": 1.60560804137754, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8596, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.42608043967024734, |
|
"grad_norm": 1.5292853895222724, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8933, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.4268798401199101, |
|
"grad_norm": 1.6276199503503024, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8905, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.4276792405695728, |
|
"grad_norm": 1.6143026040200776, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9017, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.42847864101923555, |
|
"grad_norm": 1.492638575870208, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8445, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.4292780414688983, |
|
"grad_norm": 1.5992856689061312, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8747, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.43007744191856107, |
|
"grad_norm": 1.8376302395541704, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8744, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.4308768423682238, |
|
"grad_norm": 1.5083175238496622, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8831, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.4316762428178866, |
|
"grad_norm": 1.6391308804501599, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9093, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.43247564326754934, |
|
"grad_norm": 1.587896265231209, |
|
"learning_rate": 1e-05, |
|
"loss": 0.931, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.4332750437172121, |
|
"grad_norm": 1.5174662595552115, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9176, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.43407444416687485, |
|
"grad_norm": 1.6000443436491891, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8983, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.4348738446165376, |
|
"grad_norm": 1.6311375389076388, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9358, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.43567324506620037, |
|
"grad_norm": 1.5311673613481407, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9248, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.4364726455158631, |
|
"grad_norm": 1.527296520797819, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8941, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.4372720459655259, |
|
"grad_norm": 1.3849530231908453, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9206, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.4380714464151886, |
|
"grad_norm": 1.6041978636707703, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8933, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.43887084686485134, |
|
"grad_norm": 1.5449273405092985, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9157, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.4396702473145141, |
|
"grad_norm": 1.5864452967308555, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8443, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.44046964776417685, |
|
"grad_norm": 1.4728488192211566, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9108, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.4412690482138396, |
|
"grad_norm": 1.4823924024202317, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9336, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.44206844866350237, |
|
"grad_norm": 1.4382359303688308, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9271, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.4428678491131651, |
|
"grad_norm": 1.5676768234957863, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9705, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.4436672495628279, |
|
"grad_norm": 1.5423184321680976, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8464, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.44446665001249064, |
|
"grad_norm": 1.6045659880625645, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9303, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.4452660504621534, |
|
"grad_norm": 1.9872755202696784, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8687, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.44606545091181615, |
|
"grad_norm": 1.4834070914943105, |
|
"learning_rate": 1e-05, |
|
"loss": 0.951, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.4468648513614789, |
|
"grad_norm": 1.5310211273825027, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9233, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.44766425181114167, |
|
"grad_norm": 1.5815996536549406, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9767, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.4484636522608044, |
|
"grad_norm": 1.7688239075887118, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8879, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.4492630527104671, |
|
"grad_norm": 1.6482560554808632, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9124, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.4500624531601299, |
|
"grad_norm": 1.5404021166963555, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9027, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.45086185360979264, |
|
"grad_norm": 1.5195520813189534, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9112, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.4516612540594554, |
|
"grad_norm": 1.5192783031055126, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8971, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.45246065450911815, |
|
"grad_norm": 1.5618653033074856, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9054, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.4532600549587809, |
|
"grad_norm": 1.6064016663059253, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9391, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.45405945540844367, |
|
"grad_norm": 1.7240615287273162, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8906, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.4548588558581064, |
|
"grad_norm": 1.7149945179624295, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8621, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.4556582563077692, |
|
"grad_norm": 1.4856328376378898, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8694, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.45645765675743194, |
|
"grad_norm": 1.4702642174922036, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9025, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.4572570572070947, |
|
"grad_norm": 1.6088556169851551, |
|
"learning_rate": 1e-05, |
|
"loss": 0.868, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.45805645765675745, |
|
"grad_norm": 1.5509844332733922, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9513, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.4588558581064202, |
|
"grad_norm": 1.5292949122902217, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8845, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.4596552585560829, |
|
"grad_norm": 1.6381076297979584, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9386, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.46045465900574567, |
|
"grad_norm": 1.6267004497668505, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8987, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.4612540594554084, |
|
"grad_norm": 1.5456142322307922, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9121, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.4620534599050712, |
|
"grad_norm": 1.5522043742149023, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8914, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.46285286035473394, |
|
"grad_norm": 1.633867715589152, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8741, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.4636522608043967, |
|
"grad_norm": 1.614894631262607, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9171, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.46445166125405946, |
|
"grad_norm": 1.386145144430922, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8693, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.4652510617037222, |
|
"grad_norm": 1.484841140261494, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8966, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.46605046215338497, |
|
"grad_norm": 1.6068617064880517, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8818, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.4668498626030477, |
|
"grad_norm": 1.6096786496184112, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9123, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.4676492630527105, |
|
"grad_norm": 1.4602535645871833, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9143, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.46844866350237324, |
|
"grad_norm": 1.7447912274361523, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8966, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.469248063952036, |
|
"grad_norm": 1.5775439912332734, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8994, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.4700474644016987, |
|
"grad_norm": 1.4031832068470533, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9055, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.47084686485136146, |
|
"grad_norm": 1.5789430313417314, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9393, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.4716462653010242, |
|
"grad_norm": 1.4655734741114497, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8889, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.47244566575068697, |
|
"grad_norm": 1.752804541715281, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9166, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.47324506620034973, |
|
"grad_norm": 1.6906678527664594, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8673, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.4740444666500125, |
|
"grad_norm": 1.5985802845452706, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9435, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.47484386709967524, |
|
"grad_norm": 1.6997316043068198, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9112, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.475643267549338, |
|
"grad_norm": 1.3896008701013607, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8884, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.47644266799900076, |
|
"grad_norm": 1.4232134469996818, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8537, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.4772420684486635, |
|
"grad_norm": 1.4962294604199373, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8599, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.47804146889832627, |
|
"grad_norm": 1.3445821960864492, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8719, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.47884086934798903, |
|
"grad_norm": 1.5426225615913305, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9097, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.4796402697976518, |
|
"grad_norm": 1.4650349809263883, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8933, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4804396702473145, |
|
"grad_norm": 1.5753170073693514, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9461, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.48123907069697724, |
|
"grad_norm": 1.6207854665284498, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8332, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.48203847114664, |
|
"grad_norm": 1.6847020603077485, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8902, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.48283787159630276, |
|
"grad_norm": 1.746631687170473, |
|
"learning_rate": 1e-05, |
|
"loss": 0.852, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.4836372720459655, |
|
"grad_norm": 1.5812097478750036, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8875, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.4844366724956283, |
|
"grad_norm": 1.664501332749721, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9623, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.48523607294529103, |
|
"grad_norm": 1.3887624769518734, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8815, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.4860354733949538, |
|
"grad_norm": 1.4487321005360188, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8985, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.48683487384461654, |
|
"grad_norm": 1.4789561071530237, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9061, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.4876342742942793, |
|
"grad_norm": 1.5069409156312008, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9286, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.48843367474394206, |
|
"grad_norm": 1.4663884880855809, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9382, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.4892330751936048, |
|
"grad_norm": 1.4795946008795262, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9013, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.4900324756432676, |
|
"grad_norm": 1.6550390075160482, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8981, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.4908318760929303, |
|
"grad_norm": 1.5252370570410794, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9399, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.49163127654259303, |
|
"grad_norm": 1.55342646595899, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9369, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.4924306769922558, |
|
"grad_norm": 1.3945867465343513, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9739, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.49323007744191855, |
|
"grad_norm": 1.8084042523739312, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8568, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.4940294778915813, |
|
"grad_norm": 1.3957730664102426, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9212, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.49482887834124406, |
|
"grad_norm": 1.576073681260172, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9323, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.4956282787909068, |
|
"grad_norm": 1.478562229589502, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8784, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4964276792405696, |
|
"grad_norm": 1.5742856570618204, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8949, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.49722707969023233, |
|
"grad_norm": 1.7717496405831807, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8882, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.4980264801398951, |
|
"grad_norm": 1.512802542889935, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9555, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.49882588058955785, |
|
"grad_norm": 1.44305014112251, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9058, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.4996252810392206, |
|
"grad_norm": 1.6065628841661808, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8697, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.5004246814888833, |
|
"grad_norm": 1.5176075034291314, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8774, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.5012240819385461, |
|
"grad_norm": 1.4385224047152578, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9092, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.5020234823882088, |
|
"grad_norm": 1.5846911793271963, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9333, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.5028228828378716, |
|
"grad_norm": 1.6455364602527989, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9412, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.5036222832875343, |
|
"grad_norm": 1.6062811152199334, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9091, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.5044216837371971, |
|
"grad_norm": 1.4354611082735989, |
|
"learning_rate": 1e-05, |
|
"loss": 0.907, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.5052210841868598, |
|
"grad_norm": 1.52829754540632, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9195, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.5060204846365226, |
|
"grad_norm": 1.6184765917993094, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9452, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.5068198850861854, |
|
"grad_norm": 1.5257888577090237, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8772, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.5076192855358481, |
|
"grad_norm": 1.4539514346389641, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8898, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.5084186859855109, |
|
"grad_norm": 1.6554813398137607, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8987, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.5092180864351736, |
|
"grad_norm": 1.4575833122082418, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9455, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.5100174868848364, |
|
"grad_norm": 1.651253682354515, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8731, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.5108168873344991, |
|
"grad_norm": 1.608007000762813, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9068, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.5116162877841619, |
|
"grad_norm": 1.4451823786722864, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9363, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.5124156882338247, |
|
"grad_norm": 1.585132398185237, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8394, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.5132150886834874, |
|
"grad_norm": 1.5460763577114784, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8782, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.5140144891331502, |
|
"grad_norm": 1.5336894539869739, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8913, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.5148138895828129, |
|
"grad_norm": 1.5563638706418883, |
|
"learning_rate": 1e-05, |
|
"loss": 0.891, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.5156132900324757, |
|
"grad_norm": 1.6781793712825763, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9243, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.5164126904821384, |
|
"grad_norm": 1.555015563156278, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8733, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.5172120909318011, |
|
"grad_norm": 1.4725706386221917, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8681, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.5180114913814639, |
|
"grad_norm": 1.3268747875477092, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8807, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.5188108918311266, |
|
"grad_norm": 1.5451710380595707, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9226, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.5196102922807894, |
|
"grad_norm": 1.5573419054386046, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9044, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5204096927304521, |
|
"grad_norm": 1.2989474126701601, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8532, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.5212090931801149, |
|
"grad_norm": 1.696305481260023, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8595, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.5220084936297776, |
|
"grad_norm": 1.4451028681658686, |
|
"learning_rate": 1e-05, |
|
"loss": 0.933, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.5228078940794404, |
|
"grad_norm": 1.5925002414772222, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9286, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.5236072945291032, |
|
"grad_norm": 1.4716559853454252, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8943, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.5244066949787659, |
|
"grad_norm": 1.7475122640309384, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9523, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.5252060954284287, |
|
"grad_norm": 1.645603067269987, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9075, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.5260054958780914, |
|
"grad_norm": 1.6726736025945501, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9039, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.5268048963277542, |
|
"grad_norm": 1.518938137250405, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8761, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.5276042967774169, |
|
"grad_norm": 1.7083121838298914, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8697, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.5284036972270797, |
|
"grad_norm": 1.5499463775414077, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8976, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.5292030976767425, |
|
"grad_norm": 1.4098782407183605, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8753, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.5300024981264052, |
|
"grad_norm": 1.7641918962063994, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9194, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.530801898576068, |
|
"grad_norm": 1.5469637110527181, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9059, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.5316012990257307, |
|
"grad_norm": 1.6487062365426841, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8897, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.5324006994753935, |
|
"grad_norm": 1.5205006643304535, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9216, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.5332000999250562, |
|
"grad_norm": 1.5325976583230465, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8957, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.533999500374719, |
|
"grad_norm": 1.720042040656152, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8832, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.5347989008243818, |
|
"grad_norm": 1.4435271985771057, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8587, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.5355983012740445, |
|
"grad_norm": 1.7309862738667545, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8801, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.5363977017237073, |
|
"grad_norm": 1.51553026472629, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8948, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.53719710217337, |
|
"grad_norm": 1.5034966185821361, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8825, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.5379965026230328, |
|
"grad_norm": 1.5085135625486585, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8758, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.5387959030726954, |
|
"grad_norm": 1.5419185376449267, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9952, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.5395953035223582, |
|
"grad_norm": 1.516360272741118, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8744, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.5403947039720209, |
|
"grad_norm": 1.6057277324687185, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9379, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.5411941044216837, |
|
"grad_norm": 1.5074825505125475, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8687, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.5419935048713465, |
|
"grad_norm": 1.564461484690962, |
|
"learning_rate": 1e-05, |
|
"loss": 0.928, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.5427929053210092, |
|
"grad_norm": 1.5052766213063988, |
|
"learning_rate": 1e-05, |
|
"loss": 0.909, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.543592305770672, |
|
"grad_norm": 1.3946507047858405, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8984, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.5443917062203347, |
|
"grad_norm": 1.524550146914044, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9103, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.5451911066699975, |
|
"grad_norm": 1.743015450167898, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8817, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.5459905071196602, |
|
"grad_norm": 1.2727179347293005, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9565, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.546789907569323, |
|
"grad_norm": 1.4218645212985512, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9361, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.5475893080189858, |
|
"grad_norm": 1.5827671331667068, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9195, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.5483887084686485, |
|
"grad_norm": 1.5111024964279403, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8975, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.5491881089183113, |
|
"grad_norm": 1.698526384803921, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8693, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.549987509367974, |
|
"grad_norm": 1.4499111433077698, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9304, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.5507869098176368, |
|
"grad_norm": 1.6198855701994876, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9071, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.5515863102672995, |
|
"grad_norm": 1.447799249815993, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8577, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5523857107169623, |
|
"grad_norm": 1.4643912062350883, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8841, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.553185111166625, |
|
"grad_norm": 1.2539155232355081, |
|
"learning_rate": 1e-05, |
|
"loss": 0.902, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.5539845116162878, |
|
"grad_norm": 1.547551258731981, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9678, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.5547839120659506, |
|
"grad_norm": 1.4727625062306167, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8882, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.5555833125156133, |
|
"grad_norm": 1.4776645587359942, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8921, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.5563827129652761, |
|
"grad_norm": 1.4858467571616956, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9032, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.5571821134149388, |
|
"grad_norm": 1.6272094570109954, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8754, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.5579815138646016, |
|
"grad_norm": 1.5209165879169078, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8892, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.5587809143142644, |
|
"grad_norm": 1.5534555948764655, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8518, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.559580314763927, |
|
"grad_norm": 1.7199439771989053, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9232, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5603797152135898, |
|
"grad_norm": 1.3598124896967667, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9486, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.5611791156632525, |
|
"grad_norm": 1.612574738886904, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8794, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.5619785161129153, |
|
"grad_norm": 1.3832262396852995, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9321, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.562777916562578, |
|
"grad_norm": 1.5923071651772416, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8793, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.5635773170122408, |
|
"grad_norm": 1.5489614624229269, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8701, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.5643767174619035, |
|
"grad_norm": 1.4485859965980266, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8911, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.5651761179115663, |
|
"grad_norm": 1.3786799015631879, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9122, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.5659755183612291, |
|
"grad_norm": 1.5342699233246582, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8847, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.5667749188108918, |
|
"grad_norm": 1.5871051701796994, |
|
"learning_rate": 1e-05, |
|
"loss": 0.884, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.5675743192605546, |
|
"grad_norm": 1.5660069308536273, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8551, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.5683737197102173, |
|
"grad_norm": 1.429596069400543, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8957, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.5691731201598801, |
|
"grad_norm": 1.4491964477267238, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8531, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.5699725206095428, |
|
"grad_norm": 1.4365928694753973, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8761, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.5707719210592056, |
|
"grad_norm": 1.5578122539795014, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8804, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.5715713215088684, |
|
"grad_norm": 1.5327160301768794, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8559, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.5723707219585311, |
|
"grad_norm": 1.530442187113109, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8689, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.5731701224081939, |
|
"grad_norm": 1.5680317221543405, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8969, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.5739695228578566, |
|
"grad_norm": 1.5241288570093494, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9161, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.5747689233075194, |
|
"grad_norm": 1.3731985273369733, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8568, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.5755683237571821, |
|
"grad_norm": 1.4645302822523454, |
|
"learning_rate": 1e-05, |
|
"loss": 0.899, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5763677242068449, |
|
"grad_norm": 1.429554718936312, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9161, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.5771671246565077, |
|
"grad_norm": 1.3621850244930958, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9169, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.5779665251061704, |
|
"grad_norm": 1.485846183303666, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9811, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.5787659255558332, |
|
"grad_norm": 1.4036480667947844, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8841, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.5795653260054959, |
|
"grad_norm": 1.3680437907081195, |
|
"learning_rate": 1e-05, |
|
"loss": 0.861, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.5803647264551587, |
|
"grad_norm": 1.4902900528640177, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9022, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.5811641269048213, |
|
"grad_norm": 1.367169701352056, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9091, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.5819635273544841, |
|
"grad_norm": 1.6487586565871948, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9328, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.5827629278041468, |
|
"grad_norm": 1.6567920316755664, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8662, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.5835623282538096, |
|
"grad_norm": 1.3391698664356693, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8993, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5843617287034724, |
|
"grad_norm": 1.3695456445124472, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8371, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.5851611291531351, |
|
"grad_norm": 1.418306336363921, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9092, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.5859605296027979, |
|
"grad_norm": 1.7580509988769806, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9117, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.5867599300524606, |
|
"grad_norm": 1.4969730064494027, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8777, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.5875593305021234, |
|
"grad_norm": 1.5311661672699555, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8633, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.5883587309517861, |
|
"grad_norm": 1.4485040495772017, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9358, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.5891581314014489, |
|
"grad_norm": 1.4826514641684152, |
|
"learning_rate": 1e-05, |
|
"loss": 0.852, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.5899575318511117, |
|
"grad_norm": 1.48791832285035, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8782, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.5907569323007744, |
|
"grad_norm": 1.6057041771896603, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8316, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.5915563327504372, |
|
"grad_norm": 2.1038688962784593, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8521, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5923557332000999, |
|
"grad_norm": 1.621521996919619, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8843, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.5931551336497627, |
|
"grad_norm": 1.471365198038119, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8784, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.5939545340994254, |
|
"grad_norm": 1.4957177407162774, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9049, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.5947539345490882, |
|
"grad_norm": 1.4767129920631528, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8556, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.595553334998751, |
|
"grad_norm": 1.4751091109435195, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8525, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.5963527354484137, |
|
"grad_norm": 1.5180861867428592, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8986, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.5971521358980765, |
|
"grad_norm": 1.716833225193397, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9396, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.5979515363477392, |
|
"grad_norm": 1.662846684061582, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8806, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.598750936797402, |
|
"grad_norm": 1.5397292686479351, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9085, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.5995503372470647, |
|
"grad_norm": 1.3571032049534457, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9406, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.6003497376967275, |
|
"grad_norm": 1.384922018598161, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8956, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.6011491381463903, |
|
"grad_norm": 1.496498809863047, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8918, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.6019485385960529, |
|
"grad_norm": 1.4830953787172334, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8431, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.6027479390457157, |
|
"grad_norm": 1.6829833333195696, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8561, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.6035473394953784, |
|
"grad_norm": 1.3654050365320536, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9101, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.6043467399450412, |
|
"grad_norm": 1.5240419337473992, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8338, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.6051461403947039, |
|
"grad_norm": 1.5491861960420192, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8921, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.6059455408443667, |
|
"grad_norm": 1.4148317529647148, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8677, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.6067449412940294, |
|
"grad_norm": 1.469343003903587, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8734, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.6075443417436922, |
|
"grad_norm": 1.319737814833517, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8688, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.608343742193355, |
|
"grad_norm": 1.4581636035714403, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8753, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.6091431426430177, |
|
"grad_norm": 1.8427778000120836, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9185, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.6099425430926805, |
|
"grad_norm": 1.4013027241862714, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9376, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.6107419435423432, |
|
"grad_norm": 1.5267045554235308, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8835, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.611541343992006, |
|
"grad_norm": 1.4715893506156257, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8676, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.6123407444416687, |
|
"grad_norm": 1.4577005776877618, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8796, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.6131401448913315, |
|
"grad_norm": 1.4934814897272444, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8458, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.6139395453409943, |
|
"grad_norm": 1.5364809951583207, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8316, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.614738945790657, |
|
"grad_norm": 1.4992439555873935, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9177, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.6155383462403198, |
|
"grad_norm": 1.4324130065382474, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9105, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.6163377466899825, |
|
"grad_norm": 1.372488633970353, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9365, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.6171371471396453, |
|
"grad_norm": 1.3430055625087858, |
|
"learning_rate": 1e-05, |
|
"loss": 0.887, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.617936547589308, |
|
"grad_norm": 1.4070687341497352, |
|
"learning_rate": 1e-05, |
|
"loss": 0.918, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.6187359480389708, |
|
"grad_norm": 1.4126858378429896, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8249, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.6195353484886336, |
|
"grad_norm": 1.5659156867498283, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8313, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.6203347489382963, |
|
"grad_norm": 1.4546097055174756, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8701, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.6211341493879591, |
|
"grad_norm": 1.4487557061202467, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9272, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.6219335498376218, |
|
"grad_norm": 1.6276489271011279, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9506, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.6227329502872845, |
|
"grad_norm": 1.6078488944139557, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9327, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.6235323507369472, |
|
"grad_norm": 1.4508829251993478, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9369, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.62433175118661, |
|
"grad_norm": 1.612776765629144, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9124, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.6251311516362728, |
|
"grad_norm": 1.5608370989668476, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8758, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.6259305520859355, |
|
"grad_norm": 1.5129857913859477, |
|
"learning_rate": 1e-05, |
|
"loss": 0.891, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.6267299525355983, |
|
"grad_norm": 1.4321443280452155, |
|
"learning_rate": 1e-05, |
|
"loss": 0.865, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.627529352985261, |
|
"grad_norm": 1.5058564295604038, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8721, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.6283287534349238, |
|
"grad_norm": 1.3807849349968864, |
|
"learning_rate": 1e-05, |
|
"loss": 0.886, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.6291281538845865, |
|
"grad_norm": 1.544883025432354, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9102, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.6299275543342493, |
|
"grad_norm": 1.4150356335689325, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9361, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.630726954783912, |
|
"grad_norm": 1.5188112447723208, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8874, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.6315263552335748, |
|
"grad_norm": 1.5441059644669919, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9105, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.6323257556832376, |
|
"grad_norm": 1.7469333936594207, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8572, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.6331251561329003, |
|
"grad_norm": 1.6602103078622925, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9294, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.6339245565825631, |
|
"grad_norm": 1.5925807734316682, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9744, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.6347239570322258, |
|
"grad_norm": 1.5394065631369533, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9164, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.6355233574818886, |
|
"grad_norm": 1.5935047510060332, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8769, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.6363227579315514, |
|
"grad_norm": 1.344142047079821, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9317, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.6371221583812141, |
|
"grad_norm": 1.6200454224138392, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8334, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.6379215588308769, |
|
"grad_norm": 1.5204016202631034, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9006, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.6387209592805396, |
|
"grad_norm": 1.4920314496701772, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8501, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.6395203597302024, |
|
"grad_norm": 1.3209265560951622, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9025, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6403197601798651, |
|
"grad_norm": 1.5701927388007535, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8747, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.6411191606295279, |
|
"grad_norm": 1.3344795038412969, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9104, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.6419185610791907, |
|
"grad_norm": 1.3938320762656133, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8409, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.6427179615288534, |
|
"grad_norm": 1.4249626741383923, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8727, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.6435173619785162, |
|
"grad_norm": 1.6691646244578324, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8903, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.6443167624281788, |
|
"grad_norm": 1.665931296408499, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8787, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.6451161628778416, |
|
"grad_norm": 1.693200235102736, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8462, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.6459155633275043, |
|
"grad_norm": 1.4005335152598601, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8637, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.6467149637771671, |
|
"grad_norm": 1.5270196926285917, |
|
"learning_rate": 1e-05, |
|
"loss": 0.86, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.6475143642268298, |
|
"grad_norm": 1.4150346179433293, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8734, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.6483137646764926, |
|
"grad_norm": 1.53091696763508, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8754, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.6491131651261554, |
|
"grad_norm": 1.474027558315905, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9586, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.6499125655758181, |
|
"grad_norm": 1.485859581480546, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9106, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.6507119660254809, |
|
"grad_norm": 1.568460720361032, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8803, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.6515113664751436, |
|
"grad_norm": 1.5563031313131295, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9097, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.6523107669248064, |
|
"grad_norm": 1.5440917854626373, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9062, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.6531101673744691, |
|
"grad_norm": 1.5083755089979098, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8674, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.6539095678241319, |
|
"grad_norm": 1.508645000565019, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8815, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.6547089682737947, |
|
"grad_norm": 1.6098529049906811, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8344, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.6555083687234574, |
|
"grad_norm": 1.711843405154856, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9035, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.6563077691731202, |
|
"grad_norm": 1.4578793644862615, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8953, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.6571071696227829, |
|
"grad_norm": 1.5916969602134543, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8868, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.6579065700724457, |
|
"grad_norm": 1.7747741238079355, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8762, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.6587059705221084, |
|
"grad_norm": 1.610938375922778, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9062, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.6595053709717712, |
|
"grad_norm": 1.6873519485834756, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8631, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.660304771421434, |
|
"grad_norm": 1.430821156429654, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9604, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.6611041718710967, |
|
"grad_norm": 1.457720171628577, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8823, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.6619035723207595, |
|
"grad_norm": 1.3817461766649617, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9294, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.6627029727704222, |
|
"grad_norm": 1.4095998527286095, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8562, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.663502373220085, |
|
"grad_norm": 1.4396424977428872, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8256, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.6643017736697477, |
|
"grad_norm": 1.38822130860778, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8717, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.6651011741194104, |
|
"grad_norm": 1.4057148558281964, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9041, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.6659005745690731, |
|
"grad_norm": 1.4772530181187606, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9316, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.6666999750187359, |
|
"grad_norm": 1.5248374759511425, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8771, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.6674993754683987, |
|
"grad_norm": 1.5352948925732954, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9223, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.6682987759180614, |
|
"grad_norm": 1.7695375410960146, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8801, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.6690981763677242, |
|
"grad_norm": 1.3579372966834742, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8714, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.6698975768173869, |
|
"grad_norm": 1.5174930728786662, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8513, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.6706969772670497, |
|
"grad_norm": 1.5225177134174273, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8947, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.6714963777167124, |
|
"grad_norm": 1.773009293174373, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8279, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6722957781663752, |
|
"grad_norm": 1.5784630095216696, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8764, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.673095178616038, |
|
"grad_norm": 1.4708285523723468, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8816, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.6738945790657007, |
|
"grad_norm": 1.43983591742943, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9482, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.6746939795153635, |
|
"grad_norm": 1.4485915743374498, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9028, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.6754933799650262, |
|
"grad_norm": 1.5016530521995441, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8731, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.676292780414689, |
|
"grad_norm": 1.3809441111375442, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9214, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.6770921808643517, |
|
"grad_norm": 1.7240170055604878, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8947, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.6778915813140145, |
|
"grad_norm": 1.3301304611766438, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9231, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.6786909817636773, |
|
"grad_norm": 1.4218727212100182, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8962, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.67949038221334, |
|
"grad_norm": 1.63010423786957, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8939, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6802897826630028, |
|
"grad_norm": 1.4495140324549352, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8875, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.6810891831126655, |
|
"grad_norm": 1.5626000543974294, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8814, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.6818885835623283, |
|
"grad_norm": 1.5909709047210767, |
|
"learning_rate": 1e-05, |
|
"loss": 0.883, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.682687984011991, |
|
"grad_norm": 1.388722303171786, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8748, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.6834873844616538, |
|
"grad_norm": 1.385369830792288, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8989, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.6842867849113166, |
|
"grad_norm": 1.4882389241813443, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8844, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.6850861853609793, |
|
"grad_norm": 1.5186240399620652, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8171, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.6858855858106421, |
|
"grad_norm": 1.6078033804533332, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8521, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.6866849862603047, |
|
"grad_norm": 1.5272879309131646, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8721, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.6874843867099675, |
|
"grad_norm": 1.3931816328350173, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8932, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.6882837871596302, |
|
"grad_norm": 1.907029791689304, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8905, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.689083187609293, |
|
"grad_norm": 1.556416461497499, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8986, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.6898825880589557, |
|
"grad_norm": 1.4869019645563188, |
|
"learning_rate": 1e-05, |
|
"loss": 0.877, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.6906819885086185, |
|
"grad_norm": 1.3740940835208075, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9277, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.6914813889582813, |
|
"grad_norm": 1.4834340760108946, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9176, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.692280789407944, |
|
"grad_norm": 1.4499951936894326, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8522, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.6930801898576068, |
|
"grad_norm": 1.3889909352429337, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8675, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.6938795903072695, |
|
"grad_norm": 1.3995933987812776, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8729, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.6946789907569323, |
|
"grad_norm": 1.4764512256041193, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8209, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.695478391206595, |
|
"grad_norm": 1.465237648051072, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8008, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6962777916562578, |
|
"grad_norm": 1.3466097106594175, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8931, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.6970771921059206, |
|
"grad_norm": 1.5104958792040775, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8828, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.6978765925555833, |
|
"grad_norm": 1.5720653267427949, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9531, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.6986759930052461, |
|
"grad_norm": 1.5492614550562422, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9313, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.6994753934549088, |
|
"grad_norm": 1.303038024217404, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9034, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.7002747939045716, |
|
"grad_norm": 1.4497112842693025, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8917, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.7010741943542343, |
|
"grad_norm": 1.530596911055762, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8814, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.7018735948038971, |
|
"grad_norm": 1.5261791959543383, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8853, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.7026729952535599, |
|
"grad_norm": 1.527060521262994, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8882, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.7034723957032226, |
|
"grad_norm": 1.4906207672568565, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8723, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.7042717961528854, |
|
"grad_norm": 1.480851718176504, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8692, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.7050711966025481, |
|
"grad_norm": 1.47139179353177, |
|
"learning_rate": 1e-05, |
|
"loss": 0.888, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.7058705970522109, |
|
"grad_norm": 1.4278237515234393, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9221, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.7066699975018736, |
|
"grad_norm": 1.573532967010904, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9087, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.7074693979515363, |
|
"grad_norm": 1.590669913446065, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8771, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.708268798401199, |
|
"grad_norm": 1.5285176052901992, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8884, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.7090681988508618, |
|
"grad_norm": 1.5768609209939375, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8715, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.7098675993005246, |
|
"grad_norm": 1.7625584577995699, |
|
"learning_rate": 1e-05, |
|
"loss": 0.936, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.7106669997501873, |
|
"grad_norm": 1.6615792785808772, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8865, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.7114664001998501, |
|
"grad_norm": 1.3836071347408263, |
|
"learning_rate": 1e-05, |
|
"loss": 0.861, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.7122658006495128, |
|
"grad_norm": 1.5374171878390779, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8533, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.7130652010991756, |
|
"grad_norm": 1.4960191138124015, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8971, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.7138646015488384, |
|
"grad_norm": 1.3462286304870854, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9002, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.7146640019985011, |
|
"grad_norm": 1.516533149153394, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8495, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.7154634024481639, |
|
"grad_norm": 1.4741671333939332, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8702, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.7162628028978266, |
|
"grad_norm": 1.412230967356979, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8839, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.7170622033474894, |
|
"grad_norm": 1.508657424433702, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9207, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.7178616037971521, |
|
"grad_norm": 1.5335780024625871, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9414, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.7186610042468149, |
|
"grad_norm": 1.522192545285303, |
|
"learning_rate": 1e-05, |
|
"loss": 0.85, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.7194604046964777, |
|
"grad_norm": 1.433190511112366, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8603, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7202598051461404, |
|
"grad_norm": 1.505735858560805, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9305, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.7210592055958032, |
|
"grad_norm": 1.3709122596783658, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9035, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.7218586060454659, |
|
"grad_norm": 1.4784407355636868, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8133, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.7226580064951287, |
|
"grad_norm": 1.4139431509162406, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8757, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.7234574069447914, |
|
"grad_norm": 1.483920166289949, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8908, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.7242568073944542, |
|
"grad_norm": 1.355169839026166, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8778, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.725056207844117, |
|
"grad_norm": 1.5849754730542471, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8126, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.7258556082937797, |
|
"grad_norm": 1.4415392226295947, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9533, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.7266550087434425, |
|
"grad_norm": 1.423271400925077, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8991, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.7274544091931052, |
|
"grad_norm": 1.2581118411370464, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8691, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.7282538096427679, |
|
"grad_norm": 1.6042455117982117, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9323, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.7290532100924306, |
|
"grad_norm": 1.7219536250131735, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9108, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.7298526105420934, |
|
"grad_norm": 1.39448532764431, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8465, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.7306520109917561, |
|
"grad_norm": 1.3967526960492356, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8673, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.7314514114414189, |
|
"grad_norm": 1.6077994734490668, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8955, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.7322508118910817, |
|
"grad_norm": 1.3203640300504973, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7997, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.7330502123407444, |
|
"grad_norm": 1.4566518226470033, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8296, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.7338496127904072, |
|
"grad_norm": 1.7293187013351636, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9021, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.7346490132400699, |
|
"grad_norm": 1.5383747305896551, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8973, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.7354484136897327, |
|
"grad_norm": 1.4275975245981607, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8612, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.7362478141393954, |
|
"grad_norm": 1.472214485322947, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9005, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.7370472145890582, |
|
"grad_norm": 1.4170406969180516, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8952, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.737846615038721, |
|
"grad_norm": 1.4134994732170305, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8427, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.7386460154883837, |
|
"grad_norm": 1.5810245176397593, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8873, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.7394454159380465, |
|
"grad_norm": 1.572493026866151, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8999, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.7402448163877092, |
|
"grad_norm": 1.4558846312035074, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9221, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.741044216837372, |
|
"grad_norm": 1.41669477168302, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8994, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.7418436172870347, |
|
"grad_norm": 1.433461160216514, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8749, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.7426430177366975, |
|
"grad_norm": 1.5673163590141157, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8586, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.7434424181863603, |
|
"grad_norm": 1.4736635147050137, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9211, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.744241818636023, |
|
"grad_norm": 1.4647228645746486, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8332, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.7450412190856858, |
|
"grad_norm": 1.3876657153509906, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8481, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.7458406195353485, |
|
"grad_norm": 1.469313389155329, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9234, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.7466400199850113, |
|
"grad_norm": 1.415959193503077, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8794, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.747439420434674, |
|
"grad_norm": 1.4597571617980725, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8565, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.7482388208843368, |
|
"grad_norm": 1.5271437643331571, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8826, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.7490382213339996, |
|
"grad_norm": 1.4956114964893394, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9085, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.7498376217836622, |
|
"grad_norm": 1.4732612528806723, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8247, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.750637022233325, |
|
"grad_norm": 1.4787986640658028, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9317, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.7514364226829877, |
|
"grad_norm": 1.7252017457319206, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8741, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.7522358231326505, |
|
"grad_norm": 1.4487217007150137, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8629, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.7530352235823132, |
|
"grad_norm": 1.5157039585564798, |
|
"learning_rate": 1e-05, |
|
"loss": 0.897, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.753834624031976, |
|
"grad_norm": 1.611412160953887, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9021, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.7546340244816387, |
|
"grad_norm": 1.4394146060850934, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8281, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.7554334249313015, |
|
"grad_norm": 1.453348907195491, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8928, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.7562328253809643, |
|
"grad_norm": 1.4907250315835585, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7856, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.757032225830627, |
|
"grad_norm": 1.49481328462233, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9155, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.7578316262802898, |
|
"grad_norm": 1.3751698030196142, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9301, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.7586310267299525, |
|
"grad_norm": 1.4444725328440537, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8655, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.7594304271796153, |
|
"grad_norm": 1.5456042887758088, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8901, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.760229827629278, |
|
"grad_norm": 1.4949432017846453, |
|
"learning_rate": 1e-05, |
|
"loss": 0.916, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.7610292280789408, |
|
"grad_norm": 1.6011034169877894, |
|
"learning_rate": 1e-05, |
|
"loss": 0.891, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.7618286285286036, |
|
"grad_norm": 1.3042950526088992, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9446, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.7626280289782663, |
|
"grad_norm": 1.5837586059992244, |
|
"learning_rate": 1e-05, |
|
"loss": 0.91, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.7634274294279291, |
|
"grad_norm": 1.4339719484551816, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9016, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.7642268298775918, |
|
"grad_norm": 1.5025550156701537, |
|
"learning_rate": 1e-05, |
|
"loss": 0.879, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.7650262303272546, |
|
"grad_norm": 1.5378372958159126, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9063, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.7658256307769173, |
|
"grad_norm": 1.5230827569900542, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8989, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.7666250312265801, |
|
"grad_norm": 1.5291223084053325, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8616, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.7674244316762429, |
|
"grad_norm": 1.4773136700451888, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8424, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.7682238321259056, |
|
"grad_norm": 1.2093245102672463, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8848, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.7690232325755684, |
|
"grad_norm": 1.8571716921307402, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8495, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.7698226330252311, |
|
"grad_norm": 1.4472646694433717, |
|
"learning_rate": 1e-05, |
|
"loss": 0.867, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.7706220334748938, |
|
"grad_norm": 1.4580828263402077, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9002, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.7714214339245565, |
|
"grad_norm": 1.499450946544706, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9073, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.7722208343742193, |
|
"grad_norm": 1.4461364940439836, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8778, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.773020234823882, |
|
"grad_norm": 1.506316728494387, |
|
"learning_rate": 1e-05, |
|
"loss": 0.845, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.7738196352735448, |
|
"grad_norm": 1.3561052135711964, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8722, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.7746190357232076, |
|
"grad_norm": 1.4017997594585556, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8602, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.7754184361728703, |
|
"grad_norm": 1.4673374430145514, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8503, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.7762178366225331, |
|
"grad_norm": 1.6150949805416606, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8194, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.7770172370721958, |
|
"grad_norm": 1.4293495610183653, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8719, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.7778166375218586, |
|
"grad_norm": 1.34711255646197, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9134, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.7786160379715213, |
|
"grad_norm": 1.486875510521667, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8282, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.7794154384211841, |
|
"grad_norm": 1.3975406817023381, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8696, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.7802148388708469, |
|
"grad_norm": 1.590401419774706, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8639, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.7810142393205096, |
|
"grad_norm": 1.6348383990486186, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8747, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.7818136397701724, |
|
"grad_norm": 1.316266061656018, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8912, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.7826130402198351, |
|
"grad_norm": 1.4071917948886756, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8258, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.7834124406694979, |
|
"grad_norm": 1.4469880221919649, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8354, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.7842118411191606, |
|
"grad_norm": 1.2870843290387057, |
|
"learning_rate": 1e-05, |
|
"loss": 0.895, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.7850112415688234, |
|
"grad_norm": 1.3149196281524491, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8512, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.7858106420184862, |
|
"grad_norm": 1.4704086234102491, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9012, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.7866100424681489, |
|
"grad_norm": 1.3828361638550721, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8971, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.7874094429178117, |
|
"grad_norm": 1.6052079287723495, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8577, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.7882088433674744, |
|
"grad_norm": 1.6793204061607632, |
|
"learning_rate": 1e-05, |
|
"loss": 0.876, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.7890082438171372, |
|
"grad_norm": 1.4036184553448683, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8983, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.7898076442668, |
|
"grad_norm": 1.3858819411819097, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8535, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.7906070447164627, |
|
"grad_norm": 1.549784899074943, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8849, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.7914064451661255, |
|
"grad_norm": 1.4777960666446712, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8666, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7922058456157881, |
|
"grad_norm": 1.9201257825258455, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8409, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.7930052460654509, |
|
"grad_norm": 1.5498417440527896, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8805, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.7938046465151136, |
|
"grad_norm": 1.4317539743714072, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8635, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.7946040469647764, |
|
"grad_norm": 1.5358225135776136, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8708, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.7954034474144391, |
|
"grad_norm": 1.4138952398073754, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9169, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.7962028478641019, |
|
"grad_norm": 1.306882525453356, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8839, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.7970022483137646, |
|
"grad_norm": 1.4151969180638062, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9003, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.7978016487634274, |
|
"grad_norm": 1.275880598076204, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8549, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.7986010492130902, |
|
"grad_norm": 1.4001477420212065, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8505, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.7994004496627529, |
|
"grad_norm": 1.4186294121350504, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9215, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8001998501124157, |
|
"grad_norm": 1.2933825079861516, |
|
"learning_rate": 1e-05, |
|
"loss": 0.851, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.8009992505620784, |
|
"grad_norm": 1.3844838762102727, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8491, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.8017986510117412, |
|
"grad_norm": 1.424315745643642, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8568, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.802598051461404, |
|
"grad_norm": 1.536779666402137, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8616, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.8033974519110667, |
|
"grad_norm": 1.5821296355398455, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8527, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.8041968523607295, |
|
"grad_norm": 1.4455856617071001, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8713, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.8049962528103922, |
|
"grad_norm": 1.2354619055674243, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8993, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.805795653260055, |
|
"grad_norm": 1.6070375123923897, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8743, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.8065950537097177, |
|
"grad_norm": 1.4541848794736738, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8581, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.8073944541593805, |
|
"grad_norm": 1.4226597509638712, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8627, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.8081938546090432, |
|
"grad_norm": 1.4541863541400335, |
|
"learning_rate": 1e-05, |
|
"loss": 0.895, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.808993255058706, |
|
"grad_norm": 1.5062531375485146, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8703, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.8097926555083688, |
|
"grad_norm": 1.4276851237794737, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9105, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.8105920559580315, |
|
"grad_norm": 1.4788273876522071, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8367, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.8113914564076943, |
|
"grad_norm": 1.407145240499365, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8145, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.812190856857357, |
|
"grad_norm": 1.8014438432061057, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8819, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.8129902573070197, |
|
"grad_norm": 1.372209038359735, |
|
"learning_rate": 1e-05, |
|
"loss": 0.868, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.8137896577566824, |
|
"grad_norm": 1.5495856867435909, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8626, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.8145890582063452, |
|
"grad_norm": 1.617836176498916, |
|
"learning_rate": 1e-05, |
|
"loss": 0.802, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.815388458656008, |
|
"grad_norm": 1.4721686410259016, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8835, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.8161878591056707, |
|
"grad_norm": 1.5756457858641464, |
|
"learning_rate": 1e-05, |
|
"loss": 0.903, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.8169872595553335, |
|
"grad_norm": 1.5229101192600658, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9111, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.8177866600049962, |
|
"grad_norm": 1.3993305196243857, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8218, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.818586060454659, |
|
"grad_norm": 1.760678360532871, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8942, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.8193854609043217, |
|
"grad_norm": 1.5000785334067135, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8791, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.8201848613539845, |
|
"grad_norm": 1.6674955223352013, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8626, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.8209842618036473, |
|
"grad_norm": 1.6318940473847319, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9127, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.82178366225331, |
|
"grad_norm": 1.3538540553221186, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8845, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.8225830627029728, |
|
"grad_norm": 1.6241141376741397, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9048, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.8233824631526355, |
|
"grad_norm": 1.6076201928103848, |
|
"learning_rate": 1e-05, |
|
"loss": 0.911, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.8241818636022983, |
|
"grad_norm": 1.6514943246242055, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9161, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.824981264051961, |
|
"grad_norm": 1.4757470594102153, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8522, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.8257806645016238, |
|
"grad_norm": 1.6550497620549556, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8821, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.8265800649512866, |
|
"grad_norm": 1.416433208923937, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8757, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.8273794654009493, |
|
"grad_norm": 1.2778570567318692, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9259, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.8281788658506121, |
|
"grad_norm": 1.5852330201994453, |
|
"learning_rate": 1e-05, |
|
"loss": 0.833, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.8289782663002748, |
|
"grad_norm": 1.5240272977432132, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8842, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.8297776667499376, |
|
"grad_norm": 1.2798050681953308, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8338, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.8305770671996003, |
|
"grad_norm": 1.5489648653630288, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8501, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.8313764676492631, |
|
"grad_norm": 1.435906811134758, |
|
"learning_rate": 1e-05, |
|
"loss": 0.867, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.8321758680989259, |
|
"grad_norm": 1.4702097962874583, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8378, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.8329752685485886, |
|
"grad_norm": 1.594108179583473, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8765, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.8337746689982513, |
|
"grad_norm": 1.5200902751808993, |
|
"learning_rate": 1e-05, |
|
"loss": 0.877, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.834574069447914, |
|
"grad_norm": 1.438298229451603, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8663, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.8353734698975768, |
|
"grad_norm": 1.454018494694832, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8918, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.8361728703472395, |
|
"grad_norm": 1.5005502708015002, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8373, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.8369722707969023, |
|
"grad_norm": 1.291500552799284, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8418, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.837771671246565, |
|
"grad_norm": 1.464971545003121, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8986, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.8385710716962278, |
|
"grad_norm": 1.3970450556504503, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8829, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.8393704721458906, |
|
"grad_norm": 1.4873155145975965, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9039, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.8401698725955533, |
|
"grad_norm": 1.4116749220099283, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8392, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 0.8409692730452161, |
|
"grad_norm": 1.375469878009426, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8463, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.8417686734948788, |
|
"grad_norm": 1.2984722488631455, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8578, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.8425680739445416, |
|
"grad_norm": 1.377137242087716, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8753, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.8433674743942043, |
|
"grad_norm": 1.513617649555769, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9161, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.8441668748438671, |
|
"grad_norm": 1.3943343336302483, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8984, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.8449662752935299, |
|
"grad_norm": 1.4829273034743662, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9456, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 0.8457656757431926, |
|
"grad_norm": 1.300813156773205, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8848, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.8465650761928554, |
|
"grad_norm": 1.6171681830366456, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8825, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.8473644766425181, |
|
"grad_norm": 1.4746785824924948, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8782, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.8481638770921809, |
|
"grad_norm": 1.4773421322909972, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8511, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 0.8489632775418436, |
|
"grad_norm": 1.2712734121324603, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8512, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.8497626779915064, |
|
"grad_norm": 1.4673347511942942, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9082, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 0.8505620784411692, |
|
"grad_norm": 1.3648492502704213, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8857, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.8513614788908319, |
|
"grad_norm": 1.4139141501508512, |
|
"learning_rate": 1e-05, |
|
"loss": 0.862, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.8521608793404947, |
|
"grad_norm": 1.477130924813934, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9706, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.8529602797901574, |
|
"grad_norm": 1.4482250453004122, |
|
"learning_rate": 1e-05, |
|
"loss": 0.906, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 0.8537596802398202, |
|
"grad_norm": 1.522992168116749, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9545, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.8545590806894829, |
|
"grad_norm": 1.5261726249592624, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9238, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 0.8553584811391456, |
|
"grad_norm": 1.4267309300725217, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8925, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.8561578815888083, |
|
"grad_norm": 1.4527959506992734, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8994, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 0.8569572820384711, |
|
"grad_norm": 1.5030190804190187, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9301, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.8577566824881339, |
|
"grad_norm": 1.2235265071589685, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8509, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 0.8585560829377966, |
|
"grad_norm": 1.4074654499219896, |
|
"learning_rate": 1e-05, |
|
"loss": 0.889, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.8593554833874594, |
|
"grad_norm": 1.2944008535061877, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8966, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.8601548838371221, |
|
"grad_norm": 1.3310895995556136, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8138, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.8609542842867849, |
|
"grad_norm": 1.427408631037752, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8852, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 0.8617536847364476, |
|
"grad_norm": 1.4692371711172514, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8797, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.8625530851861104, |
|
"grad_norm": 1.541692282374257, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8589, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 0.8633524856357732, |
|
"grad_norm": 1.5815910403079887, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8625, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.8641518860854359, |
|
"grad_norm": 1.4959179824929254, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9189, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 0.8649512865350987, |
|
"grad_norm": 1.5319580336293697, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8267, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 0.8657506869847614, |
|
"grad_norm": 1.486040602833083, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8625, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 0.8665500874344242, |
|
"grad_norm": 1.424254094608181, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8943, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.867349487884087, |
|
"grad_norm": 1.643630623556634, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9188, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.8681488883337497, |
|
"grad_norm": 1.452534027382345, |
|
"learning_rate": 1e-05, |
|
"loss": 0.88, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.8689482887834125, |
|
"grad_norm": 1.5479604306192913, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7943, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 0.8697476892330752, |
|
"grad_norm": 1.363070121645927, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8416, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.870547089682738, |
|
"grad_norm": 1.4551924760921788, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8748, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 0.8713464901324007, |
|
"grad_norm": 1.8072081219985316, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8955, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.8721458905820635, |
|
"grad_norm": 1.480345916799882, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8984, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 0.8729452910317262, |
|
"grad_norm": 1.513951237735827, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8573, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.873744691481389, |
|
"grad_norm": 1.4012284095364107, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8448, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 0.8745440919310518, |
|
"grad_norm": 1.4316182193855909, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8778, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 0.8753434923807145, |
|
"grad_norm": 1.4767564148326937, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8769, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.8761428928303772, |
|
"grad_norm": 1.4996522495810245, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8928, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.8769422932800399, |
|
"grad_norm": 1.4836247078704627, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8657, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 0.8777416937297027, |
|
"grad_norm": 1.4148846725052078, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8763, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.8785410941793654, |
|
"grad_norm": 1.8564930284795111, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8362, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 0.8793404946290282, |
|
"grad_norm": 1.3887316999375894, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8692, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.880139895078691, |
|
"grad_norm": 4.721320996449426, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8951, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 0.8809392955283537, |
|
"grad_norm": 1.801959602001512, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9127, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 0.8817386959780165, |
|
"grad_norm": 1.4999059990761596, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8277, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 0.8825380964276792, |
|
"grad_norm": 1.580749043430391, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8532, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.883337496877342, |
|
"grad_norm": 4.723585804015321, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8846, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.8841368973270047, |
|
"grad_norm": 10.533804682370834, |
|
"learning_rate": 1e-05, |
|
"loss": 0.915, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 0.8849362977766675, |
|
"grad_norm": 2.7074849652786948, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8588, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 0.8857356982263302, |
|
"grad_norm": 1.8291875456761892, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8274, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.886535098675993, |
|
"grad_norm": 4.390203546717027, |
|
"learning_rate": 1e-05, |
|
"loss": 0.908, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 0.8873344991256558, |
|
"grad_norm": 3.8103014314112156, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9094, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.8881338995753185, |
|
"grad_norm": 455.28682152314866, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8881, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 0.8889333000249813, |
|
"grad_norm": 4.4477324217626295, |
|
"learning_rate": 1e-05, |
|
"loss": 0.891, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.889732700474644, |
|
"grad_norm": 1.3884130302591122, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8485, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 0.8905321009243068, |
|
"grad_norm": 1.4938176798235159, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8438, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 0.8913315013739695, |
|
"grad_norm": 1.5434085929606869, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8977, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.8921309018236323, |
|
"grad_norm": 1.3286197641197046, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8355, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.8929303022732951, |
|
"grad_norm": 1.4646146883912168, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8735, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 0.8937297027229578, |
|
"grad_norm": 8.122892577298567, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9016, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.8945291031726206, |
|
"grad_norm": 1.4376773752975496, |
|
"learning_rate": 1e-05, |
|
"loss": 0.913, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 0.8953285036222833, |
|
"grad_norm": 2.146749128485352, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8643, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.8961279040719461, |
|
"grad_norm": 1.7549423766927372, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8559, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 0.8969273045216088, |
|
"grad_norm": 1.6784215753386844, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8467, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.8977267049712715, |
|
"grad_norm": 1.5205155953208587, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8918, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 0.8985261054209343, |
|
"grad_norm": 1.439844948580554, |
|
"learning_rate": 1e-05, |
|
"loss": 0.846, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.899325505870597, |
|
"grad_norm": 1.3494413427515104, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8701, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.9001249063202598, |
|
"grad_norm": 1.4785482334232822, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8762, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 0.9009243067699225, |
|
"grad_norm": 1.6204723133056338, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8618, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 0.9017237072195853, |
|
"grad_norm": 1.5410838002577578, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8352, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.902523107669248, |
|
"grad_norm": 1.408368948793772, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8143, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 0.9033225081189108, |
|
"grad_norm": 1.3840905876298821, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8497, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.9041219085685736, |
|
"grad_norm": 1.7756397607717793, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9065, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 0.9049213090182363, |
|
"grad_norm": 1.3699433150113711, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8455, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.9057207094678991, |
|
"grad_norm": 1.4303100795006611, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8574, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 0.9065201099175618, |
|
"grad_norm": 1.3913259705586178, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8615, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 0.9073195103672246, |
|
"grad_norm": 1.4143231716945688, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9084, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.9081189108168873, |
|
"grad_norm": 1.3947073651825206, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8926, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.9089183112665501, |
|
"grad_norm": 1.415175153929991, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8153, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 0.9097177117162129, |
|
"grad_norm": 1.3554176947555092, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9018, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 0.9105171121658756, |
|
"grad_norm": 1.3676437829569341, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8339, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 0.9113165126155384, |
|
"grad_norm": 1.2248182189476722, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8865, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.9121159130652011, |
|
"grad_norm": 1.5759658066895652, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8736, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 0.9129153135148639, |
|
"grad_norm": 1.2978962092251058, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9114, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 0.9137147139645266, |
|
"grad_norm": 1.3829867839638308, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8636, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 0.9145141144141894, |
|
"grad_norm": 1.4786554185436886, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9103, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.9153135148638522, |
|
"grad_norm": 1.5247139561552725, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8403, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.9161129153135149, |
|
"grad_norm": 1.3888872413761024, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8457, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 0.9169123157631777, |
|
"grad_norm": 1.426097741347822, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8769, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 0.9177117162128404, |
|
"grad_norm": 1.5560059286195493, |
|
"learning_rate": 1e-05, |
|
"loss": 0.864, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.9185111166625031, |
|
"grad_norm": 1.4406955993681905, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8668, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 0.9193105171121658, |
|
"grad_norm": 1.489597707567999, |
|
"learning_rate": 1e-05, |
|
"loss": 0.855, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.9201099175618286, |
|
"grad_norm": 1.379157014673917, |
|
"learning_rate": 1e-05, |
|
"loss": 0.935, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 0.9209093180114913, |
|
"grad_norm": 1.4949181541382415, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8664, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.9217087184611541, |
|
"grad_norm": 1.4980233869730157, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8224, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 0.9225081189108169, |
|
"grad_norm": 1.4050924624234455, |
|
"learning_rate": 1e-05, |
|
"loss": 0.891, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 0.9233075193604796, |
|
"grad_norm": 1.6467240441672264, |
|
"learning_rate": 1e-05, |
|
"loss": 0.834, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.9241069198101424, |
|
"grad_norm": 1.3421364569781595, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8436, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 0.9249063202598051, |
|
"grad_norm": 1.246062501997166, |
|
"learning_rate": 1e-05, |
|
"loss": 0.869, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 0.9257057207094679, |
|
"grad_norm": 1.4858806518325938, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9168, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 0.9265051211591306, |
|
"grad_norm": 1.4777896246461322, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8593, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 0.9273045216087934, |
|
"grad_norm": 1.4266934813336434, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8943, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.9281039220584562, |
|
"grad_norm": 1.4729608886697982, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8981, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.9289033225081189, |
|
"grad_norm": 1.4856931952636183, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8623, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 0.9297027229577817, |
|
"grad_norm": 1.4313714774475765, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8441, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 0.9305021234074444, |
|
"grad_norm": 1.3975576369260547, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8337, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.9313015238571072, |
|
"grad_norm": 1.4706185165998424, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8336, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.9321009243067699, |
|
"grad_norm": 1.3837948320627937, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8741, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 0.9329003247564327, |
|
"grad_norm": 1.3855675072168605, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9235, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 0.9336997252060955, |
|
"grad_norm": 1.5034589343394933, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8267, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 0.9344991256557582, |
|
"grad_norm": 1.5081619715031618, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8912, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 0.935298526105421, |
|
"grad_norm": 1.570365541340616, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8589, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.9360979265550837, |
|
"grad_norm": 1.368058151600139, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8669, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 0.9368973270047465, |
|
"grad_norm": 1.4254090126900538, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8997, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 0.9376967274544092, |
|
"grad_norm": 1.4563379308659208, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8378, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 0.938496127904072, |
|
"grad_norm": 1.480841767300247, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8428, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 0.9392955283537348, |
|
"grad_norm": 1.4441769797776909, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8308, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.9400949288033974, |
|
"grad_norm": 1.5331892724720704, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8733, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.9408943292530602, |
|
"grad_norm": 1.3897737412131999, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8255, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 0.9416937297027229, |
|
"grad_norm": 1.4328437965242162, |
|
"learning_rate": 1e-05, |
|
"loss": 0.842, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 0.9424931301523857, |
|
"grad_norm": 1.1601633219334695, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8528, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 0.9432925306020484, |
|
"grad_norm": 1.3663250801686486, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8325, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.9440919310517112, |
|
"grad_norm": 1.4998097326159285, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9421, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 0.9448913315013739, |
|
"grad_norm": 1.4608832729340682, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8508, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 0.9456907319510367, |
|
"grad_norm": 1.562661791032361, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9003, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 0.9464901324006995, |
|
"grad_norm": 1.4455704359698196, |
|
"learning_rate": 1e-05, |
|
"loss": 0.964, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.9472895328503622, |
|
"grad_norm": 1.5776281729460202, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8575, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.948088933300025, |
|
"grad_norm": 1.549921877625713, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8504, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 0.9488883337496877, |
|
"grad_norm": 1.3389592770549843, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8488, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 0.9496877341993505, |
|
"grad_norm": 1.4429492254870946, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8246, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.9504871346490132, |
|
"grad_norm": 1.4238306426926814, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8696, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 0.951286535098676, |
|
"grad_norm": 1.4646305779731619, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8502, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.9520859355483388, |
|
"grad_norm": 1.494347031973423, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8642, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 0.9528853359980015, |
|
"grad_norm": 1.963685019515452, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8507, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 0.9536847364476643, |
|
"grad_norm": 1.3925026944755527, |
|
"learning_rate": 1e-05, |
|
"loss": 0.833, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 0.954484136897327, |
|
"grad_norm": 1.4062902940189372, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9028, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 0.9552835373469898, |
|
"grad_norm": 1.2343971080574194, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8522, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.9560829377966525, |
|
"grad_norm": 1.4221098313944995, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8577, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.9568823382463153, |
|
"grad_norm": 1.5290533732550755, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8093, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 0.9576817386959781, |
|
"grad_norm": 1.3961174339920084, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8647, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 0.9584811391456408, |
|
"grad_norm": 1.4151475464959773, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8868, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 0.9592805395953036, |
|
"grad_norm": 1.513441275615894, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8647, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9600799400449663, |
|
"grad_norm": 1.3820417006090109, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8477, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 0.960879340494629, |
|
"grad_norm": 1.4387974434664792, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8536, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 0.9616787409442917, |
|
"grad_norm": 1.5784176967006853, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8778, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 0.9624781413939545, |
|
"grad_norm": 1.4269915386314171, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8572, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.9632775418436172, |
|
"grad_norm": 1.3866388696845584, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8086, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.96407694229328, |
|
"grad_norm": 1.432076302146608, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8454, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 0.9648763427429428, |
|
"grad_norm": 1.4992577974774581, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7908, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 0.9656757431926055, |
|
"grad_norm": 1.497039314194387, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8544, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 0.9664751436422683, |
|
"grad_norm": 1.3007974080201803, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8477, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 0.967274544091931, |
|
"grad_norm": 1.5618516258742383, |
|
"learning_rate": 1e-05, |
|
"loss": 0.835, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.9680739445415938, |
|
"grad_norm": 1.4210670398569833, |
|
"learning_rate": 1e-05, |
|
"loss": 0.832, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 0.9688733449912565, |
|
"grad_norm": 1.5510313623384935, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8602, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.9696727454409193, |
|
"grad_norm": 1.521288522133268, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8861, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 0.9704721458905821, |
|
"grad_norm": 1.5884079297863427, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8258, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 0.9712715463402448, |
|
"grad_norm": 1.3385008591661527, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8272, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.9720709467899076, |
|
"grad_norm": 1.3382297608246647, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8984, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.9728703472395703, |
|
"grad_norm": 1.548407496139496, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8649, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 0.9736697476892331, |
|
"grad_norm": 1.336053175129197, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8958, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 0.9744691481388958, |
|
"grad_norm": 1.3748017255834115, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8486, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 0.9752685485885586, |
|
"grad_norm": 1.5234383744628233, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8617, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.9760679490382214, |
|
"grad_norm": 1.4764432977833921, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9367, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 0.9768673494878841, |
|
"grad_norm": 1.3631292544649363, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8714, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 0.9776667499375469, |
|
"grad_norm": 1.3171008529103865, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8285, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 0.9784661503872096, |
|
"grad_norm": 1.4354745441705121, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9037, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 0.9792655508368724, |
|
"grad_norm": 1.3919378193960412, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9309, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.9800649512865351, |
|
"grad_norm": 1.4461454394492737, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8928, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 0.9808643517361979, |
|
"grad_norm": 1.3724038374747247, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9014, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 0.9816637521858605, |
|
"grad_norm": 1.351928124821094, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8343, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 0.9824631526355233, |
|
"grad_norm": 1.3143104444611924, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8804, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 0.9832625530851861, |
|
"grad_norm": 1.5074208283788533, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8708, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.9840619535348488, |
|
"grad_norm": 1.4675362219576862, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8545, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 0.9848613539845116, |
|
"grad_norm": 1.4044134991072301, |
|
"learning_rate": 1e-05, |
|
"loss": 0.852, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 0.9856607544341743, |
|
"grad_norm": 1.4731748400546958, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9222, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 0.9864601548838371, |
|
"grad_norm": 1.4128661942086913, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8997, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 0.9872595553334998, |
|
"grad_norm": 1.4368853581391632, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8672, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.9880589557831626, |
|
"grad_norm": 1.453673257213547, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8779, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 0.9888583562328254, |
|
"grad_norm": 1.7470099861196207, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9028, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 0.9896577566824881, |
|
"grad_norm": 1.2697243063535835, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8677, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 0.9904571571321509, |
|
"grad_norm": 1.5282634647109214, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8824, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 0.9912565575818136, |
|
"grad_norm": 1.5236456464951182, |
|
"learning_rate": 1e-05, |
|
"loss": 0.875, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.9920559580314764, |
|
"grad_norm": 1.2831857679108445, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9118, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 0.9928553584811391, |
|
"grad_norm": 1.4427270743757334, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8488, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 0.9936547589308019, |
|
"grad_norm": 1.6145144060086711, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8594, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 0.9944541593804647, |
|
"grad_norm": 1.5536788191330388, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8736, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 0.9952535598301274, |
|
"grad_norm": 1.488891430752203, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8824, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.9960529602797902, |
|
"grad_norm": 1.7670913427025423, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8481, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 0.9968523607294529, |
|
"grad_norm": 1.4017507511502658, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8422, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 0.9976517611791157, |
|
"grad_norm": 1.3372936110607956, |
|
"learning_rate": 1e-05, |
|
"loss": 0.842, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.9984511616287784, |
|
"grad_norm": 1.3328353321262152, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8982, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 0.9992505620784412, |
|
"grad_norm": 1.4055115515472896, |
|
"learning_rate": 1e-05, |
|
"loss": 0.8433, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.9992505620784412, |
|
"step": 1250, |
|
"total_flos": 826404337876992.0, |
|
"train_loss": 0.9163284823417663, |
|
"train_runtime": 166824.9366, |
|
"train_samples_per_second": 0.48, |
|
"train_steps_per_second": 0.007 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 1250, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 826404337876992.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|