|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.28035538005923, |
|
"eval_steps": 355, |
|
"global_step": 710, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0003948667324777887, |
|
"grad_norm": 0.5502959489822388, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0032, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0003948667324777887, |
|
"eval_loss": 1.3028720617294312, |
|
"eval_runtime": 63.423, |
|
"eval_samples_per_second": 16.824, |
|
"eval_steps_per_second": 8.42, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0007897334649555774, |
|
"grad_norm": 0.5348024368286133, |
|
"learning_rate": 4e-05, |
|
"loss": 1.2158, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0011846001974333662, |
|
"grad_norm": 0.5212297439575195, |
|
"learning_rate": 6e-05, |
|
"loss": 1.2107, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0015794669299111549, |
|
"grad_norm": 0.5010500550270081, |
|
"learning_rate": 8e-05, |
|
"loss": 1.7374, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0019743336623889436, |
|
"grad_norm": 0.566511869430542, |
|
"learning_rate": 0.0001, |
|
"loss": 1.263, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0023692003948667323, |
|
"grad_norm": 0.558596134185791, |
|
"learning_rate": 0.00012, |
|
"loss": 1.1253, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.002764067127344521, |
|
"grad_norm": 0.525932788848877, |
|
"learning_rate": 0.00014, |
|
"loss": 1.155, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0031589338598223098, |
|
"grad_norm": 0.5322596430778503, |
|
"learning_rate": 0.00016, |
|
"loss": 1.173, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.003553800592300099, |
|
"grad_norm": 0.5490784049034119, |
|
"learning_rate": 0.00018, |
|
"loss": 1.1944, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.003948667324777887, |
|
"grad_norm": 0.5888460278511047, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1346, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.004343534057255676, |
|
"grad_norm": 0.5463979840278625, |
|
"learning_rate": 0.0001999997517831015, |
|
"loss": 1.2648, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.004738400789733465, |
|
"grad_norm": 0.6944459676742554, |
|
"learning_rate": 0.00019999900713363826, |
|
"loss": 1.1832, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.005133267522211254, |
|
"grad_norm": 0.675365149974823, |
|
"learning_rate": 0.0001999977660553069, |
|
"loss": 1.1111, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.005528134254689042, |
|
"grad_norm": 0.6740691065788269, |
|
"learning_rate": 0.00019999602855426865, |
|
"loss": 1.0508, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.005923000987166831, |
|
"grad_norm": 0.6092358827590942, |
|
"learning_rate": 0.00019999379463914898, |
|
"loss": 1.048, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0063178677196446195, |
|
"grad_norm": 0.5579732656478882, |
|
"learning_rate": 0.0001999910643210378, |
|
"loss": 0.9692, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.006712734452122409, |
|
"grad_norm": 0.5955824851989746, |
|
"learning_rate": 0.0001999878376134894, |
|
"loss": 1.3142, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.007107601184600198, |
|
"grad_norm": 0.49893510341644287, |
|
"learning_rate": 0.00019998411453252217, |
|
"loss": 1.211, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.007502467917077986, |
|
"grad_norm": 0.6234785914421082, |
|
"learning_rate": 0.0001999798950966188, |
|
"loss": 1.0711, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.007897334649555774, |
|
"grad_norm": 0.5721188187599182, |
|
"learning_rate": 0.0001999751793267259, |
|
"loss": 0.9827, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.008292201382033564, |
|
"grad_norm": 0.566277801990509, |
|
"learning_rate": 0.00019996996724625426, |
|
"loss": 0.9768, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.008687068114511353, |
|
"grad_norm": 0.5721827745437622, |
|
"learning_rate": 0.0001999642588810784, |
|
"loss": 1.0156, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.009081934846989142, |
|
"grad_norm": 0.4582698941230774, |
|
"learning_rate": 0.00019995805425953648, |
|
"loss": 1.0947, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.00947680157946693, |
|
"grad_norm": 0.5519172549247742, |
|
"learning_rate": 0.00019995135341243042, |
|
"loss": 0.9691, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.009871668311944718, |
|
"grad_norm": 0.5251504182815552, |
|
"learning_rate": 0.00019994415637302547, |
|
"loss": 1.1051, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.010266535044422508, |
|
"grad_norm": 0.49342384934425354, |
|
"learning_rate": 0.00019993646317705016, |
|
"loss": 1.1886, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.010661401776900297, |
|
"grad_norm": 0.458482027053833, |
|
"learning_rate": 0.0001999282738626961, |
|
"loss": 0.8504, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.011056268509378084, |
|
"grad_norm": 0.5016602873802185, |
|
"learning_rate": 0.00019991958847061784, |
|
"loss": 0.8907, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.011451135241855873, |
|
"grad_norm": 0.5376136302947998, |
|
"learning_rate": 0.0001999104070439326, |
|
"loss": 1.1895, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.011846001974333662, |
|
"grad_norm": 0.5124022364616394, |
|
"learning_rate": 0.00019990072962822007, |
|
"loss": 1.2014, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.012240868706811452, |
|
"grad_norm": 0.4343184232711792, |
|
"learning_rate": 0.0001998905562715222, |
|
"loss": 1.1406, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.012635735439289239, |
|
"grad_norm": 0.5123298764228821, |
|
"learning_rate": 0.00019987988702434303, |
|
"loss": 1.1627, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.013030602171767028, |
|
"grad_norm": 0.48822933435440063, |
|
"learning_rate": 0.00019986872193964827, |
|
"loss": 1.0454, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.013425468904244817, |
|
"grad_norm": 0.501602053642273, |
|
"learning_rate": 0.00019985706107286514, |
|
"loss": 1.144, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.013820335636722606, |
|
"grad_norm": 0.5616022348403931, |
|
"learning_rate": 0.00019984490448188218, |
|
"loss": 1.1829, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.014215202369200396, |
|
"grad_norm": 0.5326923727989197, |
|
"learning_rate": 0.00019983225222704878, |
|
"loss": 0.9466, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.014610069101678183, |
|
"grad_norm": 0.3960028886795044, |
|
"learning_rate": 0.000199819104371175, |
|
"loss": 0.8513, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.015004935834155972, |
|
"grad_norm": 0.5138500928878784, |
|
"learning_rate": 0.00019980546097953132, |
|
"loss": 1.02, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.015399802566633761, |
|
"grad_norm": 0.47606444358825684, |
|
"learning_rate": 0.00019979132211984805, |
|
"loss": 1.0354, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.01579466929911155, |
|
"grad_norm": 0.4528456926345825, |
|
"learning_rate": 0.00019977668786231534, |
|
"loss": 0.8414, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01618953603158934, |
|
"grad_norm": 0.5217841863632202, |
|
"learning_rate": 0.00019976155827958252, |
|
"loss": 1.2372, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.016584402764067127, |
|
"grad_norm": 0.511803925037384, |
|
"learning_rate": 0.000199745933446758, |
|
"loss": 0.8319, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.016979269496544915, |
|
"grad_norm": 0.5257598757743835, |
|
"learning_rate": 0.00019972981344140874, |
|
"loss": 0.9624, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.017374136229022705, |
|
"grad_norm": 0.5112103819847107, |
|
"learning_rate": 0.00019971319834355983, |
|
"loss": 1.123, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.017769002961500493, |
|
"grad_norm": 0.5637938976287842, |
|
"learning_rate": 0.00019969608823569433, |
|
"loss": 1.2229, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.018163869693978284, |
|
"grad_norm": 0.46877309679985046, |
|
"learning_rate": 0.0001996784832027525, |
|
"loss": 0.9595, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.01855873642645607, |
|
"grad_norm": 0.45058727264404297, |
|
"learning_rate": 0.00019966038333213177, |
|
"loss": 1.0821, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.01895360315893386, |
|
"grad_norm": 0.4382037818431854, |
|
"learning_rate": 0.00019964178871368594, |
|
"loss": 0.9997, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.01934846989141165, |
|
"grad_norm": 0.44654494524002075, |
|
"learning_rate": 0.000199622699439725, |
|
"loss": 1.0338, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.019743336623889437, |
|
"grad_norm": 0.5067604184150696, |
|
"learning_rate": 0.00019960311560501454, |
|
"loss": 1.1121, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.020138203356367228, |
|
"grad_norm": 0.4290701746940613, |
|
"learning_rate": 0.0001995830373067754, |
|
"loss": 1.0287, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.020533070088845015, |
|
"grad_norm": 0.5244402885437012, |
|
"learning_rate": 0.00019956246464468294, |
|
"loss": 1.1912, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.020927936821322803, |
|
"grad_norm": 0.4742845892906189, |
|
"learning_rate": 0.0001995413977208669, |
|
"loss": 1.0139, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.021322803553800593, |
|
"grad_norm": 0.47367361187934875, |
|
"learning_rate": 0.00019951983663991056, |
|
"loss": 1.0935, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.02171767028627838, |
|
"grad_norm": 0.4405689835548401, |
|
"learning_rate": 0.00019949778150885042, |
|
"loss": 1.2933, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.02211253701875617, |
|
"grad_norm": 0.39893534779548645, |
|
"learning_rate": 0.0001994752324371756, |
|
"loss": 0.83, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.02250740375123396, |
|
"grad_norm": 0.4917256832122803, |
|
"learning_rate": 0.00019945218953682734, |
|
"loss": 1.0927, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.022902270483711747, |
|
"grad_norm": 0.453832745552063, |
|
"learning_rate": 0.00019942865292219838, |
|
"loss": 0.964, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.023297137216189538, |
|
"grad_norm": 0.4970617890357971, |
|
"learning_rate": 0.00019940462271013238, |
|
"loss": 1.0414, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.023692003948667325, |
|
"grad_norm": 0.5314046144485474, |
|
"learning_rate": 0.0001993800990199235, |
|
"loss": 1.2126, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.024086870681145112, |
|
"grad_norm": 0.5307350754737854, |
|
"learning_rate": 0.00019935508197331555, |
|
"loss": 1.0771, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.024481737413622903, |
|
"grad_norm": 0.495712548494339, |
|
"learning_rate": 0.0001993295716945017, |
|
"loss": 1.0686, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.02487660414610069, |
|
"grad_norm": 0.5226410627365112, |
|
"learning_rate": 0.00019930356831012353, |
|
"loss": 1.0349, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.025271470878578478, |
|
"grad_norm": 0.4592258334159851, |
|
"learning_rate": 0.00019927707194927066, |
|
"loss": 0.9929, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.02566633761105627, |
|
"grad_norm": 0.486198753118515, |
|
"learning_rate": 0.00019925008274347995, |
|
"loss": 1.0707, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.026061204343534056, |
|
"grad_norm": 0.5763838291168213, |
|
"learning_rate": 0.00019922260082673497, |
|
"loss": 0.8451, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.026456071076011847, |
|
"grad_norm": 0.5000368356704712, |
|
"learning_rate": 0.00019919462633546519, |
|
"loss": 0.8953, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.026850937808489635, |
|
"grad_norm": 0.5193626284599304, |
|
"learning_rate": 0.0001991661594085455, |
|
"loss": 0.8959, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.027245804540967422, |
|
"grad_norm": 0.5146979689598083, |
|
"learning_rate": 0.00019913720018729532, |
|
"loss": 1.1425, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.027640671273445213, |
|
"grad_norm": 0.4941859543323517, |
|
"learning_rate": 0.000199107748815478, |
|
"loss": 1.1127, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.028035538005923, |
|
"grad_norm": 0.5475146770477295, |
|
"learning_rate": 0.00019907780543930014, |
|
"loss": 1.0152, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.02843040473840079, |
|
"grad_norm": 0.6136332154273987, |
|
"learning_rate": 0.00019904737020741075, |
|
"loss": 1.1167, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.02882527147087858, |
|
"grad_norm": 0.531680703163147, |
|
"learning_rate": 0.00019901644327090064, |
|
"loss": 1.0022, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.029220138203356366, |
|
"grad_norm": 0.5442494750022888, |
|
"learning_rate": 0.00019898502478330152, |
|
"loss": 0.8863, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.029615004935834157, |
|
"grad_norm": 0.4275268018245697, |
|
"learning_rate": 0.00019895311490058542, |
|
"loss": 0.845, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.030009871668311944, |
|
"grad_norm": 0.5767403841018677, |
|
"learning_rate": 0.00019892071378116376, |
|
"loss": 1.13, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.030404738400789732, |
|
"grad_norm": 0.5110602974891663, |
|
"learning_rate": 0.00019888782158588667, |
|
"loss": 1.0784, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.030799605133267523, |
|
"grad_norm": 0.5388869643211365, |
|
"learning_rate": 0.00019885443847804211, |
|
"loss": 1.0197, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.03119447186574531, |
|
"grad_norm": 0.5769171118736267, |
|
"learning_rate": 0.00019882056462335512, |
|
"loss": 0.8093, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.0315893385982231, |
|
"grad_norm": 0.3854546546936035, |
|
"learning_rate": 0.00019878620018998696, |
|
"loss": 0.7723, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.031984205330700885, |
|
"grad_norm": 0.4594631791114807, |
|
"learning_rate": 0.00019875134534853427, |
|
"loss": 0.978, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.03237907206317868, |
|
"grad_norm": 0.5577380061149597, |
|
"learning_rate": 0.0001987160002720283, |
|
"loss": 1.0064, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.03277393879565647, |
|
"grad_norm": 0.4823514223098755, |
|
"learning_rate": 0.00019868016513593391, |
|
"loss": 0.9228, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.033168805528134254, |
|
"grad_norm": 0.5669511556625366, |
|
"learning_rate": 0.0001986438401181489, |
|
"loss": 1.2223, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.03356367226061204, |
|
"grad_norm": 0.48681461811065674, |
|
"learning_rate": 0.00019860702539900287, |
|
"loss": 1.0993, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.03395853899308983, |
|
"grad_norm": 0.47141095995903015, |
|
"learning_rate": 0.00019856972116125653, |
|
"loss": 1.1599, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.03435340572556762, |
|
"grad_norm": 0.5382753610610962, |
|
"learning_rate": 0.00019853192759010076, |
|
"loss": 1.1186, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.03474827245804541, |
|
"grad_norm": 0.592026948928833, |
|
"learning_rate": 0.00019849364487315558, |
|
"loss": 1.0947, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.0351431391905232, |
|
"grad_norm": 0.42034783959388733, |
|
"learning_rate": 0.00019845487320046935, |
|
"loss": 0.9649, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.035538005923000986, |
|
"grad_norm": 0.4590117633342743, |
|
"learning_rate": 0.0001984156127645178, |
|
"loss": 0.997, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03593287265547877, |
|
"grad_norm": 0.5288587212562561, |
|
"learning_rate": 0.00019837586376020294, |
|
"loss": 1.2129, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.03632773938795657, |
|
"grad_norm": 0.4397427439689636, |
|
"learning_rate": 0.0001983356263848523, |
|
"loss": 0.9819, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.036722606120434355, |
|
"grad_norm": 0.4406636357307434, |
|
"learning_rate": 0.00019829490083821778, |
|
"loss": 1.074, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.03711747285291214, |
|
"grad_norm": 0.4988841116428375, |
|
"learning_rate": 0.0001982536873224748, |
|
"loss": 0.9614, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.03751233958538993, |
|
"grad_norm": 0.4320489466190338, |
|
"learning_rate": 0.00019821198604222113, |
|
"loss": 0.9872, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.03790720631786772, |
|
"grad_norm": 0.4227694272994995, |
|
"learning_rate": 0.0001981697972044761, |
|
"loss": 1.0866, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.03830207305034551, |
|
"grad_norm": 0.449147492647171, |
|
"learning_rate": 0.00019812712101867922, |
|
"loss": 1.0443, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.0386969397828233, |
|
"grad_norm": 0.7097704410552979, |
|
"learning_rate": 0.00019808395769668963, |
|
"loss": 0.9615, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.039091806515301086, |
|
"grad_norm": 0.4379878640174866, |
|
"learning_rate": 0.0001980403074527846, |
|
"loss": 1.0132, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.039486673247778874, |
|
"grad_norm": 0.47281649708747864, |
|
"learning_rate": 0.0001979961705036587, |
|
"loss": 0.9845, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03988153998025666, |
|
"grad_norm": 0.424258291721344, |
|
"learning_rate": 0.00019795154706842266, |
|
"loss": 1.0192, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.040276406712734455, |
|
"grad_norm": 0.5341431498527527, |
|
"learning_rate": 0.00019790643736860227, |
|
"loss": 0.9863, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.04067127344521224, |
|
"grad_norm": 0.5369569659233093, |
|
"learning_rate": 0.00019786084162813733, |
|
"loss": 1.0572, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.04106614017769003, |
|
"grad_norm": 0.5228739380836487, |
|
"learning_rate": 0.00019781476007338058, |
|
"loss": 0.8181, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.04146100691016782, |
|
"grad_norm": 0.48687776923179626, |
|
"learning_rate": 0.00019776819293309633, |
|
"loss": 1.0303, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.041855873642645605, |
|
"grad_norm": 0.6160632967948914, |
|
"learning_rate": 0.00019772114043845965, |
|
"loss": 0.9719, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.04225074037512339, |
|
"grad_norm": 0.42757055163383484, |
|
"learning_rate": 0.00019767360282305508, |
|
"loss": 0.8688, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.04264560710760119, |
|
"grad_norm": 0.5323183536529541, |
|
"learning_rate": 0.0001976255803228753, |
|
"loss": 1.0611, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.043040473840078974, |
|
"grad_norm": 0.45643237233161926, |
|
"learning_rate": 0.00019757707317632028, |
|
"loss": 0.9245, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.04343534057255676, |
|
"grad_norm": 0.51936936378479, |
|
"learning_rate": 0.0001975280816241959, |
|
"loss": 1.1352, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.04383020730503455, |
|
"grad_norm": 0.49822020530700684, |
|
"learning_rate": 0.0001974786059097128, |
|
"loss": 0.9481, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.04422507403751234, |
|
"grad_norm": 0.5198648571968079, |
|
"learning_rate": 0.0001974286462784851, |
|
"loss": 1.1848, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.04461994076999013, |
|
"grad_norm": 0.9258118271827698, |
|
"learning_rate": 0.0001973782029785293, |
|
"loss": 1.1156, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.04501480750246792, |
|
"grad_norm": 0.5064953565597534, |
|
"learning_rate": 0.00019732727626026305, |
|
"loss": 0.9965, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.045409674234945706, |
|
"grad_norm": 0.4941990375518799, |
|
"learning_rate": 0.00019727586637650373, |
|
"loss": 1.1318, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.04580454096742349, |
|
"grad_norm": 0.61434006690979, |
|
"learning_rate": 0.0001972239735824674, |
|
"loss": 1.0637, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.04619940769990128, |
|
"grad_norm": 0.53554368019104, |
|
"learning_rate": 0.0001971715981357674, |
|
"loss": 0.8824, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.046594274432379075, |
|
"grad_norm": 0.505577802658081, |
|
"learning_rate": 0.0001971187402964132, |
|
"loss": 0.9145, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.04698914116485686, |
|
"grad_norm": 0.557715654373169, |
|
"learning_rate": 0.00019706540032680893, |
|
"loss": 0.9495, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.04738400789733465, |
|
"grad_norm": 0.5071070194244385, |
|
"learning_rate": 0.00019701157849175228, |
|
"loss": 0.9492, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04777887462981244, |
|
"grad_norm": 0.4534873068332672, |
|
"learning_rate": 0.00019695727505843297, |
|
"loss": 1.1968, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.048173741362290225, |
|
"grad_norm": 0.46414080262184143, |
|
"learning_rate": 0.00019690249029643162, |
|
"loss": 0.8883, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.04856860809476802, |
|
"grad_norm": 0.43917688727378845, |
|
"learning_rate": 0.00019684722447771834, |
|
"loss": 1.0213, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.048963474827245806, |
|
"grad_norm": 0.46896979212760925, |
|
"learning_rate": 0.00019679147787665126, |
|
"loss": 0.8508, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.049358341559723594, |
|
"grad_norm": 0.457086443901062, |
|
"learning_rate": 0.0001967352507699754, |
|
"loss": 1.1217, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.04975320829220138, |
|
"grad_norm": 0.44028928875923157, |
|
"learning_rate": 0.0001966785434368211, |
|
"loss": 1.0044, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.05014807502467917, |
|
"grad_norm": 0.47712892293930054, |
|
"learning_rate": 0.00019662135615870275, |
|
"loss": 0.993, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.050542941757156956, |
|
"grad_norm": 0.5953882932662964, |
|
"learning_rate": 0.00019656368921951734, |
|
"loss": 1.2092, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.05093780848963475, |
|
"grad_norm": 0.4725169837474823, |
|
"learning_rate": 0.00019650554290554298, |
|
"loss": 0.8518, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.05133267522211254, |
|
"grad_norm": 0.4115935266017914, |
|
"learning_rate": 0.00019644691750543767, |
|
"loss": 0.86, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.051727541954590325, |
|
"grad_norm": 0.5736876726150513, |
|
"learning_rate": 0.0001963878133102377, |
|
"loss": 0.8093, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.05212240868706811, |
|
"grad_norm": 0.4820341169834137, |
|
"learning_rate": 0.00019632823061335627, |
|
"loss": 1.1891, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.0525172754195459, |
|
"grad_norm": 0.43938153982162476, |
|
"learning_rate": 0.00019626816971058205, |
|
"loss": 0.7104, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.052912142152023695, |
|
"grad_norm": 0.4602479040622711, |
|
"learning_rate": 0.00019620763090007762, |
|
"loss": 0.906, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.05330700888450148, |
|
"grad_norm": 0.567136824131012, |
|
"learning_rate": 0.0001961466144823781, |
|
"loss": 0.9195, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.05370187561697927, |
|
"grad_norm": 0.4483197331428528, |
|
"learning_rate": 0.00019608512076038962, |
|
"loss": 0.913, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.05409674234945706, |
|
"grad_norm": 0.42070272564888, |
|
"learning_rate": 0.00019602315003938782, |
|
"loss": 1.1745, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.054491609081934844, |
|
"grad_norm": 0.5072475671768188, |
|
"learning_rate": 0.00019596070262701626, |
|
"loss": 0.9904, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.05488647581441264, |
|
"grad_norm": 0.47331702709198, |
|
"learning_rate": 0.00019589777883328505, |
|
"loss": 1.1526, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.055281342546890426, |
|
"grad_norm": 0.5382581949234009, |
|
"learning_rate": 0.00019583437897056915, |
|
"loss": 0.866, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.05567620927936821, |
|
"grad_norm": 0.41280797123908997, |
|
"learning_rate": 0.0001957705033536069, |
|
"loss": 0.8771, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.056071076011846, |
|
"grad_norm": 0.4384588301181793, |
|
"learning_rate": 0.00019570615229949842, |
|
"loss": 1.1457, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.05646594274432379, |
|
"grad_norm": 0.3916715681552887, |
|
"learning_rate": 0.00019564132612770414, |
|
"loss": 0.832, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.05686080947680158, |
|
"grad_norm": 0.5212041139602661, |
|
"learning_rate": 0.00019557602516004306, |
|
"loss": 0.9689, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.05725567620927937, |
|
"grad_norm": 0.5447223782539368, |
|
"learning_rate": 0.00019551024972069126, |
|
"loss": 1.2021, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.05765054294175716, |
|
"grad_norm": 0.5747576355934143, |
|
"learning_rate": 0.00019544400013618023, |
|
"loss": 1.0035, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.058045409674234945, |
|
"grad_norm": 0.48325222730636597, |
|
"learning_rate": 0.00019537727673539536, |
|
"loss": 1.0858, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.05844027640671273, |
|
"grad_norm": 0.5126092433929443, |
|
"learning_rate": 0.00019531007984957408, |
|
"loss": 0.8908, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.05883514313919053, |
|
"grad_norm": 0.4576544761657715, |
|
"learning_rate": 0.0001952424098123045, |
|
"loss": 1.0389, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.059230009871668314, |
|
"grad_norm": 0.43132367730140686, |
|
"learning_rate": 0.00019517426695952358, |
|
"loss": 0.8228, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0596248766041461, |
|
"grad_norm": 0.44958174228668213, |
|
"learning_rate": 0.00019510565162951537, |
|
"loss": 1.0578, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.06001974333662389, |
|
"grad_norm": 0.4115462899208069, |
|
"learning_rate": 0.00019503656416290963, |
|
"loss": 1.1849, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.060414610069101676, |
|
"grad_norm": 0.5049015879631042, |
|
"learning_rate": 0.0001949670049026799, |
|
"loss": 1.0552, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.060809476801579464, |
|
"grad_norm": 0.48052188754081726, |
|
"learning_rate": 0.00019489697419414182, |
|
"loss": 0.9705, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.06120434353405726, |
|
"grad_norm": 0.5106899738311768, |
|
"learning_rate": 0.00019482647238495152, |
|
"loss": 0.9298, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.061599210266535045, |
|
"grad_norm": 0.49583542346954346, |
|
"learning_rate": 0.00019475549982510382, |
|
"loss": 1.1084, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.06199407699901283, |
|
"grad_norm": 0.5219290256500244, |
|
"learning_rate": 0.00019468405686693044, |
|
"loss": 1.0565, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.06238894373149062, |
|
"grad_norm": 0.5849049687385559, |
|
"learning_rate": 0.00019461214386509842, |
|
"loss": 0.9961, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.06278381046396841, |
|
"grad_norm": 0.5726532936096191, |
|
"learning_rate": 0.00019453976117660818, |
|
"loss": 1.0036, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.0631786771964462, |
|
"grad_norm": 0.5158294439315796, |
|
"learning_rate": 0.0001944669091607919, |
|
"loss": 1.0737, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.06357354392892399, |
|
"grad_norm": 0.48837384581565857, |
|
"learning_rate": 0.00019439358817931152, |
|
"loss": 1.0415, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.06396841066140177, |
|
"grad_norm": 0.5121546387672424, |
|
"learning_rate": 0.00019431979859615726, |
|
"loss": 1.1167, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.06436327739387956, |
|
"grad_norm": 0.6468896269798279, |
|
"learning_rate": 0.00019424554077764546, |
|
"loss": 0.944, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.06475814412635736, |
|
"grad_norm": 0.4869466722011566, |
|
"learning_rate": 0.00019417081509241714, |
|
"loss": 1.0122, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.06515301085883514, |
|
"grad_norm": 0.43559664487838745, |
|
"learning_rate": 0.00019409562191143577, |
|
"loss": 1.101, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.06554787759131293, |
|
"grad_norm": 0.46581968665122986, |
|
"learning_rate": 0.00019401996160798573, |
|
"loss": 0.9572, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.06594274432379071, |
|
"grad_norm": 0.5070847868919373, |
|
"learning_rate": 0.00019394383455767034, |
|
"loss": 1.0415, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.06633761105626851, |
|
"grad_norm": 0.427746057510376, |
|
"learning_rate": 0.00019386724113841, |
|
"loss": 0.9545, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.0667324777887463, |
|
"grad_norm": 0.44796499609947205, |
|
"learning_rate": 0.00019379018173044037, |
|
"loss": 0.9902, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.06712734452122408, |
|
"grad_norm": 0.46470651030540466, |
|
"learning_rate": 0.00019371265671631037, |
|
"loss": 0.8932, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.06752221125370188, |
|
"grad_norm": 0.524315595626831, |
|
"learning_rate": 0.00019363466648088034, |
|
"loss": 1.1012, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.06791707798617966, |
|
"grad_norm": 0.39339789748191833, |
|
"learning_rate": 0.0001935562114113202, |
|
"loss": 0.9966, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.06831194471865745, |
|
"grad_norm": 0.4902956783771515, |
|
"learning_rate": 0.00019347729189710743, |
|
"loss": 1.0936, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.06870681145113525, |
|
"grad_norm": 0.44631102681159973, |
|
"learning_rate": 0.00019339790833002515, |
|
"loss": 1.0011, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.06910167818361303, |
|
"grad_norm": 0.48202449083328247, |
|
"learning_rate": 0.00019331806110416027, |
|
"loss": 0.9386, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.06949654491609082, |
|
"grad_norm": 0.5927445292472839, |
|
"learning_rate": 0.00019323775061590135, |
|
"loss": 0.919, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.0698914116485686, |
|
"grad_norm": 0.5132244229316711, |
|
"learning_rate": 0.0001931569772639368, |
|
"loss": 0.8995, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.0702862783810464, |
|
"grad_norm": 0.3917968273162842, |
|
"learning_rate": 0.00019307574144925287, |
|
"loss": 0.9831, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.07068114511352419, |
|
"grad_norm": 0.4451232850551605, |
|
"learning_rate": 0.00019299404357513158, |
|
"loss": 1.0076, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.07107601184600197, |
|
"grad_norm": 0.462495893239975, |
|
"learning_rate": 0.00019291188404714878, |
|
"loss": 1.1291, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.07147087857847977, |
|
"grad_norm": 0.4120655953884125, |
|
"learning_rate": 0.0001928292632731721, |
|
"loss": 0.8429, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.07186574531095755, |
|
"grad_norm": 0.5940248370170593, |
|
"learning_rate": 0.00019274618166335912, |
|
"loss": 1.078, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.07226061204343534, |
|
"grad_norm": 0.46714073419570923, |
|
"learning_rate": 0.00019266263963015488, |
|
"loss": 0.9423, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.07265547877591313, |
|
"grad_norm": 0.41042840480804443, |
|
"learning_rate": 0.00019257863758829035, |
|
"loss": 0.8366, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.07305034550839092, |
|
"grad_norm": 0.5920013785362244, |
|
"learning_rate": 0.00019249417595478002, |
|
"loss": 1.084, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.07344521224086871, |
|
"grad_norm": 0.5364437699317932, |
|
"learning_rate": 0.00019240925514892, |
|
"loss": 1.0667, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.07384007897334649, |
|
"grad_norm": 0.5053632855415344, |
|
"learning_rate": 0.00019232387559228587, |
|
"loss": 0.9369, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.07423494570582428, |
|
"grad_norm": 0.4612145721912384, |
|
"learning_rate": 0.0001922380377087306, |
|
"loss": 1.0796, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.07462981243830208, |
|
"grad_norm": 0.5499488711357117, |
|
"learning_rate": 0.00019215174192438247, |
|
"loss": 1.2071, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.07502467917077986, |
|
"grad_norm": 0.5043527483940125, |
|
"learning_rate": 0.00019206498866764288, |
|
"loss": 1.111, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.07541954590325765, |
|
"grad_norm": 0.557327926158905, |
|
"learning_rate": 0.00019197777836918437, |
|
"loss": 1.0387, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.07581441263573543, |
|
"grad_norm": 0.5045757293701172, |
|
"learning_rate": 0.0001918901114619483, |
|
"loss": 1.02, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.07620927936821323, |
|
"grad_norm": 0.5515264868736267, |
|
"learning_rate": 0.00019180198838114282, |
|
"loss": 0.8964, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.07660414610069102, |
|
"grad_norm": 0.4991328716278076, |
|
"learning_rate": 0.00019171340956424074, |
|
"loss": 0.9792, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.0769990128331688, |
|
"grad_norm": 0.3965533971786499, |
|
"learning_rate": 0.00019162437545097719, |
|
"loss": 0.9692, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.0773938795656466, |
|
"grad_norm": 0.49761366844177246, |
|
"learning_rate": 0.0001915348864833476, |
|
"loss": 0.9203, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.07778874629812438, |
|
"grad_norm": 0.4468221962451935, |
|
"learning_rate": 0.00019144494310560544, |
|
"loss": 0.8878, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.07818361303060217, |
|
"grad_norm": 0.4550800025463104, |
|
"learning_rate": 0.0001913545457642601, |
|
"loss": 0.8622, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.07857847976307997, |
|
"grad_norm": 0.4845464825630188, |
|
"learning_rate": 0.00019126369490807447, |
|
"loss": 0.9628, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.07897334649555775, |
|
"grad_norm": 0.4576549232006073, |
|
"learning_rate": 0.00019117239098806295, |
|
"loss": 1.0311, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07936821322803554, |
|
"grad_norm": 0.5760570764541626, |
|
"learning_rate": 0.00019108063445748904, |
|
"loss": 1.1729, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.07976307996051332, |
|
"grad_norm": 0.6804947257041931, |
|
"learning_rate": 0.00019098842577186314, |
|
"loss": 0.8829, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.08015794669299112, |
|
"grad_norm": 0.4532022476196289, |
|
"learning_rate": 0.00019089576538894036, |
|
"loss": 1.0826, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.08055281342546891, |
|
"grad_norm": 0.5325609445571899, |
|
"learning_rate": 0.00019080265376871815, |
|
"loss": 1.0122, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.08094768015794669, |
|
"grad_norm": 0.46676936745643616, |
|
"learning_rate": 0.00019070909137343408, |
|
"loss": 1.0449, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.08134254689042449, |
|
"grad_norm": 0.4907085597515106, |
|
"learning_rate": 0.00019061507866756347, |
|
"loss": 0.9381, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.08173741362290227, |
|
"grad_norm": 0.5306389927864075, |
|
"learning_rate": 0.0001905206161178172, |
|
"loss": 1.0909, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.08213228035538006, |
|
"grad_norm": 0.48803043365478516, |
|
"learning_rate": 0.00019042570419313925, |
|
"loss": 1.0608, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.08252714708785784, |
|
"grad_norm": 0.4253416061401367, |
|
"learning_rate": 0.0001903303433647045, |
|
"loss": 1.0037, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.08292201382033564, |
|
"grad_norm": 0.5864118337631226, |
|
"learning_rate": 0.00019023453410591635, |
|
"loss": 0.8971, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.08331688055281343, |
|
"grad_norm": 0.5264946222305298, |
|
"learning_rate": 0.00019013827689240436, |
|
"loss": 1.0425, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.08371174728529121, |
|
"grad_norm": 0.5967885255813599, |
|
"learning_rate": 0.00019004157220202185, |
|
"loss": 0.8929, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.084106614017769, |
|
"grad_norm": 0.5387664437294006, |
|
"learning_rate": 0.00018994442051484356, |
|
"loss": 0.9526, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.08450148075024679, |
|
"grad_norm": 0.4813781976699829, |
|
"learning_rate": 0.00018984682231316333, |
|
"loss": 0.9496, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.08489634748272458, |
|
"grad_norm": 0.513137698173523, |
|
"learning_rate": 0.0001897487780814916, |
|
"loss": 0.9249, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.08529121421520237, |
|
"grad_norm": 0.585263192653656, |
|
"learning_rate": 0.00018965028830655309, |
|
"loss": 1.123, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.08568608094768015, |
|
"grad_norm": 0.45392006635665894, |
|
"learning_rate": 0.00018955135347728432, |
|
"loss": 0.9507, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.08608094768015795, |
|
"grad_norm": 0.7591621279716492, |
|
"learning_rate": 0.00018945197408483123, |
|
"loss": 0.8529, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.08647581441263573, |
|
"grad_norm": 0.49809765815734863, |
|
"learning_rate": 0.0001893521506225467, |
|
"loss": 1.0025, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.08687068114511352, |
|
"grad_norm": 0.4396488666534424, |
|
"learning_rate": 0.00018925188358598813, |
|
"loss": 0.9019, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.08726554787759132, |
|
"grad_norm": 0.5315730571746826, |
|
"learning_rate": 0.000189151173472915, |
|
"loss": 1.0926, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.0876604146100691, |
|
"grad_norm": 0.5290179252624512, |
|
"learning_rate": 0.00018905002078328632, |
|
"loss": 1.1491, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.08805528134254689, |
|
"grad_norm": 0.41243505477905273, |
|
"learning_rate": 0.0001889484260192582, |
|
"loss": 0.7891, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.08845014807502467, |
|
"grad_norm": 0.4659099280834198, |
|
"learning_rate": 0.0001888463896851815, |
|
"loss": 1.0976, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.08884501480750247, |
|
"grad_norm": 0.5485631823539734, |
|
"learning_rate": 0.00018874391228759893, |
|
"loss": 0.8974, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.08923988153998026, |
|
"grad_norm": 0.5084119439125061, |
|
"learning_rate": 0.000188640994335243, |
|
"loss": 1.0882, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.08963474827245804, |
|
"grad_norm": 0.4861229956150055, |
|
"learning_rate": 0.0001885376363390332, |
|
"loss": 1.0262, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.09002961500493584, |
|
"grad_norm": 0.5560560822486877, |
|
"learning_rate": 0.00018843383881207357, |
|
"loss": 0.9285, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.09042448173741362, |
|
"grad_norm": 0.5360885858535767, |
|
"learning_rate": 0.00018832960226965008, |
|
"loss": 1.06, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.09081934846989141, |
|
"grad_norm": 0.43957361578941345, |
|
"learning_rate": 0.0001882249272292282, |
|
"loss": 0.9748, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.0912142152023692, |
|
"grad_norm": 0.4349263906478882, |
|
"learning_rate": 0.00018811981421045014, |
|
"loss": 0.9549, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.09160908193484699, |
|
"grad_norm": 0.44644349813461304, |
|
"learning_rate": 0.0001880142637351325, |
|
"loss": 0.8965, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.09200394866732478, |
|
"grad_norm": 0.482573539018631, |
|
"learning_rate": 0.0001879082763272635, |
|
"loss": 0.9798, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.09239881539980256, |
|
"grad_norm": 0.4720049798488617, |
|
"learning_rate": 0.00018780185251300046, |
|
"loss": 0.8558, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.09279368213228036, |
|
"grad_norm": 0.4637092053890228, |
|
"learning_rate": 0.00018769499282066717, |
|
"loss": 1.02, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.09318854886475815, |
|
"grad_norm": 0.42427390813827515, |
|
"learning_rate": 0.00018758769778075122, |
|
"loss": 0.992, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.09358341559723593, |
|
"grad_norm": 0.5138596892356873, |
|
"learning_rate": 0.00018747996792590148, |
|
"loss": 0.7596, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.09397828232971372, |
|
"grad_norm": 0.4327022433280945, |
|
"learning_rate": 0.00018737180379092537, |
|
"loss": 1.1874, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.0943731490621915, |
|
"grad_norm": 0.43842098116874695, |
|
"learning_rate": 0.00018726320591278616, |
|
"loss": 1.1122, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.0947680157946693, |
|
"grad_norm": 0.4516022205352783, |
|
"learning_rate": 0.0001871541748306005, |
|
"loss": 0.8585, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.0951628825271471, |
|
"grad_norm": 0.48878902196884155, |
|
"learning_rate": 0.00018704471108563548, |
|
"loss": 1.0806, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.09555774925962487, |
|
"grad_norm": 0.5217946767807007, |
|
"learning_rate": 0.0001869348152213061, |
|
"loss": 0.9486, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.09595261599210267, |
|
"grad_norm": 0.5032205581665039, |
|
"learning_rate": 0.00018682448778317262, |
|
"loss": 0.8841, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.09634748272458045, |
|
"grad_norm": 0.5018641948699951, |
|
"learning_rate": 0.00018671372931893773, |
|
"loss": 0.7681, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.09674234945705824, |
|
"grad_norm": 0.4859049618244171, |
|
"learning_rate": 0.00018660254037844388, |
|
"loss": 1.0615, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.09713721618953604, |
|
"grad_norm": 0.5423186421394348, |
|
"learning_rate": 0.0001864909215136705, |
|
"loss": 0.9082, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.09753208292201382, |
|
"grad_norm": 0.6605743169784546, |
|
"learning_rate": 0.0001863788732787314, |
|
"loss": 1.0483, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.09792694965449161, |
|
"grad_norm": 0.5038883090019226, |
|
"learning_rate": 0.0001862663962298719, |
|
"loss": 0.9848, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.0983218163869694, |
|
"grad_norm": 0.4320213496685028, |
|
"learning_rate": 0.00018615349092546604, |
|
"loss": 0.8254, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.09871668311944719, |
|
"grad_norm": 0.48691487312316895, |
|
"learning_rate": 0.00018604015792601396, |
|
"loss": 1.0443, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.09911154985192498, |
|
"grad_norm": 0.5005807876586914, |
|
"learning_rate": 0.0001859263977941389, |
|
"loss": 0.8591, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.09950641658440276, |
|
"grad_norm": 0.5520577430725098, |
|
"learning_rate": 0.0001858122110945847, |
|
"loss": 1.0758, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.09990128331688056, |
|
"grad_norm": 0.44724610447883606, |
|
"learning_rate": 0.00018569759839421265, |
|
"loss": 1.0458, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.10029615004935834, |
|
"grad_norm": 0.5659752488136292, |
|
"learning_rate": 0.00018558256026199896, |
|
"loss": 1.0763, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.10069101678183613, |
|
"grad_norm": 0.4626811146736145, |
|
"learning_rate": 0.00018546709726903178, |
|
"loss": 0.9823, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.10108588351431391, |
|
"grad_norm": 0.46605491638183594, |
|
"learning_rate": 0.00018535120998850848, |
|
"loss": 1.2862, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.1014807502467917, |
|
"grad_norm": 0.6146165132522583, |
|
"learning_rate": 0.00018523489899573262, |
|
"loss": 1.0654, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.1018756169792695, |
|
"grad_norm": 0.4941771328449249, |
|
"learning_rate": 0.00018511816486811134, |
|
"loss": 0.6966, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.10227048371174728, |
|
"grad_norm": 0.4850773811340332, |
|
"learning_rate": 0.00018500100818515222, |
|
"loss": 1.0579, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.10266535044422508, |
|
"grad_norm": 0.4935731589794159, |
|
"learning_rate": 0.00018488342952846073, |
|
"loss": 0.916, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.10306021717670286, |
|
"grad_norm": 0.46898818016052246, |
|
"learning_rate": 0.000184765429481737, |
|
"loss": 1.0551, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.10345508390918065, |
|
"grad_norm": 0.4352802038192749, |
|
"learning_rate": 0.00018464700863077312, |
|
"loss": 0.9019, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.10384995064165845, |
|
"grad_norm": 0.5294659733772278, |
|
"learning_rate": 0.0001845281675634503, |
|
"loss": 1.0661, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.10424481737413623, |
|
"grad_norm": 0.5499119758605957, |
|
"learning_rate": 0.00018440890686973572, |
|
"loss": 1.0584, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.10463968410661402, |
|
"grad_norm": 0.4448906183242798, |
|
"learning_rate": 0.0001842892271416797, |
|
"loss": 0.9614, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.1050345508390918, |
|
"grad_norm": 0.5477500557899475, |
|
"learning_rate": 0.00018416912897341295, |
|
"loss": 0.8138, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.1054294175715696, |
|
"grad_norm": 0.46556559205055237, |
|
"learning_rate": 0.00018404861296114337, |
|
"loss": 0.9528, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.10582428430404739, |
|
"grad_norm": 0.5127370953559875, |
|
"learning_rate": 0.00018392767970315313, |
|
"loss": 1.0031, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.10621915103652517, |
|
"grad_norm": 0.5918512940406799, |
|
"learning_rate": 0.0001838063297997958, |
|
"loss": 0.8006, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.10661401776900296, |
|
"grad_norm": 0.5921156406402588, |
|
"learning_rate": 0.00018368456385349334, |
|
"loss": 0.9143, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.10700888450148074, |
|
"grad_norm": 0.6525917053222656, |
|
"learning_rate": 0.000183562382468733, |
|
"loss": 1.0162, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.10740375123395854, |
|
"grad_norm": 0.5337258577346802, |
|
"learning_rate": 0.00018343978625206452, |
|
"loss": 0.9916, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.10779861796643633, |
|
"grad_norm": 0.509904146194458, |
|
"learning_rate": 0.00018331677581209696, |
|
"loss": 0.8978, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.10819348469891411, |
|
"grad_norm": 0.45876345038414, |
|
"learning_rate": 0.0001831933517594957, |
|
"loss": 1.0373, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.10858835143139191, |
|
"grad_norm": 0.5002173185348511, |
|
"learning_rate": 0.00018306951470697946, |
|
"loss": 0.8874, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.10898321816386969, |
|
"grad_norm": 0.44106170535087585, |
|
"learning_rate": 0.00018294526526931718, |
|
"loss": 0.7961, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.10937808489634748, |
|
"grad_norm": 0.4849831163883209, |
|
"learning_rate": 0.00018282060406332512, |
|
"loss": 1.0139, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.10977295162882528, |
|
"grad_norm": 0.4701422452926636, |
|
"learning_rate": 0.0001826955317078636, |
|
"loss": 1.0458, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.11016781836130306, |
|
"grad_norm": 0.5966842174530029, |
|
"learning_rate": 0.00018257004882383412, |
|
"loss": 0.7497, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.11056268509378085, |
|
"grad_norm": 0.49655190110206604, |
|
"learning_rate": 0.00018244415603417603, |
|
"loss": 1.2634, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.11095755182625863, |
|
"grad_norm": 0.7236825823783875, |
|
"learning_rate": 0.00018231785396386377, |
|
"loss": 1.0645, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.11135241855873643, |
|
"grad_norm": 0.522117018699646, |
|
"learning_rate": 0.00018219114323990345, |
|
"loss": 0.8553, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.11174728529121422, |
|
"grad_norm": 0.5486408472061157, |
|
"learning_rate": 0.00018206402449132995, |
|
"loss": 1.0859, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.112142152023692, |
|
"grad_norm": 0.6336297392845154, |
|
"learning_rate": 0.00018193649834920373, |
|
"loss": 1.0769, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.1125370187561698, |
|
"grad_norm": 0.47025516629219055, |
|
"learning_rate": 0.0001818085654466076, |
|
"loss": 0.9662, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.11293188548864758, |
|
"grad_norm": 0.5352553725242615, |
|
"learning_rate": 0.00018168022641864377, |
|
"loss": 1.0481, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.11332675222112537, |
|
"grad_norm": 0.4911988377571106, |
|
"learning_rate": 0.00018155148190243051, |
|
"loss": 0.888, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.11372161895360317, |
|
"grad_norm": 0.4751187264919281, |
|
"learning_rate": 0.00018142233253709916, |
|
"loss": 0.9517, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.11411648568608095, |
|
"grad_norm": 0.4718889892101288, |
|
"learning_rate": 0.00018129277896379077, |
|
"loss": 1.028, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.11451135241855874, |
|
"grad_norm": 0.5223097205162048, |
|
"learning_rate": 0.00018116282182565311, |
|
"loss": 0.7671, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.11490621915103652, |
|
"grad_norm": 0.47218167781829834, |
|
"learning_rate": 0.0001810324617678373, |
|
"loss": 0.982, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.11530108588351431, |
|
"grad_norm": 0.5651125907897949, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 0.8603, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.11569595261599211, |
|
"grad_norm": 0.6196467280387878, |
|
"learning_rate": 0.00018077053548377382, |
|
"loss": 0.9174, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.11609081934846989, |
|
"grad_norm": 0.5194737911224365, |
|
"learning_rate": 0.0001806389705578168, |
|
"loss": 1.2105, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.11648568608094768, |
|
"grad_norm": 0.46870988607406616, |
|
"learning_rate": 0.0001805070053127563, |
|
"loss": 0.9009, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.11688055281342546, |
|
"grad_norm": 0.5096109509468079, |
|
"learning_rate": 0.0001803746404037125, |
|
"loss": 1.0304, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.11727541954590326, |
|
"grad_norm": 0.5446627140045166, |
|
"learning_rate": 0.00018024187648778956, |
|
"loss": 0.9724, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.11767028627838105, |
|
"grad_norm": 0.45771893858909607, |
|
"learning_rate": 0.00018010871422407236, |
|
"loss": 1.1809, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.11806515301085883, |
|
"grad_norm": 0.4939158856868744, |
|
"learning_rate": 0.0001799751542736234, |
|
"loss": 0.9581, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.11846001974333663, |
|
"grad_norm": 0.43252983689308167, |
|
"learning_rate": 0.00017984119729947944, |
|
"loss": 0.9829, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.11885488647581441, |
|
"grad_norm": 0.5026227831840515, |
|
"learning_rate": 0.00017970684396664813, |
|
"loss": 1.0617, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.1192497532082922, |
|
"grad_norm": 0.5750073194503784, |
|
"learning_rate": 0.00017957209494210493, |
|
"loss": 0.9648, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.11964461994076998, |
|
"grad_norm": 0.511223316192627, |
|
"learning_rate": 0.0001794369508947894, |
|
"loss": 0.9457, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.12003948667324778, |
|
"grad_norm": 0.6075318455696106, |
|
"learning_rate": 0.00017930141249560233, |
|
"loss": 1.1244, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.12043435340572557, |
|
"grad_norm": 0.5594468712806702, |
|
"learning_rate": 0.00017916548041740213, |
|
"loss": 0.9975, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.12082922013820335, |
|
"grad_norm": 0.45040223002433777, |
|
"learning_rate": 0.0001790291553350016, |
|
"loss": 1.0633, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.12122408687068115, |
|
"grad_norm": 0.5200604200363159, |
|
"learning_rate": 0.0001788924379251645, |
|
"loss": 0.8929, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.12161895360315893, |
|
"grad_norm": 0.48910826444625854, |
|
"learning_rate": 0.00017875532886660228, |
|
"loss": 0.9944, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.12201382033563672, |
|
"grad_norm": 0.5069397687911987, |
|
"learning_rate": 0.0001786178288399706, |
|
"loss": 1.1026, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.12240868706811452, |
|
"grad_norm": 0.4457162320613861, |
|
"learning_rate": 0.0001784799385278661, |
|
"loss": 0.8329, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.1228035538005923, |
|
"grad_norm": 0.5899550318717957, |
|
"learning_rate": 0.0001783416586148229, |
|
"loss": 0.9725, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.12319842053307009, |
|
"grad_norm": 0.48378434777259827, |
|
"learning_rate": 0.00017820298978730921, |
|
"loss": 0.9683, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.12359328726554787, |
|
"grad_norm": 0.47537699341773987, |
|
"learning_rate": 0.00017806393273372395, |
|
"loss": 1.0343, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.12398815399802567, |
|
"grad_norm": 0.46815434098243713, |
|
"learning_rate": 0.00017792448814439333, |
|
"loss": 1.0695, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.12438302073050346, |
|
"grad_norm": 0.4383327066898346, |
|
"learning_rate": 0.00017778465671156743, |
|
"loss": 1.0047, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.12477788746298124, |
|
"grad_norm": 0.46698901057243347, |
|
"learning_rate": 0.00017764443912941672, |
|
"loss": 1.0268, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.12517275419545904, |
|
"grad_norm": 0.5612544417381287, |
|
"learning_rate": 0.0001775038360940287, |
|
"loss": 0.9519, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.12556762092793683, |
|
"grad_norm": 0.5520269274711609, |
|
"learning_rate": 0.00017736284830340436, |
|
"loss": 0.9068, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.12596248766041462, |
|
"grad_norm": 0.5094735622406006, |
|
"learning_rate": 0.00017722147645745468, |
|
"loss": 0.9915, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.1263573543928924, |
|
"grad_norm": 0.49084824323654175, |
|
"learning_rate": 0.00017707972125799735, |
|
"loss": 0.8411, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.12675222112537018, |
|
"grad_norm": 0.47397279739379883, |
|
"learning_rate": 0.00017693758340875306, |
|
"loss": 0.9266, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.12714708785784798, |
|
"grad_norm": 0.3962591588497162, |
|
"learning_rate": 0.00017679506361534215, |
|
"loss": 0.9885, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.12754195459032577, |
|
"grad_norm": 0.48955652117729187, |
|
"learning_rate": 0.000176652162585281, |
|
"loss": 1.0869, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.12793682132280354, |
|
"grad_norm": 0.6018503308296204, |
|
"learning_rate": 0.00017650888102797868, |
|
"loss": 0.8733, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.12833168805528133, |
|
"grad_norm": 0.6061957478523254, |
|
"learning_rate": 0.00017636521965473323, |
|
"loss": 1.2883, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.12872655478775913, |
|
"grad_norm": 0.47981151938438416, |
|
"learning_rate": 0.00017622117917872823, |
|
"loss": 0.9246, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.12912142152023692, |
|
"grad_norm": 0.5347411632537842, |
|
"learning_rate": 0.00017607676031502933, |
|
"loss": 1.0827, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.12951628825271472, |
|
"grad_norm": 0.7249376773834229, |
|
"learning_rate": 0.0001759319637805806, |
|
"loss": 1.0677, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.12991115498519248, |
|
"grad_norm": 0.5934639573097229, |
|
"learning_rate": 0.00017578679029420092, |
|
"loss": 0.8046, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.13030602171767028, |
|
"grad_norm": 0.6128519773483276, |
|
"learning_rate": 0.00017564124057658056, |
|
"loss": 0.8474, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.13070088845014807, |
|
"grad_norm": 0.49019843339920044, |
|
"learning_rate": 0.0001754953153502775, |
|
"loss": 0.9108, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.13109575518262587, |
|
"grad_norm": 0.548611044883728, |
|
"learning_rate": 0.0001753490153397139, |
|
"loss": 0.9954, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.13149062191510366, |
|
"grad_norm": 0.49729061126708984, |
|
"learning_rate": 0.00017520234127117243, |
|
"loss": 0.9943, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.13188548864758143, |
|
"grad_norm": 0.47645774483680725, |
|
"learning_rate": 0.00017505529387279277, |
|
"loss": 0.8234, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.13228035538005922, |
|
"grad_norm": 0.5200782418251038, |
|
"learning_rate": 0.0001749078738745679, |
|
"loss": 1.0991, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.13267522211253702, |
|
"grad_norm": 0.4647184908390045, |
|
"learning_rate": 0.0001747600820083405, |
|
"loss": 1.0169, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.1330700888450148, |
|
"grad_norm": 0.4864305853843689, |
|
"learning_rate": 0.00017461191900779936, |
|
"loss": 0.9525, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.1334649555774926, |
|
"grad_norm": 0.5017113089561462, |
|
"learning_rate": 0.00017446338560847568, |
|
"loss": 0.807, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.13385982230997037, |
|
"grad_norm": 0.5566651821136475, |
|
"learning_rate": 0.00017431448254773944, |
|
"loss": 1.2096, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.13425468904244817, |
|
"grad_norm": 0.48729124665260315, |
|
"learning_rate": 0.00017416521056479577, |
|
"loss": 0.84, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.13464955577492596, |
|
"grad_norm": 0.48902615904808044, |
|
"learning_rate": 0.00017401557040068124, |
|
"loss": 0.9068, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.13504442250740376, |
|
"grad_norm": 0.5371021628379822, |
|
"learning_rate": 0.00017386556279826021, |
|
"loss": 1.0875, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.13543928923988155, |
|
"grad_norm": 0.5122295618057251, |
|
"learning_rate": 0.00017371518850222112, |
|
"loss": 0.928, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.13583415597235932, |
|
"grad_norm": 0.5144253373146057, |
|
"learning_rate": 0.00017356444825907273, |
|
"loss": 1.1201, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.1362290227048371, |
|
"grad_norm": 0.6197713017463684, |
|
"learning_rate": 0.00017341334281714064, |
|
"loss": 1.0366, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.1366238894373149, |
|
"grad_norm": 0.5059978365898132, |
|
"learning_rate": 0.00017326187292656333, |
|
"loss": 1.0132, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.1370187561697927, |
|
"grad_norm": 0.45369940996170044, |
|
"learning_rate": 0.00017311003933928847, |
|
"loss": 1.0436, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.1374136229022705, |
|
"grad_norm": 0.5087475180625916, |
|
"learning_rate": 0.00017295784280906934, |
|
"loss": 0.9475, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.13780848963474826, |
|
"grad_norm": 0.48209476470947266, |
|
"learning_rate": 0.00017280528409146094, |
|
"loss": 1.1108, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.13820335636722605, |
|
"grad_norm": 0.5897043943405151, |
|
"learning_rate": 0.00017265236394381633, |
|
"loss": 1.0758, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.13859822309970385, |
|
"grad_norm": 0.4946494996547699, |
|
"learning_rate": 0.00017249908312528276, |
|
"loss": 0.9829, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.13899308983218164, |
|
"grad_norm": 0.49029871821403503, |
|
"learning_rate": 0.00017234544239679806, |
|
"loss": 0.8431, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.13938795656465944, |
|
"grad_norm": 0.5330137610435486, |
|
"learning_rate": 0.00017219144252108673, |
|
"loss": 1.13, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.1397828232971372, |
|
"grad_norm": 0.47816064953804016, |
|
"learning_rate": 0.00017203708426265614, |
|
"loss": 1.0986, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.140177690029615, |
|
"grad_norm": 0.537811815738678, |
|
"learning_rate": 0.00017188236838779295, |
|
"loss": 1.0599, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.140177690029615, |
|
"eval_loss": 0.9808822274208069, |
|
"eval_runtime": 61.6088, |
|
"eval_samples_per_second": 17.319, |
|
"eval_steps_per_second": 8.668, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.1405725567620928, |
|
"grad_norm": 0.5210056304931641, |
|
"learning_rate": 0.000171727295664559, |
|
"loss": 1.1635, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.1409674234945706, |
|
"grad_norm": 0.5472628474235535, |
|
"learning_rate": 0.00017157186686278766, |
|
"loss": 1.2106, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.14136229022704838, |
|
"grad_norm": 0.459087073802948, |
|
"learning_rate": 0.00017141608275408006, |
|
"loss": 1.0337, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.14175715695952615, |
|
"grad_norm": 0.41874152421951294, |
|
"learning_rate": 0.00017125994411180124, |
|
"loss": 0.8032, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.14215202369200394, |
|
"grad_norm": 0.4521096348762512, |
|
"learning_rate": 0.0001711034517110761, |
|
"loss": 0.9549, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.14254689042448174, |
|
"grad_norm": 0.48767751455307007, |
|
"learning_rate": 0.00017094660632878582, |
|
"loss": 0.9779, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.14294175715695953, |
|
"grad_norm": 0.4864053428173065, |
|
"learning_rate": 0.00017078940874356392, |
|
"loss": 0.7642, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.14333662388943733, |
|
"grad_norm": 0.46765899658203125, |
|
"learning_rate": 0.00017063185973579232, |
|
"loss": 1.0457, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.1437314906219151, |
|
"grad_norm": 0.4682892858982086, |
|
"learning_rate": 0.00017047396008759754, |
|
"loss": 0.974, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.1441263573543929, |
|
"grad_norm": 0.4890439212322235, |
|
"learning_rate": 0.00017031571058284678, |
|
"loss": 0.9047, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.14452122408687068, |
|
"grad_norm": 0.4934488832950592, |
|
"learning_rate": 0.00017015711200714414, |
|
"loss": 1.1, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.14491609081934848, |
|
"grad_norm": 0.5664051175117493, |
|
"learning_rate": 0.00016999816514782647, |
|
"loss": 1.0985, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.14531095755182627, |
|
"grad_norm": 0.5750765800476074, |
|
"learning_rate": 0.00016983887079395974, |
|
"loss": 0.936, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.14570582428430404, |
|
"grad_norm": 0.49568259716033936, |
|
"learning_rate": 0.00016967922973633494, |
|
"loss": 1.1489, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.14610069101678183, |
|
"grad_norm": 0.5348165035247803, |
|
"learning_rate": 0.00016951924276746425, |
|
"loss": 1.1904, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.14649555774925963, |
|
"grad_norm": 0.4326549470424652, |
|
"learning_rate": 0.00016935891068157704, |
|
"loss": 0.8516, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.14689042448173742, |
|
"grad_norm": 0.5540488362312317, |
|
"learning_rate": 0.000169198234274616, |
|
"loss": 1.0405, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.1472852912142152, |
|
"grad_norm": 0.5236465930938721, |
|
"learning_rate": 0.00016903721434423306, |
|
"loss": 0.9151, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.14768015794669298, |
|
"grad_norm": 0.5312307476997375, |
|
"learning_rate": 0.00016887585168978562, |
|
"loss": 1.1763, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.14807502467917077, |
|
"grad_norm": 0.4888836145401001, |
|
"learning_rate": 0.0001687141471123324, |
|
"loss": 1.0494, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.14846989141164857, |
|
"grad_norm": 0.49629417061805725, |
|
"learning_rate": 0.00016855210141462963, |
|
"loss": 0.697, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.14886475814412636, |
|
"grad_norm": 0.5097388029098511, |
|
"learning_rate": 0.0001683897154011269, |
|
"loss": 1.0308, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.14925962487660416, |
|
"grad_norm": 0.4330967664718628, |
|
"learning_rate": 0.0001682269898779632, |
|
"loss": 0.8466, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.14965449160908192, |
|
"grad_norm": 0.4622458219528198, |
|
"learning_rate": 0.00016806392565296311, |
|
"loss": 0.849, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.15004935834155972, |
|
"grad_norm": 0.4904235303401947, |
|
"learning_rate": 0.00016790052353563253, |
|
"loss": 1.0324, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1504442250740375, |
|
"grad_norm": 0.5324286222457886, |
|
"learning_rate": 0.00016773678433715475, |
|
"loss": 0.905, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.1508390918065153, |
|
"grad_norm": 0.5701109766960144, |
|
"learning_rate": 0.00016757270887038654, |
|
"loss": 1.1105, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.1512339585389931, |
|
"grad_norm": 0.45347732305526733, |
|
"learning_rate": 0.00016740829794985394, |
|
"loss": 0.875, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.15162882527147087, |
|
"grad_norm": 0.4763396084308624, |
|
"learning_rate": 0.00016724355239174833, |
|
"loss": 0.9732, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.15202369200394866, |
|
"grad_norm": 0.4246227443218231, |
|
"learning_rate": 0.00016707847301392236, |
|
"loss": 0.9235, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.15241855873642646, |
|
"grad_norm": 0.6034351587295532, |
|
"learning_rate": 0.00016691306063588583, |
|
"loss": 0.8553, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.15281342546890425, |
|
"grad_norm": 0.48924902081489563, |
|
"learning_rate": 0.0001667473160788017, |
|
"loss": 0.967, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.15320829220138205, |
|
"grad_norm": 0.5340859889984131, |
|
"learning_rate": 0.00016658124016548197, |
|
"loss": 1.1052, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.1536031589338598, |
|
"grad_norm": 0.5492063760757446, |
|
"learning_rate": 0.0001664148337203836, |
|
"loss": 1.1952, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.1539980256663376, |
|
"grad_norm": 0.4856433868408203, |
|
"learning_rate": 0.00016624809756960444, |
|
"loss": 0.9994, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1543928923988154, |
|
"grad_norm": 0.4147414565086365, |
|
"learning_rate": 0.00016608103254087906, |
|
"loss": 0.9976, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.1547877591312932, |
|
"grad_norm": 0.5175687074661255, |
|
"learning_rate": 0.00016591363946357474, |
|
"loss": 1.0245, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.155182625863771, |
|
"grad_norm": 0.620985209941864, |
|
"learning_rate": 0.00016574591916868728, |
|
"loss": 1.1981, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.15557749259624876, |
|
"grad_norm": 0.42090892791748047, |
|
"learning_rate": 0.00016557787248883696, |
|
"loss": 0.9171, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.15597235932872655, |
|
"grad_norm": 0.6142613291740417, |
|
"learning_rate": 0.00016540950025826422, |
|
"loss": 1.0901, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.15636722606120435, |
|
"grad_norm": 0.6569430232048035, |
|
"learning_rate": 0.00016524080331282577, |
|
"loss": 1.0362, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.15676209279368214, |
|
"grad_norm": 0.5111309885978699, |
|
"learning_rate": 0.00016507178248999024, |
|
"loss": 0.9666, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.15715695952615993, |
|
"grad_norm": 0.5645838975906372, |
|
"learning_rate": 0.00016490243862883413, |
|
"loss": 0.9295, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.1575518262586377, |
|
"grad_norm": 0.46207594871520996, |
|
"learning_rate": 0.00016473277257003757, |
|
"loss": 1.0462, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.1579466929911155, |
|
"grad_norm": 0.5260053873062134, |
|
"learning_rate": 0.00016456278515588024, |
|
"loss": 0.9745, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1583415597235933, |
|
"grad_norm": 0.5953394174575806, |
|
"learning_rate": 0.00016439247723023712, |
|
"loss": 1.0208, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.15873642645607108, |
|
"grad_norm": 0.5679177641868591, |
|
"learning_rate": 0.00016422184963857432, |
|
"loss": 0.8679, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.15913129318854888, |
|
"grad_norm": 0.47382932901382446, |
|
"learning_rate": 0.00016405090322794483, |
|
"loss": 0.9579, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.15952615992102664, |
|
"grad_norm": 0.49775707721710205, |
|
"learning_rate": 0.00016387963884698448, |
|
"loss": 1.2399, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.15992102665350444, |
|
"grad_norm": 0.49022993445396423, |
|
"learning_rate": 0.00016370805734590747, |
|
"loss": 1.0984, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.16031589338598223, |
|
"grad_norm": 0.5563966035842896, |
|
"learning_rate": 0.00016353615957650236, |
|
"loss": 0.9777, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.16071076011846003, |
|
"grad_norm": 0.46516212821006775, |
|
"learning_rate": 0.00016336394639212783, |
|
"loss": 0.8274, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.16110562685093782, |
|
"grad_norm": 0.4698953926563263, |
|
"learning_rate": 0.00016319141864770827, |
|
"loss": 0.9419, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.1615004935834156, |
|
"grad_norm": 0.4373069405555725, |
|
"learning_rate": 0.00016301857719972976, |
|
"loss": 0.8235, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.16189536031589338, |
|
"grad_norm": 0.6190903186798096, |
|
"learning_rate": 0.00016284542290623567, |
|
"loss": 0.9092, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.16229022704837118, |
|
"grad_norm": 0.5729885101318359, |
|
"learning_rate": 0.0001626719566268224, |
|
"loss": 0.9563, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.16268509378084897, |
|
"grad_norm": 0.5004550814628601, |
|
"learning_rate": 0.00016249817922263517, |
|
"loss": 1.1011, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.16307996051332677, |
|
"grad_norm": 0.4848381578922272, |
|
"learning_rate": 0.0001623240915563638, |
|
"loss": 1.0606, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.16347482724580453, |
|
"grad_norm": 0.5314562916755676, |
|
"learning_rate": 0.00016214969449223824, |
|
"loss": 0.7504, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.16386969397828233, |
|
"grad_norm": 0.5033890604972839, |
|
"learning_rate": 0.00016197498889602448, |
|
"loss": 1.0749, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.16426456071076012, |
|
"grad_norm": 0.5094935894012451, |
|
"learning_rate": 0.0001617999756350202, |
|
"loss": 0.8851, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.16465942744323792, |
|
"grad_norm": 0.4637800455093384, |
|
"learning_rate": 0.00016162465557805034, |
|
"loss": 0.8553, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.16505429417571568, |
|
"grad_norm": 0.43399858474731445, |
|
"learning_rate": 0.00016144902959546286, |
|
"loss": 0.9576, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.16544916090819348, |
|
"grad_norm": 0.5109582543373108, |
|
"learning_rate": 0.00016127309855912457, |
|
"loss": 1.1576, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.16584402764067127, |
|
"grad_norm": 0.5534142851829529, |
|
"learning_rate": 0.00016109686334241655, |
|
"loss": 0.7701, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.16623889437314907, |
|
"grad_norm": 0.46211493015289307, |
|
"learning_rate": 0.00016092032482023, |
|
"loss": 0.9263, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.16663376110562686, |
|
"grad_norm": 0.5489344596862793, |
|
"learning_rate": 0.00016074348386896177, |
|
"loss": 1.0165, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.16702862783810463, |
|
"grad_norm": 0.4893771708011627, |
|
"learning_rate": 0.0001605663413665102, |
|
"loss": 1.0284, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.16742349457058242, |
|
"grad_norm": 0.46742960810661316, |
|
"learning_rate": 0.00016038889819227045, |
|
"loss": 1.0603, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.16781836130306022, |
|
"grad_norm": 0.6020311713218689, |
|
"learning_rate": 0.00016021115522713047, |
|
"loss": 0.8771, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.168213228035538, |
|
"grad_norm": 0.5273780822753906, |
|
"learning_rate": 0.00016003311335346636, |
|
"loss": 0.9648, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.1686080947680158, |
|
"grad_norm": 0.5141377449035645, |
|
"learning_rate": 0.00015985477345513817, |
|
"loss": 0.8773, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.16900296150049357, |
|
"grad_norm": 0.4852812588214874, |
|
"learning_rate": 0.00015967613641748542, |
|
"loss": 0.7764, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.16939782823297136, |
|
"grad_norm": 0.43280699849128723, |
|
"learning_rate": 0.0001594972031273228, |
|
"loss": 0.7407, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.16979269496544916, |
|
"grad_norm": 0.5069910883903503, |
|
"learning_rate": 0.00015931797447293552, |
|
"loss": 0.9313, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.17018756169792695, |
|
"grad_norm": 0.5123517513275146, |
|
"learning_rate": 0.00015913845134407533, |
|
"loss": 1.0705, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.17058242843040475, |
|
"grad_norm": 0.724429190158844, |
|
"learning_rate": 0.00015895863463195558, |
|
"loss": 0.9353, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.17097729516288251, |
|
"grad_norm": 0.5428094267845154, |
|
"learning_rate": 0.00015877852522924732, |
|
"loss": 1.1227, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.1713721618953603, |
|
"grad_norm": 0.6102519631385803, |
|
"learning_rate": 0.00015859812403007443, |
|
"loss": 0.9353, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.1717670286278381, |
|
"grad_norm": 0.5151787400245667, |
|
"learning_rate": 0.00015841743193000944, |
|
"loss": 0.9646, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.1721618953603159, |
|
"grad_norm": 0.6272695064544678, |
|
"learning_rate": 0.00015823644982606905, |
|
"loss": 0.8384, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.1725567620927937, |
|
"grad_norm": 0.49354809522628784, |
|
"learning_rate": 0.00015805517861670952, |
|
"loss": 0.7855, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.17295162882527146, |
|
"grad_norm": 0.47559288144111633, |
|
"learning_rate": 0.0001578736192018224, |
|
"loss": 0.9591, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.17334649555774925, |
|
"grad_norm": 0.5615376830101013, |
|
"learning_rate": 0.00015769177248273008, |
|
"loss": 1.1537, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.17374136229022705, |
|
"grad_norm": 0.5301774144172668, |
|
"learning_rate": 0.00015750963936218105, |
|
"loss": 0.7773, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.17413622902270484, |
|
"grad_norm": 0.5083664059638977, |
|
"learning_rate": 0.0001573272207443457, |
|
"loss": 0.9705, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.17453109575518264, |
|
"grad_norm": 0.5633112788200378, |
|
"learning_rate": 0.00015714451753481168, |
|
"loss": 1.0109, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.1749259624876604, |
|
"grad_norm": 0.5243581533432007, |
|
"learning_rate": 0.00015696153064057947, |
|
"loss": 1.0258, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.1753208292201382, |
|
"grad_norm": 0.6054911613464355, |
|
"learning_rate": 0.0001567782609700579, |
|
"loss": 1.0102, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.175715695952616, |
|
"grad_norm": 0.5889274477958679, |
|
"learning_rate": 0.00015659470943305955, |
|
"loss": 1.1549, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.17611056268509379, |
|
"grad_norm": 0.5765202045440674, |
|
"learning_rate": 0.0001564108769407962, |
|
"loss": 0.7791, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.17650542941757158, |
|
"grad_norm": 0.5080841779708862, |
|
"learning_rate": 0.0001562267644058746, |
|
"loss": 0.9962, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.17690029615004935, |
|
"grad_norm": 0.5185093879699707, |
|
"learning_rate": 0.00015604237274229147, |
|
"loss": 1.1927, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.17729516288252714, |
|
"grad_norm": 0.5385391116142273, |
|
"learning_rate": 0.00015585770286542945, |
|
"loss": 1.0555, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.17769002961500494, |
|
"grad_norm": 0.6289413571357727, |
|
"learning_rate": 0.00015567275569205218, |
|
"loss": 1.0431, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.17808489634748273, |
|
"grad_norm": 0.6271052956581116, |
|
"learning_rate": 0.0001554875321402999, |
|
"loss": 1.0078, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.17847976307996052, |
|
"grad_norm": 0.6165266633033752, |
|
"learning_rate": 0.00015530203312968502, |
|
"loss": 0.9761, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.1788746298124383, |
|
"grad_norm": 0.4909960627555847, |
|
"learning_rate": 0.00015511625958108719, |
|
"loss": 1.061, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.17926949654491608, |
|
"grad_norm": 0.5358797311782837, |
|
"learning_rate": 0.00015493021241674918, |
|
"loss": 1.0878, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.17966436327739388, |
|
"grad_norm": 0.6802231073379517, |
|
"learning_rate": 0.000154743892560272, |
|
"loss": 1.0068, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.18005923000987167, |
|
"grad_norm": 0.6705336570739746, |
|
"learning_rate": 0.00015455730093661034, |
|
"loss": 1.0845, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.18045409674234947, |
|
"grad_norm": 0.43329593539237976, |
|
"learning_rate": 0.0001543704384720681, |
|
"loss": 0.8636, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.18084896347482723, |
|
"grad_norm": 0.5168460011482239, |
|
"learning_rate": 0.0001541833060942937, |
|
"loss": 0.7497, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.18124383020730503, |
|
"grad_norm": 0.4275985062122345, |
|
"learning_rate": 0.0001539959047322755, |
|
"loss": 0.9042, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.18163869693978282, |
|
"grad_norm": 0.44853127002716064, |
|
"learning_rate": 0.00015380823531633729, |
|
"loss": 0.9091, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.18203356367226062, |
|
"grad_norm": 0.4683418571949005, |
|
"learning_rate": 0.00015362029877813332, |
|
"loss": 0.8174, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.1824284304047384, |
|
"grad_norm": 0.4813165068626404, |
|
"learning_rate": 0.00015343209605064422, |
|
"loss": 0.7648, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.18282329713721618, |
|
"grad_norm": 0.47031188011169434, |
|
"learning_rate": 0.00015324362806817186, |
|
"loss": 0.8667, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.18321816386969397, |
|
"grad_norm": 0.5744631886482239, |
|
"learning_rate": 0.00015305489576633504, |
|
"loss": 0.8219, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.18361303060217177, |
|
"grad_norm": 0.48958727717399597, |
|
"learning_rate": 0.00015286590008206465, |
|
"loss": 1.018, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.18400789733464956, |
|
"grad_norm": 0.5247324109077454, |
|
"learning_rate": 0.00015267664195359917, |
|
"loss": 0.8933, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.18440276406712736, |
|
"grad_norm": 0.545970618724823, |
|
"learning_rate": 0.00015248712232047992, |
|
"loss": 1.0508, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.18479763079960512, |
|
"grad_norm": 0.5140126347541809, |
|
"learning_rate": 0.0001522973421235464, |
|
"loss": 0.7581, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.18519249753208292, |
|
"grad_norm": 0.4570896625518799, |
|
"learning_rate": 0.00015210730230493162, |
|
"loss": 1.0665, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.1855873642645607, |
|
"grad_norm": 0.43096470832824707, |
|
"learning_rate": 0.00015191700380805752, |
|
"loss": 0.8313, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.1859822309970385, |
|
"grad_norm": 0.460275799036026, |
|
"learning_rate": 0.00015172644757763015, |
|
"loss": 0.9575, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.1863770977295163, |
|
"grad_norm": 0.4163447618484497, |
|
"learning_rate": 0.00015153563455963499, |
|
"loss": 0.8838, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.18677196446199407, |
|
"grad_norm": 0.542241096496582, |
|
"learning_rate": 0.0001513445657013324, |
|
"loss": 0.8143, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.18716683119447186, |
|
"grad_norm": 0.6614589691162109, |
|
"learning_rate": 0.00015115324195125274, |
|
"loss": 0.9645, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.18756169792694966, |
|
"grad_norm": 0.4719527065753937, |
|
"learning_rate": 0.00015096166425919175, |
|
"loss": 0.9894, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.18795656465942745, |
|
"grad_norm": 0.4972122013568878, |
|
"learning_rate": 0.0001507698335762059, |
|
"loss": 0.8865, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.18835143139190524, |
|
"grad_norm": 0.5407273769378662, |
|
"learning_rate": 0.00015057775085460749, |
|
"loss": 0.8714, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.188746298124383, |
|
"grad_norm": 0.4692353308200836, |
|
"learning_rate": 0.00015038541704796003, |
|
"loss": 0.9357, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.1891411648568608, |
|
"grad_norm": 0.4748722314834595, |
|
"learning_rate": 0.00015019283311107367, |
|
"loss": 1.0376, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.1895360315893386, |
|
"grad_norm": 0.4381348490715027, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 1.0059, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.1899308983218164, |
|
"grad_norm": 0.48987191915512085, |
|
"learning_rate": 0.0001498069186720279, |
|
"loss": 0.8901, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.1903257650542942, |
|
"grad_norm": 0.5566233396530151, |
|
"learning_rate": 0.0001496135900856782, |
|
"loss": 1.1464, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.19072063178677195, |
|
"grad_norm": 0.5021251440048218, |
|
"learning_rate": 0.00014942001520069947, |
|
"loss": 1.0947, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.19111549851924975, |
|
"grad_norm": 0.5681881904602051, |
|
"learning_rate": 0.00014922619497806277, |
|
"loss": 1.0981, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.19151036525172754, |
|
"grad_norm": 0.5182890892028809, |
|
"learning_rate": 0.00014903213037995724, |
|
"loss": 1.1017, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.19190523198420534, |
|
"grad_norm": 0.4682919979095459, |
|
"learning_rate": 0.0001488378223697851, |
|
"loss": 0.6508, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.19230009871668313, |
|
"grad_norm": 0.4784727096557617, |
|
"learning_rate": 0.00014864327191215702, |
|
"loss": 0.874, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.1926949654491609, |
|
"grad_norm": 0.5247599482536316, |
|
"learning_rate": 0.00014844847997288717, |
|
"loss": 1.1797, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.1930898321816387, |
|
"grad_norm": 0.48195531964302063, |
|
"learning_rate": 0.00014825344751898863, |
|
"loss": 1.0463, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.1934846989141165, |
|
"grad_norm": 0.5093549489974976, |
|
"learning_rate": 0.00014805817551866838, |
|
"loss": 1.0409, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.19387956564659428, |
|
"grad_norm": 0.5454416275024414, |
|
"learning_rate": 0.00014786266494132267, |
|
"loss": 1.035, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.19427443237907208, |
|
"grad_norm": 0.49301639199256897, |
|
"learning_rate": 0.00014766691675753202, |
|
"loss": 1.1046, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.19466929911154984, |
|
"grad_norm": 0.4534429907798767, |
|
"learning_rate": 0.00014747093193905657, |
|
"loss": 0.9061, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.19506416584402764, |
|
"grad_norm": 0.599082887172699, |
|
"learning_rate": 0.00014727471145883127, |
|
"loss": 1.0882, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.19545903257650543, |
|
"grad_norm": 0.5382128953933716, |
|
"learning_rate": 0.00014707825629096084, |
|
"loss": 1.0369, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.19585389930898323, |
|
"grad_norm": 0.5784068703651428, |
|
"learning_rate": 0.00014688156741071514, |
|
"loss": 0.9614, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.19624876604146102, |
|
"grad_norm": 0.5423576235771179, |
|
"learning_rate": 0.00014668464579452425, |
|
"loss": 0.9217, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.1966436327739388, |
|
"grad_norm": 0.47836440801620483, |
|
"learning_rate": 0.00014648749241997363, |
|
"loss": 0.918, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.19703849950641658, |
|
"grad_norm": 0.5091026425361633, |
|
"learning_rate": 0.00014629010826579928, |
|
"loss": 1.0415, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.19743336623889438, |
|
"grad_norm": 0.553871214389801, |
|
"learning_rate": 0.00014609249431188278, |
|
"loss": 0.8315, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.19782823297137217, |
|
"grad_norm": 0.4480036199092865, |
|
"learning_rate": 0.00014589465153924672, |
|
"loss": 0.9431, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.19822309970384996, |
|
"grad_norm": 0.5340292453765869, |
|
"learning_rate": 0.00014569658093004935, |
|
"loss": 0.8736, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.19861796643632773, |
|
"grad_norm": 0.5622652173042297, |
|
"learning_rate": 0.0001454982834675802, |
|
"loss": 1.1759, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.19901283316880553, |
|
"grad_norm": 0.5641468167304993, |
|
"learning_rate": 0.00014529976013625482, |
|
"loss": 0.9721, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.19940769990128332, |
|
"grad_norm": 0.49418380856513977, |
|
"learning_rate": 0.00014510101192161018, |
|
"loss": 0.8389, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.19980256663376111, |
|
"grad_norm": 0.47910451889038086, |
|
"learning_rate": 0.0001449020398102996, |
|
"loss": 0.9339, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.20019743336623888, |
|
"grad_norm": 0.5156650543212891, |
|
"learning_rate": 0.00014470284479008782, |
|
"loss": 0.9458, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.20059230009871667, |
|
"grad_norm": 0.4681549072265625, |
|
"learning_rate": 0.00014450342784984633, |
|
"loss": 0.8954, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.20098716683119447, |
|
"grad_norm": 0.5173560380935669, |
|
"learning_rate": 0.00014430378997954817, |
|
"loss": 1.0272, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.20138203356367226, |
|
"grad_norm": 0.5966143012046814, |
|
"learning_rate": 0.00014410393217026318, |
|
"loss": 0.8915, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.20177690029615006, |
|
"grad_norm": 0.5026108026504517, |
|
"learning_rate": 0.00014390385541415308, |
|
"loss": 0.9169, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.20217176702862782, |
|
"grad_norm": 0.4867290258407593, |
|
"learning_rate": 0.00014370356070446654, |
|
"loss": 1.133, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.20256663376110562, |
|
"grad_norm": 0.4962225556373596, |
|
"learning_rate": 0.00014350304903553416, |
|
"loss": 0.9498, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.2029615004935834, |
|
"grad_norm": 0.4522517919540405, |
|
"learning_rate": 0.00014330232140276366, |
|
"loss": 0.8796, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.2033563672260612, |
|
"grad_norm": 0.4583631455898285, |
|
"learning_rate": 0.00014310137880263482, |
|
"loss": 0.9822, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.203751233958539, |
|
"grad_norm": 0.5668156147003174, |
|
"learning_rate": 0.00014290022223269463, |
|
"loss": 0.8197, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.20414610069101677, |
|
"grad_norm": 0.47949913144111633, |
|
"learning_rate": 0.0001426988526915523, |
|
"loss": 0.9435, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.20454096742349456, |
|
"grad_norm": 0.497577965259552, |
|
"learning_rate": 0.00014249727117887425, |
|
"loss": 0.8928, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.20493583415597236, |
|
"grad_norm": 0.4772360622882843, |
|
"learning_rate": 0.0001422954786953793, |
|
"loss": 1.0171, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.20533070088845015, |
|
"grad_norm": 0.5616466999053955, |
|
"learning_rate": 0.0001420934762428335, |
|
"loss": 0.9387, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.20572556762092795, |
|
"grad_norm": 0.6362606287002563, |
|
"learning_rate": 0.00014189126482404532, |
|
"loss": 0.7291, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.2061204343534057, |
|
"grad_norm": 0.4801354706287384, |
|
"learning_rate": 0.00014168884544286053, |
|
"loss": 0.7701, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.2065153010858835, |
|
"grad_norm": 0.45586976408958435, |
|
"learning_rate": 0.0001414862191041574, |
|
"loss": 0.9861, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.2069101678183613, |
|
"grad_norm": 0.5662283301353455, |
|
"learning_rate": 0.00014128338681384153, |
|
"loss": 0.9726, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.2073050345508391, |
|
"grad_norm": 0.5532451868057251, |
|
"learning_rate": 0.00014108034957884094, |
|
"loss": 1.2107, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.2076999012833169, |
|
"grad_norm": 0.5659233331680298, |
|
"learning_rate": 0.0001408771084071012, |
|
"loss": 0.9739, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.20809476801579466, |
|
"grad_norm": 0.5252379775047302, |
|
"learning_rate": 0.00014067366430758004, |
|
"loss": 0.9744, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.20848963474827245, |
|
"grad_norm": 0.6081037521362305, |
|
"learning_rate": 0.0001404700182902428, |
|
"loss": 1.0524, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.20888450148075025, |
|
"grad_norm": 0.6748234033584595, |
|
"learning_rate": 0.0001402661713660571, |
|
"loss": 0.9468, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.20927936821322804, |
|
"grad_norm": 0.4868841767311096, |
|
"learning_rate": 0.00014006212454698797, |
|
"loss": 1.0739, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.20967423494570583, |
|
"grad_norm": 0.5020860433578491, |
|
"learning_rate": 0.00013985787884599282, |
|
"loss": 0.9586, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.2100691016781836, |
|
"grad_norm": 0.579229474067688, |
|
"learning_rate": 0.00013965343527701628, |
|
"loss": 0.8937, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.2104639684106614, |
|
"grad_norm": 0.48879000544548035, |
|
"learning_rate": 0.00013944879485498538, |
|
"loss": 0.956, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.2108588351431392, |
|
"grad_norm": 0.5132958292961121, |
|
"learning_rate": 0.00013924395859580432, |
|
"loss": 0.8762, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.21125370187561698, |
|
"grad_norm": 0.5426986217498779, |
|
"learning_rate": 0.00013903892751634947, |
|
"loss": 1.0018, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.21164856860809478, |
|
"grad_norm": 0.5325112342834473, |
|
"learning_rate": 0.0001388337026344645, |
|
"loss": 0.9227, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.21204343534057254, |
|
"grad_norm": 0.5148372650146484, |
|
"learning_rate": 0.000138628284968955, |
|
"loss": 1.0617, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.21243830207305034, |
|
"grad_norm": 0.4726095199584961, |
|
"learning_rate": 0.00013842267553958371, |
|
"loss": 0.9038, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.21283316880552813, |
|
"grad_norm": 0.504017174243927, |
|
"learning_rate": 0.00013821687536706533, |
|
"loss": 1.0946, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.21322803553800593, |
|
"grad_norm": 0.483732670545578, |
|
"learning_rate": 0.00013801088547306148, |
|
"loss": 0.7506, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.21362290227048372, |
|
"grad_norm": 0.49207037687301636, |
|
"learning_rate": 0.00013780470688017562, |
|
"loss": 0.8905, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.2140177690029615, |
|
"grad_norm": 0.5545893311500549, |
|
"learning_rate": 0.00013759834061194794, |
|
"loss": 0.808, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.21441263573543928, |
|
"grad_norm": 0.657805323600769, |
|
"learning_rate": 0.00013739178769285032, |
|
"loss": 0.8566, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.21480750246791708, |
|
"grad_norm": 0.5868344902992249, |
|
"learning_rate": 0.00013718504914828135, |
|
"loss": 0.9001, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.21520236920039487, |
|
"grad_norm": 0.4816092550754547, |
|
"learning_rate": 0.00013697812600456093, |
|
"loss": 0.993, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.21559723593287267, |
|
"grad_norm": 0.5755246877670288, |
|
"learning_rate": 0.00013677101928892554, |
|
"loss": 1.1376, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.21599210266535043, |
|
"grad_norm": 0.5235198736190796, |
|
"learning_rate": 0.0001365637300295229, |
|
"loss": 1.036, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.21638696939782823, |
|
"grad_norm": 0.4904315173625946, |
|
"learning_rate": 0.00013635625925540696, |
|
"loss": 1.0033, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.21678183613030602, |
|
"grad_norm": 0.49426376819610596, |
|
"learning_rate": 0.00013614860799653276, |
|
"loss": 1.0455, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.21717670286278382, |
|
"grad_norm": 0.5230404734611511, |
|
"learning_rate": 0.00013594077728375128, |
|
"loss": 0.8619, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.2175715695952616, |
|
"grad_norm": 0.4252125918865204, |
|
"learning_rate": 0.0001357327681488045, |
|
"loss": 0.552, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.21796643632773938, |
|
"grad_norm": 0.6176772117614746, |
|
"learning_rate": 0.00013552458162432003, |
|
"loss": 0.9374, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.21836130306021717, |
|
"grad_norm": 0.476182222366333, |
|
"learning_rate": 0.00013531621874380613, |
|
"loss": 0.9189, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.21875616979269497, |
|
"grad_norm": 0.5183379054069519, |
|
"learning_rate": 0.00013510768054164653, |
|
"loss": 0.9177, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.21915103652517276, |
|
"grad_norm": 0.5251573920249939, |
|
"learning_rate": 0.00013489896805309542, |
|
"loss": 0.8619, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.21954590325765055, |
|
"grad_norm": 0.508030116558075, |
|
"learning_rate": 0.00013469008231427207, |
|
"loss": 1.032, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.21994076999012832, |
|
"grad_norm": 0.5486171245574951, |
|
"learning_rate": 0.00013448102436215592, |
|
"loss": 0.8481, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.22033563672260612, |
|
"grad_norm": 0.5298491716384888, |
|
"learning_rate": 0.00013427179523458127, |
|
"loss": 0.7748, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.2207305034550839, |
|
"grad_norm": 0.6214480996131897, |
|
"learning_rate": 0.00013406239597023225, |
|
"loss": 1.0314, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.2211253701875617, |
|
"grad_norm": 0.5228508710861206, |
|
"learning_rate": 0.00013385282760863758, |
|
"loss": 0.9916, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.2215202369200395, |
|
"grad_norm": 0.5693901777267456, |
|
"learning_rate": 0.00013364309119016538, |
|
"loss": 1.0026, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.22191510365251726, |
|
"grad_norm": 0.4827022850513458, |
|
"learning_rate": 0.0001334331877560182, |
|
"loss": 0.8068, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.22230997038499506, |
|
"grad_norm": 0.4529504179954529, |
|
"learning_rate": 0.00013322311834822756, |
|
"loss": 0.8318, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.22270483711747285, |
|
"grad_norm": 0.5850614309310913, |
|
"learning_rate": 0.00013301288400964902, |
|
"loss": 0.8946, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.22309970384995065, |
|
"grad_norm": 0.5779694318771362, |
|
"learning_rate": 0.0001328024857839569, |
|
"loss": 0.7861, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.22349457058242844, |
|
"grad_norm": 0.5600647926330566, |
|
"learning_rate": 0.00013259192471563912, |
|
"loss": 0.7921, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.2238894373149062, |
|
"grad_norm": 0.5468648076057434, |
|
"learning_rate": 0.00013238120184999195, |
|
"loss": 0.7732, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.224284304047384, |
|
"grad_norm": 0.619243323802948, |
|
"learning_rate": 0.00013217031823311488, |
|
"loss": 1.2012, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.2246791707798618, |
|
"grad_norm": 0.5478827953338623, |
|
"learning_rate": 0.00013195927491190554, |
|
"loss": 1.0279, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.2250740375123396, |
|
"grad_norm": 0.520038366317749, |
|
"learning_rate": 0.00013174807293405428, |
|
"loss": 0.8806, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.2254689042448174, |
|
"grad_norm": 0.46674588322639465, |
|
"learning_rate": 0.00013153671334803905, |
|
"loss": 0.7596, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.22586377097729515, |
|
"grad_norm": 0.510772705078125, |
|
"learning_rate": 0.0001313251972031203, |
|
"loss": 0.873, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.22625863770977295, |
|
"grad_norm": 0.4360644221305847, |
|
"learning_rate": 0.00013111352554933563, |
|
"loss": 0.9622, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.22665350444225074, |
|
"grad_norm": 0.4816182255744934, |
|
"learning_rate": 0.00013090169943749476, |
|
"loss": 1.0487, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.22704837117472854, |
|
"grad_norm": 0.48583704233169556, |
|
"learning_rate": 0.000130689719919174, |
|
"loss": 0.8041, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.22744323790720633, |
|
"grad_norm": 0.5958275198936462, |
|
"learning_rate": 0.00013047758804671136, |
|
"loss": 0.9466, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.2278381046396841, |
|
"grad_norm": 0.4736819863319397, |
|
"learning_rate": 0.00013026530487320113, |
|
"loss": 0.9483, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.2282329713721619, |
|
"grad_norm": 0.5173781514167786, |
|
"learning_rate": 0.00013005287145248878, |
|
"loss": 0.8428, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.22862783810463969, |
|
"grad_norm": 0.44686052203178406, |
|
"learning_rate": 0.00012984028883916552, |
|
"loss": 1.009, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.22902270483711748, |
|
"grad_norm": 0.5016387701034546, |
|
"learning_rate": 0.00012962755808856342, |
|
"loss": 1.0226, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.22941757156959527, |
|
"grad_norm": 0.6307184100151062, |
|
"learning_rate": 0.0001294146802567497, |
|
"loss": 0.9179, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.22981243830207304, |
|
"grad_norm": 0.43451613187789917, |
|
"learning_rate": 0.0001292016564005219, |
|
"loss": 0.9468, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.23020730503455084, |
|
"grad_norm": 0.5214070081710815, |
|
"learning_rate": 0.00012898848757740246, |
|
"loss": 0.9226, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.23060217176702863, |
|
"grad_norm": 0.6036335825920105, |
|
"learning_rate": 0.00012877517484563344, |
|
"loss": 0.9585, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.23099703849950642, |
|
"grad_norm": 0.5829451084136963, |
|
"learning_rate": 0.00012856171926417133, |
|
"loss": 1.2637, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.23139190523198422, |
|
"grad_norm": 0.5726488828659058, |
|
"learning_rate": 0.0001283481218926818, |
|
"loss": 0.9223, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.23178677196446199, |
|
"grad_norm": 0.501338005065918, |
|
"learning_rate": 0.0001281343837915344, |
|
"loss": 0.8892, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.23218163869693978, |
|
"grad_norm": 0.5073863863945007, |
|
"learning_rate": 0.00012792050602179725, |
|
"loss": 0.8541, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.23257650542941757, |
|
"grad_norm": 0.6474023461341858, |
|
"learning_rate": 0.00012770648964523194, |
|
"loss": 1.1276, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.23297137216189537, |
|
"grad_norm": 0.48500892519950867, |
|
"learning_rate": 0.00012749233572428804, |
|
"loss": 1.0324, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.23336623889437316, |
|
"grad_norm": 0.5808454155921936, |
|
"learning_rate": 0.00012727804532209803, |
|
"loss": 1.0817, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.23376110562685093, |
|
"grad_norm": 0.49606916308403015, |
|
"learning_rate": 0.0001270636195024719, |
|
"loss": 0.9889, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.23415597235932872, |
|
"grad_norm": 0.6760202050209045, |
|
"learning_rate": 0.00012684905932989186, |
|
"loss": 1.1171, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.23455083909180652, |
|
"grad_norm": 0.44945040345191956, |
|
"learning_rate": 0.00012663436586950714, |
|
"loss": 0.9308, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.2349457058242843, |
|
"grad_norm": 0.6141867637634277, |
|
"learning_rate": 0.00012641954018712863, |
|
"loss": 1.1256, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.2353405725567621, |
|
"grad_norm": 0.5368825197219849, |
|
"learning_rate": 0.0001262045833492236, |
|
"loss": 1.0181, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.23573543928923987, |
|
"grad_norm": 0.5402265787124634, |
|
"learning_rate": 0.00012598949642291047, |
|
"loss": 1.2135, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.23613030602171767, |
|
"grad_norm": 0.5296156406402588, |
|
"learning_rate": 0.00012577428047595344, |
|
"loss": 0.8361, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.23652517275419546, |
|
"grad_norm": 0.4741549491882324, |
|
"learning_rate": 0.00012555893657675718, |
|
"loss": 0.9022, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.23692003948667326, |
|
"grad_norm": 0.4711611866950989, |
|
"learning_rate": 0.0001253434657943616, |
|
"loss": 1.0165, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.23731490621915102, |
|
"grad_norm": 0.4527783989906311, |
|
"learning_rate": 0.00012512786919843648, |
|
"loss": 0.8954, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.23770977295162882, |
|
"grad_norm": 0.5278099775314331, |
|
"learning_rate": 0.0001249121478592762, |
|
"loss": 1.0807, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.2381046396841066, |
|
"grad_norm": 0.45427364110946655, |
|
"learning_rate": 0.00012469630284779438, |
|
"loss": 0.9407, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.2384995064165844, |
|
"grad_norm": 0.5772978663444519, |
|
"learning_rate": 0.00012448033523551865, |
|
"loss": 1.2734, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.2388943731490622, |
|
"grad_norm": 0.5643810033798218, |
|
"learning_rate": 0.00012426424609458518, |
|
"loss": 1.0125, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.23928923988153997, |
|
"grad_norm": 0.5393880009651184, |
|
"learning_rate": 0.0001240480364977335, |
|
"loss": 0.8613, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.23968410661401776, |
|
"grad_norm": 0.49924036860466003, |
|
"learning_rate": 0.0001238317075183011, |
|
"loss": 0.9862, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.24007897334649556, |
|
"grad_norm": 0.4382825493812561, |
|
"learning_rate": 0.00012361526023021822, |
|
"loss": 0.8692, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.24047384007897335, |
|
"grad_norm": 0.5655290484428406, |
|
"learning_rate": 0.00012339869570800232, |
|
"loss": 0.9122, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.24086870681145114, |
|
"grad_norm": 0.5245158076286316, |
|
"learning_rate": 0.00012318201502675285, |
|
"loss": 0.8347, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.2412635735439289, |
|
"grad_norm": 0.5116402506828308, |
|
"learning_rate": 0.00012296521926214596, |
|
"loss": 1.0317, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.2416584402764067, |
|
"grad_norm": 0.4878910779953003, |
|
"learning_rate": 0.00012274830949042908, |
|
"loss": 0.8947, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.2420533070088845, |
|
"grad_norm": 0.5890231132507324, |
|
"learning_rate": 0.00012253128678841568, |
|
"loss": 1.0683, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.2424481737413623, |
|
"grad_norm": 0.4757651388645172, |
|
"learning_rate": 0.00012231415223347972, |
|
"loss": 0.9816, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.2428430404738401, |
|
"grad_norm": 0.5415259599685669, |
|
"learning_rate": 0.0001220969069035506, |
|
"loss": 0.9583, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.24323790720631785, |
|
"grad_norm": 0.5389688014984131, |
|
"learning_rate": 0.0001218795518771075, |
|
"loss": 0.7091, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.24363277393879565, |
|
"grad_norm": 0.8125389814376831, |
|
"learning_rate": 0.00012166208823317427, |
|
"loss": 1.0042, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.24402764067127344, |
|
"grad_norm": 0.5463893413543701, |
|
"learning_rate": 0.0001214445170513139, |
|
"loss": 1.0685, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.24442250740375124, |
|
"grad_norm": 0.4813181161880493, |
|
"learning_rate": 0.0001212268394116233, |
|
"loss": 0.8353, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.24481737413622903, |
|
"grad_norm": 0.4448351263999939, |
|
"learning_rate": 0.00012100905639472779, |
|
"loss": 1.0261, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.2452122408687068, |
|
"grad_norm": 0.509598433971405, |
|
"learning_rate": 0.00012079116908177593, |
|
"loss": 1.0274, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.2456071076011846, |
|
"grad_norm": 0.5108718276023865, |
|
"learning_rate": 0.00012057317855443395, |
|
"loss": 0.8464, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.2460019743336624, |
|
"grad_norm": 0.4307985007762909, |
|
"learning_rate": 0.00012035508589488053, |
|
"loss": 0.9281, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.24639684106614018, |
|
"grad_norm": 0.5565152764320374, |
|
"learning_rate": 0.00012013689218580132, |
|
"loss": 0.9916, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.24679170779861798, |
|
"grad_norm": 0.7218130826950073, |
|
"learning_rate": 0.0001199185985103836, |
|
"loss": 1.0378, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.24718657453109574, |
|
"grad_norm": 0.5790725350379944, |
|
"learning_rate": 0.00011970020595231101, |
|
"loss": 1.0886, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.24758144126357354, |
|
"grad_norm": 0.5239992141723633, |
|
"learning_rate": 0.000119481715595758, |
|
"loss": 1.0763, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.24797630799605133, |
|
"grad_norm": 0.5147425532341003, |
|
"learning_rate": 0.00011926312852538455, |
|
"loss": 1.1421, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.24837117472852913, |
|
"grad_norm": 0.6474115252494812, |
|
"learning_rate": 0.0001190444458263307, |
|
"loss": 0.8813, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.24876604146100692, |
|
"grad_norm": 0.46953749656677246, |
|
"learning_rate": 0.00011882566858421135, |
|
"loss": 0.6636, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.2491609081934847, |
|
"grad_norm": 0.6197345852851868, |
|
"learning_rate": 0.00011860679788511064, |
|
"loss": 0.8935, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.24955577492596248, |
|
"grad_norm": 0.5545244812965393, |
|
"learning_rate": 0.00011838783481557664, |
|
"loss": 0.7358, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.24995064165844028, |
|
"grad_norm": 0.5150088667869568, |
|
"learning_rate": 0.00011816878046261615, |
|
"loss": 0.8935, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.25034550839091807, |
|
"grad_norm": 0.5777970552444458, |
|
"learning_rate": 0.00011794963591368893, |
|
"loss": 0.9984, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.25074037512339586, |
|
"grad_norm": 0.6271523237228394, |
|
"learning_rate": 0.00011773040225670256, |
|
"loss": 1.0425, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.25113524185587366, |
|
"grad_norm": 0.5507854223251343, |
|
"learning_rate": 0.00011751108058000706, |
|
"loss": 1.017, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.25153010858835145, |
|
"grad_norm": 0.5012635588645935, |
|
"learning_rate": 0.00011729167197238935, |
|
"loss": 1.0421, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.25192497532082925, |
|
"grad_norm": 0.5927073955535889, |
|
"learning_rate": 0.0001170721775230679, |
|
"loss": 0.8634, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.252319842053307, |
|
"grad_norm": 0.5467817783355713, |
|
"learning_rate": 0.0001168525983216873, |
|
"loss": 0.828, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.2527147087857848, |
|
"grad_norm": 0.5477814078330994, |
|
"learning_rate": 0.00011663293545831302, |
|
"loss": 0.8878, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.2531095755182626, |
|
"grad_norm": 0.5831199884414673, |
|
"learning_rate": 0.00011641319002342568, |
|
"loss": 0.9146, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.25350444225074037, |
|
"grad_norm": 0.4861442446708679, |
|
"learning_rate": 0.00011619336310791586, |
|
"loss": 0.9527, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.25389930898321816, |
|
"grad_norm": 0.4937233030796051, |
|
"learning_rate": 0.00011597345580307875, |
|
"loss": 0.8631, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.25429417571569596, |
|
"grad_norm": 0.5312657952308655, |
|
"learning_rate": 0.00011575346920060846, |
|
"loss": 1.0702, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.25468904244817375, |
|
"grad_norm": 0.5473558306694031, |
|
"learning_rate": 0.00011553340439259286, |
|
"loss": 0.8867, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.25508390918065155, |
|
"grad_norm": 0.5094785094261169, |
|
"learning_rate": 0.00011531326247150803, |
|
"loss": 0.9356, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.25547877591312934, |
|
"grad_norm": 0.4709779620170593, |
|
"learning_rate": 0.00011509304453021288, |
|
"loss": 0.8547, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.2558736426456071, |
|
"grad_norm": 0.5478555560112, |
|
"learning_rate": 0.00011487275166194367, |
|
"loss": 1.013, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.2562685093780849, |
|
"grad_norm": 0.5569909811019897, |
|
"learning_rate": 0.00011465238496030868, |
|
"loss": 0.9655, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.25666337611056267, |
|
"grad_norm": 0.6182945370674133, |
|
"learning_rate": 0.00011443194551928266, |
|
"loss": 1.0553, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.25705824284304046, |
|
"grad_norm": 0.49789944291114807, |
|
"learning_rate": 0.00011421143443320155, |
|
"loss": 1.0104, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.25745310957551826, |
|
"grad_norm": 0.5329926013946533, |
|
"learning_rate": 0.00011399085279675687, |
|
"loss": 1.1935, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.25784797630799605, |
|
"grad_norm": 0.6216697692871094, |
|
"learning_rate": 0.0001137702017049904, |
|
"loss": 0.9502, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.25824284304047385, |
|
"grad_norm": 0.46455976366996765, |
|
"learning_rate": 0.00011354948225328877, |
|
"loss": 1.0217, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.25863770977295164, |
|
"grad_norm": 0.5468802452087402, |
|
"learning_rate": 0.0001133286955373779, |
|
"loss": 0.9669, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.25903257650542943, |
|
"grad_norm": 0.6737267971038818, |
|
"learning_rate": 0.00011310784265331769, |
|
"loss": 0.828, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.25942744323790723, |
|
"grad_norm": 0.5070087909698486, |
|
"learning_rate": 0.00011288692469749649, |
|
"loss": 0.9463, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.25982230997038497, |
|
"grad_norm": 0.5430976152420044, |
|
"learning_rate": 0.0001126659427666257, |
|
"loss": 0.9986, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.26021717670286276, |
|
"grad_norm": 0.4886017143726349, |
|
"learning_rate": 0.00011244489795773432, |
|
"loss": 1.0702, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.26061204343534056, |
|
"grad_norm": 0.4727013111114502, |
|
"learning_rate": 0.00011222379136816345, |
|
"loss": 0.9179, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.26100691016781835, |
|
"grad_norm": 0.48751479387283325, |
|
"learning_rate": 0.00011200262409556097, |
|
"loss": 0.9176, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.26140177690029615, |
|
"grad_norm": 0.511191189289093, |
|
"learning_rate": 0.00011178139723787597, |
|
"loss": 1.0286, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.26179664363277394, |
|
"grad_norm": 0.48295167088508606, |
|
"learning_rate": 0.00011156011189335332, |
|
"loss": 0.9306, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.26219151036525173, |
|
"grad_norm": 0.4702792167663574, |
|
"learning_rate": 0.00011133876916052821, |
|
"loss": 0.9666, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.26258637709772953, |
|
"grad_norm": 0.5391889810562134, |
|
"learning_rate": 0.00011111737013822088, |
|
"loss": 0.9043, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.2629812438302073, |
|
"grad_norm": 0.6235263347625732, |
|
"learning_rate": 0.00011089591592553082, |
|
"loss": 1.0091, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.2633761105626851, |
|
"grad_norm": 0.5476643443107605, |
|
"learning_rate": 0.00011067440762183164, |
|
"loss": 0.9399, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.26377097729516286, |
|
"grad_norm": 0.42856502532958984, |
|
"learning_rate": 0.00011045284632676536, |
|
"loss": 0.8856, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.26416584402764065, |
|
"grad_norm": 0.6507248878479004, |
|
"learning_rate": 0.00011023123314023717, |
|
"loss": 1.0047, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.26456071076011844, |
|
"grad_norm": 0.5612763166427612, |
|
"learning_rate": 0.00011000956916240985, |
|
"loss": 0.9552, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.26495557749259624, |
|
"grad_norm": 0.6156942844390869, |
|
"learning_rate": 0.00010978785549369823, |
|
"loss": 0.7834, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.26535044422507403, |
|
"grad_norm": 0.5807977914810181, |
|
"learning_rate": 0.00010956609323476399, |
|
"loss": 1.0304, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.26574531095755183, |
|
"grad_norm": 0.4886478781700134, |
|
"learning_rate": 0.00010934428348650986, |
|
"loss": 1.0725, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.2661401776900296, |
|
"grad_norm": 0.6015396118164062, |
|
"learning_rate": 0.00010912242735007441, |
|
"loss": 0.9459, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.2665350444225074, |
|
"grad_norm": 0.5361436605453491, |
|
"learning_rate": 0.0001089005259268265, |
|
"loss": 1.0034, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.2669299111549852, |
|
"grad_norm": 0.5702396035194397, |
|
"learning_rate": 0.00010867858031835975, |
|
"loss": 1.1566, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.267324777887463, |
|
"grad_norm": 0.5034629702568054, |
|
"learning_rate": 0.00010845659162648723, |
|
"loss": 0.9551, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.26771964461994074, |
|
"grad_norm": 0.49666622281074524, |
|
"learning_rate": 0.00010823456095323579, |
|
"loss": 0.9059, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.26811451135241854, |
|
"grad_norm": 0.5842538475990295, |
|
"learning_rate": 0.00010801248940084074, |
|
"loss": 0.8893, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.26850937808489633, |
|
"grad_norm": 0.5735609531402588, |
|
"learning_rate": 0.00010779037807174033, |
|
"loss": 0.983, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.2689042448173741, |
|
"grad_norm": 0.5265182256698608, |
|
"learning_rate": 0.00010756822806857028, |
|
"loss": 0.8681, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.2692991115498519, |
|
"grad_norm": 0.5432460308074951, |
|
"learning_rate": 0.00010734604049415822, |
|
"loss": 0.7613, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.2696939782823297, |
|
"grad_norm": 0.5309749245643616, |
|
"learning_rate": 0.00010712381645151844, |
|
"loss": 1.1094, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.2700888450148075, |
|
"grad_norm": 0.588448703289032, |
|
"learning_rate": 0.00010690155704384615, |
|
"loss": 1.082, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.2704837117472853, |
|
"grad_norm": 0.4991750717163086, |
|
"learning_rate": 0.00010667926337451217, |
|
"loss": 0.8744, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.2708785784797631, |
|
"grad_norm": 0.4775620698928833, |
|
"learning_rate": 0.0001064569365470574, |
|
"loss": 1.025, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.2712734452122409, |
|
"grad_norm": 0.5142999291419983, |
|
"learning_rate": 0.00010623457766518736, |
|
"loss": 0.9704, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.27166831194471863, |
|
"grad_norm": 0.48825737833976746, |
|
"learning_rate": 0.00010601218783276672, |
|
"loss": 1.122, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.2720631786771964, |
|
"grad_norm": 0.5517759919166565, |
|
"learning_rate": 0.00010578976815381372, |
|
"loss": 0.8859, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.2724580454096742, |
|
"grad_norm": 0.5661524534225464, |
|
"learning_rate": 0.00010556731973249485, |
|
"loss": 0.89, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.272852912142152, |
|
"grad_norm": 0.5802194476127625, |
|
"learning_rate": 0.00010534484367311923, |
|
"loss": 0.8048, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.2732477788746298, |
|
"grad_norm": 0.5603414177894592, |
|
"learning_rate": 0.00010512234108013319, |
|
"loss": 1.0869, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.2736426456071076, |
|
"grad_norm": 0.5277503728866577, |
|
"learning_rate": 0.00010489981305811487, |
|
"loss": 0.7287, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.2740375123395854, |
|
"grad_norm": 0.6045374870300293, |
|
"learning_rate": 0.00010467726071176853, |
|
"loss": 0.9417, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.2744323790720632, |
|
"grad_norm": 0.6021410822868347, |
|
"learning_rate": 0.00010445468514591925, |
|
"loss": 1.1399, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.274827245804541, |
|
"grad_norm": 0.5544920563697815, |
|
"learning_rate": 0.00010423208746550732, |
|
"loss": 0.7279, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.2752221125370188, |
|
"grad_norm": 0.5612072348594666, |
|
"learning_rate": 0.00010400946877558293, |
|
"loss": 0.8715, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.2756169792694965, |
|
"grad_norm": 0.5820972323417664, |
|
"learning_rate": 0.00010378683018130047, |
|
"loss": 0.9456, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.2760118460019743, |
|
"grad_norm": 0.43969717621803284, |
|
"learning_rate": 0.0001035641727879131, |
|
"loss": 0.8192, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.2764067127344521, |
|
"grad_norm": 0.6528908610343933, |
|
"learning_rate": 0.00010334149770076747, |
|
"loss": 0.9742, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.2768015794669299, |
|
"grad_norm": 0.4668010473251343, |
|
"learning_rate": 0.00010311880602529794, |
|
"loss": 0.9471, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.2771964461994077, |
|
"grad_norm": 0.5474753379821777, |
|
"learning_rate": 0.0001028960988670212, |
|
"loss": 0.9224, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.2775913129318855, |
|
"grad_norm": 0.5672122240066528, |
|
"learning_rate": 0.00010267337733153089, |
|
"loss": 0.8441, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.2779861796643633, |
|
"grad_norm": 0.5570011138916016, |
|
"learning_rate": 0.00010245064252449201, |
|
"loss": 0.9897, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.2783810463968411, |
|
"grad_norm": 0.5113812685012817, |
|
"learning_rate": 0.0001022278955516354, |
|
"loss": 1.0066, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.2787759131293189, |
|
"grad_norm": 0.48175865411758423, |
|
"learning_rate": 0.00010200513751875227, |
|
"loss": 0.9212, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.27917077986179667, |
|
"grad_norm": 0.5643198490142822, |
|
"learning_rate": 0.00010178236953168885, |
|
"loss": 1.2371, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.2795656465942744, |
|
"grad_norm": 0.5115451216697693, |
|
"learning_rate": 0.00010155959269634068, |
|
"loss": 0.9701, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.2799605133267522, |
|
"grad_norm": 0.5253522396087646, |
|
"learning_rate": 0.00010133680811864727, |
|
"loss": 1.0685, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.28035538005923, |
|
"grad_norm": 0.7160750031471252, |
|
"learning_rate": 0.00010111401690458654, |
|
"loss": 1.0837, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.28035538005923, |
|
"eval_loss": 0.9457208514213562, |
|
"eval_runtime": 61.5761, |
|
"eval_samples_per_second": 17.328, |
|
"eval_steps_per_second": 8.672, |
|
"step": 710 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1420, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 355, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.602799288203346e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|