|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 9480, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0010548523206751054, |
|
"grad_norm": 1.2911568880081177, |
|
"learning_rate": 0.00015822784810126583, |
|
"loss": 7.5122, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.002109704641350211, |
|
"grad_norm": 1.1530958414077759, |
|
"learning_rate": 0.00031645569620253165, |
|
"loss": 6.906, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0031645569620253164, |
|
"grad_norm": 0.8528544306755066, |
|
"learning_rate": 0.00047468354430379745, |
|
"loss": 6.2587, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.004219409282700422, |
|
"grad_norm": 0.9695652723312378, |
|
"learning_rate": 0.0006329113924050633, |
|
"loss": 5.7729, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.005274261603375527, |
|
"grad_norm": 0.8636988401412964, |
|
"learning_rate": 0.0007911392405063291, |
|
"loss": 5.3159, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.006329113924050633, |
|
"grad_norm": 0.9786996841430664, |
|
"learning_rate": 0.0009493670886075949, |
|
"loss": 4.8059, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.007383966244725738, |
|
"grad_norm": 1.5117980241775513, |
|
"learning_rate": 0.0011075949367088608, |
|
"loss": 4.3993, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.008438818565400843, |
|
"grad_norm": 0.8246951699256897, |
|
"learning_rate": 0.0012658227848101266, |
|
"loss": 4.1436, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.00949367088607595, |
|
"grad_norm": 1.2078777551651, |
|
"learning_rate": 0.0014240506329113926, |
|
"loss": 3.9317, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.010548523206751054, |
|
"grad_norm": 0.9894590377807617, |
|
"learning_rate": 0.0015, |
|
"loss": 3.7802, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.011603375527426161, |
|
"grad_norm": 0.8762866854667664, |
|
"learning_rate": 0.0015, |
|
"loss": 3.622, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.012658227848101266, |
|
"grad_norm": 0.9929517507553101, |
|
"learning_rate": 0.0015, |
|
"loss": 3.5117, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.013713080168776372, |
|
"grad_norm": 0.7622191905975342, |
|
"learning_rate": 0.0015, |
|
"loss": 3.4092, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.014767932489451477, |
|
"grad_norm": 0.7740503549575806, |
|
"learning_rate": 0.0015, |
|
"loss": 3.3058, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.015822784810126583, |
|
"grad_norm": 0.8201466202735901, |
|
"learning_rate": 0.0015, |
|
"loss": 3.2344, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.016877637130801686, |
|
"grad_norm": 0.829488217830658, |
|
"learning_rate": 0.0015, |
|
"loss": 3.175, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.017932489451476793, |
|
"grad_norm": 1.1429024934768677, |
|
"learning_rate": 0.0015, |
|
"loss": 3.0982, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0189873417721519, |
|
"grad_norm": 0.7297139167785645, |
|
"learning_rate": 0.0015, |
|
"loss": 3.0552, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.020042194092827006, |
|
"grad_norm": 0.8383708000183105, |
|
"learning_rate": 0.0015, |
|
"loss": 2.9936, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.02109704641350211, |
|
"grad_norm": 0.7045958638191223, |
|
"learning_rate": 0.0015, |
|
"loss": 2.9414, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.022151898734177215, |
|
"grad_norm": 0.9524938464164734, |
|
"learning_rate": 0.0015, |
|
"loss": 2.9054, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.023206751054852322, |
|
"grad_norm": 0.8136796951293945, |
|
"learning_rate": 0.0015, |
|
"loss": 2.8717, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.024261603375527425, |
|
"grad_norm": 0.7210149765014648, |
|
"learning_rate": 0.0015, |
|
"loss": 2.8125, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.02531645569620253, |
|
"grad_norm": 1.000649333000183, |
|
"learning_rate": 0.0015, |
|
"loss": 2.7796, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.026371308016877638, |
|
"grad_norm": 0.9027130007743835, |
|
"learning_rate": 0.0015, |
|
"loss": 2.7438, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.027426160337552744, |
|
"grad_norm": 0.8297377824783325, |
|
"learning_rate": 0.0015, |
|
"loss": 2.7113, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.028481012658227847, |
|
"grad_norm": 0.9188632965087891, |
|
"learning_rate": 0.0015, |
|
"loss": 2.6782, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.029535864978902954, |
|
"grad_norm": 0.7633031606674194, |
|
"learning_rate": 0.0015, |
|
"loss": 2.6398, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.03059071729957806, |
|
"grad_norm": 0.9104907512664795, |
|
"learning_rate": 0.0015, |
|
"loss": 2.6154, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.03164556962025317, |
|
"grad_norm": 0.8751909732818604, |
|
"learning_rate": 0.0015, |
|
"loss": 2.5923, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.03270042194092827, |
|
"grad_norm": 0.8580324053764343, |
|
"learning_rate": 0.0015, |
|
"loss": 2.5698, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.03375527426160337, |
|
"grad_norm": 0.9248947501182556, |
|
"learning_rate": 0.0015, |
|
"loss": 2.5412, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.03481012658227848, |
|
"grad_norm": 0.7492154836654663, |
|
"learning_rate": 0.0015, |
|
"loss": 2.5276, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.035864978902953586, |
|
"grad_norm": 0.8280085325241089, |
|
"learning_rate": 0.0015, |
|
"loss": 2.5025, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.03691983122362869, |
|
"grad_norm": 1.2397363185882568, |
|
"learning_rate": 0.0015, |
|
"loss": 2.4612, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.0379746835443038, |
|
"grad_norm": 0.8337180614471436, |
|
"learning_rate": 0.0015, |
|
"loss": 2.4443, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.039029535864978905, |
|
"grad_norm": 0.8829106092453003, |
|
"learning_rate": 0.0015, |
|
"loss": 2.4333, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.04008438818565401, |
|
"grad_norm": 0.7179862856864929, |
|
"learning_rate": 0.0015, |
|
"loss": 2.4126, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.04113924050632911, |
|
"grad_norm": 1.0350315570831299, |
|
"learning_rate": 0.0015, |
|
"loss": 2.3976, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.04219409282700422, |
|
"grad_norm": 1.0989201068878174, |
|
"learning_rate": 0.0015, |
|
"loss": 2.3746, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.043248945147679324, |
|
"grad_norm": 0.8930501937866211, |
|
"learning_rate": 0.0015, |
|
"loss": 2.3621, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.04430379746835443, |
|
"grad_norm": 0.8837328553199768, |
|
"learning_rate": 0.0015, |
|
"loss": 2.3439, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.04535864978902954, |
|
"grad_norm": 0.7845432162284851, |
|
"learning_rate": 0.0015, |
|
"loss": 2.3147, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.046413502109704644, |
|
"grad_norm": 0.9344269037246704, |
|
"learning_rate": 0.0015, |
|
"loss": 2.3019, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.04746835443037975, |
|
"grad_norm": 0.8818278908729553, |
|
"learning_rate": 0.0015, |
|
"loss": 2.3035, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.04852320675105485, |
|
"grad_norm": 0.6915808916091919, |
|
"learning_rate": 0.0015, |
|
"loss": 2.2788, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.049578059071729956, |
|
"grad_norm": 0.7943204045295715, |
|
"learning_rate": 0.0015, |
|
"loss": 2.2654, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.05063291139240506, |
|
"grad_norm": 1.4367399215698242, |
|
"learning_rate": 0.0015, |
|
"loss": 2.2561, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.05168776371308017, |
|
"grad_norm": 0.7440197467803955, |
|
"learning_rate": 0.0015, |
|
"loss": 2.2299, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.052742616033755275, |
|
"grad_norm": 1.1008273363113403, |
|
"learning_rate": 0.0015, |
|
"loss": 2.2256, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05379746835443038, |
|
"grad_norm": 1.3533852100372314, |
|
"learning_rate": 0.0015, |
|
"loss": 2.2155, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.05485232067510549, |
|
"grad_norm": 0.7687584161758423, |
|
"learning_rate": 0.0015, |
|
"loss": 2.1925, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.05590717299578059, |
|
"grad_norm": 0.7902509570121765, |
|
"learning_rate": 0.0015, |
|
"loss": 2.1772, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.056962025316455694, |
|
"grad_norm": 1.0160045623779297, |
|
"learning_rate": 0.0015, |
|
"loss": 2.1684, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.0580168776371308, |
|
"grad_norm": 0.7253097295761108, |
|
"learning_rate": 0.0015, |
|
"loss": 2.1657, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.05907172995780591, |
|
"grad_norm": 0.730516791343689, |
|
"learning_rate": 0.0015, |
|
"loss": 2.1272, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.060126582278481014, |
|
"grad_norm": 0.8411962389945984, |
|
"learning_rate": 0.0015, |
|
"loss": 2.1316, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.06118143459915612, |
|
"grad_norm": 0.9713028073310852, |
|
"learning_rate": 0.0015, |
|
"loss": 2.1361, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.06223628691983123, |
|
"grad_norm": 1.1778079271316528, |
|
"learning_rate": 0.0015, |
|
"loss": 2.1091, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.06329113924050633, |
|
"grad_norm": 0.6379977464675903, |
|
"learning_rate": 0.0015, |
|
"loss": 2.0929, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.06434599156118144, |
|
"grad_norm": 0.8067770004272461, |
|
"learning_rate": 0.0015, |
|
"loss": 2.0945, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.06540084388185655, |
|
"grad_norm": 0.8583511114120483, |
|
"learning_rate": 0.0015, |
|
"loss": 2.082, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.06645569620253164, |
|
"grad_norm": 1.134140133857727, |
|
"learning_rate": 0.0015, |
|
"loss": 2.0674, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.06751054852320675, |
|
"grad_norm": 0.8797304630279541, |
|
"learning_rate": 0.0015, |
|
"loss": 2.0776, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.06856540084388185, |
|
"grad_norm": 1.2700276374816895, |
|
"learning_rate": 0.0015, |
|
"loss": 2.0655, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.06962025316455696, |
|
"grad_norm": 0.8297256231307983, |
|
"learning_rate": 0.0015, |
|
"loss": 2.0615, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.07067510548523206, |
|
"grad_norm": 0.8372782468795776, |
|
"learning_rate": 0.0015, |
|
"loss": 2.0386, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.07172995780590717, |
|
"grad_norm": 0.7951764464378357, |
|
"learning_rate": 0.0015, |
|
"loss": 2.032, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.07278481012658228, |
|
"grad_norm": 0.766379177570343, |
|
"learning_rate": 0.0015, |
|
"loss": 2.0374, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.07383966244725738, |
|
"grad_norm": 0.7371763586997986, |
|
"learning_rate": 0.0015, |
|
"loss": 2.0164, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.07489451476793249, |
|
"grad_norm": 1.0791587829589844, |
|
"learning_rate": 0.0015, |
|
"loss": 2.0091, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.0759493670886076, |
|
"grad_norm": 0.7908256649971008, |
|
"learning_rate": 0.0015, |
|
"loss": 1.9936, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.0770042194092827, |
|
"grad_norm": 0.8527359962463379, |
|
"learning_rate": 0.0015, |
|
"loss": 1.9844, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.07805907172995781, |
|
"grad_norm": 0.8309198021888733, |
|
"learning_rate": 0.0015, |
|
"loss": 1.9956, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.07911392405063292, |
|
"grad_norm": 1.3767690658569336, |
|
"learning_rate": 0.0015, |
|
"loss": 1.978, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.08016877637130802, |
|
"grad_norm": 0.713165819644928, |
|
"learning_rate": 0.0015, |
|
"loss": 1.9813, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.08122362869198312, |
|
"grad_norm": 0.8718229532241821, |
|
"learning_rate": 0.0015, |
|
"loss": 1.9639, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.08227848101265822, |
|
"grad_norm": 0.8156098127365112, |
|
"learning_rate": 0.0015, |
|
"loss": 1.9645, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.08333333333333333, |
|
"grad_norm": 0.9464802742004395, |
|
"learning_rate": 0.0015, |
|
"loss": 1.9661, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.08438818565400844, |
|
"grad_norm": 0.7087975740432739, |
|
"learning_rate": 0.0015, |
|
"loss": 1.9405, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.08544303797468354, |
|
"grad_norm": 0.6990103125572205, |
|
"learning_rate": 0.0015, |
|
"loss": 1.931, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.08649789029535865, |
|
"grad_norm": 1.6295113563537598, |
|
"learning_rate": 0.0015, |
|
"loss": 1.9414, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.08755274261603375, |
|
"grad_norm": 0.7534579038619995, |
|
"learning_rate": 0.0015, |
|
"loss": 1.9424, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.08860759493670886, |
|
"grad_norm": 0.9456313252449036, |
|
"learning_rate": 0.0015, |
|
"loss": 1.9182, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.08966244725738397, |
|
"grad_norm": 0.770246148109436, |
|
"learning_rate": 0.0015, |
|
"loss": 1.9216, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.09071729957805907, |
|
"grad_norm": 0.8989337682723999, |
|
"learning_rate": 0.0015, |
|
"loss": 1.9174, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.09177215189873418, |
|
"grad_norm": 0.8580651879310608, |
|
"learning_rate": 0.0015, |
|
"loss": 1.9165, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.09282700421940929, |
|
"grad_norm": 0.8543643951416016, |
|
"learning_rate": 0.0015, |
|
"loss": 1.9084, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.0938818565400844, |
|
"grad_norm": 1.1457914113998413, |
|
"learning_rate": 0.0015, |
|
"loss": 1.9017, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.0949367088607595, |
|
"grad_norm": 0.7464300394058228, |
|
"learning_rate": 0.0015, |
|
"loss": 1.8976, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.09599156118143459, |
|
"grad_norm": 0.8035700917243958, |
|
"learning_rate": 0.0015, |
|
"loss": 1.9015, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.0970464135021097, |
|
"grad_norm": 0.7010158896446228, |
|
"learning_rate": 0.0015, |
|
"loss": 1.8904, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.0981012658227848, |
|
"grad_norm": 0.8178808093070984, |
|
"learning_rate": 0.0015, |
|
"loss": 1.8712, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.09915611814345991, |
|
"grad_norm": 1.0780892372131348, |
|
"learning_rate": 0.0015, |
|
"loss": 1.8812, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.10021097046413502, |
|
"grad_norm": 1.793686032295227, |
|
"learning_rate": 0.0015, |
|
"loss": 1.8672, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.10126582278481013, |
|
"grad_norm": 1.1407757997512817, |
|
"learning_rate": 0.0015, |
|
"loss": 1.8798, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.10232067510548523, |
|
"grad_norm": 1.027924656867981, |
|
"learning_rate": 0.0015, |
|
"loss": 1.8681, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.10337552742616034, |
|
"grad_norm": 0.9612833261489868, |
|
"learning_rate": 0.0015, |
|
"loss": 1.8474, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.10443037974683544, |
|
"grad_norm": 0.7732949256896973, |
|
"learning_rate": 0.0015, |
|
"loss": 1.8529, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.10548523206751055, |
|
"grad_norm": 0.9289379119873047, |
|
"learning_rate": 0.0015, |
|
"loss": 1.8605, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.10654008438818566, |
|
"grad_norm": 0.7720140814781189, |
|
"learning_rate": 0.0015, |
|
"loss": 1.8526, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.10759493670886076, |
|
"grad_norm": 0.910430908203125, |
|
"learning_rate": 0.0015, |
|
"loss": 1.8438, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.10864978902953587, |
|
"grad_norm": 0.7728487849235535, |
|
"learning_rate": 0.0015, |
|
"loss": 1.8445, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.10970464135021098, |
|
"grad_norm": 0.8599476218223572, |
|
"learning_rate": 0.0015, |
|
"loss": 1.8354, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.11075949367088607, |
|
"grad_norm": 1.2211686372756958, |
|
"learning_rate": 0.0015, |
|
"loss": 1.839, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.11181434599156118, |
|
"grad_norm": 0.7726693749427795, |
|
"learning_rate": 0.0015, |
|
"loss": 1.8263, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.11286919831223628, |
|
"grad_norm": 0.8299482464790344, |
|
"learning_rate": 0.0015, |
|
"loss": 1.8151, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.11392405063291139, |
|
"grad_norm": 0.6944489479064941, |
|
"learning_rate": 0.0015, |
|
"loss": 1.8128, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.1149789029535865, |
|
"grad_norm": 0.671230673789978, |
|
"learning_rate": 0.0015, |
|
"loss": 1.8235, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.1160337552742616, |
|
"grad_norm": 0.7142427563667297, |
|
"learning_rate": 0.0015, |
|
"loss": 1.8081, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.11708860759493671, |
|
"grad_norm": 0.6354637145996094, |
|
"learning_rate": 0.0015, |
|
"loss": 1.8143, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.11814345991561181, |
|
"grad_norm": 0.7774524092674255, |
|
"learning_rate": 0.0015, |
|
"loss": 1.8084, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.11919831223628692, |
|
"grad_norm": 1.088733196258545, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7934, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.12025316455696203, |
|
"grad_norm": 0.82485032081604, |
|
"learning_rate": 0.0015, |
|
"loss": 1.8074, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.12130801687763713, |
|
"grad_norm": 1.128315806388855, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7956, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.12236286919831224, |
|
"grad_norm": 0.7950155138969421, |
|
"learning_rate": 0.0015, |
|
"loss": 1.8044, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.12341772151898735, |
|
"grad_norm": 0.7386237382888794, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7874, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.12447257383966245, |
|
"grad_norm": 0.7935805916786194, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7747, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.12552742616033755, |
|
"grad_norm": 0.7353824973106384, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7793, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.12658227848101267, |
|
"grad_norm": 1.8419713973999023, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7993, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.12763713080168776, |
|
"grad_norm": 0.7695147395133972, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7884, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.12869198312236288, |
|
"grad_norm": 0.6916271448135376, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7601, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.12974683544303797, |
|
"grad_norm": 0.6987491846084595, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7529, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.1308016877637131, |
|
"grad_norm": 0.7663790583610535, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7754, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.13185654008438819, |
|
"grad_norm": 0.7070717811584473, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7734, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.13291139240506328, |
|
"grad_norm": 0.7695779800415039, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7583, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.1339662447257384, |
|
"grad_norm": 0.8380961418151855, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7568, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.1350210970464135, |
|
"grad_norm": 0.741341233253479, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7734, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.1360759493670886, |
|
"grad_norm": 0.8968631625175476, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7575, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.1371308016877637, |
|
"grad_norm": 0.6770614385604858, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7471, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.13818565400843882, |
|
"grad_norm": 0.7884116172790527, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7506, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.13924050632911392, |
|
"grad_norm": 0.8187959790229797, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7519, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.14029535864978904, |
|
"grad_norm": 1.047227382659912, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7434, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.14135021097046413, |
|
"grad_norm": 0.729209303855896, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7457, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.14240506329113925, |
|
"grad_norm": 0.6964226961135864, |
|
"learning_rate": 0.0015, |
|
"loss": 1.749, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.14345991561181434, |
|
"grad_norm": 0.6814247965812683, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7365, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.14451476793248946, |
|
"grad_norm": 0.7576081156730652, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7334, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.14556962025316456, |
|
"grad_norm": 0.7002779841423035, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7256, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.14662447257383968, |
|
"grad_norm": 0.832123875617981, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7396, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.14767932489451477, |
|
"grad_norm": 0.670310378074646, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7194, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.14873417721518986, |
|
"grad_norm": 0.7912747263908386, |
|
"learning_rate": 0.0015, |
|
"loss": 1.724, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.14978902953586498, |
|
"grad_norm": 0.752036988735199, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7272, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.15084388185654007, |
|
"grad_norm": 0.8257012963294983, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7195, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.1518987341772152, |
|
"grad_norm": 0.6395012140274048, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7203, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.1529535864978903, |
|
"grad_norm": 1.1045129299163818, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7222, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.1540084388185654, |
|
"grad_norm": 0.6789548397064209, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7086, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.1550632911392405, |
|
"grad_norm": 0.8164582848548889, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7098, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.15611814345991562, |
|
"grad_norm": 0.6820360422134399, |
|
"learning_rate": 0.0015, |
|
"loss": 1.713, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.1571729957805907, |
|
"grad_norm": 0.6760832071304321, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7102, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.15822784810126583, |
|
"grad_norm": 0.7806017398834229, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7094, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.15928270042194093, |
|
"grad_norm": 0.8451957106590271, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7126, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.16033755274261605, |
|
"grad_norm": 0.7115667462348938, |
|
"learning_rate": 0.0015, |
|
"loss": 1.7092, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.16139240506329114, |
|
"grad_norm": 0.6442527174949646, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6909, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.16244725738396623, |
|
"grad_norm": 0.6934787034988403, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6835, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.16350210970464135, |
|
"grad_norm": 0.8956286311149597, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6842, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.16455696202531644, |
|
"grad_norm": 0.6710405349731445, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6887, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.16561181434599156, |
|
"grad_norm": 0.6764975786209106, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6835, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.16666666666666666, |
|
"grad_norm": 0.6724684834480286, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6908, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.16772151898734178, |
|
"grad_norm": 0.859164834022522, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6862, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.16877637130801687, |
|
"grad_norm": 0.7298034429550171, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6919, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.169831223628692, |
|
"grad_norm": 0.6594526171684265, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6892, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.17088607594936708, |
|
"grad_norm": 0.6169994473457336, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6609, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.1719409282700422, |
|
"grad_norm": 0.7296561002731323, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6726, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.1729957805907173, |
|
"grad_norm": 0.6978626847267151, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6756, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.17405063291139242, |
|
"grad_norm": 1.0857014656066895, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6678, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.1751054852320675, |
|
"grad_norm": 0.6385810375213623, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6669, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.17616033755274263, |
|
"grad_norm": 0.9220226407051086, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6769, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.17721518987341772, |
|
"grad_norm": 1.4121848344802856, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6633, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.17827004219409281, |
|
"grad_norm": 0.7881584167480469, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6546, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.17932489451476794, |
|
"grad_norm": 0.6409121155738831, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6718, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.18037974683544303, |
|
"grad_norm": 0.8101775050163269, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6544, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.18143459915611815, |
|
"grad_norm": 0.6343249678611755, |
|
"learning_rate": 0.0015, |
|
"loss": 1.651, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.18248945147679324, |
|
"grad_norm": 0.7380338907241821, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6558, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.18354430379746836, |
|
"grad_norm": 0.7978950142860413, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6561, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.18459915611814345, |
|
"grad_norm": 0.9163487553596497, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6779, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.18565400843881857, |
|
"grad_norm": 0.6927811503410339, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6588, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.18670886075949367, |
|
"grad_norm": 0.6966419816017151, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6612, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.1877637130801688, |
|
"grad_norm": 0.6320984959602356, |
|
"learning_rate": 0.0015, |
|
"loss": 1.657, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.18881856540084388, |
|
"grad_norm": 0.6386429667472839, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6593, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.189873417721519, |
|
"grad_norm": 0.6478818655014038, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6558, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.1909282700421941, |
|
"grad_norm": 0.6771355271339417, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6437, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.19198312236286919, |
|
"grad_norm": 0.7388620376586914, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6406, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.1930379746835443, |
|
"grad_norm": 0.7418689131736755, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6384, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.1940928270042194, |
|
"grad_norm": 0.6168630123138428, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6368, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.19514767932489452, |
|
"grad_norm": 0.7633735537528992, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6434, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.1962025316455696, |
|
"grad_norm": 0.6480771899223328, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6476, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.19725738396624473, |
|
"grad_norm": 0.6744736433029175, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6479, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.19831223628691982, |
|
"grad_norm": 0.625220000743866, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6316, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.19936708860759494, |
|
"grad_norm": 0.636688768863678, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6384, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.20042194092827004, |
|
"grad_norm": 0.9341202974319458, |
|
"learning_rate": 0.0015, |
|
"loss": 1.637, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.20147679324894516, |
|
"grad_norm": 1.0301650762557983, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6372, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.20253164556962025, |
|
"grad_norm": 0.6672254800796509, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6312, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.20358649789029537, |
|
"grad_norm": 0.8981499075889587, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6329, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.20464135021097046, |
|
"grad_norm": 0.6990149021148682, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6252, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.20569620253164558, |
|
"grad_norm": 0.6157594919204712, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6239, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.20675105485232068, |
|
"grad_norm": 0.6597276329994202, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6248, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.20780590717299577, |
|
"grad_norm": 0.6546605825424194, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6354, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.2088607594936709, |
|
"grad_norm": 0.7146171927452087, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6207, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.20991561181434598, |
|
"grad_norm": 0.6842172741889954, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6175, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.2109704641350211, |
|
"grad_norm": 0.7051606774330139, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6161, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2120253164556962, |
|
"grad_norm": 1.0707893371582031, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6239, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.21308016877637131, |
|
"grad_norm": 0.7594018578529358, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6324, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.2141350210970464, |
|
"grad_norm": 0.6623409390449524, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6194, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.21518987341772153, |
|
"grad_norm": 0.6320188641548157, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6173, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.21624472573839662, |
|
"grad_norm": 0.756416380405426, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6166, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.21729957805907174, |
|
"grad_norm": 0.7982528805732727, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6063, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.21835443037974683, |
|
"grad_norm": 0.83033686876297, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6108, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.21940928270042195, |
|
"grad_norm": 0.7017702460289001, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6147, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.22046413502109705, |
|
"grad_norm": 0.8277395963668823, |
|
"learning_rate": 0.0015, |
|
"loss": 1.608, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.22151898734177214, |
|
"grad_norm": 0.8305484056472778, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6151, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.22257383966244726, |
|
"grad_norm": 0.8680278658866882, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6059, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.22362869198312235, |
|
"grad_norm": 0.6655151844024658, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6058, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.22468354430379747, |
|
"grad_norm": 0.7406314611434937, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6052, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.22573839662447256, |
|
"grad_norm": 0.590162992477417, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6005, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.22679324894514769, |
|
"grad_norm": 0.7729781866073608, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6058, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.22784810126582278, |
|
"grad_norm": 1.0980957746505737, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6208, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.2289029535864979, |
|
"grad_norm": 0.7007785439491272, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6039, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.229957805907173, |
|
"grad_norm": 0.7019352912902832, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5898, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.2310126582278481, |
|
"grad_norm": 0.6203840374946594, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6022, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.2320675105485232, |
|
"grad_norm": 0.6874725222587585, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6034, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.23312236286919832, |
|
"grad_norm": 0.5788426399230957, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5894, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.23417721518987342, |
|
"grad_norm": 0.7042095065116882, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6029, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.23523206751054854, |
|
"grad_norm": 0.9149144887924194, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5979, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.23628691983122363, |
|
"grad_norm": 0.8104167580604553, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5993, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.23734177215189872, |
|
"grad_norm": 1.0500192642211914, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5904, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.23839662447257384, |
|
"grad_norm": 1.0872136354446411, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5838, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.23945147679324894, |
|
"grad_norm": 0.855541467666626, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6158, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.24050632911392406, |
|
"grad_norm": 1.3494621515274048, |
|
"learning_rate": 0.0015, |
|
"loss": 1.6022, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.24156118143459915, |
|
"grad_norm": 0.9795144200325012, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5896, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.24261603375527427, |
|
"grad_norm": 0.6512905359268188, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5793, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.24367088607594936, |
|
"grad_norm": 0.6993908286094666, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5821, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.24472573839662448, |
|
"grad_norm": 0.6173799633979797, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5959, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.24578059071729957, |
|
"grad_norm": 0.7689816355705261, |
|
"learning_rate": 0.0015, |
|
"loss": 1.577, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.2468354430379747, |
|
"grad_norm": 0.8228141665458679, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5746, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.2478902953586498, |
|
"grad_norm": 0.6704361438751221, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5789, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.2489451476793249, |
|
"grad_norm": 0.8982261419296265, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5865, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.7105319499969482, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5854, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.2510548523206751, |
|
"grad_norm": 0.590654194355011, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5799, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.2521097046413502, |
|
"grad_norm": 0.60832679271698, |
|
"learning_rate": 0.0015, |
|
"loss": 1.582, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.25316455696202533, |
|
"grad_norm": 0.6442340612411499, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5801, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.2542194092827004, |
|
"grad_norm": 0.7528612017631531, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5829, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.2552742616033755, |
|
"grad_norm": 0.9050551652908325, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5761, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.2563291139240506, |
|
"grad_norm": 0.7395772933959961, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5739, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.25738396624472576, |
|
"grad_norm": 0.758362352848053, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5848, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.25843881856540085, |
|
"grad_norm": 0.5878888964653015, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5685, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.25949367088607594, |
|
"grad_norm": 0.6303932666778564, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5837, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.26054852320675104, |
|
"grad_norm": 1.1393781900405884, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5771, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.2616033755274262, |
|
"grad_norm": 0.7777864336967468, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5828, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.2626582278481013, |
|
"grad_norm": 0.5970431566238403, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5623, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.26371308016877637, |
|
"grad_norm": 0.8533217906951904, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5695, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.26476793248945146, |
|
"grad_norm": 0.5901464223861694, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5704, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.26582278481012656, |
|
"grad_norm": 0.7525666356086731, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5735, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.2668776371308017, |
|
"grad_norm": 0.6917023658752441, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5696, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.2679324894514768, |
|
"grad_norm": 0.7417186498641968, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5712, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.2689873417721519, |
|
"grad_norm": 0.6252340078353882, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5657, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.270042194092827, |
|
"grad_norm": 0.7001520395278931, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5735, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.27109704641350213, |
|
"grad_norm": 0.6428219676017761, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5655, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.2721518987341772, |
|
"grad_norm": 0.6188051104545593, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5654, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.2732067510548523, |
|
"grad_norm": 0.6137269735336304, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5704, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.2742616033755274, |
|
"grad_norm": 0.6634780764579773, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5707, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.27531645569620256, |
|
"grad_norm": 0.6164573431015015, |
|
"learning_rate": 0.0015, |
|
"loss": 1.567, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.27637130801687765, |
|
"grad_norm": 0.67591792345047, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5696, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.27742616033755274, |
|
"grad_norm": 0.9030578136444092, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5592, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.27848101265822783, |
|
"grad_norm": 0.6329770088195801, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5689, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.2795358649789029, |
|
"grad_norm": 0.7755404114723206, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5585, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.2805907172995781, |
|
"grad_norm": 0.6837880611419678, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5538, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.28164556962025317, |
|
"grad_norm": 0.6190932989120483, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5561, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.28270042194092826, |
|
"grad_norm": 0.6166747808456421, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5544, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.28375527426160335, |
|
"grad_norm": 0.68338543176651, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5552, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.2848101265822785, |
|
"grad_norm": 0.6845906376838684, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5486, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.2858649789029536, |
|
"grad_norm": 0.5682319402694702, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5566, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.2869198312236287, |
|
"grad_norm": 0.5855002999305725, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5574, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.2879746835443038, |
|
"grad_norm": 0.6031274199485779, |
|
"learning_rate": 0.0015, |
|
"loss": 1.54, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.2890295358649789, |
|
"grad_norm": 1.0569102764129639, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5523, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.290084388185654, |
|
"grad_norm": 0.6331525444984436, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5547, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.2911392405063291, |
|
"grad_norm": 0.7460474967956543, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5646, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.2921940928270042, |
|
"grad_norm": 0.6277437806129456, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5591, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.29324894514767935, |
|
"grad_norm": 0.6590365767478943, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5562, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.29430379746835444, |
|
"grad_norm": 0.6113288998603821, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5555, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.29535864978902954, |
|
"grad_norm": 0.5620890855789185, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5516, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.29641350210970463, |
|
"grad_norm": 0.6174807548522949, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5501, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.2974683544303797, |
|
"grad_norm": 0.6747241616249084, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5571, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.29852320675105487, |
|
"grad_norm": 0.9310271143913269, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5321, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.29957805907172996, |
|
"grad_norm": 1.3230280876159668, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5384, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.30063291139240506, |
|
"grad_norm": 1.2144172191619873, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5487, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.30168776371308015, |
|
"grad_norm": 0.6515137553215027, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5432, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.3027426160337553, |
|
"grad_norm": 0.7877812385559082, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5425, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.3037974683544304, |
|
"grad_norm": 0.6689189672470093, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5313, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.3048523206751055, |
|
"grad_norm": 0.6395917534828186, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5345, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.3059071729957806, |
|
"grad_norm": 0.7754723429679871, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5428, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.3069620253164557, |
|
"grad_norm": 0.7293116450309753, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5438, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.3080168776371308, |
|
"grad_norm": 1.249470829963684, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5401, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.3090717299578059, |
|
"grad_norm": 0.6346191763877869, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5335, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.310126582278481, |
|
"grad_norm": 0.6461648941040039, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5374, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.3111814345991561, |
|
"grad_norm": 0.7199176549911499, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5476, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.31223628691983124, |
|
"grad_norm": 0.7429535388946533, |
|
"learning_rate": 0.0015, |
|
"loss": 1.529, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.31329113924050633, |
|
"grad_norm": 0.9656321406364441, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5238, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.3143459915611814, |
|
"grad_norm": 1.1244347095489502, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5349, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.3154008438818565, |
|
"grad_norm": 0.7225420475006104, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5484, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.31645569620253167, |
|
"grad_norm": 0.6689170598983765, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5242, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.31751054852320676, |
|
"grad_norm": 0.7187021374702454, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5339, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.31856540084388185, |
|
"grad_norm": 0.6817131042480469, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5356, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.31962025316455694, |
|
"grad_norm": 0.7035455703735352, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5229, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.3206751054852321, |
|
"grad_norm": 0.7125651240348816, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5336, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.3217299578059072, |
|
"grad_norm": 0.6795884966850281, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5301, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.3227848101265823, |
|
"grad_norm": 0.6996033787727356, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5408, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.32383966244725737, |
|
"grad_norm": 0.5682607889175415, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5276, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.32489451476793246, |
|
"grad_norm": 0.5759983658790588, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5197, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.3259493670886076, |
|
"grad_norm": 0.582955539226532, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5363, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.3270042194092827, |
|
"grad_norm": 0.6314529180526733, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5232, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.3280590717299578, |
|
"grad_norm": 0.6088457703590393, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5263, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.3291139240506329, |
|
"grad_norm": 0.5606082677841187, |
|
"learning_rate": 0.0015, |
|
"loss": 1.526, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.33016877637130804, |
|
"grad_norm": 0.7216711044311523, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5379, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.33122362869198313, |
|
"grad_norm": 0.7543506622314453, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5192, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.3322784810126582, |
|
"grad_norm": 0.5943099856376648, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5272, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 0.60477215051651, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5282, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.33438818565400846, |
|
"grad_norm": 0.6148793697357178, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5293, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.33544303797468356, |
|
"grad_norm": 0.591863751411438, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5238, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.33649789029535865, |
|
"grad_norm": 0.6953649520874023, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5245, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.33755274261603374, |
|
"grad_norm": 0.74885493516922, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5195, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.33860759493670883, |
|
"grad_norm": 0.7037512063980103, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5322, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.339662447257384, |
|
"grad_norm": 0.725911021232605, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5245, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.3407172995780591, |
|
"grad_norm": 0.6004663705825806, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5144, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.34177215189873417, |
|
"grad_norm": 0.7108480930328369, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4998, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.34282700421940926, |
|
"grad_norm": 0.6768283843994141, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5249, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.3438818565400844, |
|
"grad_norm": 0.5562488436698914, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5131, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.3449367088607595, |
|
"grad_norm": 0.631421685218811, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5294, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.3459915611814346, |
|
"grad_norm": 0.7008868455886841, |
|
"learning_rate": 0.0015, |
|
"loss": 1.529, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.3470464135021097, |
|
"grad_norm": 0.775509238243103, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5112, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.34810126582278483, |
|
"grad_norm": 0.5601686835289001, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5219, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.3491561181434599, |
|
"grad_norm": 0.5643460750579834, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5197, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.350210970464135, |
|
"grad_norm": 0.5928805470466614, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5228, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.3512658227848101, |
|
"grad_norm": 0.5419443249702454, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5147, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.35232067510548526, |
|
"grad_norm": 0.7151007056236267, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5166, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.35337552742616035, |
|
"grad_norm": 0.5484126210212708, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5121, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.35443037974683544, |
|
"grad_norm": 0.6396426558494568, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5102, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.35548523206751054, |
|
"grad_norm": 0.6092063188552856, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5189, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.35654008438818563, |
|
"grad_norm": 0.6935619711875916, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5133, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.3575949367088608, |
|
"grad_norm": 0.551314115524292, |
|
"learning_rate": 0.0015, |
|
"loss": 1.505, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.35864978902953587, |
|
"grad_norm": 0.5668650269508362, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5166, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.35970464135021096, |
|
"grad_norm": 0.6797813177108765, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5137, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.36075949367088606, |
|
"grad_norm": 0.7667569518089294, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4977, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.3618143459915612, |
|
"grad_norm": 0.7580966353416443, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5138, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.3628691983122363, |
|
"grad_norm": 0.5931734442710876, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5105, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.3639240506329114, |
|
"grad_norm": 0.5610995292663574, |
|
"learning_rate": 0.0015, |
|
"loss": 1.505, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.3649789029535865, |
|
"grad_norm": 0.712026059627533, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4982, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.36603375527426163, |
|
"grad_norm": 0.9137971997261047, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5166, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.3670886075949367, |
|
"grad_norm": 0.764611005783081, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5218, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.3681434599156118, |
|
"grad_norm": 0.5769204497337341, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5108, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.3691983122362869, |
|
"grad_norm": 0.6009048223495483, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4939, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.370253164556962, |
|
"grad_norm": 0.7048670053482056, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5146, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.37130801687763715, |
|
"grad_norm": 0.6143278479576111, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5105, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.37236286919831224, |
|
"grad_norm": 0.5736284852027893, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5183, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.37341772151898733, |
|
"grad_norm": 0.741503894329071, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5123, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.3744725738396624, |
|
"grad_norm": 0.6641824245452881, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5028, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.3755274261603376, |
|
"grad_norm": 0.568838357925415, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5011, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.37658227848101267, |
|
"grad_norm": 0.7397016286849976, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4981, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.37763713080168776, |
|
"grad_norm": 0.5978692173957825, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5027, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.37869198312236285, |
|
"grad_norm": 0.5610095262527466, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5129, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.379746835443038, |
|
"grad_norm": 0.7330253720283508, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4975, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.3808016877637131, |
|
"grad_norm": 0.5927225351333618, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5004, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.3818565400843882, |
|
"grad_norm": 0.5715808868408203, |
|
"learning_rate": 0.0015, |
|
"loss": 1.503, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.3829113924050633, |
|
"grad_norm": 0.6043118834495544, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4984, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.38396624472573837, |
|
"grad_norm": 0.7284471392631531, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5008, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.3850210970464135, |
|
"grad_norm": 0.6941373944282532, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5129, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.3860759493670886, |
|
"grad_norm": 0.9151371717453003, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5016, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.3871308016877637, |
|
"grad_norm": 0.5350086092948914, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4956, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.3881856540084388, |
|
"grad_norm": 0.9535840749740601, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4972, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.38924050632911394, |
|
"grad_norm": 0.6288740038871765, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5085, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.39029535864978904, |
|
"grad_norm": 0.6044665575027466, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5114, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.39135021097046413, |
|
"grad_norm": 0.6169572472572327, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4977, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.3924050632911392, |
|
"grad_norm": 0.572669267654419, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4784, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.39345991561181437, |
|
"grad_norm": 0.5647141933441162, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4965, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.39451476793248946, |
|
"grad_norm": 0.604538083076477, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4967, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.39556962025316456, |
|
"grad_norm": 0.729308545589447, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5039, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.39662447257383965, |
|
"grad_norm": 0.9094111323356628, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4912, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.39767932489451474, |
|
"grad_norm": 0.6112377643585205, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5059, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.3987341772151899, |
|
"grad_norm": 0.5163501501083374, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4938, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.399789029535865, |
|
"grad_norm": 0.683885931968689, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4966, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.4008438818565401, |
|
"grad_norm": 0.5952377319335938, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5046, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.40189873417721517, |
|
"grad_norm": 0.6416290998458862, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4908, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.4029535864978903, |
|
"grad_norm": 0.6121982336044312, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4825, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.4040084388185654, |
|
"grad_norm": 0.6151083707809448, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4865, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.4050632911392405, |
|
"grad_norm": 0.7092777490615845, |
|
"learning_rate": 0.0015, |
|
"loss": 1.5029, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.4061181434599156, |
|
"grad_norm": 0.6845822334289551, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4841, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.40717299578059074, |
|
"grad_norm": 0.8743574619293213, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4983, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.40822784810126583, |
|
"grad_norm": 0.7992430925369263, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4876, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.4092827004219409, |
|
"grad_norm": 0.6235871315002441, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4798, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.410337552742616, |
|
"grad_norm": 0.6806468367576599, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4806, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.41139240506329117, |
|
"grad_norm": 1.147836446762085, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4805, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.41244725738396626, |
|
"grad_norm": 0.6263267993927002, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4856, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.41350210970464135, |
|
"grad_norm": 0.6188958883285522, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4809, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.41455696202531644, |
|
"grad_norm": 0.5184334516525269, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4862, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.41561181434599154, |
|
"grad_norm": 0.6880916357040405, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4813, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.4166666666666667, |
|
"grad_norm": 0.8529372811317444, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4879, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.4177215189873418, |
|
"grad_norm": 0.6398452520370483, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4903, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.41877637130801687, |
|
"grad_norm": 0.7702900171279907, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4788, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.41983122362869196, |
|
"grad_norm": 0.6239493489265442, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4908, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.4208860759493671, |
|
"grad_norm": 0.6391934752464294, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4797, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.4219409282700422, |
|
"grad_norm": 0.7288190722465515, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4941, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.4229957805907173, |
|
"grad_norm": 0.5728703737258911, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4841, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.4240506329113924, |
|
"grad_norm": 0.5602556467056274, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4835, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.42510548523206754, |
|
"grad_norm": 0.7178170084953308, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4751, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.42616033755274263, |
|
"grad_norm": 0.5581310391426086, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4853, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.4272151898734177, |
|
"grad_norm": 0.5198979377746582, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4871, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.4282700421940928, |
|
"grad_norm": 0.6657981276512146, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4923, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.4293248945147679, |
|
"grad_norm": 0.765648603439331, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4845, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.43037974683544306, |
|
"grad_norm": 0.6785349249839783, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4892, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.43143459915611815, |
|
"grad_norm": 0.5718680620193481, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4837, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.43248945147679324, |
|
"grad_norm": 0.553119421005249, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4814, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.43354430379746833, |
|
"grad_norm": 0.5533855557441711, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4721, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.4345991561181435, |
|
"grad_norm": 0.6695696115493774, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4782, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.4356540084388186, |
|
"grad_norm": 0.5840980410575867, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4717, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.43670886075949367, |
|
"grad_norm": 0.6174877285957336, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4761, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.43776371308016876, |
|
"grad_norm": 0.6185278296470642, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4792, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.4388185654008439, |
|
"grad_norm": 0.5936148762702942, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4735, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.439873417721519, |
|
"grad_norm": 0.6119903326034546, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4731, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.4409282700421941, |
|
"grad_norm": 0.5228918194770813, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4704, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.4419831223628692, |
|
"grad_norm": 0.6289933323860168, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4762, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.4430379746835443, |
|
"grad_norm": 0.6278483271598816, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4773, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.4440928270042194, |
|
"grad_norm": 0.7192302942276001, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4685, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.4451476793248945, |
|
"grad_norm": 0.7284325957298279, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4787, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.4462025316455696, |
|
"grad_norm": 0.8943430781364441, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4741, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.4472573839662447, |
|
"grad_norm": 0.6659002304077148, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4737, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.44831223628691985, |
|
"grad_norm": 0.642861008644104, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4774, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.44936708860759494, |
|
"grad_norm": 0.7793336510658264, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4825, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.45042194092827004, |
|
"grad_norm": 0.5228441953659058, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4727, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.45147679324894513, |
|
"grad_norm": 0.9414927959442139, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4664, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.4525316455696203, |
|
"grad_norm": 0.8323007822036743, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4782, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.45358649789029537, |
|
"grad_norm": 0.9512622356414795, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4791, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.45464135021097046, |
|
"grad_norm": 0.5568503141403198, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4762, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.45569620253164556, |
|
"grad_norm": 0.8442806005477905, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4698, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.45675105485232065, |
|
"grad_norm": 0.6293421387672424, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4765, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.4578059071729958, |
|
"grad_norm": 0.7461580634117126, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4684, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.4588607594936709, |
|
"grad_norm": 0.8717284798622131, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4717, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.459915611814346, |
|
"grad_norm": 0.5873362421989441, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4875, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.4609704641350211, |
|
"grad_norm": 0.5259530544281006, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4619, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.4620253164556962, |
|
"grad_norm": 0.5751098394393921, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4669, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.4630801687763713, |
|
"grad_norm": 0.5525999069213867, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4649, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.4641350210970464, |
|
"grad_norm": 0.579777717590332, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4694, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.4651898734177215, |
|
"grad_norm": 0.6130363941192627, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4834, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.46624472573839665, |
|
"grad_norm": 0.6964607834815979, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4687, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.46729957805907174, |
|
"grad_norm": 0.5204871892929077, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4713, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.46835443037974683, |
|
"grad_norm": 0.801157534122467, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4676, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.4694092827004219, |
|
"grad_norm": 0.7642245292663574, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4765, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.4704641350210971, |
|
"grad_norm": 0.5725940465927124, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4615, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.47151898734177217, |
|
"grad_norm": 0.5430575609207153, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4712, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.47257383966244726, |
|
"grad_norm": 0.6782746911048889, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4767, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.47362869198312235, |
|
"grad_norm": 0.9323136806488037, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4633, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.47468354430379744, |
|
"grad_norm": 0.6096838116645813, |
|
"learning_rate": 0.0015, |
|
"loss": 1.464, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.4757383966244726, |
|
"grad_norm": 0.5575011372566223, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4688, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.4767932489451477, |
|
"grad_norm": 0.6049365401268005, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4522, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.4778481012658228, |
|
"grad_norm": 0.6802952289581299, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4695, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.47890295358649787, |
|
"grad_norm": 0.8433916568756104, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4752, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.479957805907173, |
|
"grad_norm": 0.8293811082839966, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4695, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.4810126582278481, |
|
"grad_norm": 0.7346205115318298, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4581, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.4820675105485232, |
|
"grad_norm": 0.570183515548706, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4408, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.4831223628691983, |
|
"grad_norm": 0.7086605429649353, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4648, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.48417721518987344, |
|
"grad_norm": 0.619171142578125, |
|
"learning_rate": 0.0015, |
|
"loss": 1.452, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.48523206751054854, |
|
"grad_norm": 0.6127137541770935, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4723, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.48628691983122363, |
|
"grad_norm": 0.734324038028717, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4763, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.4873417721518987, |
|
"grad_norm": 0.7030718922615051, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4489, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.4883966244725738, |
|
"grad_norm": 0.5352341532707214, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4678, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.48945147679324896, |
|
"grad_norm": 0.6763170957565308, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4677, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.49050632911392406, |
|
"grad_norm": 0.6143730282783508, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4707, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.49156118143459915, |
|
"grad_norm": 0.5258755087852478, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4559, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.49261603375527424, |
|
"grad_norm": 0.67002272605896, |
|
"learning_rate": 0.0015, |
|
"loss": 1.456, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.4936708860759494, |
|
"grad_norm": 0.5338280200958252, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4567, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.4947257383966245, |
|
"grad_norm": 0.6864109635353088, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4524, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.4957805907172996, |
|
"grad_norm": 0.5267427563667297, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4605, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.49683544303797467, |
|
"grad_norm": 0.5892050862312317, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4631, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.4978902953586498, |
|
"grad_norm": 0.7090893983840942, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4585, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.4989451476793249, |
|
"grad_norm": 0.5960710048675537, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4561, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.6754327416419983, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4544, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.5010548523206751, |
|
"grad_norm": 0.6936537623405457, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4756, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.5021097046413502, |
|
"grad_norm": 0.5348150730133057, |
|
"learning_rate": 0.0015, |
|
"loss": 1.459, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.5031645569620253, |
|
"grad_norm": 0.6094486117362976, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4567, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.5042194092827004, |
|
"grad_norm": 0.6765019297599792, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4678, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.5052742616033755, |
|
"grad_norm": 0.5961644053459167, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4503, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.5063291139240507, |
|
"grad_norm": 0.6130753755569458, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4536, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.5073839662447257, |
|
"grad_norm": 0.5550441145896912, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4551, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.5084388185654009, |
|
"grad_norm": 0.6500513553619385, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4735, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.509493670886076, |
|
"grad_norm": 0.7694984674453735, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4563, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.510548523206751, |
|
"grad_norm": 0.6021876335144043, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4505, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.5116033755274262, |
|
"grad_norm": 0.5139393210411072, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4624, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.5126582278481012, |
|
"grad_norm": 0.610668957233429, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4502, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.5137130801687764, |
|
"grad_norm": 0.8146686553955078, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4455, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.5147679324894515, |
|
"grad_norm": 0.706887423992157, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4652, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.5158227848101266, |
|
"grad_norm": 0.6309559941291809, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4552, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.5168776371308017, |
|
"grad_norm": 0.6033481955528259, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4637, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.5179324894514767, |
|
"grad_norm": 0.5887914896011353, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4594, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.5189873417721519, |
|
"grad_norm": 0.5958083271980286, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4467, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.520042194092827, |
|
"grad_norm": 0.6756103038787842, |
|
"learning_rate": 0.0015, |
|
"loss": 1.457, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.5210970464135021, |
|
"grad_norm": 0.5370129346847534, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4581, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.5221518987341772, |
|
"grad_norm": 0.6108761429786682, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4629, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.5232067510548524, |
|
"grad_norm": 0.8404567241668701, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4434, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.5242616033755274, |
|
"grad_norm": 0.5708929300308228, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4523, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.5253164556962026, |
|
"grad_norm": 0.5647048354148865, |
|
"learning_rate": 0.0015, |
|
"loss": 1.446, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.5263713080168776, |
|
"grad_norm": 0.5417590141296387, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4491, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.5274261603375527, |
|
"grad_norm": 0.7352495193481445, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4607, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.5284810126582279, |
|
"grad_norm": 0.7228708267211914, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4534, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.5295358649789029, |
|
"grad_norm": 0.6637216210365295, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4468, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.5305907172995781, |
|
"grad_norm": 0.5074453353881836, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4306, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.5316455696202531, |
|
"grad_norm": 0.5190367698669434, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4592, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.5327004219409283, |
|
"grad_norm": 0.5237929224967957, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4468, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.5337552742616034, |
|
"grad_norm": 0.5514045357704163, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4597, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.5348101265822784, |
|
"grad_norm": 0.7031023502349854, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4477, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.5358649789029536, |
|
"grad_norm": 0.5926287174224854, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4608, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.5369198312236287, |
|
"grad_norm": 0.5033077001571655, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4493, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.5379746835443038, |
|
"grad_norm": 0.5126532316207886, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4342, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.5390295358649789, |
|
"grad_norm": 0.6754027009010315, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4507, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.540084388185654, |
|
"grad_norm": 0.6079409718513489, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4433, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.5411392405063291, |
|
"grad_norm": 0.5625218749046326, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4438, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.5421940928270043, |
|
"grad_norm": 0.8679294586181641, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4604, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.5432489451476793, |
|
"grad_norm": 0.5662575364112854, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4583, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.5443037974683544, |
|
"grad_norm": 0.5976355671882629, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4506, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.5453586497890295, |
|
"grad_norm": 0.5095018148422241, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4506, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.5464135021097046, |
|
"grad_norm": 0.8036375045776367, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4592, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.5474683544303798, |
|
"grad_norm": 0.5669030547142029, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4434, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.5485232067510548, |
|
"grad_norm": 0.7651669979095459, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4566, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.54957805907173, |
|
"grad_norm": 0.5880611538887024, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4392, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.5506329113924051, |
|
"grad_norm": 0.6016876697540283, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4502, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.5516877637130801, |
|
"grad_norm": 0.5552685856819153, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4418, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.5527426160337553, |
|
"grad_norm": 0.68308025598526, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4491, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.5537974683544303, |
|
"grad_norm": 0.6933003067970276, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4413, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.5548523206751055, |
|
"grad_norm": 0.5555622577667236, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4483, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.5559071729957806, |
|
"grad_norm": 0.6394757032394409, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4559, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.5569620253164557, |
|
"grad_norm": 0.704420268535614, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4484, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.5580168776371308, |
|
"grad_norm": 0.5235179662704468, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4487, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.5590717299578059, |
|
"grad_norm": 0.610462486743927, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4505, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.560126582278481, |
|
"grad_norm": 0.7268403172492981, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4379, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.5611814345991561, |
|
"grad_norm": 0.6289941668510437, |
|
"learning_rate": 0.0015, |
|
"loss": 1.439, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.5622362869198312, |
|
"grad_norm": 0.6171261072158813, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4493, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 0.5632911392405063, |
|
"grad_norm": 0.5034967064857483, |
|
"learning_rate": 0.0015, |
|
"loss": 1.442, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.5643459915611815, |
|
"grad_norm": 0.5746512413024902, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4342, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.5654008438818565, |
|
"grad_norm": 0.8130653500556946, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4459, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.5664556962025317, |
|
"grad_norm": 0.7359591722488403, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4428, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 0.5675105485232067, |
|
"grad_norm": 0.543518602848053, |
|
"learning_rate": 0.0015, |
|
"loss": 1.438, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.5685654008438819, |
|
"grad_norm": 0.5612589716911316, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4356, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 0.569620253164557, |
|
"grad_norm": 0.678749144077301, |
|
"learning_rate": 0.0015, |
|
"loss": 1.437, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.570675105485232, |
|
"grad_norm": 0.5381031036376953, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4393, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 0.5717299578059072, |
|
"grad_norm": 0.5442678928375244, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4348, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.5727848101265823, |
|
"grad_norm": 0.643040657043457, |
|
"learning_rate": 0.0015, |
|
"loss": 1.43, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 0.5738396624472574, |
|
"grad_norm": 0.5506795048713684, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4304, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.5748945147679325, |
|
"grad_norm": 0.6098572015762329, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4348, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.5759493670886076, |
|
"grad_norm": 0.6758444905281067, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4392, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.5770042194092827, |
|
"grad_norm": 0.5184838771820068, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4396, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 0.5780590717299579, |
|
"grad_norm": 0.9065413475036621, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4399, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.5791139240506329, |
|
"grad_norm": 0.5394531488418579, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4473, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 0.580168776371308, |
|
"grad_norm": 0.6227542757987976, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4391, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.5812236286919831, |
|
"grad_norm": 0.8134665489196777, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4447, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 0.5822784810126582, |
|
"grad_norm": 0.5177081823348999, |
|
"learning_rate": 0.0015, |
|
"loss": 1.436, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.5833333333333334, |
|
"grad_norm": 0.6241568326950073, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4298, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 0.5843881856540084, |
|
"grad_norm": 0.5243352651596069, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4286, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.5854430379746836, |
|
"grad_norm": 0.5504492521286011, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4453, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.5864978902953587, |
|
"grad_norm": 0.5448554158210754, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4487, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.5875527426160337, |
|
"grad_norm": 0.5740591287612915, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4324, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 0.5886075949367089, |
|
"grad_norm": 0.7025555968284607, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4334, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.5896624472573839, |
|
"grad_norm": 0.5911626815795898, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4366, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 0.5907172995780591, |
|
"grad_norm": 0.503633439540863, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4313, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.5917721518987342, |
|
"grad_norm": 0.6805056929588318, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4384, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 0.5928270042194093, |
|
"grad_norm": 0.5583463907241821, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4463, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.5938818565400844, |
|
"grad_norm": 0.6833731532096863, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4413, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 0.5949367088607594, |
|
"grad_norm": 0.5771982073783875, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4511, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.5959915611814346, |
|
"grad_norm": 0.6087093353271484, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4457, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.5970464135021097, |
|
"grad_norm": 0.7789549231529236, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4377, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.5981012658227848, |
|
"grad_norm": 0.6058619022369385, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4469, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 0.5991561181434599, |
|
"grad_norm": 0.5375464558601379, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4455, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.6002109704641351, |
|
"grad_norm": 0.8218992352485657, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4239, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 0.6012658227848101, |
|
"grad_norm": 0.5948262810707092, |
|
"learning_rate": 0.0015, |
|
"loss": 1.43, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.6023206751054853, |
|
"grad_norm": 0.5964241623878479, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4337, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 0.6033755274261603, |
|
"grad_norm": 0.5453081727027893, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4377, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.6044303797468354, |
|
"grad_norm": 0.5510696768760681, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4355, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 0.6054852320675106, |
|
"grad_norm": 0.5511655211448669, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4331, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.6065400843881856, |
|
"grad_norm": 0.6737241744995117, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4107, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.6075949367088608, |
|
"grad_norm": 0.5429701805114746, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4199, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.6086497890295358, |
|
"grad_norm": 0.7445785403251648, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4221, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 0.609704641350211, |
|
"grad_norm": 0.5367471575737, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4298, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.6107594936708861, |
|
"grad_norm": 0.6427586674690247, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4255, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 0.6118143459915611, |
|
"grad_norm": 0.6622743606567383, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4321, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.6128691983122363, |
|
"grad_norm": 0.5611438751220703, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4336, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 0.6139240506329114, |
|
"grad_norm": 0.5816289782524109, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4464, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.6149789029535865, |
|
"grad_norm": 0.5909755825996399, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4297, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 0.6160337552742616, |
|
"grad_norm": 0.7232508659362793, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4431, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.6170886075949367, |
|
"grad_norm": 0.8321460485458374, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4451, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.6181434599156118, |
|
"grad_norm": 0.5028564929962158, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4327, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.619198312236287, |
|
"grad_norm": 0.7685743570327759, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4256, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 0.620253164556962, |
|
"grad_norm": 0.5092529058456421, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4391, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.6213080168776371, |
|
"grad_norm": 0.5273023247718811, |
|
"learning_rate": 0.0015, |
|
"loss": 1.428, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 0.6223628691983122, |
|
"grad_norm": 0.5593839883804321, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4306, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.6234177215189873, |
|
"grad_norm": 0.5306974053382874, |
|
"learning_rate": 0.0015, |
|
"loss": 1.425, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 0.6244725738396625, |
|
"grad_norm": 0.5432724356651306, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4189, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.6255274261603375, |
|
"grad_norm": 1.200668215751648, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4307, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 0.6265822784810127, |
|
"grad_norm": 0.6720802187919617, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4346, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.6276371308016878, |
|
"grad_norm": 0.5483372807502747, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4231, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.6286919831223629, |
|
"grad_norm": 0.8048787117004395, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4273, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.629746835443038, |
|
"grad_norm": 0.5510703325271606, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4342, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 0.630801687763713, |
|
"grad_norm": 0.5123088955879211, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4146, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.6318565400843882, |
|
"grad_norm": 0.6412525177001953, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4331, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 0.6329113924050633, |
|
"grad_norm": 0.5405699610710144, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4421, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.6339662447257384, |
|
"grad_norm": 0.5397794842720032, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4293, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 0.6350210970464135, |
|
"grad_norm": 0.6345342397689819, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4332, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.6360759493670886, |
|
"grad_norm": 0.5723614692687988, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4158, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 0.6371308016877637, |
|
"grad_norm": 0.5299696326255798, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4341, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.6381856540084389, |
|
"grad_norm": 0.7609160542488098, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4174, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.6392405063291139, |
|
"grad_norm": 0.5781378746032715, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4343, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.640295358649789, |
|
"grad_norm": 0.7221751809120178, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4247, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 0.6413502109704642, |
|
"grad_norm": 0.49339455366134644, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4251, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.6424050632911392, |
|
"grad_norm": 0.5464122295379639, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4235, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 0.6434599156118144, |
|
"grad_norm": 0.6804472804069519, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4191, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.6445147679324894, |
|
"grad_norm": 0.6100871562957764, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4308, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 0.6455696202531646, |
|
"grad_norm": 0.49249380826950073, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4239, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.6466244725738397, |
|
"grad_norm": 0.5322859883308411, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4243, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 0.6476793248945147, |
|
"grad_norm": 0.6617424488067627, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4211, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.6487341772151899, |
|
"grad_norm": 0.5216740369796753, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4342, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.6497890295358649, |
|
"grad_norm": 0.6193856596946716, |
|
"learning_rate": 0.0015, |
|
"loss": 1.4267, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.6508438818565401, |
|
"grad_norm": 0.5639455318450928, |
|
"learning_rate": 0.0014834368975312174, |
|
"loss": 1.4098, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 0.6518987341772152, |
|
"grad_norm": 0.5887807011604309, |
|
"learning_rate": 0.0014629899726345957, |
|
"loss": 1.4291, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.6529535864978903, |
|
"grad_norm": 0.5052474737167358, |
|
"learning_rate": 0.0014428248775471316, |
|
"loss": 1.43, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 0.6540084388185654, |
|
"grad_norm": 0.5447661280632019, |
|
"learning_rate": 0.00142293772767289, |
|
"loss": 1.4228, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.6550632911392406, |
|
"grad_norm": 0.5228587985038757, |
|
"learning_rate": 0.001403324691959192, |
|
"loss": 1.4163, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 0.6561181434599156, |
|
"grad_norm": 0.5889528393745422, |
|
"learning_rate": 0.0013839819921586025, |
|
"loss": 1.4231, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.6571729957805907, |
|
"grad_norm": 0.5708619952201843, |
|
"learning_rate": 0.0013649059021010894, |
|
"loss": 1.4109, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 0.6582278481012658, |
|
"grad_norm": 0.6014156937599182, |
|
"learning_rate": 0.0013460927469762154, |
|
"loss": 1.4135, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.6592827004219409, |
|
"grad_norm": 0.5244723558425903, |
|
"learning_rate": 0.0013275389026252255, |
|
"loss": 1.4207, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.6603375527426161, |
|
"grad_norm": 0.5485060811042786, |
|
"learning_rate": 0.0013092407948428887, |
|
"loss": 1.4081, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 0.6613924050632911, |
|
"grad_norm": 0.7510374188423157, |
|
"learning_rate": 0.001291194898688966, |
|
"loss": 1.4185, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 0.6624472573839663, |
|
"grad_norm": 0.49165019392967224, |
|
"learning_rate": 0.001273397737809166, |
|
"loss": 1.4151, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 0.6635021097046413, |
|
"grad_norm": 0.5156455039978027, |
|
"learning_rate": 0.001255845883765463, |
|
"loss": 1.4084, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 0.6645569620253164, |
|
"grad_norm": 0.6343047618865967, |
|
"learning_rate": 0.001238535955375642, |
|
"loss": 1.4036, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.6656118143459916, |
|
"grad_norm": 0.6259722709655762, |
|
"learning_rate": 0.0012214646180619506, |
|
"loss": 1.3995, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.47512054443359375, |
|
"learning_rate": 0.001204628583208727, |
|
"loss": 1.3935, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 0.6677215189873418, |
|
"grad_norm": 0.6797612309455872, |
|
"learning_rate": 0.0011880246075288827, |
|
"loss": 1.4032, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 0.6687763713080169, |
|
"grad_norm": 0.6804598569869995, |
|
"learning_rate": 0.001171649492439115, |
|
"loss": 1.3936, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 0.669831223628692, |
|
"grad_norm": 0.5329886674880981, |
|
"learning_rate": 0.0011555000834437364, |
|
"loss": 1.4002, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.6708860759493671, |
|
"grad_norm": 0.6307658553123474, |
|
"learning_rate": 0.0011395732695269908, |
|
"loss": 1.3984, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.6719409282700421, |
|
"grad_norm": 0.5046476125717163, |
|
"learning_rate": 0.0011238659825537505, |
|
"loss": 1.3772, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 0.6729957805907173, |
|
"grad_norm": 0.5106565356254578, |
|
"learning_rate": 0.0011083751966784717, |
|
"loss": 1.381, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 0.6740506329113924, |
|
"grad_norm": 0.9580277800559998, |
|
"learning_rate": 0.0010930979277622953, |
|
"loss": 1.3967, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 0.6751054852320675, |
|
"grad_norm": 0.5464169979095459, |
|
"learning_rate": 0.0010780312327981854, |
|
"loss": 1.395, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.6761603375527426, |
|
"grad_norm": 0.503548800945282, |
|
"learning_rate": 0.0010631722093439888, |
|
"loss": 1.3895, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 0.6772151898734177, |
|
"grad_norm": 0.5025346279144287, |
|
"learning_rate": 0.00104851799496331, |
|
"loss": 1.3778, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 0.6782700421940928, |
|
"grad_norm": 0.4954620599746704, |
|
"learning_rate": 0.0010340657666740914, |
|
"loss": 1.3883, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 0.679324894514768, |
|
"grad_norm": 0.56938636302948, |
|
"learning_rate": 0.0010198127404047975, |
|
"loss": 1.372, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 0.680379746835443, |
|
"grad_norm": 0.5315554738044739, |
|
"learning_rate": 0.0010057561704580897, |
|
"loss": 1.3757, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.6814345991561181, |
|
"grad_norm": 0.5042809844017029, |
|
"learning_rate": 0.0009918933489818985, |
|
"loss": 1.3863, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 0.6824894514767933, |
|
"grad_norm": 0.5185731649398804, |
|
"learning_rate": 0.0009782216054477827, |
|
"loss": 1.3798, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 0.6835443037974683, |
|
"grad_norm": 0.6783595681190491, |
|
"learning_rate": 0.0009647383061364801, |
|
"loss": 1.386, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 0.6845991561181435, |
|
"grad_norm": 0.5089995265007019, |
|
"learning_rate": 0.0009514408536305495, |
|
"loss": 1.3809, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 0.6856540084388185, |
|
"grad_norm": 0.480932354927063, |
|
"learning_rate": 0.0009383266863140042, |
|
"loss": 1.3939, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.6867088607594937, |
|
"grad_norm": 0.4996776878833771, |
|
"learning_rate": 0.000925393277878844, |
|
"loss": 1.3939, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 0.6877637130801688, |
|
"grad_norm": 0.4868309497833252, |
|
"learning_rate": 0.0009126381368383879, |
|
"loss": 1.3754, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 0.6888185654008439, |
|
"grad_norm": 0.5652695894241333, |
|
"learning_rate": 0.0009000588060473156, |
|
"loss": 1.3678, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 0.689873417721519, |
|
"grad_norm": 0.5811905264854431, |
|
"learning_rate": 0.0008876528622283235, |
|
"loss": 1.3757, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 0.6909282700421941, |
|
"grad_norm": 0.5382415056228638, |
|
"learning_rate": 0.0008754179155053053, |
|
"loss": 1.3714, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.6919831223628692, |
|
"grad_norm": 0.7246244549751282, |
|
"learning_rate": 0.0008633516089429683, |
|
"loss": 1.3727, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.6930379746835443, |
|
"grad_norm": 0.49916544556617737, |
|
"learning_rate": 0.0008514516180927928, |
|
"loss": 1.3682, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 0.6940928270042194, |
|
"grad_norm": 0.5470711588859558, |
|
"learning_rate": 0.0008397156505452524, |
|
"loss": 1.3623, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 0.6951476793248945, |
|
"grad_norm": 0.6230381727218628, |
|
"learning_rate": 0.0008281414454882051, |
|
"loss": 1.3648, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 0.6962025316455697, |
|
"grad_norm": 0.5245243906974792, |
|
"learning_rate": 0.0008167267732713704, |
|
"loss": 1.3725, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.6972573839662447, |
|
"grad_norm": 0.505135178565979, |
|
"learning_rate": 0.0008054694349768117, |
|
"loss": 1.3576, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 0.6983122362869199, |
|
"grad_norm": 0.4960167109966278, |
|
"learning_rate": 0.0007943672619953359, |
|
"loss": 1.3668, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 0.6993670886075949, |
|
"grad_norm": 0.5264673829078674, |
|
"learning_rate": 0.0007834181156087356, |
|
"loss": 1.3599, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 0.70042194092827, |
|
"grad_norm": 0.6166502833366394, |
|
"learning_rate": 0.0007726198865777852, |
|
"loss": 1.3636, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 0.7014767932489452, |
|
"grad_norm": 0.536520779132843, |
|
"learning_rate": 0.0007619704947359191, |
|
"loss": 1.3552, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.7025316455696202, |
|
"grad_norm": 0.495047390460968, |
|
"learning_rate": 0.0007514678885885087, |
|
"loss": 1.3559, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 0.7035864978902954, |
|
"grad_norm": 0.47605499625205994, |
|
"learning_rate": 0.0007411100449176633, |
|
"loss": 1.3585, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 0.7046413502109705, |
|
"grad_norm": 0.4973078668117523, |
|
"learning_rate": 0.0007308949683924791, |
|
"loss": 1.3604, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 0.7056962025316456, |
|
"grad_norm": 0.5651484131813049, |
|
"learning_rate": 0.000720820691184658, |
|
"loss": 1.3511, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 0.7067510548523207, |
|
"grad_norm": 0.46959301829338074, |
|
"learning_rate": 0.0007108852725894269, |
|
"loss": 1.3531, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.7078059071729957, |
|
"grad_norm": 0.5081770420074463, |
|
"learning_rate": 0.000701086798651681, |
|
"loss": 1.3544, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 0.7088607594936709, |
|
"grad_norm": 0.5139282941818237, |
|
"learning_rate": 0.0006914233817972798, |
|
"loss": 1.3427, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.709915611814346, |
|
"grad_norm": 0.49106651544570923, |
|
"learning_rate": 0.0006818931604694261, |
|
"loss": 1.3531, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 0.7109704641350211, |
|
"grad_norm": 0.46146702766418457, |
|
"learning_rate": 0.0006724942987700563, |
|
"loss": 1.3528, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 0.7120253164556962, |
|
"grad_norm": 0.4885563552379608, |
|
"learning_rate": 0.0006632249861061732, |
|
"loss": 1.3578, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.7130801687763713, |
|
"grad_norm": 0.6136794090270996, |
|
"learning_rate": 0.0006540834368410549, |
|
"loss": 1.3502, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 0.7141350210970464, |
|
"grad_norm": 0.5482812523841858, |
|
"learning_rate": 0.0006450678899502701, |
|
"loss": 1.3528, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 0.7151898734177216, |
|
"grad_norm": 0.5637302398681641, |
|
"learning_rate": 0.0006361766086824345, |
|
"loss": 1.3499, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 0.7162447257383966, |
|
"grad_norm": 0.46481969952583313, |
|
"learning_rate": 0.000627407880224645, |
|
"loss": 1.3556, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 0.7172995780590717, |
|
"grad_norm": 0.4623727798461914, |
|
"learning_rate": 0.0006187600153725225, |
|
"loss": 1.3395, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.7183544303797469, |
|
"grad_norm": 0.8997368216514587, |
|
"learning_rate": 0.0006102313482048055, |
|
"loss": 1.3446, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 0.7194092827004219, |
|
"grad_norm": 0.5417001247406006, |
|
"learning_rate": 0.0006018202357624274, |
|
"loss": 1.3472, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.7204641350210971, |
|
"grad_norm": 0.6152294874191284, |
|
"learning_rate": 0.0005935250577320168, |
|
"loss": 1.3418, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 0.7215189873417721, |
|
"grad_norm": 0.672227144241333, |
|
"learning_rate": 0.0005853442161337618, |
|
"loss": 1.3325, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 0.7225738396624473, |
|
"grad_norm": 0.4651714265346527, |
|
"learning_rate": 0.0005772761350135759, |
|
"loss": 1.3408, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.7236286919831224, |
|
"grad_norm": 0.5990096926689148, |
|
"learning_rate": 0.0005693192601395058, |
|
"loss": 1.3335, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 0.7246835443037974, |
|
"grad_norm": 0.4861851632595062, |
|
"learning_rate": 0.000561472058702326, |
|
"loss": 1.331, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 0.7257383966244726, |
|
"grad_norm": 0.5941367745399475, |
|
"learning_rate": 0.000553733019020258, |
|
"loss": 1.337, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 0.7267932489451476, |
|
"grad_norm": 0.5600623488426208, |
|
"learning_rate": 0.0005461006502477612, |
|
"loss": 1.3305, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 0.7278481012658228, |
|
"grad_norm": 0.5032252669334412, |
|
"learning_rate": 0.0005385734820883369, |
|
"loss": 1.3289, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.7289029535864979, |
|
"grad_norm": 0.5026607513427734, |
|
"learning_rate": 0.0005311500645112907, |
|
"loss": 1.355, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 0.729957805907173, |
|
"grad_norm": 0.5264232158660889, |
|
"learning_rate": 0.0005238289674723993, |
|
"loss": 1.3353, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 0.7310126582278481, |
|
"grad_norm": 0.47649210691452026, |
|
"learning_rate": 0.0005166087806384274, |
|
"loss": 1.3377, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 0.7320675105485233, |
|
"grad_norm": 0.4772520363330841, |
|
"learning_rate": 0.0005094881131154418, |
|
"loss": 1.3419, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 0.7331223628691983, |
|
"grad_norm": 0.5586071610450745, |
|
"learning_rate": 0.0005024655931808696, |
|
"loss": 1.3374, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.7341772151898734, |
|
"grad_norm": 0.49496790766716003, |
|
"learning_rate": 0.0004955398680192508, |
|
"loss": 1.328, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 0.7352320675105485, |
|
"grad_norm": 0.5294111967086792, |
|
"learning_rate": 0.000488709603461632, |
|
"loss": 1.3248, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 0.7362869198312236, |
|
"grad_norm": 0.47923532128334045, |
|
"learning_rate": 0.000481973483728553, |
|
"loss": 1.3276, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 0.7373417721518988, |
|
"grad_norm": 0.4925960600376129, |
|
"learning_rate": 0.0004753302111765748, |
|
"loss": 1.3268, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 0.7383966244725738, |
|
"grad_norm": 0.4940263032913208, |
|
"learning_rate": 0.0004687785060483032, |
|
"loss": 1.3384, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.739451476793249, |
|
"grad_norm": 0.5015807151794434, |
|
"learning_rate": 0.0004623171062258558, |
|
"loss": 1.3094, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 0.740506329113924, |
|
"grad_norm": 0.7085549235343933, |
|
"learning_rate": 0.0004559447669877288, |
|
"loss": 1.3253, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 0.7415611814345991, |
|
"grad_norm": 0.4822874367237091, |
|
"learning_rate": 0.00044966026076901413, |
|
"loss": 1.3294, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 0.7426160337552743, |
|
"grad_norm": 0.6149103045463562, |
|
"learning_rate": 0.00044346237692492177, |
|
"loss": 1.334, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.7436708860759493, |
|
"grad_norm": 0.5246915221214294, |
|
"learning_rate": 0.0004373499214975615, |
|
"loss": 1.3194, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.7447257383966245, |
|
"grad_norm": 0.5026849508285522, |
|
"learning_rate": 0.0004313217169859396, |
|
"loss": 1.324, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 0.7457805907172996, |
|
"grad_norm": 0.585382878780365, |
|
"learning_rate": 0.0004253766021191256, |
|
"loss": 1.3323, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 0.7468354430379747, |
|
"grad_norm": 0.4607876241207123, |
|
"learning_rate": 0.00041951343163254497, |
|
"loss": 1.3266, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 0.7478902953586498, |
|
"grad_norm": 0.5416311621665955, |
|
"learning_rate": 0.00041373107604735626, |
|
"loss": 1.3255, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 0.7489451476793249, |
|
"grad_norm": 0.48268038034439087, |
|
"learning_rate": 0.0004080284214528687, |
|
"loss": 1.3225, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.5381031036376953, |
|
"learning_rate": 0.0004024043692919589, |
|
"loss": 1.3282, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 0.7510548523206751, |
|
"grad_norm": 0.5726626515388489, |
|
"learning_rate": 0.0003968578361494449, |
|
"loss": 1.325, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 0.7521097046413502, |
|
"grad_norm": 0.4642830789089203, |
|
"learning_rate": 0.000391387753543378, |
|
"loss": 1.3323, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 0.7531645569620253, |
|
"grad_norm": 0.4678974747657776, |
|
"learning_rate": 0.00038599306771921023, |
|
"loss": 1.3162, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 0.7542194092827004, |
|
"grad_norm": 0.5471410751342773, |
|
"learning_rate": 0.0003806727394468004, |
|
"loss": 1.3134, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.7552742616033755, |
|
"grad_norm": 0.46361368894577026, |
|
"learning_rate": 0.0003754257438202162, |
|
"loss": 1.3225, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 0.7563291139240507, |
|
"grad_norm": 0.6638835668563843, |
|
"learning_rate": 0.0003702510700602974, |
|
"loss": 1.3296, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 0.7573839662447257, |
|
"grad_norm": 0.5422773957252502, |
|
"learning_rate": 0.0003651477213199393, |
|
"loss": 1.3072, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 0.7584388185654009, |
|
"grad_norm": 0.49227139353752136, |
|
"learning_rate": 0.000360114714492061, |
|
"loss": 1.3101, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 0.759493670886076, |
|
"grad_norm": 0.49915429949760437, |
|
"learning_rate": 0.0003551510800202195, |
|
"loss": 1.315, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.760548523206751, |
|
"grad_norm": 0.5297178030014038, |
|
"learning_rate": 0.0003502558617118353, |
|
"loss": 1.319, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 0.7616033755274262, |
|
"grad_norm": 0.55345618724823, |
|
"learning_rate": 0.0003454281165539914, |
|
"loss": 1.3311, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 0.7626582278481012, |
|
"grad_norm": 0.5680271983146667, |
|
"learning_rate": 0.00034066691453177176, |
|
"loss": 1.3288, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 0.7637130801687764, |
|
"grad_norm": 0.47741663455963135, |
|
"learning_rate": 0.0003359713384491037, |
|
"loss": 1.3229, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 0.7647679324894515, |
|
"grad_norm": 0.5046437978744507, |
|
"learning_rate": 0.00033134048375206944, |
|
"loss": 1.3176, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.7658227848101266, |
|
"grad_norm": 0.457300066947937, |
|
"learning_rate": 0.0003267734583546536, |
|
"loss": 1.3159, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 0.7668776371308017, |
|
"grad_norm": 0.5015650987625122, |
|
"learning_rate": 0.00032226938246689157, |
|
"loss": 1.3147, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 0.7679324894514767, |
|
"grad_norm": 0.5395479202270508, |
|
"learning_rate": 0.0003178273884253874, |
|
"loss": 1.3158, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 0.7689873417721519, |
|
"grad_norm": 0.4769648015499115, |
|
"learning_rate": 0.0003134466205261674, |
|
"loss": 1.3257, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 0.770042194092827, |
|
"grad_norm": 0.5480552315711975, |
|
"learning_rate": 0.0003091262348598378, |
|
"loss": 1.3288, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.7710970464135021, |
|
"grad_norm": 0.47319650650024414, |
|
"learning_rate": 0.0003048653991490141, |
|
"loss": 1.3094, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 0.7721518987341772, |
|
"grad_norm": 0.5190073847770691, |
|
"learning_rate": 0.00030066329258799187, |
|
"loss": 1.3068, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 0.7732067510548524, |
|
"grad_norm": 0.5128898620605469, |
|
"learning_rate": 0.0002965191056846266, |
|
"loss": 1.3107, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 0.7742616033755274, |
|
"grad_norm": 0.5712609887123108, |
|
"learning_rate": 0.000292432040104394, |
|
"loss": 1.3076, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 0.7753164556962026, |
|
"grad_norm": 0.4749037027359009, |
|
"learning_rate": 0.00028840130851659853, |
|
"loss": 1.3045, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.7763713080168776, |
|
"grad_norm": 0.48418837785720825, |
|
"learning_rate": 0.0002844261344427028, |
|
"loss": 1.3137, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 0.7774261603375527, |
|
"grad_norm": 0.47260743379592896, |
|
"learning_rate": 0.0002805057521067471, |
|
"loss": 1.3122, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 0.7784810126582279, |
|
"grad_norm": 0.47214198112487793, |
|
"learning_rate": 0.00027663940628783017, |
|
"loss": 1.3006, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 0.7795358649789029, |
|
"grad_norm": 0.5273362398147583, |
|
"learning_rate": 0.00027282635217462393, |
|
"loss": 1.3119, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 0.7805907172995781, |
|
"grad_norm": 0.5290525555610657, |
|
"learning_rate": 0.0002690658552218937, |
|
"loss": 1.3165, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.7816455696202531, |
|
"grad_norm": 0.48313039541244507, |
|
"learning_rate": 0.00026535719100899516, |
|
"loss": 1.2975, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 0.7827004219409283, |
|
"grad_norm": 0.49278801679611206, |
|
"learning_rate": 0.00026169964510032245, |
|
"loss": 1.3032, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 0.7837552742616034, |
|
"grad_norm": 0.4592038691043854, |
|
"learning_rate": 0.00025809251290767984, |
|
"loss": 1.2909, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 0.7848101265822784, |
|
"grad_norm": 0.47983959317207336, |
|
"learning_rate": 0.00025453509955454957, |
|
"loss": 1.2932, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.7858649789029536, |
|
"grad_norm": 0.5009822845458984, |
|
"learning_rate": 0.00025102671974223175, |
|
"loss": 1.2955, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.7869198312236287, |
|
"grad_norm": 0.47341182827949524, |
|
"learning_rate": 0.00024756669761782815, |
|
"loss": 1.3073, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 0.7879746835443038, |
|
"grad_norm": 0.4908011555671692, |
|
"learning_rate": 0.0002441543666440464, |
|
"loss": 1.2989, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 0.7890295358649789, |
|
"grad_norm": 0.5158747434616089, |
|
"learning_rate": 0.00024078906947079878, |
|
"loss": 1.3039, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 0.790084388185654, |
|
"grad_norm": 0.469517320394516, |
|
"learning_rate": 0.00023747015780857005, |
|
"loss": 1.3101, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 0.7911392405063291, |
|
"grad_norm": 0.47917693853378296, |
|
"learning_rate": 0.00023419699230353144, |
|
"loss": 1.3061, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.7921940928270043, |
|
"grad_norm": 0.4800731837749481, |
|
"learning_rate": 0.00023096894241437586, |
|
"loss": 1.312, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 0.7932489451476793, |
|
"grad_norm": 0.48870769143104553, |
|
"learning_rate": 0.00022778538629085056, |
|
"loss": 1.2957, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 0.7943037974683544, |
|
"grad_norm": 0.48116040229797363, |
|
"learning_rate": 0.00022464571065396427, |
|
"loss": 1.2967, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 0.7953586497890295, |
|
"grad_norm": 0.5820254683494568, |
|
"learning_rate": 0.00022154931067784521, |
|
"loss": 1.2962, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 0.7964135021097046, |
|
"grad_norm": 0.4812745451927185, |
|
"learning_rate": 0.00021849558987322782, |
|
"loss": 1.2924, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.7974683544303798, |
|
"grad_norm": 0.5120149254798889, |
|
"learning_rate": 0.0002154839599725452, |
|
"loss": 1.2939, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 0.7985232067510548, |
|
"grad_norm": 0.4970279633998871, |
|
"learning_rate": 0.00021251384081660544, |
|
"loss": 1.307, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 0.79957805907173, |
|
"grad_norm": 0.4871574938297272, |
|
"learning_rate": 0.0002095846602428303, |
|
"loss": 1.3018, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 0.8006329113924051, |
|
"grad_norm": 0.4676864445209503, |
|
"learning_rate": 0.00020669585397503358, |
|
"loss": 1.293, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 0.8016877637130801, |
|
"grad_norm": 0.4470655620098114, |
|
"learning_rate": 0.0002038468655147195, |
|
"loss": 1.2965, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.8027426160337553, |
|
"grad_norm": 0.5174440741539001, |
|
"learning_rate": 0.00020103714603387894, |
|
"loss": 1.3112, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 0.8037974683544303, |
|
"grad_norm": 0.4520186483860016, |
|
"learning_rate": 0.00019826615426926338, |
|
"loss": 1.2835, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 0.8048523206751055, |
|
"grad_norm": 0.45952776074409485, |
|
"learning_rate": 0.00019553335641811625, |
|
"loss": 1.3055, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 0.8059071729957806, |
|
"grad_norm": 0.5378235578536987, |
|
"learning_rate": 0.0001928382260353415, |
|
"loss": 1.2962, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 0.8069620253164557, |
|
"grad_norm": 0.4733293056488037, |
|
"learning_rate": 0.00019018024393208902, |
|
"loss": 1.3121, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.8080168776371308, |
|
"grad_norm": 0.4595848321914673, |
|
"learning_rate": 0.00018755889807573872, |
|
"loss": 1.2913, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 0.8090717299578059, |
|
"grad_norm": 0.4874734580516815, |
|
"learning_rate": 0.00018497368349126262, |
|
"loss": 1.2967, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 0.810126582278481, |
|
"grad_norm": 0.5490835309028625, |
|
"learning_rate": 0.00018242410216394648, |
|
"loss": 1.3108, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.8111814345991561, |
|
"grad_norm": 0.44863075017929077, |
|
"learning_rate": 0.0001799096629434529, |
|
"loss": 1.2844, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 0.8122362869198312, |
|
"grad_norm": 0.4595239758491516, |
|
"learning_rate": 0.00017742988144920578, |
|
"loss": 1.2936, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.8132911392405063, |
|
"grad_norm": 0.6216907501220703, |
|
"learning_rate": 0.00017498427997707976, |
|
"loss": 1.2918, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 0.8143459915611815, |
|
"grad_norm": 0.4747745394706726, |
|
"learning_rate": 0.00017257238740737548, |
|
"loss": 1.2967, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 0.8154008438818565, |
|
"grad_norm": 0.49981772899627686, |
|
"learning_rate": 0.00017019373911406307, |
|
"loss": 1.3045, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 0.8164556962025317, |
|
"grad_norm": 0.46517640352249146, |
|
"learning_rate": 0.000167847876875277, |
|
"loss": 1.2998, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 0.8175105485232067, |
|
"grad_norm": 0.4871491491794586, |
|
"learning_rate": 0.00016553434878504428, |
|
"loss": 1.2859, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.8185654008438819, |
|
"grad_norm": 0.5478642582893372, |
|
"learning_rate": 0.00016325270916622947, |
|
"loss": 1.2875, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 0.819620253164557, |
|
"grad_norm": 0.4661363959312439, |
|
"learning_rate": 0.00016100251848467966, |
|
"loss": 1.2952, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 0.820675105485232, |
|
"grad_norm": 0.5517392158508301, |
|
"learning_rate": 0.0001587833432645528, |
|
"loss": 1.2882, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 0.8217299578059072, |
|
"grad_norm": 0.5190399289131165, |
|
"learning_rate": 0.00015659475600481292, |
|
"loss": 1.3051, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 0.8227848101265823, |
|
"grad_norm": 0.46546727418899536, |
|
"learning_rate": 0.00015443633509687688, |
|
"loss": 1.2928, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.8238396624472574, |
|
"grad_norm": 0.5459626913070679, |
|
"learning_rate": 0.00015230766474339536, |
|
"loss": 1.293, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 0.8248945147679325, |
|
"grad_norm": 0.4505077302455902, |
|
"learning_rate": 0.00015020833487815416, |
|
"loss": 1.2994, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 0.8259493670886076, |
|
"grad_norm": 0.47471731901168823, |
|
"learning_rate": 0.0001481379410870792, |
|
"loss": 1.2903, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 0.8270042194092827, |
|
"grad_norm": 0.4919489324092865, |
|
"learning_rate": 0.00014609608453033013, |
|
"loss": 1.2758, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 0.8280590717299579, |
|
"grad_norm": 0.48681142926216125, |
|
"learning_rate": 0.00014408237186546807, |
|
"loss": 1.2849, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.8291139240506329, |
|
"grad_norm": 0.4562098979949951, |
|
"learning_rate": 0.00014209641517168273, |
|
"loss": 1.2734, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 0.830168776371308, |
|
"grad_norm": 0.5935469269752502, |
|
"learning_rate": 0.00014013783187506265, |
|
"loss": 1.2873, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 0.8312236286919831, |
|
"grad_norm": 0.47083306312561035, |
|
"learning_rate": 0.00013820624467489697, |
|
"loss": 1.3074, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 0.8322784810126582, |
|
"grad_norm": 0.4802079498767853, |
|
"learning_rate": 0.00013630128147099213, |
|
"loss": 1.3035, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 0.4673043489456177, |
|
"learning_rate": 0.00013442257529199068, |
|
"loss": 1.2809, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.8343881856540084, |
|
"grad_norm": 0.521275520324707, |
|
"learning_rate": 0.00013256976422467803, |
|
"loss": 1.2926, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 0.8354430379746836, |
|
"grad_norm": 0.5621090531349182, |
|
"learning_rate": 0.00013074249134426366, |
|
"loss": 1.2899, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 0.8364978902953587, |
|
"grad_norm": 0.46371445059776306, |
|
"learning_rate": 0.0001289404046456233, |
|
"loss": 1.3009, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 0.8375527426160337, |
|
"grad_norm": 0.4880622625350952, |
|
"learning_rate": 0.0001271631569754887, |
|
"loss": 1.291, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 0.8386075949367089, |
|
"grad_norm": 0.49742773175239563, |
|
"learning_rate": 0.0001254104059655723, |
|
"loss": 1.2959, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.8396624472573839, |
|
"grad_norm": 0.45721614360809326, |
|
"learning_rate": 0.00012368181396661337, |
|
"loss": 1.2823, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 0.8407172995780591, |
|
"grad_norm": 0.46568551659584045, |
|
"learning_rate": 0.00012197704798333364, |
|
"loss": 1.2831, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 0.8417721518987342, |
|
"grad_norm": 0.47091469168663025, |
|
"learning_rate": 0.00012029577961028894, |
|
"loss": 1.282, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 0.8428270042194093, |
|
"grad_norm": 0.5390154719352722, |
|
"learning_rate": 0.00011863768496860542, |
|
"loss": 1.2927, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 0.8438818565400844, |
|
"grad_norm": 0.577823281288147, |
|
"learning_rate": 0.00011700244464358777, |
|
"loss": 1.2881, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.8449367088607594, |
|
"grad_norm": 0.47146788239479065, |
|
"learning_rate": 0.00011538974362318715, |
|
"loss": 1.2935, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 0.8459915611814346, |
|
"grad_norm": 0.49528032541275024, |
|
"learning_rate": 0.00011379927123731737, |
|
"loss": 1.2876, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 0.8470464135021097, |
|
"grad_norm": 0.48834657669067383, |
|
"learning_rate": 0.0001122307210980077, |
|
"loss": 1.2853, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 0.8481012658227848, |
|
"grad_norm": 0.45491474866867065, |
|
"learning_rate": 0.00011068379104038026, |
|
"loss": 1.2962, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 0.8491561181434599, |
|
"grad_norm": 0.45627567172050476, |
|
"learning_rate": 0.00010915818306444116, |
|
"loss": 1.2778, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.8502109704641351, |
|
"grad_norm": 0.4637642800807953, |
|
"learning_rate": 0.00010765360327767384, |
|
"loss": 1.2853, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 0.8512658227848101, |
|
"grad_norm": 0.4641653299331665, |
|
"learning_rate": 0.00010616976183842376, |
|
"loss": 1.291, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 0.8523206751054853, |
|
"grad_norm": 0.4908168911933899, |
|
"learning_rate": 0.00010470637290006365, |
|
"loss": 1.2926, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 0.8533755274261603, |
|
"grad_norm": 0.4617968797683716, |
|
"learning_rate": 0.00010326315455592764, |
|
"loss": 1.28, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 0.8544303797468354, |
|
"grad_norm": 0.47194594144821167, |
|
"learning_rate": 0.0001018398287850053, |
|
"loss": 1.2752, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.8554852320675106, |
|
"grad_norm": 0.5317877531051636, |
|
"learning_rate": 0.00010043612139838357, |
|
"loss": 1.2938, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 0.8565400843881856, |
|
"grad_norm": 0.46418246626853943, |
|
"learning_rate": 9.905176198642719e-05, |
|
"loss": 1.2834, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 0.8575949367088608, |
|
"grad_norm": 0.49755966663360596, |
|
"learning_rate": 9.76864838666871e-05, |
|
"loss": 1.2832, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 0.8586497890295358, |
|
"grad_norm": 0.46786341071128845, |
|
"learning_rate": 9.634002403252676e-05, |
|
"loss": 1.2836, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 0.859704641350211, |
|
"grad_norm": 0.4702818989753723, |
|
"learning_rate": 9.501212310245681e-05, |
|
"loss": 1.2783, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.8607594936708861, |
|
"grad_norm": 0.44937238097190857, |
|
"learning_rate": 9.370252527016777e-05, |
|
"loss": 1.2899, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 0.8618143459915611, |
|
"grad_norm": 0.49406906962394714, |
|
"learning_rate": 9.241097825525163e-05, |
|
"loss": 1.2807, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 0.8628691983122363, |
|
"grad_norm": 0.5151875019073486, |
|
"learning_rate": 9.113723325460276e-05, |
|
"loss": 1.288, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 0.8639240506329114, |
|
"grad_norm": 0.5420994162559509, |
|
"learning_rate": 8.988104489448849e-05, |
|
"loss": 1.2815, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 0.8649789029535865, |
|
"grad_norm": 0.4622182250022888, |
|
"learning_rate": 8.864217118328042e-05, |
|
"loss": 1.2922, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.8660337552742616, |
|
"grad_norm": 0.48794785141944885, |
|
"learning_rate": 8.742037346483729e-05, |
|
"loss": 1.2854, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 0.8670886075949367, |
|
"grad_norm": 0.4837190508842468, |
|
"learning_rate": 8.62154163725303e-05, |
|
"loss": 1.2934, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 0.8681434599156118, |
|
"grad_norm": 0.47257596254348755, |
|
"learning_rate": 8.502706778390219e-05, |
|
"loss": 1.29, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 0.869198312236287, |
|
"grad_norm": 0.46022605895996094, |
|
"learning_rate": 8.38550987759513e-05, |
|
"loss": 1.2858, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 0.870253164556962, |
|
"grad_norm": 0.4648395776748657, |
|
"learning_rate": 8.269928358103191e-05, |
|
"loss": 1.3001, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.8713080168776371, |
|
"grad_norm": 0.4802897870540619, |
|
"learning_rate": 8.155939954336243e-05, |
|
"loss": 1.2892, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 0.8723628691983122, |
|
"grad_norm": 0.49936237931251526, |
|
"learning_rate": 8.043522707613312e-05, |
|
"loss": 1.2858, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 0.8734177215189873, |
|
"grad_norm": 0.4365840256214142, |
|
"learning_rate": 7.932654961920486e-05, |
|
"loss": 1.2715, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 0.8744725738396625, |
|
"grad_norm": 0.4833032488822937, |
|
"learning_rate": 7.823315359739135e-05, |
|
"loss": 1.272, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 0.8755274261603375, |
|
"grad_norm": 0.48729777336120605, |
|
"learning_rate": 7.715482837931577e-05, |
|
"loss": 1.2958, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.8765822784810127, |
|
"grad_norm": 0.4626319110393524, |
|
"learning_rate": 7.6091366236835e-05, |
|
"loss": 1.2749, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 0.8776371308016878, |
|
"grad_norm": 0.46448102593421936, |
|
"learning_rate": 7.504256230502289e-05, |
|
"loss": 1.2919, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 0.8786919831223629, |
|
"grad_norm": 0.4604092538356781, |
|
"learning_rate": 7.400821454270524e-05, |
|
"loss": 1.2859, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 0.879746835443038, |
|
"grad_norm": 0.45476552844047546, |
|
"learning_rate": 7.29881236935386e-05, |
|
"loss": 1.274, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 0.880801687763713, |
|
"grad_norm": 0.46123555302619934, |
|
"learning_rate": 7.198209324762562e-05, |
|
"loss": 1.2777, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.8818565400843882, |
|
"grad_norm": 0.4466295838356018, |
|
"learning_rate": 7.098992940365946e-05, |
|
"loss": 1.2739, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 0.8829113924050633, |
|
"grad_norm": 0.4567776024341583, |
|
"learning_rate": 7.001144103159e-05, |
|
"loss": 1.2824, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 0.8839662447257384, |
|
"grad_norm": 0.47256141901016235, |
|
"learning_rate": 6.904643963580461e-05, |
|
"loss": 1.2882, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 0.8850210970464135, |
|
"grad_norm": 0.5056165456771851, |
|
"learning_rate": 6.809473931881644e-05, |
|
"loss": 1.2796, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 0.8860759493670886, |
|
"grad_norm": 0.4639674127101898, |
|
"learning_rate": 6.71561567454532e-05, |
|
"loss": 1.2782, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.8871308016877637, |
|
"grad_norm": 0.47839340567588806, |
|
"learning_rate": 6.623051110753948e-05, |
|
"loss": 1.2982, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 0.8881856540084389, |
|
"grad_norm": 0.46292224526405334, |
|
"learning_rate": 6.531762408906607e-05, |
|
"loss": 1.2858, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 0.8892405063291139, |
|
"grad_norm": 0.4787479639053345, |
|
"learning_rate": 6.441731983183912e-05, |
|
"loss": 1.2826, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 0.890295358649789, |
|
"grad_norm": 0.46926361322402954, |
|
"learning_rate": 6.352942490160292e-05, |
|
"loss": 1.2751, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 0.8913502109704642, |
|
"grad_norm": 0.5352609157562256, |
|
"learning_rate": 6.265376825462966e-05, |
|
"loss": 1.2856, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.8924050632911392, |
|
"grad_norm": 0.536734402179718, |
|
"learning_rate": 6.179018120476945e-05, |
|
"loss": 1.2794, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 0.8934599156118144, |
|
"grad_norm": 0.44269171357154846, |
|
"learning_rate": 6.0938497390954946e-05, |
|
"loss": 1.2805, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 0.8945147679324894, |
|
"grad_norm": 0.4609244763851166, |
|
"learning_rate": 6.009855274515339e-05, |
|
"loss": 1.2765, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 0.8955696202531646, |
|
"grad_norm": 0.48474302887916565, |
|
"learning_rate": 5.9270185460760735e-05, |
|
"loss": 1.281, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 0.8966244725738397, |
|
"grad_norm": 0.45089036226272583, |
|
"learning_rate": 5.8453235961431225e-05, |
|
"loss": 1.2712, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.8976793248945147, |
|
"grad_norm": 0.4711897671222687, |
|
"learning_rate": 5.764754687033678e-05, |
|
"loss": 1.2824, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 0.8987341772151899, |
|
"grad_norm": 0.47829535603523254, |
|
"learning_rate": 5.6852962979849836e-05, |
|
"loss": 1.2769, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 0.8997890295358649, |
|
"grad_norm": 0.4775094985961914, |
|
"learning_rate": 5.6069331221644284e-05, |
|
"loss": 1.2932, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 0.9008438818565401, |
|
"grad_norm": 0.4563244581222534, |
|
"learning_rate": 5.529650063720842e-05, |
|
"loss": 1.2872, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 0.9018987341772152, |
|
"grad_norm": 0.4800124764442444, |
|
"learning_rate": 5.453432234876445e-05, |
|
"loss": 1.2797, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.9029535864978903, |
|
"grad_norm": 0.46603870391845703, |
|
"learning_rate": 5.37826495305886e-05, |
|
"loss": 1.272, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 0.9040084388185654, |
|
"grad_norm": 0.5261629223823547, |
|
"learning_rate": 5.304133738072674e-05, |
|
"loss": 1.2907, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 0.9050632911392406, |
|
"grad_norm": 0.47402966022491455, |
|
"learning_rate": 5.2310243093099814e-05, |
|
"loss": 1.2805, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 0.9061181434599156, |
|
"grad_norm": 0.5603070259094238, |
|
"learning_rate": 5.158922582999368e-05, |
|
"loss": 1.2857, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 0.9071729957805907, |
|
"grad_norm": 0.4486725628376007, |
|
"learning_rate": 5.087814669492819e-05, |
|
"loss": 1.2796, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.9082278481012658, |
|
"grad_norm": 0.4944787919521332, |
|
"learning_rate": 5.017686870590028e-05, |
|
"loss": 1.2788, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 0.9092827004219409, |
|
"grad_norm": 0.4802660048007965, |
|
"learning_rate": 4.948525676899577e-05, |
|
"loss": 1.2749, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 0.9103375527426161, |
|
"grad_norm": 0.48038867115974426, |
|
"learning_rate": 4.880317765236493e-05, |
|
"loss": 1.2865, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 0.9113924050632911, |
|
"grad_norm": 0.5557894706726074, |
|
"learning_rate": 4.8130499960556755e-05, |
|
"loss": 1.2679, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 0.9124472573839663, |
|
"grad_norm": 0.4686918258666992, |
|
"learning_rate": 4.746709410920699e-05, |
|
"loss": 1.2718, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.9135021097046413, |
|
"grad_norm": 0.5099261999130249, |
|
"learning_rate": 4.681283230007507e-05, |
|
"loss": 1.2728, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 0.9145569620253164, |
|
"grad_norm": 0.508106529712677, |
|
"learning_rate": 4.616758849642509e-05, |
|
"loss": 1.2793, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 0.9156118143459916, |
|
"grad_norm": 0.4402479827404022, |
|
"learning_rate": 4.553123839874615e-05, |
|
"loss": 1.2907, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 0.9166666666666666, |
|
"grad_norm": 0.48903051018714905, |
|
"learning_rate": 4.490365942080736e-05, |
|
"loss": 1.2817, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 0.9177215189873418, |
|
"grad_norm": 0.45789381861686707, |
|
"learning_rate": 4.428473066604285e-05, |
|
"loss": 1.2809, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.9187763713080169, |
|
"grad_norm": 0.4754452407360077, |
|
"learning_rate": 4.367433290426233e-05, |
|
"loss": 1.2802, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 0.919831223628692, |
|
"grad_norm": 0.4568568468093872, |
|
"learning_rate": 4.3072348548682595e-05, |
|
"loss": 1.2731, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 0.9208860759493671, |
|
"grad_norm": 0.4637247622013092, |
|
"learning_rate": 4.247866163327575e-05, |
|
"loss": 1.2856, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 0.9219409282700421, |
|
"grad_norm": 0.4735919237136841, |
|
"learning_rate": 4.1893157790429404e-05, |
|
"loss": 1.273, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 0.9229957805907173, |
|
"grad_norm": 0.4588661789894104, |
|
"learning_rate": 4.1315724228915066e-05, |
|
"loss": 1.2726, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.9240506329113924, |
|
"grad_norm": 0.46978574991226196, |
|
"learning_rate": 4.074624971216005e-05, |
|
"loss": 1.2616, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 0.9251054852320675, |
|
"grad_norm": 0.4512912333011627, |
|
"learning_rate": 4.018462453681889e-05, |
|
"loss": 1.2716, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 0.9261603375527426, |
|
"grad_norm": 0.45371580123901367, |
|
"learning_rate": 3.963074051164014e-05, |
|
"loss": 1.2799, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 0.9272151898734177, |
|
"grad_norm": 0.4601062834262848, |
|
"learning_rate": 3.908449093662446e-05, |
|
"loss": 1.2732, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 0.9282700421940928, |
|
"grad_norm": 0.461967408657074, |
|
"learning_rate": 3.854577058246998e-05, |
|
"loss": 1.2708, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.929324894514768, |
|
"grad_norm": 0.45294222235679626, |
|
"learning_rate": 3.801447567030094e-05, |
|
"loss": 1.2897, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 0.930379746835443, |
|
"grad_norm": 0.4439496397972107, |
|
"learning_rate": 3.7490503851675777e-05, |
|
"loss": 1.2757, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 0.9314345991561181, |
|
"grad_norm": 0.4653451442718506, |
|
"learning_rate": 3.6973754188870806e-05, |
|
"loss": 1.2939, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 0.9324894514767933, |
|
"grad_norm": 0.4448198676109314, |
|
"learning_rate": 3.6464127135435536e-05, |
|
"loss": 1.2775, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 0.9335443037974683, |
|
"grad_norm": 0.46878382563591003, |
|
"learning_rate": 3.596152451701616e-05, |
|
"loss": 1.2777, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.9345991561181435, |
|
"grad_norm": 0.5037729144096375, |
|
"learning_rate": 3.5465849512443226e-05, |
|
"loss": 1.2766, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 0.9356540084388185, |
|
"grad_norm": 0.46913567185401917, |
|
"learning_rate": 3.4977006635080086e-05, |
|
"loss": 1.2801, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 0.9367088607594937, |
|
"grad_norm": 0.5404587388038635, |
|
"learning_rate": 3.449490171442838e-05, |
|
"loss": 1.282, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 0.9377637130801688, |
|
"grad_norm": 0.4438877999782562, |
|
"learning_rate": 3.401944187798702e-05, |
|
"loss": 1.283, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 0.9388185654008439, |
|
"grad_norm": 0.45989537239074707, |
|
"learning_rate": 3.355053553336137e-05, |
|
"loss": 1.2742, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.939873417721519, |
|
"grad_norm": 0.4791684150695801, |
|
"learning_rate": 3.308809235061882e-05, |
|
"loss": 1.272, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 0.9409282700421941, |
|
"grad_norm": 0.4399260878562927, |
|
"learning_rate": 3.263202324488772e-05, |
|
"loss": 1.2796, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 0.9419831223628692, |
|
"grad_norm": 0.4575617015361786, |
|
"learning_rate": 3.218224035919609e-05, |
|
"loss": 1.2766, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 0.9430379746835443, |
|
"grad_norm": 0.4506317377090454, |
|
"learning_rate": 3.173865704754688e-05, |
|
"loss": 1.2765, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 0.9440928270042194, |
|
"grad_norm": 0.45531153678894043, |
|
"learning_rate": 3.130118785822657e-05, |
|
"loss": 1.2748, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.9451476793248945, |
|
"grad_norm": 0.4633644223213196, |
|
"learning_rate": 3.08697485173437e-05, |
|
"loss": 1.2816, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 0.9462025316455697, |
|
"grad_norm": 0.5337445735931396, |
|
"learning_rate": 3.0444255912594442e-05, |
|
"loss": 1.2772, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 0.9472573839662447, |
|
"grad_norm": 0.5188563466072083, |
|
"learning_rate": 3.002462807725185e-05, |
|
"loss": 1.275, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 0.9483122362869199, |
|
"grad_norm": 0.48296037316322327, |
|
"learning_rate": 2.9610784174375868e-05, |
|
"loss": 1.2797, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 0.9493670886075949, |
|
"grad_norm": 0.46105465292930603, |
|
"learning_rate": 2.920264448124087e-05, |
|
"loss": 1.2758, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.95042194092827, |
|
"grad_norm": 0.45554500818252563, |
|
"learning_rate": 2.8800130373977936e-05, |
|
"loss": 1.2726, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 0.9514767932489452, |
|
"grad_norm": 0.4668406844139099, |
|
"learning_rate": 2.84031643124288e-05, |
|
"loss": 1.2734, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 0.9525316455696202, |
|
"grad_norm": 0.4780922532081604, |
|
"learning_rate": 2.8011669825208517e-05, |
|
"loss": 1.2923, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 0.9535864978902954, |
|
"grad_norm": 0.4693585932254791, |
|
"learning_rate": 2.762557149497405e-05, |
|
"loss": 1.2676, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 0.9546413502109705, |
|
"grad_norm": 0.4524311423301697, |
|
"learning_rate": 2.724479494389592e-05, |
|
"loss": 1.2813, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.9556962025316456, |
|
"grad_norm": 0.44265279173851013, |
|
"learning_rate": 2.6869266819330058e-05, |
|
"loss": 1.2797, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 0.9567510548523207, |
|
"grad_norm": 0.4572458863258362, |
|
"learning_rate": 2.6498914779687228e-05, |
|
"loss": 1.2814, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 0.9578059071729957, |
|
"grad_norm": 0.4525168836116791, |
|
"learning_rate": 2.6133667480497115e-05, |
|
"loss": 1.2802, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 0.9588607594936709, |
|
"grad_norm": 0.4815879464149475, |
|
"learning_rate": 2.5773454560664597e-05, |
|
"loss": 1.2822, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 0.959915611814346, |
|
"grad_norm": 0.44361522793769836, |
|
"learning_rate": 2.541820662891541e-05, |
|
"loss": 1.2632, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.9609704641350211, |
|
"grad_norm": 0.4554971754550934, |
|
"learning_rate": 2.5067855250428616e-05, |
|
"loss": 1.2706, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 0.9620253164556962, |
|
"grad_norm": 0.520605742931366, |
|
"learning_rate": 2.472233293365335e-05, |
|
"loss": 1.2838, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 0.9630801687763713, |
|
"grad_norm": 0.4688946604728699, |
|
"learning_rate": 2.4381573117307307e-05, |
|
"loss": 1.268, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 0.9641350210970464, |
|
"grad_norm": 0.5332078337669373, |
|
"learning_rate": 2.4045510157554362e-05, |
|
"loss": 1.2868, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 0.9651898734177216, |
|
"grad_norm": 0.47867295145988464, |
|
"learning_rate": 2.3714079315358985e-05, |
|
"loss": 1.2729, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.9662447257383966, |
|
"grad_norm": 0.46423396468162537, |
|
"learning_rate": 2.338721674401494e-05, |
|
"loss": 1.2813, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 0.9672995780590717, |
|
"grad_norm": 0.46052441000938416, |
|
"learning_rate": 2.30648594768459e-05, |
|
"loss": 1.2796, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 0.9683544303797469, |
|
"grad_norm": 0.4625004529953003, |
|
"learning_rate": 2.2746945415075523e-05, |
|
"loss": 1.276, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 0.9694092827004219, |
|
"grad_norm": 0.46688348054885864, |
|
"learning_rate": 2.2433413315864803e-05, |
|
"loss": 1.2731, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 0.9704641350210971, |
|
"grad_norm": 0.4896974563598633, |
|
"learning_rate": 2.2124202780514277e-05, |
|
"loss": 1.2757, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.9715189873417721, |
|
"grad_norm": 0.4933200180530548, |
|
"learning_rate": 2.1819254242828815e-05, |
|
"loss": 1.2764, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 0.9725738396624473, |
|
"grad_norm": 0.4900830388069153, |
|
"learning_rate": 2.151850895764285e-05, |
|
"loss": 1.2717, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 0.9736286919831224, |
|
"grad_norm": 0.45735400915145874, |
|
"learning_rate": 2.12219089895037e-05, |
|
"loss": 1.2646, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 0.9746835443037974, |
|
"grad_norm": 0.48359930515289307, |
|
"learning_rate": 2.092939720151092e-05, |
|
"loss": 1.2612, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 0.9757383966244726, |
|
"grad_norm": 0.4569590985774994, |
|
"learning_rate": 2.064091724430947e-05, |
|
"loss": 1.2719, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.9767932489451476, |
|
"grad_norm": 0.47614622116088867, |
|
"learning_rate": 2.0356413545234603e-05, |
|
"loss": 1.2671, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 0.9778481012658228, |
|
"grad_norm": 0.48315489292144775, |
|
"learning_rate": 2.0075831297606357e-05, |
|
"loss": 1.2791, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 0.9789029535864979, |
|
"grad_norm": 0.4540780484676361, |
|
"learning_rate": 1.9799116450171627e-05, |
|
"loss": 1.2692, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 0.979957805907173, |
|
"grad_norm": 0.465520977973938, |
|
"learning_rate": 1.952621569669175e-05, |
|
"loss": 1.2733, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 0.9810126582278481, |
|
"grad_norm": 0.4556669294834137, |
|
"learning_rate": 1.9257076465673605e-05, |
|
"loss": 1.2719, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.9820675105485233, |
|
"grad_norm": 0.46988987922668457, |
|
"learning_rate": 1.899164691024229e-05, |
|
"loss": 1.2741, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 0.9831223628691983, |
|
"grad_norm": 0.48423823714256287, |
|
"learning_rate": 1.872987589815331e-05, |
|
"loss": 1.2691, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 0.9841772151898734, |
|
"grad_norm": 0.4487670063972473, |
|
"learning_rate": 1.8471713001942538e-05, |
|
"loss": 1.2821, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 0.9852320675105485, |
|
"grad_norm": 0.4768301546573639, |
|
"learning_rate": 1.8217108489211845e-05, |
|
"loss": 1.2784, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 0.9862869198312236, |
|
"grad_norm": 0.462040513753891, |
|
"learning_rate": 1.7966013313048696e-05, |
|
"loss": 1.2883, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.9873417721518988, |
|
"grad_norm": 0.4433370530605316, |
|
"learning_rate": 1.7718379102577752e-05, |
|
"loss": 1.2808, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 0.9883966244725738, |
|
"grad_norm": 0.4689382314682007, |
|
"learning_rate": 1.7474158153642745e-05, |
|
"loss": 1.2792, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 0.989451476793249, |
|
"grad_norm": 0.4566853940486908, |
|
"learning_rate": 1.7233303419616745e-05, |
|
"loss": 1.2648, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 0.990506329113924, |
|
"grad_norm": 0.48965659737586975, |
|
"learning_rate": 1.699576850233916e-05, |
|
"loss": 1.27, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 0.9915611814345991, |
|
"grad_norm": 0.45181959867477417, |
|
"learning_rate": 1.6761507643177553e-05, |
|
"loss": 1.2827, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.9926160337552743, |
|
"grad_norm": 0.5077662467956543, |
|
"learning_rate": 1.6530475714212752e-05, |
|
"loss": 1.2588, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 0.9936708860759493, |
|
"grad_norm": 0.4643486440181732, |
|
"learning_rate": 1.6302628209545423e-05, |
|
"loss": 1.2787, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 0.9947257383966245, |
|
"grad_norm": 0.4604697525501251, |
|
"learning_rate": 1.6077921236722464e-05, |
|
"loss": 1.2783, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 0.9957805907172996, |
|
"grad_norm": 0.44910043478012085, |
|
"learning_rate": 1.5856311508281594e-05, |
|
"loss": 1.2722, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 0.9968354430379747, |
|
"grad_norm": 0.45268765091896057, |
|
"learning_rate": 1.5637756333412454e-05, |
|
"loss": 1.2814, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.9978902953586498, |
|
"grad_norm": 0.4484785199165344, |
|
"learning_rate": 1.542221360973268e-05, |
|
"loss": 1.2664, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 0.9989451476793249, |
|
"grad_norm": 0.4483862519264221, |
|
"learning_rate": 1.5209641815177312e-05, |
|
"loss": 1.2798, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.4878731966018677, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.2718, |
|
"step": 9480 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 9480, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.832308198648013e+16, |
|
"train_batch_size": 1024, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|