|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.7250168395527414, |
|
"eval_steps": 500, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008621851003637344, |
|
"grad_norm": 1.4002951383590698, |
|
"learning_rate": 1.4367816091954023e-07, |
|
"loss": 1.6797, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.017243702007274687, |
|
"grad_norm": 1.374281644821167, |
|
"learning_rate": 2.8735632183908047e-07, |
|
"loss": 1.6722, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02586555301091203, |
|
"grad_norm": 1.2334840297698975, |
|
"learning_rate": 4.3103448275862073e-07, |
|
"loss": 1.6699, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.034487404014549375, |
|
"grad_norm": 1.3353266716003418, |
|
"learning_rate": 5.747126436781609e-07, |
|
"loss": 1.6697, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.043109255018186715, |
|
"grad_norm": 1.3006664514541626, |
|
"learning_rate": 7.183908045977011e-07, |
|
"loss": 1.6547, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05173110602182406, |
|
"grad_norm": 1.2290219068527222, |
|
"learning_rate": 8.620689655172415e-07, |
|
"loss": 1.6121, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0603529570254614, |
|
"grad_norm": 0.8605530261993408, |
|
"learning_rate": 1.0057471264367817e-06, |
|
"loss": 1.5626, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06897480802909875, |
|
"grad_norm": 0.5922505259513855, |
|
"learning_rate": 1.1494252873563219e-06, |
|
"loss": 1.5188, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07759665903273609, |
|
"grad_norm": 0.5861937999725342, |
|
"learning_rate": 1.2931034482758623e-06, |
|
"loss": 1.5159, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08621851003637343, |
|
"grad_norm": 0.5518978238105774, |
|
"learning_rate": 1.4367816091954023e-06, |
|
"loss": 1.4833, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09484036104001077, |
|
"grad_norm": 0.5048322677612305, |
|
"learning_rate": 1.5804597701149427e-06, |
|
"loss": 1.4932, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.10346221204364812, |
|
"grad_norm": 0.6119816899299622, |
|
"learning_rate": 1.724137931034483e-06, |
|
"loss": 1.4988, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.11208406304728546, |
|
"grad_norm": 0.5448249578475952, |
|
"learning_rate": 1.8678160919540231e-06, |
|
"loss": 1.4513, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1207059140509228, |
|
"grad_norm": 0.5504677295684814, |
|
"learning_rate": 2.0114942528735633e-06, |
|
"loss": 1.4618, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.12932776505456015, |
|
"grad_norm": 0.5333659052848816, |
|
"learning_rate": 2.1551724137931035e-06, |
|
"loss": 1.4717, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1379496160581975, |
|
"grad_norm": 0.7873896360397339, |
|
"learning_rate": 2.2988505747126437e-06, |
|
"loss": 1.4464, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.14657146706183483, |
|
"grad_norm": 0.9079521298408508, |
|
"learning_rate": 2.4425287356321844e-06, |
|
"loss": 1.4313, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.15519331806547218, |
|
"grad_norm": 0.7075573801994324, |
|
"learning_rate": 2.5862068965517246e-06, |
|
"loss": 1.4527, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.16381516906910953, |
|
"grad_norm": 0.5483418107032776, |
|
"learning_rate": 2.729885057471265e-06, |
|
"loss": 1.4109, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.17243702007274686, |
|
"grad_norm": 0.5097762942314148, |
|
"learning_rate": 2.8735632183908046e-06, |
|
"loss": 1.3786, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.18105887107638421, |
|
"grad_norm": 0.6269800066947937, |
|
"learning_rate": 3.017241379310345e-06, |
|
"loss": 1.4585, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.18968072208002154, |
|
"grad_norm": 0.5950655937194824, |
|
"learning_rate": 3.1609195402298854e-06, |
|
"loss": 1.4093, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.1983025730836589, |
|
"grad_norm": 0.597637414932251, |
|
"learning_rate": 3.3045977011494256e-06, |
|
"loss": 1.407, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.20692442408729625, |
|
"grad_norm": 0.5727440118789673, |
|
"learning_rate": 3.448275862068966e-06, |
|
"loss": 1.3743, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.21554627509093358, |
|
"grad_norm": 0.5026169419288635, |
|
"learning_rate": 3.5919540229885056e-06, |
|
"loss": 1.3868, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.22416812609457093, |
|
"grad_norm": 0.5612446069717407, |
|
"learning_rate": 3.7356321839080462e-06, |
|
"loss": 1.3997, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.23278997709820828, |
|
"grad_norm": 0.5654894709587097, |
|
"learning_rate": 3.8793103448275865e-06, |
|
"loss": 1.3925, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2414118281018456, |
|
"grad_norm": 0.5213720798492432, |
|
"learning_rate": 4.022988505747127e-06, |
|
"loss": 1.3982, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.25003367910548296, |
|
"grad_norm": 0.6513163447380066, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 1.4087, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.2586555301091203, |
|
"grad_norm": 0.5747817158699036, |
|
"learning_rate": 4.310344827586207e-06, |
|
"loss": 1.3629, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2672773811127577, |
|
"grad_norm": 0.5259600877761841, |
|
"learning_rate": 4.454022988505747e-06, |
|
"loss": 1.3932, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.275899232116395, |
|
"grad_norm": 0.5602086782455444, |
|
"learning_rate": 4.5977011494252875e-06, |
|
"loss": 1.3537, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2845210831200323, |
|
"grad_norm": 0.6604083180427551, |
|
"learning_rate": 4.741379310344828e-06, |
|
"loss": 1.3651, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.29314293412366965, |
|
"grad_norm": 0.5549290180206299, |
|
"learning_rate": 4.885057471264369e-06, |
|
"loss": 1.3452, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.30176478512730703, |
|
"grad_norm": 0.548821210861206, |
|
"learning_rate": 4.999994959675734e-06, |
|
"loss": 1.3647, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.31038663613094436, |
|
"grad_norm": 0.5312780141830444, |
|
"learning_rate": 4.9998185504603824e-06, |
|
"loss": 1.3404, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3190084871345817, |
|
"grad_norm": 0.5477195382118225, |
|
"learning_rate": 4.999390145355199e-06, |
|
"loss": 1.3841, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.32763033813821907, |
|
"grad_norm": 0.5766547322273254, |
|
"learning_rate": 4.998709787545849e-06, |
|
"loss": 1.3594, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.3362521891418564, |
|
"grad_norm": 0.5785459280014038, |
|
"learning_rate": 4.997777545616258e-06, |
|
"loss": 1.3402, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.3448740401454937, |
|
"grad_norm": 0.5505363941192627, |
|
"learning_rate": 4.996593513541701e-06, |
|
"loss": 1.3355, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3534958911491311, |
|
"grad_norm": 0.5421465039253235, |
|
"learning_rate": 4.995157810679327e-06, |
|
"loss": 1.359, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.36211774215276843, |
|
"grad_norm": 0.5955513119697571, |
|
"learning_rate": 4.993470581756129e-06, |
|
"loss": 1.3743, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.37073959315640576, |
|
"grad_norm": 0.6144652366638184, |
|
"learning_rate": 4.991531996854352e-06, |
|
"loss": 1.3447, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3793614441600431, |
|
"grad_norm": 0.5953258872032166, |
|
"learning_rate": 4.989342251394352e-06, |
|
"loss": 1.3208, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.38798329516368046, |
|
"grad_norm": 0.5650104880332947, |
|
"learning_rate": 4.986901566114891e-06, |
|
"loss": 1.3562, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3966051461673178, |
|
"grad_norm": 0.8173232078552246, |
|
"learning_rate": 4.984210187050891e-06, |
|
"loss": 1.3151, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.4052269971709551, |
|
"grad_norm": 0.5520560145378113, |
|
"learning_rate": 4.981268385508627e-06, |
|
"loss": 1.3591, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.4138488481745925, |
|
"grad_norm": 0.5939339399337769, |
|
"learning_rate": 4.978076458038382e-06, |
|
"loss": 1.3306, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.4224706991782298, |
|
"grad_norm": 0.5531189441680908, |
|
"learning_rate": 4.974634726404551e-06, |
|
"loss": 1.3338, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.43109255018186715, |
|
"grad_norm": 0.7108302116394043, |
|
"learning_rate": 4.9709435375532065e-06, |
|
"loss": 1.3248, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.43109255018186715, |
|
"eval_loss": 1.210019826889038, |
|
"eval_runtime": 4375.303, |
|
"eval_samples_per_second": 15.08, |
|
"eval_steps_per_second": 7.54, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.43971440118550453, |
|
"grad_norm": 0.47356271743774414, |
|
"learning_rate": 4.9670032635771205e-06, |
|
"loss": 1.3342, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.44833625218914186, |
|
"grad_norm": 0.4977116286754608, |
|
"learning_rate": 4.962814301678262e-06, |
|
"loss": 1.3412, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.4569581031927792, |
|
"grad_norm": 0.534755289554596, |
|
"learning_rate": 4.958377074127751e-06, |
|
"loss": 1.32, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.46557995419641657, |
|
"grad_norm": 0.5627906918525696, |
|
"learning_rate": 4.953692028223295e-06, |
|
"loss": 1.3275, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.4742018052000539, |
|
"grad_norm": 0.5472209453582764, |
|
"learning_rate": 4.948759636244096e-06, |
|
"loss": 1.3352, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.4828236562036912, |
|
"grad_norm": 0.5113406777381897, |
|
"learning_rate": 4.943580395403244e-06, |
|
"loss": 1.31, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.49144550720732855, |
|
"grad_norm": 0.6487182974815369, |
|
"learning_rate": 4.938154827797595e-06, |
|
"loss": 1.2995, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.5000673582109659, |
|
"grad_norm": 0.6053293347358704, |
|
"learning_rate": 4.932483480355139e-06, |
|
"loss": 1.3377, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.5086892092146033, |
|
"grad_norm": 0.5979019999504089, |
|
"learning_rate": 4.926566924779869e-06, |
|
"loss": 1.3169, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.5173110602182406, |
|
"grad_norm": 0.6338688135147095, |
|
"learning_rate": 4.920405757494147e-06, |
|
"loss": 1.2965, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5259329112218779, |
|
"grad_norm": 0.5050321221351624, |
|
"learning_rate": 4.914000599578585e-06, |
|
"loss": 1.3246, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5345547622255153, |
|
"grad_norm": 0.5875179767608643, |
|
"learning_rate": 4.907352096709432e-06, |
|
"loss": 1.337, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.5431766132291527, |
|
"grad_norm": 0.6425178647041321, |
|
"learning_rate": 4.900460919093492e-06, |
|
"loss": 1.2946, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.55179846423279, |
|
"grad_norm": 0.541878342628479, |
|
"learning_rate": 4.893327761400557e-06, |
|
"loss": 1.2993, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.5604203152364273, |
|
"grad_norm": 0.586501955986023, |
|
"learning_rate": 4.885953342693384e-06, |
|
"loss": 1.3011, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5690421662400647, |
|
"grad_norm": 0.5775993466377258, |
|
"learning_rate": 4.878338406355211e-06, |
|
"loss": 1.3213, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.577664017243702, |
|
"grad_norm": 0.5908535718917847, |
|
"learning_rate": 4.870483720014814e-06, |
|
"loss": 1.2963, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.5862858682473393, |
|
"grad_norm": 0.5903546810150146, |
|
"learning_rate": 4.862390075469132e-06, |
|
"loss": 1.2818, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.5949077192509767, |
|
"grad_norm": 0.6688754558563232, |
|
"learning_rate": 4.854058288603445e-06, |
|
"loss": 1.3254, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.6035295702546141, |
|
"grad_norm": 0.5674655437469482, |
|
"learning_rate": 4.8454891993091305e-06, |
|
"loss": 1.2957, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6121514212582514, |
|
"grad_norm": 0.6107905507087708, |
|
"learning_rate": 4.836683671398995e-06, |
|
"loss": 1.2824, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.6207732722618887, |
|
"grad_norm": 0.5999839305877686, |
|
"learning_rate": 4.827642592520203e-06, |
|
"loss": 1.2977, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.629395123265526, |
|
"grad_norm": 0.5449870824813843, |
|
"learning_rate": 4.818366874064789e-06, |
|
"loss": 1.2949, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.6380169742691634, |
|
"grad_norm": 0.5735543966293335, |
|
"learning_rate": 4.808857451077788e-06, |
|
"loss": 1.3084, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.6466388252728007, |
|
"grad_norm": 0.5688530802726746, |
|
"learning_rate": 4.799115282162979e-06, |
|
"loss": 1.2974, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.6552606762764381, |
|
"grad_norm": 0.5878692269325256, |
|
"learning_rate": 4.789141349386249e-06, |
|
"loss": 1.3138, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.6638825272800755, |
|
"grad_norm": 0.642494261264801, |
|
"learning_rate": 4.7789366581765995e-06, |
|
"loss": 1.285, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.6725043782837128, |
|
"grad_norm": 0.6337887644767761, |
|
"learning_rate": 4.768502237224788e-06, |
|
"loss": 1.295, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.6811262292873501, |
|
"grad_norm": 0.6511521935462952, |
|
"learning_rate": 4.757839138379635e-06, |
|
"loss": 1.3059, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.6897480802909874, |
|
"grad_norm": 0.6140688061714172, |
|
"learning_rate": 4.74694843654199e-06, |
|
"loss": 1.2781, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6983699312946248, |
|
"grad_norm": 0.5881298780441284, |
|
"learning_rate": 4.735831229556374e-06, |
|
"loss": 1.2944, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.7069917822982622, |
|
"grad_norm": 0.6124337315559387, |
|
"learning_rate": 4.7244886381003115e-06, |
|
"loss": 1.287, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.7156136333018995, |
|
"grad_norm": 0.5487476587295532, |
|
"learning_rate": 4.712921805571362e-06, |
|
"loss": 1.2885, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.7242354843055369, |
|
"grad_norm": 0.6456742286682129, |
|
"learning_rate": 4.7011318979718565e-06, |
|
"loss": 1.2899, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.7328573353091742, |
|
"grad_norm": 0.5877824425697327, |
|
"learning_rate": 4.689120103791356e-06, |
|
"loss": 1.3066, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.7414791863128115, |
|
"grad_norm": 0.628680408000946, |
|
"learning_rate": 4.676887633886851e-06, |
|
"loss": 1.3101, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.7501010373164488, |
|
"grad_norm": 0.6239911317825317, |
|
"learning_rate": 4.664435721360695e-06, |
|
"loss": 1.2782, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.7587228883200862, |
|
"grad_norm": 0.5513969659805298, |
|
"learning_rate": 4.651765621436303e-06, |
|
"loss": 1.2836, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.7673447393237236, |
|
"grad_norm": 0.5616466403007507, |
|
"learning_rate": 4.638878611331615e-06, |
|
"loss": 1.2967, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.7759665903273609, |
|
"grad_norm": 1.2961684465408325, |
|
"learning_rate": 4.6257759901303535e-06, |
|
"loss": 1.3094, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7845884413309983, |
|
"grad_norm": 0.6225080490112305, |
|
"learning_rate": 4.612459078651055e-06, |
|
"loss": 1.3083, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.7932102923346356, |
|
"grad_norm": 0.6216508150100708, |
|
"learning_rate": 4.598929219313938e-06, |
|
"loss": 1.3286, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.8018321433382729, |
|
"grad_norm": 0.5944140553474426, |
|
"learning_rate": 4.585187776005569e-06, |
|
"loss": 1.263, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.8104539943419102, |
|
"grad_norm": 0.5992977023124695, |
|
"learning_rate": 4.571236133941381e-06, |
|
"loss": 1.2745, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.8190758453455477, |
|
"grad_norm": 0.5519088506698608, |
|
"learning_rate": 4.557075699526032e-06, |
|
"loss": 1.2772, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.827697696349185, |
|
"grad_norm": 0.5918429493904114, |
|
"learning_rate": 4.542707900211636e-06, |
|
"loss": 1.2915, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.8363195473528223, |
|
"grad_norm": 0.6135639548301697, |
|
"learning_rate": 4.528134184353863e-06, |
|
"loss": 1.2918, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.8449413983564596, |
|
"grad_norm": 0.6600371599197388, |
|
"learning_rate": 4.5133560210659384e-06, |
|
"loss": 1.2844, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.853563249360097, |
|
"grad_norm": 0.6321092844009399, |
|
"learning_rate": 4.498374900070551e-06, |
|
"loss": 1.282, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.8621851003637343, |
|
"grad_norm": 0.5802695155143738, |
|
"learning_rate": 4.483192331549675e-06, |
|
"loss": 1.2723, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8621851003637343, |
|
"eval_loss": 1.1568914651870728, |
|
"eval_runtime": 4375.5203, |
|
"eval_samples_per_second": 15.08, |
|
"eval_steps_per_second": 7.54, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8708069513673716, |
|
"grad_norm": 0.5625444650650024, |
|
"learning_rate": 4.467809845992338e-06, |
|
"loss": 1.2788, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.8794288023710091, |
|
"grad_norm": 0.575935959815979, |
|
"learning_rate": 4.452228994040341e-06, |
|
"loss": 1.302, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.8880506533746464, |
|
"grad_norm": 0.5979976058006287, |
|
"learning_rate": 4.4364513463319405e-06, |
|
"loss": 1.271, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.8966725043782837, |
|
"grad_norm": 0.6508215069770813, |
|
"learning_rate": 4.420478493343523e-06, |
|
"loss": 1.2838, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.905294355381921, |
|
"grad_norm": 0.6415181756019592, |
|
"learning_rate": 4.404312045229273e-06, |
|
"loss": 1.2855, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.9139162063855584, |
|
"grad_norm": 0.59377521276474, |
|
"learning_rate": 4.387953631658863e-06, |
|
"loss": 1.2745, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.9225380573891957, |
|
"grad_norm": 0.6269784569740295, |
|
"learning_rate": 4.371404901653174e-06, |
|
"loss": 1.2667, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.9311599083928331, |
|
"grad_norm": 0.6030882000923157, |
|
"learning_rate": 4.35466752341806e-06, |
|
"loss": 1.2433, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.9397817593964705, |
|
"grad_norm": 0.6197340488433838, |
|
"learning_rate": 4.337743184176188e-06, |
|
"loss": 1.2791, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.9484036104001078, |
|
"grad_norm": 0.607699453830719, |
|
"learning_rate": 4.320633589996956e-06, |
|
"loss": 1.278, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.9570254614037451, |
|
"grad_norm": 0.6275235414505005, |
|
"learning_rate": 4.303340465624507e-06, |
|
"loss": 1.2587, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.9656473124073824, |
|
"grad_norm": 0.6535059213638306, |
|
"learning_rate": 4.285865554303874e-06, |
|
"loss": 1.2895, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.9742691634110198, |
|
"grad_norm": 0.6479883790016174, |
|
"learning_rate": 4.2682106176052405e-06, |
|
"loss": 1.2651, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.9828910144146571, |
|
"grad_norm": 0.7725274562835693, |
|
"learning_rate": 4.2503774352463735e-06, |
|
"loss": 1.2384, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.9915128654182945, |
|
"grad_norm": 0.6182934641838074, |
|
"learning_rate": 4.23236780491321e-06, |
|
"loss": 1.2723, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.0007813552472047, |
|
"grad_norm": 1.8191434144973755, |
|
"learning_rate": 4.214183542078646e-06, |
|
"loss": 1.3882, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.009403206250842, |
|
"grad_norm": 0.7100806832313538, |
|
"learning_rate": 4.195826479819523e-06, |
|
"loss": 1.2857, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.0180250572544793, |
|
"grad_norm": 0.5903263688087463, |
|
"learning_rate": 4.177298468631844e-06, |
|
"loss": 1.2888, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.0266469082581167, |
|
"grad_norm": 0.6088208556175232, |
|
"learning_rate": 4.158601376244237e-06, |
|
"loss": 1.2355, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.035268759261754, |
|
"grad_norm": 0.6548230648040771, |
|
"learning_rate": 4.139737087429672e-06, |
|
"loss": 1.2435, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.0438906102653913, |
|
"grad_norm": 0.6475362777709961, |
|
"learning_rate": 4.120707503815464e-06, |
|
"loss": 1.2462, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.0525124612690286, |
|
"grad_norm": 0.7016700506210327, |
|
"learning_rate": 4.101514543691588e-06, |
|
"loss": 1.2479, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.061134312272666, |
|
"grad_norm": 0.6940033435821533, |
|
"learning_rate": 4.0821601418172926e-06, |
|
"loss": 1.2659, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.0697561632763033, |
|
"grad_norm": 0.6648741960525513, |
|
"learning_rate": 4.0626462492260725e-06, |
|
"loss": 1.2441, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.0783780142799406, |
|
"grad_norm": 0.665122389793396, |
|
"learning_rate": 4.042974833028992e-06, |
|
"loss": 1.2792, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.0869998652835782, |
|
"grad_norm": 0.6138463020324707, |
|
"learning_rate": 4.0231478762163865e-06, |
|
"loss": 1.2462, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.0956217162872155, |
|
"grad_norm": 0.61916184425354, |
|
"learning_rate": 4.003167377457972e-06, |
|
"loss": 1.2858, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.1042435672908528, |
|
"grad_norm": 0.6411153674125671, |
|
"learning_rate": 3.983035350901356e-06, |
|
"loss": 1.2519, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.1128654182944901, |
|
"grad_norm": 0.6579316854476929, |
|
"learning_rate": 3.962753825969016e-06, |
|
"loss": 1.2661, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.1214872692981275, |
|
"grad_norm": 0.6916026473045349, |
|
"learning_rate": 3.942324847153706e-06, |
|
"loss": 1.2812, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.1301091203017648, |
|
"grad_norm": 0.6541363596916199, |
|
"learning_rate": 3.921750473812377e-06, |
|
"loss": 1.2454, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.1387309713054021, |
|
"grad_norm": 0.6301002502441406, |
|
"learning_rate": 3.901032779958563e-06, |
|
"loss": 1.2452, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.1473528223090395, |
|
"grad_norm": 0.6470747590065002, |
|
"learning_rate": 3.880173854053325e-06, |
|
"loss": 1.242, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.1559746733126768, |
|
"grad_norm": 0.62432861328125, |
|
"learning_rate": 3.859175798794715e-06, |
|
"loss": 1.2578, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.164596524316314, |
|
"grad_norm": 0.735650897026062, |
|
"learning_rate": 3.838040730905811e-06, |
|
"loss": 1.2323, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.1732183753199514, |
|
"grad_norm": 0.6072832345962524, |
|
"learning_rate": 3.816770780921343e-06, |
|
"loss": 1.2417, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.1818402263235888, |
|
"grad_norm": 0.6269782185554504, |
|
"learning_rate": 3.7953680929729215e-06, |
|
"loss": 1.2579, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.190462077327226, |
|
"grad_norm": 0.6426697373390198, |
|
"learning_rate": 3.7738348245728953e-06, |
|
"loss": 1.2711, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.1990839283308636, |
|
"grad_norm": 0.6683219075202942, |
|
"learning_rate": 3.7521731463968638e-06, |
|
"loss": 1.2375, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.207705779334501, |
|
"grad_norm": 0.7327633500099182, |
|
"learning_rate": 3.730385242064861e-06, |
|
"loss": 1.2509, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.2163276303381383, |
|
"grad_norm": 0.6698377728462219, |
|
"learning_rate": 3.708473307921234e-06, |
|
"loss": 1.2748, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.2249494813417756, |
|
"grad_norm": 0.6427878737449646, |
|
"learning_rate": 3.686439552813236e-06, |
|
"loss": 1.2753, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.233571332345413, |
|
"grad_norm": 0.7282299399375916, |
|
"learning_rate": 3.6642861978683676e-06, |
|
"loss": 1.2218, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.2421931833490503, |
|
"grad_norm": 0.6039260029792786, |
|
"learning_rate": 3.6420154762704685e-06, |
|
"loss": 1.243, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.2508150343526876, |
|
"grad_norm": 0.6218879222869873, |
|
"learning_rate": 3.619629633034604e-06, |
|
"loss": 1.2225, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.259436885356325, |
|
"grad_norm": 0.660929799079895, |
|
"learning_rate": 3.597130924780754e-06, |
|
"loss": 1.2641, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.2680587363599622, |
|
"grad_norm": 0.6086330413818359, |
|
"learning_rate": 3.574521619506332e-06, |
|
"loss": 1.2288, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.2766805873635996, |
|
"grad_norm": 0.6594045162200928, |
|
"learning_rate": 3.5518039963575577e-06, |
|
"loss": 1.2558, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.285302438367237, |
|
"grad_norm": 0.6506398320198059, |
|
"learning_rate": 3.5289803453997087e-06, |
|
"loss": 1.2361, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.2939242893708744, |
|
"grad_norm": 0.6286528706550598, |
|
"learning_rate": 3.506052967386265e-06, |
|
"loss": 1.2344, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.2939242893708744, |
|
"eval_loss": 1.1364344358444214, |
|
"eval_runtime": 4371.3293, |
|
"eval_samples_per_second": 15.094, |
|
"eval_steps_per_second": 7.547, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.3025461403745116, |
|
"grad_norm": 0.6357390880584717, |
|
"learning_rate": 3.4830241735269852e-06, |
|
"loss": 1.2597, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.311167991378149, |
|
"grad_norm": 0.5889900326728821, |
|
"learning_rate": 3.459896285254917e-06, |
|
"loss": 1.2535, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.3197898423817862, |
|
"grad_norm": 0.7132574319839478, |
|
"learning_rate": 3.436671633992389e-06, |
|
"loss": 1.2496, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.3284116933854238, |
|
"grad_norm": 0.604434072971344, |
|
"learning_rate": 3.4133525609159883e-06, |
|
"loss": 1.2578, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.337033544389061, |
|
"grad_norm": 0.6603388786315918, |
|
"learning_rate": 3.3899414167205547e-06, |
|
"loss": 1.2462, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.3456553953926984, |
|
"grad_norm": 0.5738435983657837, |
|
"learning_rate": 3.3664405613822216e-06, |
|
"loss": 1.2309, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.3542772463963357, |
|
"grad_norm": 0.6693400740623474, |
|
"learning_rate": 3.3428523639205125e-06, |
|
"loss": 1.2656, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.362899097399973, |
|
"grad_norm": 0.6772233843803406, |
|
"learning_rate": 3.319179202159532e-06, |
|
"loss": 1.2326, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.3715209484036104, |
|
"grad_norm": 0.6765257716178894, |
|
"learning_rate": 3.295423462488271e-06, |
|
"loss": 1.2666, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.3801427994072477, |
|
"grad_norm": 0.61844402551651, |
|
"learning_rate": 3.271587539620039e-06, |
|
"loss": 1.2188, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.388764650410885, |
|
"grad_norm": 0.6714752912521362, |
|
"learning_rate": 3.247673836351068e-06, |
|
"loss": 1.2276, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.3973865014145224, |
|
"grad_norm": 0.5900276899337769, |
|
"learning_rate": 3.2236847633182955e-06, |
|
"loss": 1.2452, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.40600835241816, |
|
"grad_norm": 0.6843028664588928, |
|
"learning_rate": 3.199622738756357e-06, |
|
"loss": 1.2317, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.414630203421797, |
|
"grad_norm": 0.7222546935081482, |
|
"learning_rate": 3.17549018825382e-06, |
|
"loss": 1.2445, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.4232520544254346, |
|
"grad_norm": 0.6822832226753235, |
|
"learning_rate": 3.151289544508664e-06, |
|
"loss": 1.2442, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.4318739054290717, |
|
"grad_norm": 0.7010654211044312, |
|
"learning_rate": 3.1270232470830525e-06, |
|
"loss": 1.2517, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.4404957564327092, |
|
"grad_norm": 0.6761536598205566, |
|
"learning_rate": 3.102693742157415e-06, |
|
"loss": 1.2424, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.4491176074363465, |
|
"grad_norm": 0.730097234249115, |
|
"learning_rate": 3.078303482283854e-06, |
|
"loss": 1.2167, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.4577394584399839, |
|
"grad_norm": 0.7009713053703308, |
|
"learning_rate": 3.0538549261389154e-06, |
|
"loss": 1.2492, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.4663613094436212, |
|
"grad_norm": 0.5926857590675354, |
|
"learning_rate": 3.029350538275742e-06, |
|
"loss": 1.1965, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.4749831604472585, |
|
"grad_norm": 0.6391776204109192, |
|
"learning_rate": 3.0047927888756268e-06, |
|
"loss": 1.2326, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.4836050114508958, |
|
"grad_norm": 0.7003401517868042, |
|
"learning_rate": 2.9801841534990115e-06, |
|
"loss": 1.2248, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.4922268624545332, |
|
"grad_norm": 0.682777464389801, |
|
"learning_rate": 2.9555271128359326e-06, |
|
"loss": 1.2305, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.5008487134581705, |
|
"grad_norm": 0.5897073745727539, |
|
"learning_rate": 2.9308241524559522e-06, |
|
"loss": 1.2269, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.5094705644618078, |
|
"grad_norm": 0.7111027240753174, |
|
"learning_rate": 2.9060777625576014e-06, |
|
"loss": 1.2338, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.5180924154654454, |
|
"grad_norm": 0.6545217037200928, |
|
"learning_rate": 2.8812904377173532e-06, |
|
"loss": 1.2222, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.5267142664690825, |
|
"grad_norm": 0.6440667510032654, |
|
"learning_rate": 2.856464676638156e-06, |
|
"loss": 1.2033, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.53533611747272, |
|
"grad_norm": 0.7168214321136475, |
|
"learning_rate": 2.831602981897546e-06, |
|
"loss": 1.2479, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.5439579684763571, |
|
"grad_norm": 0.6428610682487488, |
|
"learning_rate": 2.8067078596953793e-06, |
|
"loss": 1.2302, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.5525798194799947, |
|
"grad_norm": 0.6651865839958191, |
|
"learning_rate": 2.7817818196011897e-06, |
|
"loss": 1.263, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.561201670483632, |
|
"grad_norm": 0.6888891458511353, |
|
"learning_rate": 2.756827374301207e-06, |
|
"loss": 1.2001, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.5698235214872693, |
|
"grad_norm": 0.6644035577774048, |
|
"learning_rate": 2.73184703934507e-06, |
|
"loss": 1.216, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.5784453724909067, |
|
"grad_norm": 0.6795063614845276, |
|
"learning_rate": 2.7068433328922405e-06, |
|
"loss": 1.245, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.587067223494544, |
|
"grad_norm": 0.7901127338409424, |
|
"learning_rate": 2.68181877545816e-06, |
|
"loss": 1.2168, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.5956890744981813, |
|
"grad_norm": 0.6792474389076233, |
|
"learning_rate": 2.6567758896601654e-06, |
|
"loss": 1.2406, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.6043109255018186, |
|
"grad_norm": 0.638313353061676, |
|
"learning_rate": 2.6317171999631992e-06, |
|
"loss": 1.253, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.612932776505456, |
|
"grad_norm": 0.7407149076461792, |
|
"learning_rate": 2.6066452324253257e-06, |
|
"loss": 1.2279, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.6215546275090933, |
|
"grad_norm": 0.6624804139137268, |
|
"learning_rate": 2.58156251444309e-06, |
|
"loss": 1.2433, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.6301764785127308, |
|
"grad_norm": 0.6785764694213867, |
|
"learning_rate": 2.5564715744967446e-06, |
|
"loss": 1.2267, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.638798329516368, |
|
"grad_norm": 0.7038357853889465, |
|
"learning_rate": 2.531374941895361e-06, |
|
"loss": 1.2371, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.6474201805200055, |
|
"grad_norm": 0.7683678269386292, |
|
"learning_rate": 2.506275146521863e-06, |
|
"loss": 1.2039, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.6560420315236426, |
|
"grad_norm": 0.6339368224143982, |
|
"learning_rate": 2.4811747185780005e-06, |
|
"loss": 1.201, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.6646638825272801, |
|
"grad_norm": 0.8253235220909119, |
|
"learning_rate": 2.45607618832929e-06, |
|
"loss": 1.2585, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.6732857335309175, |
|
"grad_norm": 0.7511754631996155, |
|
"learning_rate": 2.4309820858499487e-06, |
|
"loss": 1.2043, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.6819075845345548, |
|
"grad_norm": 0.709459662437439, |
|
"learning_rate": 2.405894940767851e-06, |
|
"loss": 1.2493, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.6905294355381921, |
|
"grad_norm": 0.6520094871520996, |
|
"learning_rate": 2.380817282009523e-06, |
|
"loss": 1.2514, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.6991512865418295, |
|
"grad_norm": 0.6714244484901428, |
|
"learning_rate": 2.35575163754522e-06, |
|
"loss": 1.2204, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.7077731375454668, |
|
"grad_norm": 0.6813965439796448, |
|
"learning_rate": 2.330700534134086e-06, |
|
"loss": 1.2042, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.716394988549104, |
|
"grad_norm": 0.6882847547531128, |
|
"learning_rate": 2.3056664970694433e-06, |
|
"loss": 1.2139, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.7250168395527414, |
|
"grad_norm": 0.7284813523292542, |
|
"learning_rate": 2.280652049924232e-06, |
|
"loss": 1.2124, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.7250168395527414, |
|
"eval_loss": 1.1232779026031494, |
|
"eval_runtime": 4379.8083, |
|
"eval_samples_per_second": 15.065, |
|
"eval_steps_per_second": 7.533, |
|
"step": 2000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3477, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.949667944935509e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|