{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 283, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "completion_length": 385.89532871246337, "epoch": 0.0176678445229682, "grad_norm": 0.5257675647735596, "kl": 0.0012482523918151856, "learning_rate": 3.448275862068966e-06, "loss": 0.0001, "reward": 0.6444196742027998, "reward_std": 0.4479222524911165, "rewards/accuracy_reward": 0.1539062581025064, "rewards/format_reward": 0.49051341526210307, "step": 5 }, { "completion_length": 229.56596965789794, "epoch": 0.0353356890459364, "grad_norm": 0.5334790945053101, "kl": 0.3332817077636719, "learning_rate": 6.896551724137932e-06, "loss": 0.0133, "reward": 0.9689732536673545, "reward_std": 0.26624082447960973, "rewards/accuracy_reward": 0.08214286140864716, "rewards/format_reward": 0.8868303976953029, "step": 10 }, { "completion_length": 157.9798059463501, "epoch": 0.053003533568904596, "grad_norm": 0.287689745426178, "kl": 0.04854583740234375, "learning_rate": 1.0344827586206898e-05, "loss": 0.0019, "reward": 1.062388438731432, "reward_std": 0.20184952337294818, "rewards/accuracy_reward": 0.10022321890573949, "rewards/format_reward": 0.962165217846632, "step": 15 }, { "completion_length": 171.60927028656005, "epoch": 0.0706713780918728, "grad_norm": 0.2567259669303894, "kl": 0.078216552734375, "learning_rate": 1.3793103448275863e-05, "loss": 0.0031, "reward": 1.1186384424567222, "reward_std": 0.24078293070197104, "rewards/accuracy_reward": 0.15345982832368463, "rewards/format_reward": 0.9651786126196384, "step": 20 }, { "completion_length": 210.81998748779296, "epoch": 0.08833922261484099, "grad_norm": 0.5349090695381165, "kl": 0.112786865234375, "learning_rate": 1.7241379310344828e-05, "loss": 0.0045, "reward": 1.1583705946803093, "reward_std": 0.3293624483048916, "rewards/accuracy_reward": 0.21551340306177735, "rewards/format_reward": 0.9428571835160255, "step": 25 }, { "completion_length": 293.2053703308105, "epoch": 0.10600706713780919, "grad_norm": 0.19419077038764954, "kl": 0.09522705078125, "learning_rate": 1.999923511388017e-05, "loss": 0.0038, "reward": 1.2208705872297287, "reward_std": 0.36669483222067356, "rewards/accuracy_reward": 0.2871651930734515, "rewards/format_reward": 0.9337054029107094, "step": 30 }, { "completion_length": 323.5448799133301, "epoch": 0.12367491166077739, "grad_norm": 0.22766603529453278, "kl": 0.39053955078125, "learning_rate": 1.9972476383747748e-05, "loss": 0.0157, "reward": 1.2860491678118706, "reward_std": 0.30488016121089456, "rewards/accuracy_reward": 0.3202009078115225, "rewards/format_reward": 0.9658482551574707, "step": 35 }, { "completion_length": 285.7590543746948, "epoch": 0.1413427561837456, "grad_norm": 0.3550393283367157, "kl": 0.118524169921875, "learning_rate": 1.9907590277344582e-05, "loss": 0.0047, "reward": 1.2512277334928512, "reward_std": 0.34426755234599116, "rewards/accuracy_reward": 0.30758929941803215, "rewards/format_reward": 0.9436384335160255, "step": 40 }, { "completion_length": 269.9738945007324, "epoch": 0.15901060070671377, "grad_norm": 2.2805335521698, "kl": 1.9856201171875, "learning_rate": 1.9804824871166254e-05, "loss": 0.0796, "reward": 1.1523438021540642, "reward_std": 0.46314122155308723, "rewards/accuracy_reward": 0.316294657997787, "rewards/format_reward": 0.8360491469502449, "step": 45 }, { "completion_length": 249.81585788726807, "epoch": 0.17667844522968199, "grad_norm": 6.213081359863281, "kl": 3.11435546875, "learning_rate": 1.9664573064143604e-05, "loss": 0.1247, "reward": 0.8168527133762836, "reward_std": 0.42027441747486594, "rewards/accuracy_reward": 0.22991072395816445, "rewards/format_reward": 0.5869419902563096, "step": 50 }, { "completion_length": 837.4478942871094, "epoch": 0.19434628975265017, "grad_norm": 0.1708061844110489, "kl": 0.7300537109375, "learning_rate": 1.948737107548771e-05, "loss": 0.0292, "reward": 0.0875000040512532, "reward_std": 0.16735761975869537, "rewards/accuracy_reward": 0.03895089457510039, "rewards/format_reward": 0.048549109499435875, "step": 55 }, { "completion_length": 1024.0, "epoch": 0.21201413427561838, "grad_norm": 0.16706973314285278, "kl": 0.364111328125, "learning_rate": 1.9273896394584103e-05, "loss": 0.0146, "reward": 0.09107143309665844, "reward_std": 0.14619714424479752, "rewards/accuracy_reward": 0.09084821873111651, "rewards/format_reward": 0.00022321429569274187, "step": 60 }, { "completion_length": 1024.0, "epoch": 0.22968197879858657, "grad_norm": 0.10041823238134384, "kl": 0.2697998046875, "learning_rate": 1.9024965190774262e-05, "loss": 0.0108, "reward": 0.17421875786967575, "reward_std": 0.23079481534659863, "rewards/accuracy_reward": 0.17421875786967575, "rewards/format_reward": 0.0, "step": 65 }, { "completion_length": 1024.0, "epoch": 0.24734982332155478, "grad_norm": 0.3122642934322357, "kl": 0.28310546875, "learning_rate": 1.8741529192927528e-05, "loss": 0.0113, "reward": 0.19218751024454833, "reward_std": 0.23374766409397124, "rewards/accuracy_reward": 0.19218751024454833, "rewards/format_reward": 0.0, "step": 70 }, { "completion_length": 1024.0, "epoch": 0.26501766784452296, "grad_norm": 0.3012256622314453, "kl": 0.3281494140625, "learning_rate": 1.8424672050733577e-05, "loss": 0.0131, "reward": 0.20256697395816445, "reward_std": 0.23207057397812605, "rewards/accuracy_reward": 0.20122768832370638, "rewards/format_reward": 0.0013392857741564511, "step": 75 }, { "completion_length": 1024.0, "epoch": 0.2826855123674912, "grad_norm": 0.2279616743326187, "kl": 0.569970703125, "learning_rate": 1.8075605191627242e-05, "loss": 0.0228, "reward": 0.1684151873923838, "reward_std": 0.1943290094844997, "rewards/accuracy_reward": 0.16808036593720316, "rewards/format_reward": 0.0003348214435391128, "step": 80 }, { "completion_length": 1023.7799125671387, "epoch": 0.3003533568904594, "grad_norm": 0.1516251116991043, "kl": 0.51630859375, "learning_rate": 1.7695663189185703e-05, "loss": 0.0207, "reward": 0.21607143906876444, "reward_std": 0.20886625591665506, "rewards/accuracy_reward": 0.21607143906876444, "rewards/format_reward": 0.0, "step": 85 }, { "completion_length": 1023.1735549926758, "epoch": 0.31802120141342755, "grad_norm": 0.09773323684930801, "kl": 0.39691162109375, "learning_rate": 1.7286298660705877e-05, "loss": 0.0159, "reward": 0.2613839410245419, "reward_std": 0.2178319870494306, "rewards/accuracy_reward": 0.2613839410245419, "rewards/format_reward": 0.0, "step": 90 }, { "completion_length": 1022.9333808898925, "epoch": 0.33568904593639576, "grad_norm": 0.2660332918167114, "kl": 0.3833740234375, "learning_rate": 1.6849076713469914e-05, "loss": 0.0153, "reward": 0.2964285839349031, "reward_std": 0.23299281364306806, "rewards/accuracy_reward": 0.2964285839349031, "rewards/format_reward": 0.0, "step": 95 }, { "completion_length": 1022.3206634521484, "epoch": 0.35335689045936397, "grad_norm": 1.3615121841430664, "kl": 0.835400390625, "learning_rate": 1.6385668960932143e-05, "loss": 0.0334, "reward": 0.1241071482654661, "reward_std": 0.16077298847958446, "rewards/accuracy_reward": 0.1241071482654661, "rewards/format_reward": 0.0, "step": 100 }, { "epoch": 0.35335689045936397, "eval_completion_length": 1021.8020172119141, "eval_kl": 0.794921875, "eval_loss": 0.03294466808438301, "eval_reward": 0.1272321492433548, "eval_reward_std": 0.20546478778123856, "eval_rewards/accuracy_reward": 0.1272321492433548, "eval_rewards/format_reward": 0.0, "eval_runtime": 83.4283, "eval_samples_per_second": 1.187, "eval_steps_per_second": 0.012, "step": 100 }, { "completion_length": 1023.4450950622559, "epoch": 0.3710247349823322, "grad_norm": 1.5146944522857666, "kl": 1.085205078125, "learning_rate": 1.5897847131705194e-05, "loss": 0.0434, "reward": 0.12544643497094513, "reward_std": 0.1771216381341219, "rewards/accuracy_reward": 0.12544643497094513, "rewards/format_reward": 0.0, "step": 105 }, { "completion_length": 1023.4677505493164, "epoch": 0.38869257950530034, "grad_norm": 0.6001514196395874, "kl": 1.1712890625, "learning_rate": 1.5387476295779737e-05, "loss": 0.0469, "reward": 0.16138393647270277, "reward_std": 0.20901122770737857, "rewards/accuracy_reward": 0.15982143628643825, "rewards/format_reward": 0.0015625000698491931, "step": 110 }, { "completion_length": 1023.904689025879, "epoch": 0.40636042402826855, "grad_norm": 0.357604444026947, "kl": 0.9640380859375, "learning_rate": 1.4856507733875837e-05, "loss": 0.0386, "reward": 0.2774553706869483, "reward_std": 0.22977914968505503, "rewards/accuracy_reward": 0.27712054904550315, "rewards/format_reward": 0.0003348214435391128, "step": 115 }, { "completion_length": 1023.7766761779785, "epoch": 0.42402826855123676, "grad_norm": 2.9171788692474365, "kl": 1.54794921875, "learning_rate": 1.4306971477188223e-05, "loss": 0.0619, "reward": 0.29587054867297413, "reward_std": 0.22431441079825162, "rewards/accuracy_reward": 0.2954241203144193, "rewards/format_reward": 0.00044642859138548373, "step": 120 }, { "completion_length": 1023.5507850646973, "epoch": 0.4416961130742049, "grad_norm": 1.7767579555511475, "kl": 1.08740234375, "learning_rate": 1.3740968546047935e-05, "loss": 0.0435, "reward": 0.26272322572767737, "reward_std": 0.2170619947835803, "rewards/accuracy_reward": 0.2623884044587612, "rewards/format_reward": 0.0003348214435391128, "step": 125 }, { "completion_length": 1023.353914642334, "epoch": 0.45936395759717313, "grad_norm": 0.626575231552124, "kl": 1.624072265625, "learning_rate": 1.3160662917174045e-05, "loss": 0.065, "reward": 0.2738839427009225, "reward_std": 0.21952001163735985, "rewards/accuracy_reward": 0.27332590725272893, "rewards/format_reward": 0.0005580357392318547, "step": 130 }, { "completion_length": 1023.6706512451171, "epoch": 0.47703180212014135, "grad_norm": 2.2120277881622314, "kl": 1.8474609375, "learning_rate": 1.2568273250226681e-05, "loss": 0.0739, "reward": 0.27935269083827735, "reward_std": 0.2362092829309404, "rewards/accuracy_reward": 0.2791294766589999, "rewards/format_reward": 0.00022321429569274187, "step": 135 }, { "completion_length": 1023.9494422912597, "epoch": 0.49469964664310956, "grad_norm": 3.246752977371216, "kl": 2.728662109375, "learning_rate": 1.1966064405292887e-05, "loss": 0.1091, "reward": 0.2922991219907999, "reward_std": 0.23428730978630483, "rewards/accuracy_reward": 0.2919643005356193, "rewards/format_reward": 0.0003348214435391128, "step": 140 }, { "completion_length": 1023.9381698608398, "epoch": 0.5123674911660777, "grad_norm": 11.377167701721191, "kl": 2.4001953125, "learning_rate": 1.1356338783736256e-05, "loss": 0.096, "reward": 0.31104912236332893, "reward_std": 0.2248768277466297, "rewards/accuracy_reward": 0.31093751527369023, "rewards/format_reward": 0.00011160714784637093, "step": 145 }, { "completion_length": 1023.8702018737793, "epoch": 0.5300353356890459, "grad_norm": 4.7356181144714355, "kl": 0.99365234375, "learning_rate": 1.0741427525516463e-05, "loss": 0.0398, "reward": 0.2786830499768257, "reward_std": 0.22981408620253205, "rewards/accuracy_reward": 0.2775669790804386, "rewards/format_reward": 0.0011160714784637094, "step": 150 }, { "completion_length": 1023.8892868041992, "epoch": 0.5477031802120141, "grad_norm": 0.48026323318481445, "kl": 1.191552734375, "learning_rate": 1.012368159663363e-05, "loss": 0.0477, "reward": 0.2672991200350225, "reward_std": 0.2149678454734385, "rewards/accuracy_reward": 0.2670759057626128, "rewards/format_reward": 0.00022321429569274187, "step": 155 }, { "completion_length": 1023.9833709716797, "epoch": 0.5653710247349824, "grad_norm": 0.2914314866065979, "kl": 0.31871337890625, "learning_rate": 9.505462800772612e-06, "loss": 0.0127, "reward": 0.2546875115483999, "reward_std": 0.21248297598212956, "rewards/accuracy_reward": 0.2534598330967128, "rewards/format_reward": 0.0012276786263100802, "step": 160 }, { "completion_length": 1023.8994430541992, "epoch": 0.5830388692579506, "grad_norm": 0.403013676404953, "kl": 0.38603515625, "learning_rate": 8.889134749511956e-06, "loss": 0.0154, "reward": 0.23883929578587412, "reward_std": 0.21736905500292777, "rewards/accuracy_reward": 0.23627233160659672, "rewards/format_reward": 0.0025669644004665316, "step": 165 }, { "completion_length": 1023.8919662475586, "epoch": 0.6007067137809188, "grad_norm": 2.4790608882904053, "kl": 0.843505859375, "learning_rate": 8.277053825620836e-06, "loss": 0.0337, "reward": 0.22020090287551283, "reward_std": 0.225645115878433, "rewards/accuracy_reward": 0.21886161714792252, "rewards/format_reward": 0.0013392857741564511, "step": 170 }, { "completion_length": 1023.5271240234375, "epoch": 0.6183745583038869, "grad_norm": 1.517407774925232, "kl": 1.15859375, "learning_rate": 7.671560173993588e-06, "loss": 0.0463, "reward": 0.2196428684517741, "reward_std": 0.22536969408392907, "rewards/accuracy_reward": 0.21886161863803863, "rewards/format_reward": 0.0007812500349245966, "step": 175 }, { "completion_length": 1023.7799140930176, "epoch": 0.6360424028268551, "grad_norm": 3.477236747741699, "kl": 1.853125, "learning_rate": 7.07496875466589e-06, "loss": 0.0742, "reward": 0.18906251017469913, "reward_std": 0.21059290650300683, "rewards/accuracy_reward": 0.18861608181614428, "rewards/format_reward": 0.00044642859138548373, "step": 180 }, { "completion_length": 1023.8558059692383, "epoch": 0.6537102473498233, "grad_norm": 4.594732284545898, "kl": 1.274169921875, "learning_rate": 6.489560492119225e-06, "loss": 0.051, "reward": 0.22857143869623542, "reward_std": 0.23066243380308152, "rewards/accuracy_reward": 0.22745536724105478, "rewards/format_reward": 0.0011160714784637094, "step": 185 }, { "completion_length": 1023.9906257629394, "epoch": 0.6713780918727915, "grad_norm": 5.503405570983887, "kl": 1.36650390625, "learning_rate": 5.9175735547120975e-06, "loss": 0.0547, "reward": 0.2881696566008031, "reward_std": 0.24815111914649607, "rewards/accuracy_reward": 0.28437501201406123, "rewards/format_reward": 0.0037946430500596763, "step": 190 }, { "completion_length": 1023.9542419433594, "epoch": 0.6890459363957597, "grad_norm": 1.3981853723526, "kl": 2.093359375, "learning_rate": 5.361194797579108e-06, "loss": 0.0838, "reward": 0.29575894251465795, "reward_std": 0.2531373543664813, "rewards/accuracy_reward": 0.28939733523875477, "rewards/format_reward": 0.0063616074505262075, "step": 195 }, { "completion_length": 1023.9152908325195, "epoch": 0.7067137809187279, "grad_norm": 7.645321369171143, "kl": 1.69521484375, "learning_rate": 4.8225514017138205e-06, "loss": 0.0678, "reward": 0.26662947684526445, "reward_std": 0.2479257956147194, "rewards/accuracy_reward": 0.25881697619333865, "rewards/format_reward": 0.007812500360887497, "step": 200 }, { "epoch": 0.7067137809187279, "eval_completion_length": 1024.0, "eval_kl": 2.50390625, "eval_loss": 0.10209327191114426, "eval_reward": 0.310267873108387, "eval_reward_std": 0.22309495136141777, "eval_rewards/accuracy_reward": 0.305803582072258, "eval_rewards/format_reward": 0.004464285913854837, "eval_runtime": 91.3405, "eval_samples_per_second": 1.084, "eval_steps_per_second": 0.011, "step": 200 }, { "completion_length": 1024.0, "epoch": 0.7243816254416962, "grad_norm": 4.966090679168701, "kl": 2.449951171875, "learning_rate": 4.303702741201431e-06, "loss": 0.098, "reward": 0.24832590399309992, "reward_std": 0.24168844958767294, "rewards/accuracy_reward": 0.24118304727599024, "rewards/format_reward": 0.007142857485450804, "step": 205 }, { "completion_length": 1023.8795768737793, "epoch": 0.7420494699646644, "grad_norm": 7.903662204742432, "kl": 1.901416015625, "learning_rate": 3.8066325096949153e-06, "loss": 0.076, "reward": 0.25558037031441927, "reward_std": 0.2529288594610989, "rewards/accuracy_reward": 0.2431919751688838, "rewards/format_reward": 0.012388393504079432, "step": 210 }, { "completion_length": 1023.8687515258789, "epoch": 0.7597173144876325, "grad_norm": 2.2350668907165527, "kl": 1.812060546875, "learning_rate": 3.3332411362372063e-06, "loss": 0.0725, "reward": 0.258482154738158, "reward_std": 0.26162059921771286, "rewards/accuracy_reward": 0.24118304559960962, "rewards/format_reward": 0.01729910804424435, "step": 215 }, { "completion_length": 1023.8536842346191, "epoch": 0.7773851590106007, "grad_norm": 2.8165674209594727, "kl": 1.880078125, "learning_rate": 2.8853385194256677e-06, "loss": 0.0752, "reward": 0.24375001061707735, "reward_std": 0.25496505089104177, "rewards/accuracy_reward": 0.23002233263105154, "rewards/format_reward": 0.013727679383009672, "step": 220 }, { "completion_length": 1024.0, "epoch": 0.7950530035335689, "grad_norm": 3.0542807579040527, "kl": 1.645703125, "learning_rate": 2.464637107698046e-06, "loss": 0.0659, "reward": 0.26216519055888055, "reward_std": 0.27050148248672484, "rewards/accuracy_reward": 0.23950894009321927, "rewards/format_reward": 0.022656251245643945, "step": 225 }, { "completion_length": 1023.835604095459, "epoch": 0.8127208480565371, "grad_norm": 1.1989268064498901, "kl": 1.440283203125, "learning_rate": 2.072745352195794e-06, "loss": 0.0576, "reward": 0.2896205481141806, "reward_std": 0.28690752685070037, "rewards/accuracy_reward": 0.2671875134110451, "rewards/format_reward": 0.0224330369848758, "step": 230 }, { "completion_length": 1023.9050231933594, "epoch": 0.8303886925795053, "grad_norm": 0.9027553200721741, "kl": 1.21279296875, "learning_rate": 1.7111615572361628e-06, "loss": 0.0485, "reward": 0.2984375137835741, "reward_std": 0.29320847503840924, "rewards/accuracy_reward": 0.27187501136213543, "rewards/format_reward": 0.026562501350417732, "step": 235 }, { "completion_length": 1023.9849334716797, "epoch": 0.8480565371024735, "grad_norm": 1.0079853534698486, "kl": 0.9890380859375, "learning_rate": 1.381268151904298e-06, "loss": 0.0396, "reward": 0.3024553697556257, "reward_std": 0.3057010589167476, "rewards/accuracy_reward": 0.2669642990455031, "rewards/format_reward": 0.03549107307335362, "step": 240 }, { "completion_length": 1023.9882820129394, "epoch": 0.8657243816254417, "grad_norm": 0.6375559568405151, "kl": 0.80595703125, "learning_rate": 1.0843264046665558e-06, "loss": 0.0322, "reward": 0.28783483654260633, "reward_std": 0.2901507246308029, "rewards/accuracy_reward": 0.256250012293458, "rewards/format_reward": 0.0315848228870891, "step": 245 }, { "completion_length": 1023.8103805541992, "epoch": 0.8833922261484098, "grad_norm": 0.3038390576839447, "kl": 0.7016357421875, "learning_rate": 8.214716012124491e-07, "loss": 0.0281, "reward": 0.28537947684526443, "reward_std": 0.2940663579851389, "rewards/accuracy_reward": 0.2517857262864709, "rewards/format_reward": 0.033593751536682245, "step": 250 }, { "completion_length": 1023.9936386108399, "epoch": 0.901060070671378, "grad_norm": 0.7605869174003601, "kl": 0.6392333984375, "learning_rate": 5.937087039615619e-07, "loss": 0.0256, "reward": 0.3013393010944128, "reward_std": 0.2973380209878087, "rewards/accuracy_reward": 0.26551340594887735, "rewards/format_reward": 0.035825894423760475, "step": 255 }, { "completion_length": 1024.0, "epoch": 0.9187279151943463, "grad_norm": 0.4661541283130646, "kl": 0.6113037109375, "learning_rate": 4.019085098303077e-07, "loss": 0.0245, "reward": 0.30870537031441925, "reward_std": 0.301556083932519, "rewards/accuracy_reward": 0.27533483430743216, "rewards/format_reward": 0.03337053736904636, "step": 260 }, { "completion_length": 1023.9744422912597, "epoch": 0.9363957597173145, "grad_norm": 0.5394344925880432, "kl": 0.59677734375, "learning_rate": 2.4680432094837394e-07, "loss": 0.0239, "reward": 0.2986607299186289, "reward_std": 0.3167744716629386, "rewards/accuracy_reward": 0.2603794766589999, "rewards/format_reward": 0.03828125168802217, "step": 265 }, { "completion_length": 1023.7898460388184, "epoch": 0.9540636042402827, "grad_norm": 0.35241708159446716, "kl": 0.6021240234375, "learning_rate": 1.289891410535593e-07, "loss": 0.0241, "reward": 0.30212054755538703, "reward_std": 0.30877320375293493, "rewards/accuracy_reward": 0.2652901913970709, "rewards/format_reward": 0.036830358975566926, "step": 270 }, { "completion_length": 1023.873885345459, "epoch": 0.9717314487632509, "grad_norm": 0.3670821189880371, "kl": 0.5589111328125, "learning_rate": 4.8913408283934874e-08, "loss": 0.0224, "reward": 0.30368304885923864, "reward_std": 0.30286577958613636, "rewards/accuracy_reward": 0.2606026901863515, "rewards/format_reward": 0.04308035911526531, "step": 275 }, { "completion_length": 1023.9140632629394, "epoch": 0.9893992932862191, "grad_norm": 0.4401331841945648, "kl": 0.5879638671875, "learning_rate": 6.883273035447335e-09, "loss": 0.0235, "reward": 0.31294644335284827, "reward_std": 0.3160593772307038, "rewards/accuracy_reward": 0.2747767997905612, "rewards/format_reward": 0.038169644912704824, "step": 280 }, { "completion_length": 1023.8789800008138, "epoch": 1.0, "kl": 0.8118896484375, "reward": 0.30822173940638703, "reward_std": 0.3040016482894619, "rewards/accuracy_reward": 0.27101935756703216, "rewards/format_reward": 0.03720238265426209, "step": 283, "total_flos": 0.0, "train_loss": 0.040519028099076065, "train_runtime": 54178.7039, "train_samples_per_second": 1.337, "train_steps_per_second": 0.005 } ], "logging_steps": 5, "max_steps": 283, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }