{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 1875, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.6595744680851065e-08, "logits/chosen": -1.7968215942382812, "logits/rejected": -2.159090995788574, "logps/chosen": -88.33059692382812, "logps/rejected": -242.96200561523438, "loss": 0.4322, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 2.6595744680851066e-07, "logits/chosen": -2.003159999847412, "logits/rejected": -1.3869916200637817, "logps/chosen": -240.9772186279297, "logps/rejected": -195.60606384277344, "loss": 0.3319, "rewards/accuracies": 0.3333333432674408, "rewards/chosen": -3.270954766776413e-05, "rewards/margins": -8.25071256258525e-05, "rewards/rejected": 4.979758523404598e-05, "step": 10 }, { "epoch": 0.01, "learning_rate": 5.319148936170213e-07, "logits/chosen": -2.0388007164001465, "logits/rejected": -1.5615094900131226, "logps/chosen": -291.083740234375, "logps/rejected": -277.5216369628906, "loss": 0.3514, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 1.1951732631132472e-05, "rewards/margins": 0.00027519199647940695, "rewards/rejected": -0.0002632402756717056, "step": 20 }, { "epoch": 0.02, "learning_rate": 7.97872340425532e-07, "logits/chosen": -1.860889196395874, "logits/rejected": -1.5862194299697876, "logps/chosen": -248.38510131835938, "logps/rejected": -261.7816467285156, "loss": 0.324, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0007015246083028615, "rewards/margins": 0.004821115639060736, "rewards/rejected": -0.005522639956325293, "step": 30 }, { "epoch": 0.02, "learning_rate": 1.0638297872340427e-06, "logits/chosen": -1.8764064311981201, "logits/rejected": -1.2899483442306519, "logps/chosen": -355.25958251953125, "logps/rejected": -389.2695007324219, "loss": 0.3286, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.009422576054930687, "rewards/margins": 0.022184943780303, "rewards/rejected": -0.03160751983523369, "step": 40 }, { "epoch": 0.03, "learning_rate": 1.3297872340425533e-06, "logits/chosen": -1.990142583847046, "logits/rejected": -1.2961665391921997, "logps/chosen": -316.00860595703125, "logps/rejected": -277.88421630859375, "loss": 0.2629, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.05700983479619026, "rewards/margins": 0.059757936745882034, "rewards/rejected": -0.1167677640914917, "step": 50 }, { "epoch": 0.03, "learning_rate": 1.595744680851064e-06, "logits/chosen": -1.748492956161499, "logits/rejected": -0.8994135856628418, "logps/chosen": -389.3627624511719, "logps/rejected": -579.7057495117188, "loss": 0.1989, "rewards/accuracies": 0.875, "rewards/chosen": -0.09011684358119965, "rewards/margins": 0.22812744975090027, "rewards/rejected": -0.3182442784309387, "step": 60 }, { "epoch": 0.04, "learning_rate": 1.8617021276595745e-06, "logits/chosen": -1.6900399923324585, "logits/rejected": -1.4010140895843506, "logps/chosen": -420.5406799316406, "logps/rejected": -859.8084716796875, "loss": 0.1253, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20870384573936462, "rewards/margins": 0.3385527431964874, "rewards/rejected": -0.547256588935852, "step": 70 }, { "epoch": 0.04, "learning_rate": 2.1276595744680853e-06, "logits/chosen": -1.7609751224517822, "logits/rejected": -1.0384010076522827, "logps/chosen": -474.48187255859375, "logps/rejected": -747.34716796875, "loss": 0.1309, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19330377876758575, "rewards/margins": 0.34078216552734375, "rewards/rejected": -0.5340859293937683, "step": 80 }, { "epoch": 0.05, "learning_rate": 2.393617021276596e-06, "logits/chosen": -1.7291476726531982, "logits/rejected": -1.2021540403366089, "logps/chosen": -454.2134704589844, "logps/rejected": -764.934326171875, "loss": 0.16, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1365814059972763, "rewards/margins": 0.36457785964012146, "rewards/rejected": -0.5011593103408813, "step": 90 }, { "epoch": 0.05, "learning_rate": 2.6595744680851065e-06, "logits/chosen": -1.5737159252166748, "logits/rejected": -0.9248941540718079, "logps/chosen": -482.3492126464844, "logps/rejected": -792.2481689453125, "loss": 0.1239, "rewards/accuracies": 0.875, "rewards/chosen": -0.19203761219978333, "rewards/margins": 0.3564862310886383, "rewards/rejected": -0.5485238432884216, "step": 100 }, { "epoch": 0.06, "learning_rate": 2.9255319148936174e-06, "logits/chosen": -1.7435375452041626, "logits/rejected": -1.356065034866333, "logps/chosen": -416.564208984375, "logps/rejected": -796.4661254882812, "loss": 0.1253, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1526903361082077, "rewards/margins": 0.3349696397781372, "rewards/rejected": -0.4876599907875061, "step": 110 }, { "epoch": 0.06, "learning_rate": 3.191489361702128e-06, "logits/chosen": -1.6976553201675415, "logits/rejected": -1.0894078016281128, "logps/chosen": -409.96258544921875, "logps/rejected": -617.7588500976562, "loss": 0.1948, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.13733306527137756, "rewards/margins": 0.293459415435791, "rewards/rejected": -0.4307924807071686, "step": 120 }, { "epoch": 0.07, "learning_rate": 3.457446808510639e-06, "logits/chosen": -1.7993590831756592, "logits/rejected": -1.400632619857788, "logps/chosen": -370.1565856933594, "logps/rejected": -709.3056640625, "loss": 0.2055, "rewards/accuracies": 0.875, "rewards/chosen": -0.11207763850688934, "rewards/margins": 0.340470552444458, "rewards/rejected": -0.45254817605018616, "step": 130 }, { "epoch": 0.07, "learning_rate": 3.723404255319149e-06, "logits/chosen": -1.495011806488037, "logits/rejected": -0.9245948791503906, "logps/chosen": -388.5771789550781, "logps/rejected": -792.4680786132812, "loss": 0.1088, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1575288623571396, "rewards/margins": 0.3997672200202942, "rewards/rejected": -0.557296097278595, "step": 140 }, { "epoch": 0.08, "learning_rate": 3.98936170212766e-06, "logits/chosen": -1.6491578817367554, "logits/rejected": -1.2172632217407227, "logps/chosen": -407.8502502441406, "logps/rejected": -738.5733642578125, "loss": 0.1397, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.12080486118793488, "rewards/margins": 0.32797589898109436, "rewards/rejected": -0.44878071546554565, "step": 150 }, { "epoch": 0.09, "learning_rate": 4.255319148936171e-06, "logits/chosen": -1.6612653732299805, "logits/rejected": -1.1705405712127686, "logps/chosen": -353.0194396972656, "logps/rejected": -689.8749389648438, "loss": 0.1454, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.09746531397104263, "rewards/margins": 0.3515530228614807, "rewards/rejected": -0.44901829957962036, "step": 160 }, { "epoch": 0.09, "learning_rate": 4.521276595744681e-06, "logits/chosen": -1.54987370967865, "logits/rejected": -1.1912695169448853, "logps/chosen": -544.5787963867188, "logps/rejected": -835.3132934570312, "loss": 0.1048, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.23248295485973358, "rewards/margins": 0.3334501087665558, "rewards/rejected": -0.565933108329773, "step": 170 }, { "epoch": 0.1, "learning_rate": 4.787234042553192e-06, "logits/chosen": -1.7397425174713135, "logits/rejected": -0.8725941777229309, "logps/chosen": -510.69842529296875, "logps/rejected": -840.5343017578125, "loss": 0.1531, "rewards/accuracies": 0.875, "rewards/chosen": -0.2171137034893036, "rewards/margins": 0.39573976397514343, "rewards/rejected": -0.6128535270690918, "step": 180 }, { "epoch": 0.1, "learning_rate": 4.999982660399688e-06, "logits/chosen": -1.6966726779937744, "logits/rejected": -1.09552800655365, "logps/chosen": -514.5984497070312, "logps/rejected": -911.4729614257812, "loss": 0.1503, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2481248676776886, "rewards/margins": 0.3679044842720032, "rewards/rejected": -0.6160293221473694, "step": 190 }, { "epoch": 0.11, "learning_rate": 4.99937579964398e-06, "logits/chosen": -1.4942667484283447, "logits/rejected": -1.1419141292572021, "logps/chosen": -432.5450134277344, "logps/rejected": -730.1014404296875, "loss": 0.1267, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.21937580406665802, "rewards/margins": 0.3185574412345886, "rewards/rejected": -0.5379332304000854, "step": 200 }, { "epoch": 0.11, "learning_rate": 4.9979021993870645e-06, "logits/chosen": -1.571395993232727, "logits/rejected": -0.9183829426765442, "logps/chosen": -480.79644775390625, "logps/rejected": -813.7987060546875, "loss": 0.1624, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18962515890598297, "rewards/margins": 0.3737575113773346, "rewards/rejected": -0.563382625579834, "step": 210 }, { "epoch": 0.12, "learning_rate": 4.995562370647553e-06, "logits/chosen": -1.668015480041504, "logits/rejected": -1.1087052822113037, "logps/chosen": -517.7100219726562, "logps/rejected": -838.1522216796875, "loss": 0.1372, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22473697364330292, "rewards/margins": 0.35942238569259644, "rewards/rejected": -0.5841594338417053, "step": 220 }, { "epoch": 0.12, "learning_rate": 4.992357124836838e-06, "logits/chosen": -1.3532911539077759, "logits/rejected": -0.6337820291519165, "logps/chosen": -458.74462890625, "logps/rejected": -736.6771240234375, "loss": 0.1419, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21437951922416687, "rewards/margins": 0.35455334186553955, "rewards/rejected": -0.5689328908920288, "step": 230 }, { "epoch": 0.13, "learning_rate": 4.9882875734777044e-06, "logits/chosen": -1.6833770275115967, "logits/rejected": -1.0865981578826904, "logps/chosen": -476.49578857421875, "logps/rejected": -742.6441650390625, "loss": 0.162, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1685108244419098, "rewards/margins": 0.33409184217453003, "rewards/rejected": -0.5026026368141174, "step": 240 }, { "epoch": 0.13, "learning_rate": 4.983355127818882e-06, "logits/chosen": -1.4850168228149414, "logits/rejected": -0.9603360295295715, "logps/chosen": -400.22967529296875, "logps/rejected": -569.9345703125, "loss": 0.1919, "rewards/accuracies": 0.625, "rewards/chosen": -0.20730257034301758, "rewards/margins": 0.22820453345775604, "rewards/rejected": -0.4355071187019348, "step": 250 }, { "epoch": 0.14, "learning_rate": 4.977561498345639e-06, "logits/chosen": -1.544639229774475, "logits/rejected": -1.1013596057891846, "logps/chosen": -407.4434509277344, "logps/rejected": -822.0099487304688, "loss": 0.0966, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.19301848113536835, "rewards/margins": 0.41144537925720215, "rewards/rejected": -0.6044638752937317, "step": 260 }, { "epoch": 0.14, "learning_rate": 4.970908694186624e-06, "logits/chosen": -1.5448771715164185, "logits/rejected": -0.8540661931037903, "logps/chosen": -542.1297607421875, "logps/rejected": -889.5344848632812, "loss": 0.1371, "rewards/accuracies": 0.875, "rewards/chosen": -0.25042372941970825, "rewards/margins": 0.39966678619384766, "rewards/rejected": -0.6500904560089111, "step": 270 }, { "epoch": 0.15, "learning_rate": 4.9633990224171305e-06, "logits/chosen": -1.4944156408309937, "logits/rejected": -0.8036524057388306, "logps/chosen": -671.7164306640625, "logps/rejected": -922.0513916015625, "loss": 0.1319, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3747330605983734, "rewards/margins": 0.3100079596042633, "rewards/rejected": -0.6847410202026367, "step": 280 }, { "epoch": 0.15, "learning_rate": 4.955035087259046e-06, "logits/chosen": -1.4477952718734741, "logits/rejected": -0.8218593597412109, "logps/chosen": -612.0665893554688, "logps/rejected": -863.3322143554688, "loss": 0.1494, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3176848888397217, "rewards/margins": 0.30745354294776917, "rewards/rejected": -0.6251384019851685, "step": 290 }, { "epoch": 0.16, "learning_rate": 4.945819789177756e-06, "logits/chosen": -1.611026406288147, "logits/rejected": -1.1793110370635986, "logps/chosen": -518.3214111328125, "logps/rejected": -892.6036987304688, "loss": 0.1228, "rewards/accuracies": 0.875, "rewards/chosen": -0.2295423001050949, "rewards/margins": 0.37014490365982056, "rewards/rejected": -0.5996872186660767, "step": 300 }, { "epoch": 0.17, "learning_rate": 4.935756323876306e-06, "logits/chosen": -1.508418083190918, "logits/rejected": -1.277306318283081, "logps/chosen": -459.0326232910156, "logps/rejected": -823.93017578125, "loss": 0.1518, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.27009934186935425, "rewards/margins": 0.32507914304733276, "rewards/rejected": -0.5951785445213318, "step": 310 }, { "epoch": 0.17, "learning_rate": 4.924848181187199e-06, "logits/chosen": -1.6442441940307617, "logits/rejected": -1.1329659223556519, "logps/chosen": -494.06097412109375, "logps/rejected": -854.1019287109375, "loss": 0.1422, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20919394493103027, "rewards/margins": 0.39555859565734863, "rewards/rejected": -0.6047526001930237, "step": 320 }, { "epoch": 0.18, "learning_rate": 4.913099143862173e-06, "logits/chosen": -1.3633651733398438, "logits/rejected": -0.9123932123184204, "logps/chosen": -474.5048828125, "logps/rejected": -829.7545776367188, "loss": 0.1498, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.28733277320861816, "rewards/margins": 0.3500373959541321, "rewards/rejected": -0.6373701095581055, "step": 330 }, { "epoch": 0.18, "learning_rate": 4.900513286260416e-06, "logits/chosen": -1.5975598096847534, "logits/rejected": -1.2887117862701416, "logps/chosen": -400.32781982421875, "logps/rejected": -744.0382080078125, "loss": 0.1053, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20992426574230194, "rewards/margins": 0.3338248133659363, "rewards/rejected": -0.5437491536140442, "step": 340 }, { "epoch": 0.19, "learning_rate": 4.887094972935645e-06, "logits/chosen": -1.764219045639038, "logits/rejected": -0.9871004819869995, "logps/chosen": -573.0086059570312, "logps/rejected": -937.3956909179688, "loss": 0.1504, "rewards/accuracies": 0.875, "rewards/chosen": -0.2734777331352234, "rewards/margins": 0.37265342473983765, "rewards/rejected": -0.646131157875061, "step": 350 }, { "epoch": 0.19, "learning_rate": 4.87284885712256e-06, "logits/chosen": -1.5166432857513428, "logits/rejected": -0.8717886209487915, "logps/chosen": -572.190673828125, "logps/rejected": -876.5632934570312, "loss": 0.1876, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.32470396161079407, "rewards/margins": 0.34118732810020447, "rewards/rejected": -0.6658912897109985, "step": 360 }, { "epoch": 0.2, "learning_rate": 4.857779879123181e-06, "logits/chosen": -1.7403156757354736, "logits/rejected": -0.9518265724182129, "logps/chosen": -505.53387451171875, "logps/rejected": -778.5391845703125, "loss": 0.1207, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20143766701221466, "rewards/margins": 0.37924817204475403, "rewards/rejected": -0.5806857943534851, "step": 370 }, { "epoch": 0.2, "learning_rate": 4.841893264593643e-06, "logits/chosen": -1.7110675573349, "logits/rejected": -1.026960849761963, "logps/chosen": -472.236328125, "logps/rejected": -761.3172607421875, "loss": 0.095, "rewards/accuracies": 0.875, "rewards/chosen": -0.2130139172077179, "rewards/margins": 0.36098140478134155, "rewards/rejected": -0.5739952325820923, "step": 380 }, { "epoch": 0.21, "learning_rate": 4.825194522732023e-06, "logits/chosen": -1.6140925884246826, "logits/rejected": -1.1293842792510986, "logps/chosen": -507.7935485839844, "logps/rejected": -913.9110107421875, "loss": 0.1277, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.26502326130867004, "rewards/margins": 0.3756178915500641, "rewards/rejected": -0.6406410932540894, "step": 390 }, { "epoch": 0.21, "learning_rate": 4.807689444367853e-06, "logits/chosen": -1.7682578563690186, "logits/rejected": -1.3489004373550415, "logps/chosen": -495.90869140625, "logps/rejected": -782.658447265625, "loss": 0.1251, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.260581910610199, "rewards/margins": 0.3089093565940857, "rewards/rejected": -0.5694912075996399, "step": 400 }, { "epoch": 0.22, "learning_rate": 4.78938409995396e-06, "logits/chosen": -1.5254316329956055, "logits/rejected": -1.0945005416870117, "logps/chosen": -463.646484375, "logps/rejected": -902.4519653320312, "loss": 0.1313, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.24164071679115295, "rewards/margins": 0.3996545672416687, "rewards/rejected": -0.6412952542304993, "step": 410 }, { "epoch": 0.22, "learning_rate": 4.770284837461342e-06, "logits/chosen": -1.5965580940246582, "logits/rejected": -0.836743175983429, "logps/chosen": -586.2058715820312, "logps/rejected": -917.9168090820312, "loss": 0.1124, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.26902061700820923, "rewards/margins": 0.3916351795196533, "rewards/rejected": -0.6606558561325073, "step": 420 }, { "epoch": 0.23, "learning_rate": 4.7503982801778015e-06, "logits/chosen": -1.557950735092163, "logits/rejected": -1.0112215280532837, "logps/chosen": -501.4098205566406, "logps/rejected": -789.2760009765625, "loss": 0.154, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.24787664413452148, "rewards/margins": 0.3141789734363556, "rewards/rejected": -0.5620556473731995, "step": 430 }, { "epoch": 0.23, "learning_rate": 4.729731324411104e-06, "logits/chosen": -1.7676448822021484, "logits/rejected": -1.1603769063949585, "logps/chosen": -429.96734619140625, "logps/rejected": -750.7506103515625, "loss": 0.1237, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1744639277458191, "rewards/margins": 0.35027581453323364, "rewards/rejected": -0.5247397422790527, "step": 440 }, { "epoch": 0.24, "learning_rate": 4.7082911370974645e-06, "logits/chosen": -1.8625621795654297, "logits/rejected": -1.36086905002594, "logps/chosen": -548.0135498046875, "logps/rejected": -759.1170654296875, "loss": 0.1707, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2390761822462082, "rewards/margins": 0.2892398238182068, "rewards/rejected": -0.5283160209655762, "step": 450 }, { "epoch": 0.25, "learning_rate": 4.68608515331618e-06, "logits/chosen": -1.7188348770141602, "logits/rejected": -1.187195062637329, "logps/chosen": -492.1756286621094, "logps/rejected": -859.0760498046875, "loss": 0.1414, "rewards/accuracies": 0.875, "rewards/chosen": -0.23255252838134766, "rewards/margins": 0.3831843137741089, "rewards/rejected": -0.6157368421554565, "step": 460 }, { "epoch": 0.25, "learning_rate": 4.663121073711269e-06, "logits/chosen": -1.5974490642547607, "logits/rejected": -1.2564659118652344, "logps/chosen": -336.80487060546875, "logps/rejected": -661.6661376953125, "loss": 0.1196, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.14996656775474548, "rewards/margins": 0.32413381338119507, "rewards/rejected": -0.47410035133361816, "step": 470 }, { "epoch": 0.26, "learning_rate": 4.63940686182103e-06, "logits/chosen": -1.6767423152923584, "logits/rejected": -1.1938632726669312, "logps/chosen": -505.0990295410156, "logps/rejected": -846.8779296875, "loss": 0.159, "rewards/accuracies": 0.875, "rewards/chosen": -0.2421807050704956, "rewards/margins": 0.3599635660648346, "rewards/rejected": -0.6021442413330078, "step": 480 }, { "epoch": 0.26, "learning_rate": 4.614950741316425e-06, "logits/chosen": -1.529900312423706, "logits/rejected": -1.0826785564422607, "logps/chosen": -421.31707763671875, "logps/rejected": -685.7420654296875, "loss": 0.1721, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22861020267009735, "rewards/margins": 0.28916865587234497, "rewards/rejected": -0.5177788734436035, "step": 490 }, { "epoch": 0.27, "learning_rate": 4.589761193149254e-06, "logits/chosen": -1.6966304779052734, "logits/rejected": -0.9312071800231934, "logps/chosen": -535.8978271484375, "logps/rejected": -940.1627197265625, "loss": 0.1144, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.24013669788837433, "rewards/margins": 0.4429057240486145, "rewards/rejected": -0.6830424070358276, "step": 500 }, { "epoch": 0.27, "learning_rate": 4.563846952611112e-06, "logits/chosen": -1.6221929788589478, "logits/rejected": -0.9574362635612488, "logps/chosen": -428.6238708496094, "logps/rejected": -704.8244018554688, "loss": 0.0844, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.195401132106781, "rewards/margins": 0.3248489797115326, "rewards/rejected": -0.520250141620636, "step": 510 }, { "epoch": 0.28, "learning_rate": 4.537217006304141e-06, "logits/chosen": -1.4427409172058105, "logits/rejected": -1.003901481628418, "logps/chosen": -429.660400390625, "logps/rejected": -772.5963134765625, "loss": 0.1457, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15723460912704468, "rewards/margins": 0.3122255206108093, "rewards/rejected": -0.4694600999355316, "step": 520 }, { "epoch": 0.28, "learning_rate": 4.50988058902464e-06, "logits/chosen": -1.2717740535736084, "logits/rejected": -0.8480876684188843, "logps/chosen": -367.8202209472656, "logps/rejected": -776.1926879882812, "loss": 0.1152, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.14414677023887634, "rewards/margins": 0.38212689757347107, "rewards/rejected": -0.5262737274169922, "step": 530 }, { "epoch": 0.29, "learning_rate": 4.481847180560593e-06, "logits/chosen": -1.5822323560714722, "logits/rejected": -0.9035153388977051, "logps/chosen": -439.30816650390625, "logps/rejected": -708.1422729492188, "loss": 0.2078, "rewards/accuracies": 0.75, "rewards/chosen": -0.18015776574611664, "rewards/margins": 0.32809919118881226, "rewards/rejected": -0.5082569122314453, "step": 540 }, { "epoch": 0.29, "learning_rate": 4.453126502404253e-06, "logits/chosen": -1.6248279809951782, "logits/rejected": -0.9642871022224426, "logps/chosen": -561.463623046875, "logps/rejected": -740.08935546875, "loss": 0.1773, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.27120378613471985, "rewards/margins": 0.28620854020118713, "rewards/rejected": -0.557412326335907, "step": 550 }, { "epoch": 0.3, "learning_rate": 4.423728514380892e-06, "logits/chosen": -1.4605586528778076, "logits/rejected": -0.8407928347587585, "logps/chosen": -514.7965087890625, "logps/rejected": -831.8440551757812, "loss": 0.12, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.24941739439964294, "rewards/margins": 0.36968275904655457, "rewards/rejected": -0.6191002130508423, "step": 560 }, { "epoch": 0.3, "learning_rate": 4.393663411194918e-06, "logits/chosen": -1.4048388004302979, "logits/rejected": -1.0212897062301636, "logps/chosen": -490.42431640625, "logps/rejected": -855.1259765625, "loss": 0.1416, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.24018950760364532, "rewards/margins": 0.3597009778022766, "rewards/rejected": -0.5998905301094055, "step": 570 }, { "epoch": 0.31, "learning_rate": 4.362941618894523e-06, "logits/chosen": -1.3778400421142578, "logits/rejected": -0.983964741230011, "logps/chosen": -586.1995849609375, "logps/rejected": -981.2742309570312, "loss": 0.1205, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.30800628662109375, "rewards/margins": 0.3807603120803833, "rewards/rejected": -0.6887666583061218, "step": 580 }, { "epoch": 0.31, "learning_rate": 4.331573791256116e-06, "logits/chosen": -1.4645698070526123, "logits/rejected": -0.9271195530891418, "logps/chosen": -621.4105224609375, "logps/rejected": -899.0559692382812, "loss": 0.1273, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2714827358722687, "rewards/margins": 0.362936794757843, "rewards/rejected": -0.6344195604324341, "step": 590 }, { "epoch": 0.32, "learning_rate": 4.299570806089786e-06, "logits/chosen": -1.6326652765274048, "logits/rejected": -0.9927080273628235, "logps/chosen": -490.701904296875, "logps/rejected": -842.052734375, "loss": 0.1023, "rewards/accuracies": 0.875, "rewards/chosen": -0.18793320655822754, "rewards/margins": 0.408639132976532, "rewards/rejected": -0.5965723395347595, "step": 600 }, { "epoch": 0.33, "learning_rate": 4.266943761467057e-06, "logits/chosen": -1.2816569805145264, "logits/rejected": -0.8941723704338074, "logps/chosen": -367.02191162109375, "logps/rejected": -760.0553588867188, "loss": 0.1443, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.11693791300058365, "rewards/margins": 0.39976662397384644, "rewards/rejected": -0.5167044997215271, "step": 610 }, { "epoch": 0.33, "learning_rate": 4.233703971872287e-06, "logits/chosen": -1.8729069232940674, "logits/rejected": -1.0977063179016113, "logps/chosen": -393.6733093261719, "logps/rejected": -763.2752075195312, "loss": 0.1335, "rewards/accuracies": 0.875, "rewards/chosen": -0.08343084156513214, "rewards/margins": 0.4291655123233795, "rewards/rejected": -0.5125963687896729, "step": 620 }, { "epoch": 0.34, "learning_rate": 4.1998629642789925e-06, "logits/chosen": -1.5668641328811646, "logits/rejected": -1.1349601745605469, "logps/chosen": -426.9754333496094, "logps/rejected": -820.5556640625, "loss": 0.1742, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1329190582036972, "rewards/margins": 0.36928990483283997, "rewards/rejected": -0.5022088885307312, "step": 630 }, { "epoch": 0.34, "learning_rate": 4.165432474152505e-06, "logits/chosen": -1.5012271404266357, "logits/rejected": -1.278693675994873, "logps/chosen": -365.3034973144531, "logps/rejected": -678.4292602539062, "loss": 0.1737, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.13991737365722656, "rewards/margins": 0.3017304837703705, "rewards/rejected": -0.44164785742759705, "step": 640 }, { "epoch": 0.35, "learning_rate": 4.130424441380308e-06, "logits/chosen": -1.42804753780365, "logits/rejected": -0.967817485332489, "logps/chosen": -411.77801513671875, "logps/rejected": -691.2272338867188, "loss": 0.1527, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1383361965417862, "rewards/margins": 0.33920183777809143, "rewards/rejected": -0.47753801941871643, "step": 650 }, { "epoch": 0.35, "learning_rate": 4.09485100613151e-06, "logits/chosen": -1.5555391311645508, "logits/rejected": -1.1440869569778442, "logps/chosen": -434.00335693359375, "logps/rejected": -744.3508911132812, "loss": 0.168, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19056299328804016, "rewards/margins": 0.30447274446487427, "rewards/rejected": -0.49503573775291443, "step": 660 }, { "epoch": 0.36, "learning_rate": 4.058724504646834e-06, "logits/chosen": -1.8099536895751953, "logits/rejected": -1.0837316513061523, "logps/chosen": -427.7793884277344, "logps/rejected": -729.915283203125, "loss": 0.1229, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1386745572090149, "rewards/margins": 0.3526052236557007, "rewards/rejected": -0.4912797808647156, "step": 670 }, { "epoch": 0.36, "learning_rate": 4.022057464960632e-06, "logits/chosen": -1.6691503524780273, "logits/rejected": -1.33521568775177, "logps/chosen": -428.7286071777344, "logps/rejected": -789.0191040039062, "loss": 0.1607, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1496300995349884, "rewards/margins": 0.35229435563087463, "rewards/rejected": -0.5019243955612183, "step": 680 }, { "epoch": 0.37, "learning_rate": 3.984862602556383e-06, "logits/chosen": -1.6232519149780273, "logits/rejected": -1.197933554649353, "logps/chosen": -460.2228088378906, "logps/rejected": -696.0914306640625, "loss": 0.1346, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.18895591795444489, "rewards/margins": 0.28954973816871643, "rewards/rejected": -0.4785057008266449, "step": 690 }, { "epoch": 0.37, "learning_rate": 3.947152815957187e-06, "logits/chosen": -1.5458933115005493, "logits/rejected": -1.116236925125122, "logps/chosen": -433.87322998046875, "logps/rejected": -756.8858642578125, "loss": 0.1492, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.22537223994731903, "rewards/margins": 0.34959647059440613, "rewards/rejected": -0.5749687552452087, "step": 700 }, { "epoch": 0.38, "learning_rate": 3.908941182252785e-06, "logits/chosen": -1.5793603658676147, "logits/rejected": -0.9729734659194946, "logps/chosen": -458.96368408203125, "logps/rejected": -781.1962890625, "loss": 0.1615, "rewards/accuracies": 0.875, "rewards/chosen": -0.1985333412885666, "rewards/margins": 0.3719526529312134, "rewards/rejected": -0.5704860091209412, "step": 710 }, { "epoch": 0.38, "learning_rate": 3.8702409525646535e-06, "logits/chosen": -1.6880747079849243, "logits/rejected": -1.0946999788284302, "logps/chosen": -550.5426635742188, "logps/rejected": -861.6978759765625, "loss": 0.1362, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1908409297466278, "rewards/margins": 0.3778737485408783, "rewards/rejected": -0.5687146782875061, "step": 720 }, { "epoch": 0.39, "learning_rate": 3.8310655474507495e-06, "logits/chosen": -1.7694594860076904, "logits/rejected": -1.1918199062347412, "logps/chosen": -443.54736328125, "logps/rejected": -717.8020629882812, "loss": 0.1418, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.16424255073070526, "rewards/margins": 0.30146175622940063, "rewards/rejected": -0.4657043516635895, "step": 730 }, { "epoch": 0.39, "learning_rate": 3.7914285522515002e-06, "logits/chosen": -1.539620280265808, "logits/rejected": -1.3648602962493896, "logps/chosen": -468.6385192871094, "logps/rejected": -892.7566528320312, "loss": 0.1552, "rewards/accuracies": 0.875, "rewards/chosen": -0.22199459373950958, "rewards/margins": 0.36069172620773315, "rewards/rejected": -0.5826863050460815, "step": 740 }, { "epoch": 0.4, "learning_rate": 3.751343712378639e-06, "logits/chosen": -1.68185555934906, "logits/rejected": -1.0438605546951294, "logps/chosen": -377.9205627441406, "logps/rejected": -688.3480834960938, "loss": 0.1417, "rewards/accuracies": 0.75, "rewards/chosen": -0.15213271975517273, "rewards/margins": 0.3400834798812866, "rewards/rejected": -0.49221619963645935, "step": 750 }, { "epoch": 0.41, "learning_rate": 3.710824928548546e-06, "logits/chosen": -1.7241179943084717, "logits/rejected": -1.1749062538146973, "logps/chosen": -398.90521240234375, "logps/rejected": -792.80078125, "loss": 0.1218, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.14098913967609406, "rewards/margins": 0.4070391058921814, "rewards/rejected": -0.5480281710624695, "step": 760 }, { "epoch": 0.41, "learning_rate": 3.6698862519617225e-06, "logits/chosen": -1.862091064453125, "logits/rejected": -1.0774017572402954, "logps/chosen": -380.6012878417969, "logps/rejected": -803.9888916015625, "loss": 0.1009, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.12483291327953339, "rewards/margins": 0.46553611755371094, "rewards/rejected": -0.5903691053390503, "step": 770 }, { "epoch": 0.42, "learning_rate": 3.6285418794300793e-06, "logits/chosen": -1.416322946548462, "logits/rejected": -0.8399195671081543, "logps/chosen": -444.05523681640625, "logps/rejected": -761.3108520507812, "loss": 0.1571, "rewards/accuracies": 0.75, "rewards/chosen": -0.20516355335712433, "rewards/margins": 0.3872140049934387, "rewards/rejected": -0.5923775434494019, "step": 780 }, { "epoch": 0.42, "learning_rate": 3.5868061484537365e-06, "logits/chosen": -1.39794921875, "logits/rejected": -0.8267971873283386, "logps/chosen": -507.1766052246094, "logps/rejected": -875.86962890625, "loss": 0.1497, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.23690223693847656, "rewards/margins": 0.40496787428855896, "rewards/rejected": -0.6418701410293579, "step": 790 }, { "epoch": 0.43, "learning_rate": 3.5446935322490285e-06, "logits/chosen": -1.7719318866729736, "logits/rejected": -0.9355955123901367, "logps/chosen": -544.9541015625, "logps/rejected": -865.5302734375, "loss": 0.1963, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2105661928653717, "rewards/margins": 0.3866081237792969, "rewards/rejected": -0.5971742868423462, "step": 800 }, { "epoch": 0.43, "learning_rate": 3.502218634729447e-06, "logits/chosen": -1.6639026403427124, "logits/rejected": -1.067781925201416, "logps/chosen": -575.9091796875, "logps/rejected": -838.0983276367188, "loss": 0.1233, "rewards/accuracies": 0.875, "rewards/chosen": -0.23653562366962433, "rewards/margins": 0.3239360749721527, "rewards/rejected": -0.5604716539382935, "step": 810 }, { "epoch": 0.44, "learning_rate": 3.459396185441265e-06, "logits/chosen": -1.7067358493804932, "logits/rejected": -1.0498546361923218, "logps/chosen": -398.35516357421875, "logps/rejected": -626.8757934570312, "loss": 0.1608, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1356094628572464, "rewards/margins": 0.3172938823699951, "rewards/rejected": -0.4529033601284027, "step": 820 }, { "epoch": 0.44, "learning_rate": 3.4162410344555834e-06, "logits/chosen": -1.9210001230239868, "logits/rejected": -1.1206413507461548, "logps/chosen": -405.9615783691406, "logps/rejected": -725.4310913085938, "loss": 0.125, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.10851490497589111, "rewards/margins": 0.38589829206466675, "rewards/rejected": -0.49441319704055786, "step": 830 }, { "epoch": 0.45, "learning_rate": 3.3727681472185937e-06, "logits/chosen": -1.6562303304672241, "logits/rejected": -1.19851553440094, "logps/chosen": -486.35107421875, "logps/rejected": -963.0572509765625, "loss": 0.1105, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.22348380088806152, "rewards/margins": 0.4227561056613922, "rewards/rejected": -0.6462398767471313, "step": 840 }, { "epoch": 0.45, "learning_rate": 3.3289925993618217e-06, "logits/chosen": -1.5856201648712158, "logits/rejected": -1.0767395496368408, "logps/chosen": -526.1747436523438, "logps/rejected": -797.7916870117188, "loss": 0.137, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2687681019306183, "rewards/margins": 0.2989320755004883, "rewards/rejected": -0.567700207233429, "step": 850 }, { "epoch": 0.46, "learning_rate": 3.2849295714741643e-06, "logits/chosen": -1.7678568363189697, "logits/rejected": -1.2151532173156738, "logps/chosen": -597.7952880859375, "logps/rejected": -848.87841796875, "loss": 0.1308, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.27867773175239563, "rewards/margins": 0.30206385254859924, "rewards/rejected": -0.5807415843009949, "step": 860 }, { "epoch": 0.46, "learning_rate": 3.2405943438375287e-06, "logits/chosen": -1.7643588781356812, "logits/rejected": -1.099827527999878, "logps/chosen": -423.9742126464844, "logps/rejected": -774.4637451171875, "loss": 0.0974, "rewards/accuracies": 0.875, "rewards/chosen": -0.1482265591621399, "rewards/margins": 0.410900741815567, "rewards/rejected": -0.5591272711753845, "step": 870 }, { "epoch": 0.47, "learning_rate": 3.1960022911279036e-06, "logits/chosen": -1.5414252281188965, "logits/rejected": -1.1484423875808716, "logps/chosen": -493.69464111328125, "logps/rejected": -835.8029174804688, "loss": 0.1526, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19744431972503662, "rewards/margins": 0.36680763959884644, "rewards/rejected": -0.5642520189285278, "step": 880 }, { "epoch": 0.47, "learning_rate": 3.1511688770836844e-06, "logits/chosen": -1.511249303817749, "logits/rejected": -1.3401678800582886, "logps/chosen": -404.75933837890625, "logps/rejected": -805.8262939453125, "loss": 0.1035, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18373355269432068, "rewards/margins": 0.3318132758140564, "rewards/rejected": -0.5155468583106995, "step": 890 }, { "epoch": 0.48, "learning_rate": 3.1061096491431307e-06, "logits/chosen": -1.8831459283828735, "logits/rejected": -1.2160544395446777, "logps/chosen": -447.65032958984375, "logps/rejected": -843.8660278320312, "loss": 0.1345, "rewards/accuracies": 0.875, "rewards/chosen": -0.13529552519321442, "rewards/margins": 0.40966707468032837, "rewards/rejected": -0.5449625849723816, "step": 900 }, { "epoch": 0.49, "learning_rate": 3.0608402330527796e-06, "logits/chosen": -1.6770378351211548, "logits/rejected": -0.9972168803215027, "logps/chosen": -379.8583984375, "logps/rejected": -719.3693237304688, "loss": 0.1765, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15062110126018524, "rewards/margins": 0.3817873001098633, "rewards/rejected": -0.5324083566665649, "step": 910 }, { "epoch": 0.49, "learning_rate": 3.0153763274487176e-06, "logits/chosen": -1.4134846925735474, "logits/rejected": -0.966874897480011, "logps/chosen": -441.3450622558594, "logps/rejected": -707.3884887695312, "loss": 0.1235, "rewards/accuracies": 0.75, "rewards/chosen": -0.21538302302360535, "rewards/margins": 0.3049529790878296, "rewards/rejected": -0.5203360319137573, "step": 920 }, { "epoch": 0.5, "learning_rate": 2.9697336984125683e-06, "logits/chosen": -1.6667283773422241, "logits/rejected": -1.0133411884307861, "logps/chosen": -401.2959899902344, "logps/rejected": -851.93701171875, "loss": 0.1206, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.15221676230430603, "rewards/margins": 0.45876413583755493, "rewards/rejected": -0.6109809279441833, "step": 930 }, { "epoch": 0.5, "learning_rate": 2.923928174004094e-06, "logits/chosen": -1.8437814712524414, "logits/rejected": -1.0747764110565186, "logps/chosen": -470.7169494628906, "logps/rejected": -732.7559814453125, "loss": 0.1247, "rewards/accuracies": 0.875, "rewards/chosen": -0.13192041218280792, "rewards/margins": 0.3781585991382599, "rewards/rejected": -0.5100789666175842, "step": 940 }, { "epoch": 0.51, "learning_rate": 2.8779756387723036e-06, "logits/chosen": -1.7663402557373047, "logits/rejected": -1.3018739223480225, "logps/chosen": -446.77490234375, "logps/rejected": -766.7832641601562, "loss": 0.1146, "rewards/accuracies": 0.875, "rewards/chosen": -0.1646779477596283, "rewards/margins": 0.36457663774490356, "rewards/rejected": -0.5292545557022095, "step": 950 }, { "epoch": 0.51, "learning_rate": 2.831892028246968e-06, "logits/chosen": -1.848724603652954, "logits/rejected": -1.216956377029419, "logps/chosen": -418.67645263671875, "logps/rejected": -703.2694702148438, "loss": 0.1209, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.14982689917087555, "rewards/margins": 0.36974358558654785, "rewards/rejected": -0.5195704698562622, "step": 960 }, { "epoch": 0.52, "learning_rate": 2.7856933234124617e-06, "logits/chosen": -1.7911808490753174, "logits/rejected": -1.0922878980636597, "logps/chosen": -448.37603759765625, "logps/rejected": -834.2364501953125, "loss": 0.1538, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17878659069538116, "rewards/margins": 0.3954610228538513, "rewards/rejected": -0.5742476582527161, "step": 970 }, { "epoch": 0.52, "learning_rate": 2.7393955451658387e-06, "logits/chosen": -1.7210479974746704, "logits/rejected": -1.2294584512710571, "logps/chosen": -514.4754028320312, "logps/rejected": -868.5929565429688, "loss": 0.1626, "rewards/accuracies": 0.875, "rewards/chosen": -0.21787652373313904, "rewards/margins": 0.3894199728965759, "rewards/rejected": -0.6072965264320374, "step": 980 }, { "epoch": 0.53, "learning_rate": 2.6930147487610667e-06, "logits/chosen": -1.5907623767852783, "logits/rejected": -0.78331458568573, "logps/chosen": -462.7984313964844, "logps/rejected": -805.7174072265625, "loss": 0.1373, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17765957117080688, "rewards/margins": 0.40347957611083984, "rewards/rejected": -0.581139087677002, "step": 990 }, { "epoch": 0.53, "learning_rate": 2.6465670182413487e-06, "logits/chosen": -1.6310056447982788, "logits/rejected": -1.0298982858657837, "logps/chosen": -411.04937744140625, "logps/rejected": -758.7462158203125, "loss": 0.1237, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1506483405828476, "rewards/margins": 0.36691543459892273, "rewards/rejected": -0.5175637602806091, "step": 1000 }, { "epoch": 0.54, "learning_rate": 2.6000684608614594e-06, "logits/chosen": -1.6570842266082764, "logits/rejected": -0.8277125358581543, "logps/chosen": -506.580810546875, "logps/rejected": -801.989990234375, "loss": 0.1436, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1983393281698227, "rewards/margins": 0.38503485918045044, "rewards/rejected": -0.5833742022514343, "step": 1010 }, { "epoch": 0.54, "learning_rate": 2.5535352015020338e-06, "logits/chosen": -1.528637170791626, "logits/rejected": -0.8484199643135071, "logps/chosen": -470.8020935058594, "logps/rejected": -820.8448486328125, "loss": 0.1363, "rewards/accuracies": 0.875, "rewards/chosen": -0.20915472507476807, "rewards/margins": 0.38422003388404846, "rewards/rejected": -0.5933747887611389, "step": 1020 }, { "epoch": 0.55, "learning_rate": 2.506983377077741e-06, "logits/chosen": -1.3463196754455566, "logits/rejected": -1.018822193145752, "logps/chosen": -464.81524658203125, "logps/rejected": -807.076171875, "loss": 0.1584, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21939115226268768, "rewards/margins": 0.3329920172691345, "rewards/rejected": -0.5523831844329834, "step": 1030 }, { "epoch": 0.55, "learning_rate": 2.460429130941289e-06, "logits/chosen": -1.4068031311035156, "logits/rejected": -0.9966346621513367, "logps/chosen": -443.41583251953125, "logps/rejected": -826.1185302734375, "loss": 0.1182, "rewards/accuracies": 0.875, "rewards/chosen": -0.1990918219089508, "rewards/margins": 0.39130455255508423, "rewards/rejected": -0.5903963446617126, "step": 1040 }, { "epoch": 0.56, "learning_rate": 2.413888607285192e-06, "logits/chosen": -1.2919907569885254, "logits/rejected": -0.9193531274795532, "logps/chosen": -496.358642578125, "logps/rejected": -845.7939453125, "loss": 0.173, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.24995890259742737, "rewards/margins": 0.3503498435020447, "rewards/rejected": -0.6003087162971497, "step": 1050 }, { "epoch": 0.57, "learning_rate": 2.367377945543249e-06, "logits/chosen": -1.6841480731964111, "logits/rejected": -0.907370924949646, "logps/chosen": -446.6328125, "logps/rejected": -884.2018432617188, "loss": 0.1068, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.15798960626125336, "rewards/margins": 0.4673282206058502, "rewards/rejected": -0.6253177523612976, "step": 1060 }, { "epoch": 0.57, "learning_rate": 2.320913274793676e-06, "logits/chosen": -1.7113037109375, "logits/rejected": -1.1816798448562622, "logps/chosen": -410.67645263671875, "logps/rejected": -770.4984741210938, "loss": 0.1423, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1650415062904358, "rewards/margins": 0.37022119760513306, "rewards/rejected": -0.5352627038955688, "step": 1070 }, { "epoch": 0.58, "learning_rate": 2.27451070816582e-06, "logits/chosen": -1.6226348876953125, "logits/rejected": -0.9200002551078796, "logps/chosen": -518.1405029296875, "logps/rejected": -891.6884765625, "loss": 0.1105, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16074618697166443, "rewards/margins": 0.47465044260025024, "rewards/rejected": -0.6353966593742371, "step": 1080 }, { "epoch": 0.58, "learning_rate": 2.228186337252414e-06, "logits/chosen": -1.7294307947158813, "logits/rejected": -0.8779215812683105, "logps/chosen": -516.14013671875, "logps/rejected": -824.5764770507812, "loss": 0.1407, "rewards/accuracies": 0.875, "rewards/chosen": -0.17551277577877045, "rewards/margins": 0.40844354033470154, "rewards/rejected": -0.583956241607666, "step": 1090 }, { "epoch": 0.59, "learning_rate": 2.1819562265292946e-06, "logits/chosen": -1.5813789367675781, "logits/rejected": -1.001509666442871, "logps/chosen": -429.26593017578125, "logps/rejected": -789.7249755859375, "loss": 0.1574, "rewards/accuracies": 0.875, "rewards/chosen": -0.21274442970752716, "rewards/margins": 0.3669392764568329, "rewards/rejected": -0.5796837210655212, "step": 1100 }, { "epoch": 0.59, "learning_rate": 2.1358364077845236e-06, "logits/chosen": -1.533307671546936, "logits/rejected": -0.9590204954147339, "logps/chosen": -387.72381591796875, "logps/rejected": -828.5607299804688, "loss": 0.1014, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.14084286987781525, "rewards/margins": 0.4476155638694763, "rewards/rejected": -0.5884584188461304, "step": 1110 }, { "epoch": 0.6, "learning_rate": 2.089842874558849e-06, "logits/chosen": -1.3846327066421509, "logits/rejected": -1.0313770771026611, "logps/chosen": -479.6465759277344, "logps/rejected": -906.1482543945312, "loss": 0.1133, "rewards/accuracies": 0.875, "rewards/chosen": -0.22880907356739044, "rewards/margins": 0.4139330983161926, "rewards/rejected": -0.6427421569824219, "step": 1120 }, { "epoch": 0.6, "learning_rate": 2.0439915765994242e-06, "logits/chosen": -1.5441999435424805, "logits/rejected": -0.8765427470207214, "logps/chosen": -374.98504638671875, "logps/rejected": -726.3242797851562, "loss": 0.1078, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1631808578968048, "rewards/margins": 0.38433948159217834, "rewards/rejected": -0.5475203394889832, "step": 1130 }, { "epoch": 0.61, "learning_rate": 1.9982984143287186e-06, "logits/chosen": -1.7160451412200928, "logits/rejected": -0.9389771223068237, "logps/chosen": -419.886962890625, "logps/rejected": -727.5339965820312, "loss": 0.1209, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1307600438594818, "rewards/margins": 0.4173372685909271, "rewards/rejected": -0.5480973720550537, "step": 1140 }, { "epoch": 0.61, "learning_rate": 1.95277923333053e-06, "logits/chosen": -1.5577538013458252, "logits/rejected": -0.9766386151313782, "logps/chosen": -432.010498046875, "logps/rejected": -772.4149780273438, "loss": 0.1015, "rewards/accuracies": 0.75, "rewards/chosen": -0.1316412091255188, "rewards/margins": 0.40984097123146057, "rewards/rejected": -0.541482150554657, "step": 1150 }, { "epoch": 0.62, "learning_rate": 1.9074498188550156e-06, "logits/chosen": -1.6150667667388916, "logits/rejected": -1.0481829643249512, "logps/chosen": -460.06781005859375, "logps/rejected": -748.8250122070312, "loss": 0.1577, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.18149954080581665, "rewards/margins": 0.34171923995018005, "rewards/rejected": -0.5232187509536743, "step": 1160 }, { "epoch": 0.62, "learning_rate": 1.862325890344643e-06, "logits/chosen": -1.3022327423095703, "logits/rejected": -0.9266065359115601, "logps/chosen": -367.62823486328125, "logps/rejected": -806.5985107421875, "loss": 0.1656, "rewards/accuracies": 0.875, "rewards/chosen": -0.17992374300956726, "rewards/margins": 0.42552104592323303, "rewards/rejected": -0.6054448485374451, "step": 1170 }, { "epoch": 0.63, "learning_rate": 1.817423095982972e-06, "logits/chosen": -1.3970632553100586, "logits/rejected": -0.9412476420402527, "logps/chosen": -451.11883544921875, "logps/rejected": -783.0731201171875, "loss": 0.102, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20501062273979187, "rewards/margins": 0.37199467420578003, "rewards/rejected": -0.5770053267478943, "step": 1180 }, { "epoch": 0.63, "learning_rate": 1.7727570072681293e-06, "logits/chosen": -1.4293451309204102, "logits/rejected": -0.8616847991943359, "logps/chosen": -406.25042724609375, "logps/rejected": -737.0385131835938, "loss": 0.1378, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16194511950016022, "rewards/margins": 0.3638822138309479, "rewards/rejected": -0.5258272886276245, "step": 1190 }, { "epoch": 0.64, "learning_rate": 1.7283431136128961e-06, "logits/chosen": -1.6002616882324219, "logits/rejected": -1.116288423538208, "logps/chosen": -449.384521484375, "logps/rejected": -805.2763061523438, "loss": 0.1454, "rewards/accuracies": 0.875, "rewards/chosen": -0.20747177302837372, "rewards/margins": 0.3553561270236969, "rewards/rejected": -0.5628278851509094, "step": 1200 }, { "epoch": 0.65, "learning_rate": 1.6841968169732478e-06, "logits/chosen": -1.5592294931411743, "logits/rejected": -1.054216742515564, "logps/chosen": -448.9071350097656, "logps/rejected": -851.6107177734375, "loss": 0.1204, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18522223830223083, "rewards/margins": 0.4141850471496582, "rewards/rejected": -0.5994073152542114, "step": 1210 }, { "epoch": 0.65, "learning_rate": 1.6403334265072284e-06, "logits/chosen": -1.6474437713623047, "logits/rejected": -0.8614113926887512, "logps/chosen": -453.735107421875, "logps/rejected": -801.1546630859375, "loss": 0.1081, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1977526694536209, "rewards/margins": 0.38285189867019653, "rewards/rejected": -0.5806045532226562, "step": 1220 }, { "epoch": 0.66, "learning_rate": 1.5967681532660066e-06, "logits/chosen": -1.2708427906036377, "logits/rejected": -0.9732850790023804, "logps/chosen": -437.337890625, "logps/rejected": -822.8092041015625, "loss": 0.1336, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20598828792572021, "rewards/margins": 0.38752201199531555, "rewards/rejected": -0.5935102701187134, "step": 1230 }, { "epoch": 0.66, "learning_rate": 1.5535161049189463e-06, "logits/chosen": -1.5570838451385498, "logits/rejected": -1.1252386569976807, "logps/chosen": -500.2212829589844, "logps/rejected": -786.1821899414062, "loss": 0.1145, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1828458607196808, "rewards/margins": 0.3151377737522125, "rewards/rejected": -0.4979836046695709, "step": 1240 }, { "epoch": 0.67, "learning_rate": 1.5105922805145356e-06, "logits/chosen": -1.8010812997817993, "logits/rejected": -1.2702046632766724, "logps/chosen": -434.25421142578125, "logps/rejected": -807.052001953125, "loss": 0.1215, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15841101109981537, "rewards/margins": 0.35736268758773804, "rewards/rejected": -0.5157736539840698, "step": 1250 }, { "epoch": 0.67, "learning_rate": 1.4680115652789823e-06, "logits/chosen": -1.856612205505371, "logits/rejected": -1.147216558456421, "logps/chosen": -523.8411865234375, "logps/rejected": -821.1082763671875, "loss": 0.1727, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2023150473833084, "rewards/margins": 0.35308974981307983, "rewards/rejected": -0.555404782295227, "step": 1260 }, { "epoch": 0.68, "learning_rate": 1.4257887254542767e-06, "logits/chosen": -1.5119379758834839, "logits/rejected": -1.0702050924301147, "logps/chosen": -511.7137756347656, "logps/rejected": -906.3107299804688, "loss": 0.1025, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.21919742226600647, "rewards/margins": 0.3765312731266022, "rewards/rejected": -0.5957286953926086, "step": 1270 }, { "epoch": 0.68, "learning_rate": 1.3839384031775227e-06, "logits/chosen": -1.6945511102676392, "logits/rejected": -0.8750427961349487, "logps/chosen": -440.59552001953125, "logps/rejected": -766.9216918945312, "loss": 0.1519, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15435707569122314, "rewards/margins": 0.4113141894340515, "rewards/rejected": -0.5656712651252747, "step": 1280 }, { "epoch": 0.69, "learning_rate": 1.342475111403298e-06, "logits/chosen": -1.4833415746688843, "logits/rejected": -1.0713919401168823, "logps/chosen": -438.8766174316406, "logps/rejected": -720.0028076171875, "loss": 0.1574, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20507605373859406, "rewards/margins": 0.28728824853897095, "rewards/rejected": -0.4923642575740814, "step": 1290 }, { "epoch": 0.69, "learning_rate": 1.3014132288708209e-06, "logits/chosen": -1.5766406059265137, "logits/rejected": -1.0825704336166382, "logps/chosen": -438.3309020996094, "logps/rejected": -823.6751708984375, "loss": 0.166, "rewards/accuracies": 0.875, "rewards/chosen": -0.19768479466438293, "rewards/margins": 0.3687485158443451, "rewards/rejected": -0.566433310508728, "step": 1300 }, { "epoch": 0.7, "learning_rate": 1.2607669951176549e-06, "logits/chosen": -1.4940482378005981, "logits/rejected": -1.2070845365524292, "logps/chosen": -389.45343017578125, "logps/rejected": -773.9241333007812, "loss": 0.1574, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15667062997817993, "rewards/margins": 0.3334207832813263, "rewards/rejected": -0.49009138345718384, "step": 1310 }, { "epoch": 0.7, "learning_rate": 1.2205505055416891e-06, "logits/chosen": -1.5122394561767578, "logits/rejected": -1.3955858945846558, "logps/chosen": -338.9855651855469, "logps/rejected": -748.5198364257812, "loss": 0.1404, "rewards/accuracies": 0.875, "rewards/chosen": -0.16774006187915802, "rewards/margins": 0.34086841344833374, "rewards/rejected": -0.5086084604263306, "step": 1320 }, { "epoch": 0.71, "learning_rate": 1.1807777065131002e-06, "logits/chosen": -1.5142749547958374, "logits/rejected": -1.0132977962493896, "logps/chosen": -410.44879150390625, "logps/rejected": -810.9103393554688, "loss": 0.1108, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.14515772461891174, "rewards/margins": 0.36618533730506897, "rewards/rejected": -0.5113429427146912, "step": 1330 }, { "epoch": 0.71, "learning_rate": 1.1414623905380012e-06, "logits/chosen": -1.756066083908081, "logits/rejected": -1.1571279764175415, "logps/chosen": -441.978515625, "logps/rejected": -786.6061401367188, "loss": 0.1217, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1463043987751007, "rewards/margins": 0.36096060276031494, "rewards/rejected": -0.5072649717330933, "step": 1340 }, { "epoch": 0.72, "learning_rate": 1.1026181914754388e-06, "logits/chosen": -1.784054160118103, "logits/rejected": -1.0276035070419312, "logps/chosen": -506.1011657714844, "logps/rejected": -819.1619873046875, "loss": 0.1352, "rewards/accuracies": 0.875, "rewards/chosen": -0.1743244081735611, "rewards/margins": 0.38723859190940857, "rewards/rejected": -0.5615630149841309, "step": 1350 }, { "epoch": 0.73, "learning_rate": 1.0642585798094136e-06, "logits/chosen": -1.5410611629486084, "logits/rejected": -1.0178577899932861, "logps/chosen": -377.84197998046875, "logps/rejected": -720.7569580078125, "loss": 0.1264, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.12278805673122406, "rewards/margins": 0.38729211688041687, "rewards/rejected": -0.5100802183151245, "step": 1360 }, { "epoch": 0.73, "learning_rate": 1.0263968579775522e-06, "logits/chosen": -1.5256543159484863, "logits/rejected": -0.9656683802604675, "logps/chosen": -458.48089599609375, "logps/rejected": -791.9251708984375, "loss": 0.1401, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16513575613498688, "rewards/margins": 0.38074809312820435, "rewards/rejected": -0.54588383436203, "step": 1370 }, { "epoch": 0.74, "learning_rate": 9.89046155758058e-07, "logits/chosen": -1.6825745105743408, "logits/rejected": -0.8826824426651001, "logps/chosen": -455.65594482421875, "logps/rejected": -802.0789794921875, "loss": 0.1228, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1435212790966034, "rewards/margins": 0.41453132033348083, "rewards/rejected": -0.558052659034729, "step": 1380 }, { "epoch": 0.74, "learning_rate": 9.52219425716534e-07, "logits/chosen": -1.4951298236846924, "logits/rejected": -0.8258262872695923, "logps/chosen": -515.0365600585938, "logps/rejected": -771.9305419921875, "loss": 0.1429, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.22707219421863556, "rewards/margins": 0.3406526446342468, "rewards/rejected": -0.5677248239517212, "step": 1390 }, { "epoch": 0.75, "learning_rate": 9.15929438714262e-07, "logits/chosen": -1.6602566242218018, "logits/rejected": -0.9937980771064758, "logps/chosen": -368.70684814453125, "logps/rejected": -689.556884765625, "loss": 0.1528, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.12013135105371475, "rewards/margins": 0.37646666169166565, "rewards/rejected": -0.4965980052947998, "step": 1400 }, { "epoch": 0.75, "learning_rate": 8.801887794794911e-07, "logits/chosen": -1.4943420886993408, "logits/rejected": -0.9112469553947449, "logps/chosen": -379.4705810546875, "logps/rejected": -716.5015258789062, "loss": 0.1407, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.13394254446029663, "rewards/margins": 0.3894422650337219, "rewards/rejected": -0.5233848690986633, "step": 1410 }, { "epoch": 0.76, "learning_rate": 8.450098422432787e-07, "logits/chosen": -1.7622817754745483, "logits/rejected": -0.7207467555999756, "logps/chosen": -537.2728271484375, "logps/rejected": -854.8095703125, "loss": 0.1203, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1893191635608673, "rewards/margins": 0.4274328649044037, "rewards/rejected": -0.616752028465271, "step": 1420 }, { "epoch": 0.76, "learning_rate": 8.104048264413858e-07, "logits/chosen": -1.5849692821502686, "logits/rejected": -0.9879060983657837, "logps/chosen": -451.66802978515625, "logps/rejected": -812.3735961914062, "loss": 0.1162, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1757660210132599, "rewards/margins": 0.4036192297935486, "rewards/rejected": -0.5793852806091309, "step": 1430 }, { "epoch": 0.77, "learning_rate": 7.763857324837321e-07, "logits/chosen": -1.7880465984344482, "logits/rejected": -1.1138683557510376, "logps/chosen": -470.4102478027344, "logps/rejected": -782.1883544921875, "loss": 0.1273, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18462924659252167, "rewards/margins": 0.37033870816230774, "rewards/rejected": -0.5549679398536682, "step": 1440 }, { "epoch": 0.77, "learning_rate": 7.429643575928605e-07, "logits/chosen": -1.688932180404663, "logits/rejected": -1.1515108346939087, "logps/chosen": -416.93896484375, "logps/rejected": -748.1307373046875, "loss": 0.1284, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.14920882880687714, "rewards/margins": 0.345571368932724, "rewards/rejected": -0.4947802424430847, "step": 1450 }, { "epoch": 0.78, "learning_rate": 7.101522917128709e-07, "logits/chosen": -1.3505184650421143, "logits/rejected": -0.8502361178398132, "logps/chosen": -453.3301696777344, "logps/rejected": -852.1624755859375, "loss": 0.149, "rewards/accuracies": 0.875, "rewards/chosen": -0.18861651420593262, "rewards/margins": 0.39376121759414673, "rewards/rejected": -0.5823776721954346, "step": 1460 }, { "epoch": 0.78, "learning_rate": 6.779609134902312e-07, "logits/chosen": -1.4756485223770142, "logits/rejected": -0.8883223533630371, "logps/chosen": -409.55029296875, "logps/rejected": -707.3751831054688, "loss": 0.1383, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.17147330939769745, "rewards/margins": 0.3226475715637207, "rewards/rejected": -0.49412089586257935, "step": 1470 }, { "epoch": 0.79, "learning_rate": 6.464013863278629e-07, "logits/chosen": -1.593145728111267, "logits/rejected": -0.8717827796936035, "logps/chosen": -429.87725830078125, "logps/rejected": -856.5046997070312, "loss": 0.1131, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.12572301924228668, "rewards/margins": 0.46143823862075806, "rewards/rejected": -0.5871611833572388, "step": 1480 }, { "epoch": 0.79, "learning_rate": 6.154846545138696e-07, "logits/chosen": -1.556706190109253, "logits/rejected": -1.1209014654159546, "logps/chosen": -434.39813232421875, "logps/rejected": -873.4528198242188, "loss": 0.1184, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.151644766330719, "rewards/margins": 0.44497567415237427, "rewards/rejected": -0.5966204404830933, "step": 1490 }, { "epoch": 0.8, "learning_rate": 5.852214394262515e-07, "logits/chosen": -1.5190951824188232, "logits/rejected": -1.1570379734039307, "logps/chosen": -394.3932189941406, "logps/rejected": -781.7257690429688, "loss": 0.1364, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.16030506789684296, "rewards/margins": 0.3787681758403778, "rewards/rejected": -0.5390732884407043, "step": 1500 }, { "epoch": 0.81, "learning_rate": 5.556222358149191e-07, "logits/chosen": -1.5962765216827393, "logits/rejected": -0.9932464361190796, "logps/chosen": -390.4809875488281, "logps/rejected": -714.4888916015625, "loss": 0.1652, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.17372211813926697, "rewards/margins": 0.361335813999176, "rewards/rejected": -0.5350579023361206, "step": 1510 }, { "epoch": 0.81, "learning_rate": 5.266973081622992e-07, "logits/chosen": -1.4811457395553589, "logits/rejected": -1.0426948070526123, "logps/chosen": -451.40069580078125, "logps/rejected": -772.6881713867188, "loss": 0.1611, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.18898162245750427, "rewards/margins": 0.3498608469963074, "rewards/rejected": -0.5388425588607788, "step": 1520 }, { "epoch": 0.82, "learning_rate": 4.984566871237942e-07, "logits/chosen": -1.4943921566009521, "logits/rejected": -1.000528335571289, "logps/chosen": -393.3673095703125, "logps/rejected": -763.3333129882812, "loss": 0.1387, "rewards/accuracies": 0.875, "rewards/chosen": -0.15240536630153656, "rewards/margins": 0.3935711681842804, "rewards/rejected": -0.5459765195846558, "step": 1530 }, { "epoch": 0.82, "learning_rate": 4.709101660493251e-07, "logits/chosen": -1.4344061613082886, "logits/rejected": -0.8900424838066101, "logps/chosen": -454.6851501464844, "logps/rejected": -862.0211181640625, "loss": 0.1148, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.2144501656293869, "rewards/margins": 0.39736613631248474, "rewards/rejected": -0.6118162870407104, "step": 1540 }, { "epoch": 0.83, "learning_rate": 4.440672975871743e-07, "logits/chosen": -1.6005455255508423, "logits/rejected": -1.2345631122589111, "logps/chosen": -454.6659240722656, "logps/rejected": -910.2664794921875, "loss": 0.0851, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.1945376694202423, "rewards/margins": 0.4257555603981018, "rewards/rejected": -0.6202932000160217, "step": 1550 }, { "epoch": 0.83, "learning_rate": 4.1793739037129134e-07, "logits/chosen": -1.755613923072815, "logits/rejected": -0.9976798892021179, "logps/chosen": -426.450927734375, "logps/rejected": -827.8946533203125, "loss": 0.1088, "rewards/accuracies": 0.875, "rewards/chosen": -0.1098506897687912, "rewards/margins": 0.4619103968143463, "rewards/rejected": -0.5717611908912659, "step": 1560 }, { "epoch": 0.84, "learning_rate": 3.9252950579322405e-07, "logits/chosen": -1.7585302591323853, "logits/rejected": -0.9437012672424316, "logps/chosen": -617.3839111328125, "logps/rejected": -886.2396240234375, "loss": 0.1537, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2242734432220459, "rewards/margins": 0.37233808636665344, "rewards/rejected": -0.5966114401817322, "step": 1570 }, { "epoch": 0.84, "learning_rate": 3.6785245485978864e-07, "logits/chosen": -1.5823638439178467, "logits/rejected": -1.016841173171997, "logps/chosen": -453.40643310546875, "logps/rejected": -826.7568359375, "loss": 0.1052, "rewards/accuracies": 0.875, "rewards/chosen": -0.14566640555858612, "rewards/margins": 0.41016706824302673, "rewards/rejected": -0.5558334589004517, "step": 1580 }, { "epoch": 0.85, "learning_rate": 3.43914795137566e-07, "logits/chosen": -1.3402397632598877, "logits/rejected": -0.6611793041229248, "logps/chosen": -491.9454040527344, "logps/rejected": -827.7058715820312, "loss": 0.1243, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1830025464296341, "rewards/margins": 0.3839171230792999, "rewards/rejected": -0.5669196844100952, "step": 1590 }, { "epoch": 0.85, "learning_rate": 3.207248277852901e-07, "logits/chosen": -1.3319523334503174, "logits/rejected": -1.2467930316925049, "logps/chosen": -415.2613220214844, "logps/rejected": -794.6478271484375, "loss": 0.175, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18765749037265778, "rewards/margins": 0.33323392271995544, "rewards/rejected": -0.5208913683891296, "step": 1600 }, { "epoch": 0.86, "learning_rate": 2.9829059467515074e-07, "logits/chosen": -1.6862188577651978, "logits/rejected": -1.0607928037643433, "logps/chosen": -466.0138244628906, "logps/rejected": -823.7083129882812, "loss": 0.1264, "rewards/accuracies": 0.875, "rewards/chosen": -0.1697189062833786, "rewards/margins": 0.39212626218795776, "rewards/rejected": -0.5618451833724976, "step": 1610 }, { "epoch": 0.86, "learning_rate": 2.766198756040153e-07, "logits/chosen": -1.5529918670654297, "logits/rejected": -1.1102968454360962, "logps/chosen": -514.1727294921875, "logps/rejected": -934.7576293945312, "loss": 0.0946, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.2095489799976349, "rewards/margins": 0.39688506722450256, "rewards/rejected": -0.6064340472221375, "step": 1620 }, { "epoch": 0.87, "learning_rate": 2.5572018559553155e-07, "logits/chosen": -1.4525808095932007, "logits/rejected": -1.114332675933838, "logps/chosen": -429.51336669921875, "logps/rejected": -814.693115234375, "loss": 0.1319, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20347242057323456, "rewards/margins": 0.3701416254043579, "rewards/rejected": -0.5736140012741089, "step": 1630 }, { "epoch": 0.87, "learning_rate": 2.3559877229404864e-07, "logits/chosen": -1.5984094142913818, "logits/rejected": -1.1003965139389038, "logps/chosen": -458.2529296875, "logps/rejected": -795.9619140625, "loss": 0.1294, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17800959944725037, "rewards/margins": 0.363762229681015, "rewards/rejected": -0.5417717695236206, "step": 1640 }, { "epoch": 0.88, "learning_rate": 2.1626261345126576e-07, "logits/chosen": -1.4350886344909668, "logits/rejected": -1.0259506702423096, "logps/chosen": -415.7510681152344, "logps/rejected": -919.9736328125, "loss": 0.076, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.15845449268817902, "rewards/margins": 0.4928809702396393, "rewards/rejected": -0.6513354182243347, "step": 1650 }, { "epoch": 0.89, "learning_rate": 1.9771841450646505e-07, "logits/chosen": -1.6430625915527344, "logits/rejected": -0.9447630643844604, "logps/chosen": -506.1864318847656, "logps/rejected": -795.38134765625, "loss": 0.1602, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.23687663674354553, "rewards/margins": 0.33186858892440796, "rewards/rejected": -0.5687452554702759, "step": 1660 }, { "epoch": 0.89, "learning_rate": 1.7997260626118758e-07, "logits/chosen": -1.898046851158142, "logits/rejected": -1.3102858066558838, "logps/chosen": -514.0572509765625, "logps/rejected": -825.0703125, "loss": 0.1456, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.17340177297592163, "rewards/margins": 0.38971638679504395, "rewards/rejected": -0.5631181597709656, "step": 1670 }, { "epoch": 0.9, "learning_rate": 1.6303134264914365e-07, "logits/chosen": -1.6851441860198975, "logits/rejected": -1.0963430404663086, "logps/chosen": -480.8072204589844, "logps/rejected": -728.0396728515625, "loss": 0.1273, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1836322844028473, "rewards/margins": 0.3386848270893097, "rewards/rejected": -0.5223170518875122, "step": 1680 }, { "epoch": 0.9, "learning_rate": 1.469004986021355e-07, "logits/chosen": -1.414111852645874, "logits/rejected": -0.8712374567985535, "logps/chosen": -443.46728515625, "logps/rejected": -897.1246948242188, "loss": 0.0899, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.17597445845603943, "rewards/margins": 0.4499644339084625, "rewards/rejected": -0.6259388327598572, "step": 1690 }, { "epoch": 0.91, "learning_rate": 1.315856680127367e-07, "logits/chosen": -1.4355229139328003, "logits/rejected": -0.8268268704414368, "logps/chosen": -411.8287658691406, "logps/rejected": -796.5527954101562, "loss": 0.1013, "rewards/accuracies": 0.875, "rewards/chosen": -0.16030281782150269, "rewards/margins": 0.4271472990512848, "rewards/rejected": -0.5874501466751099, "step": 1700 }, { "epoch": 0.91, "learning_rate": 1.1709216179442817e-07, "logits/chosen": -1.5936983823776245, "logits/rejected": -0.9012172818183899, "logps/chosen": -452.53155517578125, "logps/rejected": -878.1297607421875, "loss": 0.1108, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.17777523398399353, "rewards/margins": 0.4133872389793396, "rewards/rejected": -0.5911625623703003, "step": 1710 }, { "epoch": 0.92, "learning_rate": 1.0342500603986421e-07, "logits/chosen": -1.441282033920288, "logits/rejected": -0.9638457298278809, "logps/chosen": -417.11895751953125, "logps/rejected": -741.5521850585938, "loss": 0.1466, "rewards/accuracies": 0.75, "rewards/chosen": -0.16872674226760864, "rewards/margins": 0.3362739682197571, "rewards/rejected": -0.5050007104873657, "step": 1720 }, { "epoch": 0.92, "learning_rate": 9.058894027791643e-08, "logits/chosen": -1.4651381969451904, "logits/rejected": -0.9410767555236816, "logps/chosen": -497.6310119628906, "logps/rejected": -866.1295776367188, "loss": 0.1057, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.23562383651733398, "rewards/margins": 0.3830471634864807, "rewards/rejected": -0.6186710596084595, "step": 1730 }, { "epoch": 0.93, "learning_rate": 7.858841583008592e-08, "logits/chosen": -1.6138349771499634, "logits/rejected": -1.0234501361846924, "logps/chosen": -425.06610107421875, "logps/rejected": -700.060791015625, "loss": 0.1241, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.16232439875602722, "rewards/margins": 0.3415161669254303, "rewards/rejected": -0.5038405656814575, "step": 1740 }, { "epoch": 0.93, "learning_rate": 6.742759426686313e-08, "logits/chosen": -1.5296719074249268, "logits/rejected": -1.15841543674469, "logps/chosen": -541.86083984375, "logps/rejected": -857.0759887695312, "loss": 0.1324, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.22835755348205566, "rewards/margins": 0.37031129002571106, "rewards/rejected": -0.5986688733100891, "step": 1750 }, { "epoch": 0.94, "learning_rate": 5.7110345964571104e-08, "logits/chosen": -1.6711105108261108, "logits/rejected": -1.0233453512191772, "logps/chosen": -445.75762939453125, "logps/rejected": -772.8753662109375, "loss": 0.1172, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18701913952827454, "rewards/margins": 0.3782210052013397, "rewards/rejected": -0.565240204334259, "step": 1760 }, { "epoch": 0.94, "learning_rate": 4.764024876318357e-08, "logits/chosen": -1.5489776134490967, "logits/rejected": -0.8348779678344727, "logps/chosen": -509.6427307128906, "logps/rejected": -782.4371337890625, "loss": 0.1146, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.20378637313842773, "rewards/margins": 0.37221604585647583, "rewards/rejected": -0.5760024189949036, "step": 1770 }, { "epoch": 0.95, "learning_rate": 3.902058672559633e-08, "logits/chosen": -1.8395429849624634, "logits/rejected": -1.2655082941055298, "logps/chosen": -375.2162780761719, "logps/rejected": -805.35302734375, "loss": 0.1244, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.11881232261657715, "rewards/margins": 0.4330004155635834, "rewards/rejected": -0.5518127679824829, "step": 1780 }, { "epoch": 0.95, "learning_rate": 3.125434899876933e-08, "logits/chosen": -1.5633362531661987, "logits/rejected": -1.1406381130218506, "logps/chosen": -356.45098876953125, "logps/rejected": -783.3472900390625, "loss": 0.1001, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1347019374370575, "rewards/margins": 0.42905181646347046, "rewards/rejected": -0.5637537837028503, "step": 1790 }, { "epoch": 0.96, "learning_rate": 2.4344228777145873e-08, "logits/chosen": -1.6571776866912842, "logits/rejected": -0.7649690508842468, "logps/chosen": -587.1907348632812, "logps/rejected": -933.9886474609375, "loss": 0.132, "rewards/accuracies": 0.875, "rewards/chosen": -0.25137990713119507, "rewards/margins": 0.42299261689186096, "rewards/rejected": -0.6743724942207336, "step": 1800 }, { "epoch": 0.97, "learning_rate": 1.829262236869772e-08, "logits/chosen": -1.541998267173767, "logits/rejected": -0.8689600229263306, "logps/chosen": -483.3575134277344, "logps/rejected": -698.212158203125, "loss": 0.1768, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22255222499370575, "rewards/margins": 0.27523303031921387, "rewards/rejected": -0.4977852404117584, "step": 1810 }, { "epoch": 0.97, "learning_rate": 1.3101628363929586e-08, "logits/chosen": -1.5238444805145264, "logits/rejected": -0.7508775591850281, "logps/chosen": -520.79296875, "logps/rejected": -767.9632568359375, "loss": 0.1203, "rewards/accuracies": 0.875, "rewards/chosen": -0.1896631270647049, "rewards/margins": 0.36830946803092957, "rewards/rejected": -0.5579725503921509, "step": 1820 }, { "epoch": 0.98, "learning_rate": 8.773046908123195e-09, "logits/chosen": -1.6025253534317017, "logits/rejected": -1.304527997970581, "logps/chosen": -375.14874267578125, "logps/rejected": -767.8821411132812, "loss": 0.1284, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1781584918498993, "rewards/margins": 0.33938026428222656, "rewards/rejected": -0.5175387263298035, "step": 1830 }, { "epoch": 0.98, "learning_rate": 5.308379077080817e-09, "logits/chosen": -1.6030333042144775, "logits/rejected": -1.3066356182098389, "logps/chosen": -397.94873046875, "logps/rejected": -825.3441162109375, "loss": 0.1111, "rewards/accuracies": 0.875, "rewards/chosen": -0.20664629340171814, "rewards/margins": 0.3704259991645813, "rewards/rejected": -0.5770723819732666, "step": 1840 }, { "epoch": 0.99, "learning_rate": 2.7088263565760996e-09, "logits/chosen": -1.6151325702667236, "logits/rejected": -0.9792189598083496, "logps/chosen": -399.3708801269531, "logps/rejected": -748.7066650390625, "loss": 0.1181, "rewards/accuracies": 0.875, "rewards/chosen": -0.1316554844379425, "rewards/margins": 0.42211928963661194, "rewards/rejected": -0.5537747740745544, "step": 1850 }, { "epoch": 0.99, "learning_rate": 9.752902257023633e-10, "logits/chosen": -1.6095302104949951, "logits/rejected": -1.1830781698226929, "logps/chosen": -393.78350830078125, "logps/rejected": -796.3955078125, "loss": 0.0928, "rewards/accuracies": 0.875, "rewards/chosen": -0.15096323192119598, "rewards/margins": 0.4236125349998474, "rewards/rejected": -0.5745757818222046, "step": 1860 }, { "epoch": 1.0, "learning_rate": 1.083718442532189e-10, "logits/chosen": -1.4612399339675903, "logits/rejected": -0.8474820256233215, "logps/chosen": -456.6351623535156, "logps/rejected": -785.6075439453125, "loss": 0.1354, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18163737654685974, "rewards/margins": 0.3639640808105469, "rewards/rejected": -0.545601487159729, "step": 1870 }, { "epoch": 1.0, "step": 1875, "total_flos": 0.0, "train_loss": 0.13990657812754312, "train_runtime": 16010.7596, "train_samples_per_second": 0.937, "train_steps_per_second": 0.117 } ], "logging_steps": 10, "max_steps": 1875, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }