{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9992254066615027, "eval_steps": 100, "global_step": 2904, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.7182130584192438e-09, "logits/chosen": -2.293531894683838, "logits/rejected": -2.2362442016601562, "logps/chosen": -280.74072265625, "logps/rejected": -204.830322265625, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 1.718213058419244e-08, "logits/chosen": -2.411555290222168, "logits/rejected": -2.3393168449401855, "logps/chosen": -294.2322998046875, "logps/rejected": -213.8911895751953, "loss": 0.6946, "rewards/accuracies": 0.4375, "rewards/chosen": 0.005316631868481636, "rewards/margins": 0.0028615635819733143, "rewards/rejected": 0.002455067355185747, "step": 10 }, { "epoch": 0.02, "learning_rate": 3.436426116838488e-08, "logits/chosen": -2.4150137901306152, "logits/rejected": -2.3802390098571777, "logps/chosen": -279.42938232421875, "logps/rejected": -237.62747192382812, "loss": 0.6943, "rewards/accuracies": 0.53125, "rewards/chosen": 0.0023494327906519175, "rewards/margins": 0.0011181762674823403, "rewards/rejected": 0.0012312561739236116, "step": 20 }, { "epoch": 0.03, "learning_rate": 5.154639175257731e-08, "logits/chosen": -2.461092472076416, "logits/rejected": -2.39383602142334, "logps/chosen": -301.07952880859375, "logps/rejected": -215.763427734375, "loss": 0.6943, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": -0.0003540778416208923, "rewards/margins": -0.001285408972762525, "rewards/rejected": 0.0009313317714259028, "step": 30 }, { "epoch": 0.04, "learning_rate": 6.872852233676976e-08, "logits/chosen": -2.3856747150421143, "logits/rejected": -2.3453280925750732, "logps/chosen": -291.4425354003906, "logps/rejected": -231.8385772705078, "loss": 0.6934, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": 0.0012902533635497093, "rewards/margins": -0.00038409550325013697, "rewards/rejected": 0.001674349419772625, "step": 40 }, { "epoch": 0.05, "learning_rate": 8.59106529209622e-08, "logits/chosen": -2.443054437637329, "logits/rejected": -2.383383274078369, "logps/chosen": -299.1965026855469, "logps/rejected": -220.2180938720703, "loss": 0.6948, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.005029269959777594, "rewards/margins": 0.004818198271095753, "rewards/rejected": 0.0002110706700477749, "step": 50 }, { "epoch": 0.06, "learning_rate": 1.0309278350515462e-07, "logits/chosen": -2.435997247695923, "logits/rejected": -2.4249629974365234, "logps/chosen": -272.54656982421875, "logps/rejected": -227.5023193359375, "loss": 0.6908, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": 0.0016036666929721832, "rewards/margins": 0.0018250759458169341, "rewards/rejected": -0.00022140909277368337, "step": 60 }, { "epoch": 0.07, "learning_rate": 1.202749140893471e-07, "logits/chosen": -2.4656193256378174, "logits/rejected": -2.420733690261841, "logps/chosen": -292.0702209472656, "logps/rejected": -206.99124145507812, "loss": 0.6908, "rewards/accuracies": 0.5625, "rewards/chosen": 0.00455916253849864, "rewards/margins": 0.008069148287177086, "rewards/rejected": -0.003509984817355871, "step": 70 }, { "epoch": 0.08, "learning_rate": 1.3745704467353952e-07, "logits/chosen": -2.381108283996582, "logits/rejected": -2.3896584510803223, "logps/chosen": -250.19076538085938, "logps/rejected": -212.47366333007812, "loss": 0.6914, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": 0.006951277144253254, "rewards/margins": 0.008491529151797295, "rewards/rejected": -0.0015402527060359716, "step": 80 }, { "epoch": 0.09, "learning_rate": 1.5463917525773197e-07, "logits/chosen": -2.3331446647644043, "logits/rejected": -2.254476547241211, "logps/chosen": -241.5772247314453, "logps/rejected": -185.46815490722656, "loss": 0.6898, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.008401724509894848, "rewards/margins": 0.014180210418999195, "rewards/rejected": -0.005778484977781773, "step": 90 }, { "epoch": 0.1, "learning_rate": 1.718213058419244e-07, "logits/chosen": -2.4016473293304443, "logits/rejected": -2.3915467262268066, "logps/chosen": -259.73956298828125, "logps/rejected": -221.9446258544922, "loss": 0.6896, "rewards/accuracies": 0.5625, "rewards/chosen": 0.0046272119507193565, "rewards/margins": 0.00822476390749216, "rewards/rejected": -0.0035975512582808733, "step": 100 }, { "epoch": 0.11, "learning_rate": 1.8900343642611682e-07, "logits/chosen": -2.441370725631714, "logits/rejected": -2.3111448287963867, "logps/chosen": -252.11367797851562, "logps/rejected": -210.3745574951172, "loss": 0.6912, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.0009224863606505096, "rewards/margins": 0.001503048581071198, "rewards/rejected": -0.0005805626278743148, "step": 110 }, { "epoch": 0.12, "learning_rate": 2.0618556701030925e-07, "logits/chosen": -2.3808603286743164, "logits/rejected": -2.437734842300415, "logps/chosen": -258.5278015136719, "logps/rejected": -210.9561309814453, "loss": 0.6901, "rewards/accuracies": 0.581250011920929, "rewards/chosen": 0.001118434825912118, "rewards/margins": 0.00486636720597744, "rewards/rejected": -0.0037479314487427473, "step": 120 }, { "epoch": 0.13, "learning_rate": 2.2336769759450173e-07, "logits/chosen": -2.447282075881958, "logits/rejected": -2.431652784347534, "logps/chosen": -279.3333435058594, "logps/rejected": -213.950439453125, "loss": 0.688, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.005058329086750746, "rewards/margins": 0.012270588427782059, "rewards/rejected": -0.0072122602723538876, "step": 130 }, { "epoch": 0.14, "learning_rate": 2.405498281786942e-07, "logits/chosen": -2.574840545654297, "logits/rejected": -2.400458812713623, "logps/chosen": -267.6883544921875, "logps/rejected": -203.22642517089844, "loss": 0.6891, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": 0.002968291286379099, "rewards/margins": 0.005260258913040161, "rewards/rejected": -0.0022919676266610622, "step": 140 }, { "epoch": 0.15, "learning_rate": 2.5773195876288655e-07, "logits/chosen": -2.357297420501709, "logits/rejected": -2.391117811203003, "logps/chosen": -280.30828857421875, "logps/rejected": -214.0823974609375, "loss": 0.6878, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.013498497195541859, "rewards/margins": 0.025946879759430885, "rewards/rejected": -0.012448383495211601, "step": 150 }, { "epoch": 0.17, "learning_rate": 2.7491408934707903e-07, "logits/chosen": -2.460391044616699, "logits/rejected": -2.435685873031616, "logps/chosen": -307.55450439453125, "logps/rejected": -234.9291534423828, "loss": 0.6834, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.007904368452727795, "rewards/margins": 0.025973070412874222, "rewards/rejected": -0.01806870475411415, "step": 160 }, { "epoch": 0.18, "learning_rate": 2.9209621993127146e-07, "logits/chosen": -2.417241334915161, "logits/rejected": -2.4194204807281494, "logps/chosen": -284.2513732910156, "logps/rejected": -220.6437530517578, "loss": 0.6872, "rewards/accuracies": 0.612500011920929, "rewards/chosen": 0.006491424981504679, "rewards/margins": 0.01496223546564579, "rewards/rejected": -0.008470811881124973, "step": 170 }, { "epoch": 0.19, "learning_rate": 3.0927835051546394e-07, "logits/chosen": -2.426492214202881, "logits/rejected": -2.427013635635376, "logps/chosen": -261.0791320800781, "logps/rejected": -236.6595916748047, "loss": 0.6821, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.007498173974454403, "rewards/margins": 0.020344991236925125, "rewards/rejected": -0.012846815399825573, "step": 180 }, { "epoch": 0.2, "learning_rate": 3.2646048109965636e-07, "logits/chosen": -2.479682445526123, "logits/rejected": -2.3931996822357178, "logps/chosen": -261.3951721191406, "logps/rejected": -213.754150390625, "loss": 0.6816, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": 0.011733494699001312, "rewards/margins": 0.023404525592923164, "rewards/rejected": -0.011671033687889576, "step": 190 }, { "epoch": 0.21, "learning_rate": 3.436426116838488e-07, "logits/chosen": -2.420584201812744, "logits/rejected": -2.3466110229492188, "logps/chosen": -262.15338134765625, "logps/rejected": -223.8980255126953, "loss": 0.6793, "rewards/accuracies": 0.6875, "rewards/chosen": 0.01480065006762743, "rewards/margins": 0.030916428193449974, "rewards/rejected": -0.01611577905714512, "step": 200 }, { "epoch": 0.22, "learning_rate": 3.608247422680412e-07, "logits/chosen": -2.4468910694122314, "logits/rejected": -2.367849826812744, "logps/chosen": -276.70526123046875, "logps/rejected": -203.1634979248047, "loss": 0.6773, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": 0.0047667198814451694, "rewards/margins": 0.023584634065628052, "rewards/rejected": -0.01881791278719902, "step": 210 }, { "epoch": 0.23, "learning_rate": 3.7800687285223364e-07, "logits/chosen": -2.443112850189209, "logits/rejected": -2.4011592864990234, "logps/chosen": -248.66348266601562, "logps/rejected": -211.2028350830078, "loss": 0.6725, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.01790793612599373, "rewards/margins": 0.04252880811691284, "rewards/rejected": -0.024620870128273964, "step": 220 }, { "epoch": 0.24, "learning_rate": 3.9518900343642607e-07, "logits/chosen": -2.451524257659912, "logits/rejected": -2.444117546081543, "logps/chosen": -261.34912109375, "logps/rejected": -210.658447265625, "loss": 0.6754, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.027729609981179237, "rewards/margins": 0.043738484382629395, "rewards/rejected": -0.016008879989385605, "step": 230 }, { "epoch": 0.25, "learning_rate": 4.123711340206185e-07, "logits/chosen": -2.3075928688049316, "logits/rejected": -2.343151092529297, "logps/chosen": -280.16119384765625, "logps/rejected": -234.6321563720703, "loss": 0.6685, "rewards/accuracies": 0.65625, "rewards/chosen": 0.024107476696372032, "rewards/margins": 0.05809453874826431, "rewards/rejected": -0.03398705646395683, "step": 240 }, { "epoch": 0.26, "learning_rate": 4.2955326460481097e-07, "logits/chosen": -2.4088199138641357, "logits/rejected": -2.380805492401123, "logps/chosen": -267.1762390136719, "logps/rejected": -210.53866577148438, "loss": 0.6682, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.02717725932598114, "rewards/margins": 0.06459168344736099, "rewards/rejected": -0.03741442412137985, "step": 250 }, { "epoch": 0.27, "learning_rate": 4.4673539518900345e-07, "logits/chosen": -2.3903017044067383, "logits/rejected": -2.4178988933563232, "logps/chosen": -261.8699951171875, "logps/rejected": -215.27633666992188, "loss": 0.6653, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.01536791305989027, "rewards/margins": 0.03926190733909607, "rewards/rejected": -0.023893997073173523, "step": 260 }, { "epoch": 0.28, "learning_rate": 4.639175257731959e-07, "logits/chosen": -2.3510518074035645, "logits/rejected": -2.374760389328003, "logps/chosen": -221.19140625, "logps/rejected": -214.09078979492188, "loss": 0.6619, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.025187481194734573, "rewards/margins": 0.06599839776754379, "rewards/rejected": -0.04081092029809952, "step": 270 }, { "epoch": 0.29, "learning_rate": 4.810996563573884e-07, "logits/chosen": -2.4117298126220703, "logits/rejected": -2.4171319007873535, "logps/chosen": -265.48126220703125, "logps/rejected": -219.87637329101562, "loss": 0.6574, "rewards/accuracies": 0.643750011920929, "rewards/chosen": 0.02810204029083252, "rewards/margins": 0.07569292932748795, "rewards/rejected": -0.047590889036655426, "step": 280 }, { "epoch": 0.3, "learning_rate": 4.982817869415807e-07, "logits/chosen": -2.4645633697509766, "logits/rejected": -2.3363564014434814, "logps/chosen": -299.02349853515625, "logps/rejected": -233.5424346923828, "loss": 0.6555, "rewards/accuracies": 0.731249988079071, "rewards/chosen": 0.042131923139095306, "rewards/margins": 0.09228460490703583, "rewards/rejected": -0.05015267804265022, "step": 290 }, { "epoch": 0.31, "learning_rate": 4.982778415614236e-07, "logits/chosen": -2.388867139816284, "logits/rejected": -2.3494858741760254, "logps/chosen": -236.2740020751953, "logps/rejected": -220.2272186279297, "loss": 0.6511, "rewards/accuracies": 0.668749988079071, "rewards/chosen": 0.020576827228069305, "rewards/margins": 0.09741847962141037, "rewards/rejected": -0.07684165239334106, "step": 300 }, { "epoch": 0.32, "learning_rate": 4.963643321852277e-07, "logits/chosen": -2.4252231121063232, "logits/rejected": -2.3302061557769775, "logps/chosen": -285.35650634765625, "logps/rejected": -231.33602905273438, "loss": 0.6499, "rewards/accuracies": 0.643750011920929, "rewards/chosen": 0.015449454076588154, "rewards/margins": 0.10541415214538574, "rewards/rejected": -0.08996469527482986, "step": 310 }, { "epoch": 0.33, "learning_rate": 4.944508228090318e-07, "logits/chosen": -2.437065839767456, "logits/rejected": -2.4959487915039062, "logps/chosen": -238.969482421875, "logps/rejected": -192.4582977294922, "loss": 0.6367, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": 0.03995648771524429, "rewards/margins": 0.13508270680904388, "rewards/rejected": -0.09512621909379959, "step": 320 }, { "epoch": 0.34, "learning_rate": 4.925373134328357e-07, "logits/chosen": -2.461618423461914, "logits/rejected": -2.4392247200012207, "logps/chosen": -252.608642578125, "logps/rejected": -222.25125122070312, "loss": 0.6428, "rewards/accuracies": 0.6875, "rewards/chosen": 0.0467003658413887, "rewards/margins": 0.106337770819664, "rewards/rejected": -0.05963738635182381, "step": 330 }, { "epoch": 0.35, "learning_rate": 4.906238040566398e-07, "logits/chosen": -2.4741828441619873, "logits/rejected": -2.355389356613159, "logps/chosen": -271.66387939453125, "logps/rejected": -231.6305694580078, "loss": 0.6431, "rewards/accuracies": 0.71875, "rewards/chosen": 0.04233894124627113, "rewards/margins": 0.15629062056541443, "rewards/rejected": -0.11395169794559479, "step": 340 }, { "epoch": 0.36, "learning_rate": 4.887102946804438e-07, "logits/chosen": -2.5339910984039307, "logits/rejected": -2.424262523651123, "logps/chosen": -289.12408447265625, "logps/rejected": -223.707275390625, "loss": 0.6293, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.05516533926129341, "rewards/margins": 0.20021691918373108, "rewards/rejected": -0.14505159854888916, "step": 350 }, { "epoch": 0.37, "learning_rate": 4.867967853042479e-07, "logits/chosen": -2.440347194671631, "logits/rejected": -2.469924211502075, "logps/chosen": -272.43304443359375, "logps/rejected": -223.846435546875, "loss": 0.6225, "rewards/accuracies": 0.71875, "rewards/chosen": 0.05506114289164543, "rewards/margins": 0.1845804899930954, "rewards/rejected": -0.12951937317848206, "step": 360 }, { "epoch": 0.38, "learning_rate": 4.84883275928052e-07, "logits/chosen": -2.4580020904541016, "logits/rejected": -2.422905206680298, "logps/chosen": -274.3728332519531, "logps/rejected": -228.1702117919922, "loss": 0.622, "rewards/accuracies": 0.65625, "rewards/chosen": 0.04404681175947189, "rewards/margins": 0.1968606859445572, "rewards/rejected": -0.1528138816356659, "step": 370 }, { "epoch": 0.39, "learning_rate": 4.82969766551856e-07, "logits/chosen": -2.3649065494537354, "logits/rejected": -2.3759725093841553, "logps/chosen": -258.05328369140625, "logps/rejected": -228.05404663085938, "loss": 0.6335, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": 0.04060830920934677, "rewards/margins": 0.17422744631767273, "rewards/rejected": -0.13361915946006775, "step": 380 }, { "epoch": 0.4, "learning_rate": 4.810562571756601e-07, "logits/chosen": -2.4177675247192383, "logits/rejected": -2.406047821044922, "logps/chosen": -255.5844268798828, "logps/rejected": -219.80984497070312, "loss": 0.6251, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.02088143676519394, "rewards/margins": 0.18797752261161804, "rewards/rejected": -0.1670960783958435, "step": 390 }, { "epoch": 0.41, "learning_rate": 4.791427477994642e-07, "logits/chosen": -2.418471574783325, "logits/rejected": -2.4022955894470215, "logps/chosen": -306.84869384765625, "logps/rejected": -239.74612426757812, "loss": 0.62, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.06454652547836304, "rewards/margins": 0.1981131136417389, "rewards/rejected": -0.13356655836105347, "step": 400 }, { "epoch": 0.42, "learning_rate": 4.772292384232682e-07, "logits/chosen": -2.4177165031433105, "logits/rejected": -2.379561185836792, "logps/chosen": -246.2447967529297, "logps/rejected": -245.50753784179688, "loss": 0.6214, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.025952398777008057, "rewards/margins": 0.16164085268974304, "rewards/rejected": -0.1875932663679123, "step": 410 }, { "epoch": 0.43, "learning_rate": 4.753157290470723e-07, "logits/chosen": -2.488081216812134, "logits/rejected": -2.4657857418060303, "logps/chosen": -256.111083984375, "logps/rejected": -225.01748657226562, "loss": 0.6292, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": 0.001765048480592668, "rewards/margins": 0.16318608820438385, "rewards/rejected": -0.1614210307598114, "step": 420 }, { "epoch": 0.44, "learning_rate": 4.7340221967087635e-07, "logits/chosen": -2.411403179168701, "logits/rejected": -2.3677725791931152, "logps/chosen": -251.43051147460938, "logps/rejected": -224.96240234375, "loss": 0.6157, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": 0.036291979253292084, "rewards/margins": 0.2256316840648651, "rewards/rejected": -0.18933971226215363, "step": 430 }, { "epoch": 0.45, "learning_rate": 4.714887102946804e-07, "logits/chosen": -2.4702084064483643, "logits/rejected": -2.3358662128448486, "logps/chosen": -257.2681884765625, "logps/rejected": -200.87564086914062, "loss": 0.6008, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.021172259002923965, "rewards/margins": 0.22813072800636292, "rewards/rejected": -0.20695844292640686, "step": 440 }, { "epoch": 0.46, "learning_rate": 4.6957520091848447e-07, "logits/chosen": -2.426776885986328, "logits/rejected": -2.3923580646514893, "logps/chosen": -228.310791015625, "logps/rejected": -204.06149291992188, "loss": 0.6346, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": 0.00926109217107296, "rewards/margins": 0.17081685364246368, "rewards/rejected": -0.16155575215816498, "step": 450 }, { "epoch": 0.48, "learning_rate": 4.6766169154228853e-07, "logits/chosen": -2.3557937145233154, "logits/rejected": -2.3138818740844727, "logps/chosen": -268.3694152832031, "logps/rejected": -239.6737518310547, "loss": 0.5994, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.07315589487552643, "rewards/margins": 0.2832576632499695, "rewards/rejected": -0.21010179817676544, "step": 460 }, { "epoch": 0.49, "learning_rate": 4.657481821660926e-07, "logits/chosen": -2.4533188343048096, "logits/rejected": -2.4328866004943848, "logps/chosen": -283.4711608886719, "logps/rejected": -211.56640625, "loss": 0.5987, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.0002489805337972939, "rewards/margins": 0.27414873242378235, "rewards/rejected": -0.27439773082733154, "step": 470 }, { "epoch": 0.5, "learning_rate": 4.6383467278989666e-07, "logits/chosen": -2.3915047645568848, "logits/rejected": -2.4537439346313477, "logps/chosen": -248.0155029296875, "logps/rejected": -230.64767456054688, "loss": 0.6144, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.018264885991811752, "rewards/margins": 0.20035696029663086, "rewards/rejected": -0.21862182021141052, "step": 480 }, { "epoch": 0.51, "learning_rate": 4.6192116341370067e-07, "logits/chosen": -2.5227386951446533, "logits/rejected": -2.4403090476989746, "logps/chosen": -277.166748046875, "logps/rejected": -230.45849609375, "loss": 0.5927, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.04206278175115585, "rewards/margins": 0.3326976001262665, "rewards/rejected": -0.29063481092453003, "step": 490 }, { "epoch": 0.52, "learning_rate": 4.6000765403750473e-07, "logits/chosen": -2.4241671562194824, "logits/rejected": -2.336174488067627, "logps/chosen": -255.9370574951172, "logps/rejected": -211.1270751953125, "loss": 0.5983, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": 0.01858004741370678, "rewards/margins": 0.2676704525947571, "rewards/rejected": -0.24909043312072754, "step": 500 }, { "epoch": 0.53, "learning_rate": 4.580941446613088e-07, "logits/chosen": -2.4633803367614746, "logits/rejected": -2.4187140464782715, "logps/chosen": -257.75225830078125, "logps/rejected": -238.34164428710938, "loss": 0.5972, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.019993241876363754, "rewards/margins": 0.240191251039505, "rewards/rejected": -0.26018446683883667, "step": 510 }, { "epoch": 0.54, "learning_rate": 4.5618063528511285e-07, "logits/chosen": -2.387589454650879, "logits/rejected": -2.358363628387451, "logps/chosen": -274.30670166015625, "logps/rejected": -232.08969116210938, "loss": 0.6006, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.05401581525802612, "rewards/margins": 0.24342355132102966, "rewards/rejected": -0.29743942618370056, "step": 520 }, { "epoch": 0.55, "learning_rate": 4.542671259089169e-07, "logits/chosen": -2.4646763801574707, "logits/rejected": -2.407026767730713, "logps/chosen": -273.80029296875, "logps/rejected": -233.99826049804688, "loss": 0.6071, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.009464024566113949, "rewards/margins": 0.2972859740257263, "rewards/rejected": -0.30674999952316284, "step": 530 }, { "epoch": 0.56, "learning_rate": 4.52353616532721e-07, "logits/chosen": -2.4378743171691895, "logits/rejected": -2.415499448776245, "logps/chosen": -266.07171630859375, "logps/rejected": -235.11093139648438, "loss": 0.6005, "rewards/accuracies": 0.65625, "rewards/chosen": -0.009244078770279884, "rewards/margins": 0.26727497577667236, "rewards/rejected": -0.2765190303325653, "step": 540 }, { "epoch": 0.57, "learning_rate": 4.5044010715652504e-07, "logits/chosen": -2.439612627029419, "logits/rejected": -2.3910512924194336, "logps/chosen": -249.3505401611328, "logps/rejected": -228.9892120361328, "loss": 0.6001, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.029611006379127502, "rewards/margins": 0.2741519510746002, "rewards/rejected": -0.30376294255256653, "step": 550 }, { "epoch": 0.58, "learning_rate": 4.485265977803291e-07, "logits/chosen": -2.46055269241333, "logits/rejected": -2.3553805351257324, "logps/chosen": -293.1697692871094, "logps/rejected": -232.68115234375, "loss": 0.5852, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.061341024935245514, "rewards/margins": 0.43009573221206665, "rewards/rejected": -0.36875468492507935, "step": 560 }, { "epoch": 0.59, "learning_rate": 4.4661308840413316e-07, "logits/chosen": -2.509950637817383, "logits/rejected": -2.377487897872925, "logps/chosen": -285.7837829589844, "logps/rejected": -236.22866821289062, "loss": 0.6017, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": 0.0002725020167417824, "rewards/margins": 0.3595966100692749, "rewards/rejected": -0.3593241274356842, "step": 570 }, { "epoch": 0.6, "learning_rate": 4.446995790279372e-07, "logits/chosen": -2.4554500579833984, "logits/rejected": -2.4359169006347656, "logps/chosen": -283.475341796875, "logps/rejected": -230.9565887451172, "loss": 0.5871, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": 0.004635247401893139, "rewards/margins": 0.32853394746780396, "rewards/rejected": -0.32389870285987854, "step": 580 }, { "epoch": 0.61, "learning_rate": 4.4278606965174123e-07, "logits/chosen": -2.472580671310425, "logits/rejected": -2.470784902572632, "logps/chosen": -262.70196533203125, "logps/rejected": -241.69784545898438, "loss": 0.5868, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.021802525967359543, "rewards/margins": 0.33116960525512695, "rewards/rejected": -0.3529720902442932, "step": 590 }, { "epoch": 0.62, "learning_rate": 4.408725602755453e-07, "logits/chosen": -2.4604482650756836, "logits/rejected": -2.4069600105285645, "logps/chosen": -233.8094482421875, "logps/rejected": -207.21536254882812, "loss": 0.5901, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.03362672030925751, "rewards/margins": 0.2702116370201111, "rewards/rejected": -0.3038383424282074, "step": 600 }, { "epoch": 0.63, "learning_rate": 4.3895905089934936e-07, "logits/chosen": -2.476191282272339, "logits/rejected": -2.3460450172424316, "logps/chosen": -276.6174621582031, "logps/rejected": -231.13705444335938, "loss": 0.5887, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.0008199826115742326, "rewards/margins": 0.3593784272670746, "rewards/rejected": -0.360198438167572, "step": 610 }, { "epoch": 0.64, "learning_rate": 4.370455415231534e-07, "logits/chosen": -2.418025493621826, "logits/rejected": -2.426182270050049, "logps/chosen": -276.7029724121094, "logps/rejected": -260.8800354003906, "loss": 0.5695, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.050284016877412796, "rewards/margins": 0.325679212808609, "rewards/rejected": -0.37596315145492554, "step": 620 }, { "epoch": 0.65, "learning_rate": 4.351320321469575e-07, "logits/chosen": -2.4381816387176514, "logits/rejected": -2.4007935523986816, "logps/chosen": -298.5264587402344, "logps/rejected": -234.97250366210938, "loss": 0.5822, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": 0.040602535009384155, "rewards/margins": 0.45465603470802307, "rewards/rejected": -0.4140535295009613, "step": 630 }, { "epoch": 0.66, "learning_rate": 4.3321852277076154e-07, "logits/chosen": -2.4025540351867676, "logits/rejected": -2.3902947902679443, "logps/chosen": -267.2010192871094, "logps/rejected": -240.09646606445312, "loss": 0.5675, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.013172095641493797, "rewards/margins": 0.4019550383090973, "rewards/rejected": -0.38878297805786133, "step": 640 }, { "epoch": 0.67, "learning_rate": 4.313050133945656e-07, "logits/chosen": -2.507800340652466, "logits/rejected": -2.4536452293395996, "logps/chosen": -291.55218505859375, "logps/rejected": -242.00558471679688, "loss": 0.5795, "rewards/accuracies": 0.75, "rewards/chosen": 0.00018588601960800588, "rewards/margins": 0.46111243963241577, "rewards/rejected": -0.4609266221523285, "step": 650 }, { "epoch": 0.68, "learning_rate": 4.2939150401836967e-07, "logits/chosen": -2.4589836597442627, "logits/rejected": -2.4613184928894043, "logps/chosen": -245.21237182617188, "logps/rejected": -246.0800323486328, "loss": 0.5688, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.06368513405323029, "rewards/margins": 0.32503411173820496, "rewards/rejected": -0.38871926069259644, "step": 660 }, { "epoch": 0.69, "learning_rate": 4.2747799464217373e-07, "logits/chosen": -2.397803783416748, "logits/rejected": -2.3992929458618164, "logps/chosen": -262.5749816894531, "logps/rejected": -231.0945281982422, "loss": 0.574, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.10684232413768768, "rewards/margins": 0.3226371705532074, "rewards/rejected": -0.4294795095920563, "step": 670 }, { "epoch": 0.7, "learning_rate": 4.255644852659778e-07, "logits/chosen": -2.4963364601135254, "logits/rejected": -2.4439988136291504, "logps/chosen": -287.1022644042969, "logps/rejected": -249.372802734375, "loss": 0.6135, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08951963484287262, "rewards/margins": 0.2950531840324402, "rewards/rejected": -0.384572833776474, "step": 680 }, { "epoch": 0.71, "learning_rate": 4.236509758897818e-07, "logits/chosen": -2.3793249130249023, "logits/rejected": -2.3877103328704834, "logps/chosen": -260.2186584472656, "logps/rejected": -218.5549774169922, "loss": 0.6012, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.07342827320098877, "rewards/margins": 0.3570996820926666, "rewards/rejected": -0.4305279850959778, "step": 690 }, { "epoch": 0.72, "learning_rate": 4.2173746651358586e-07, "logits/chosen": -2.4117445945739746, "logits/rejected": -2.3893179893493652, "logps/chosen": -260.35223388671875, "logps/rejected": -246.88528442382812, "loss": 0.5919, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.11649465560913086, "rewards/margins": 0.276920884847641, "rewards/rejected": -0.39341551065444946, "step": 700 }, { "epoch": 0.73, "learning_rate": 4.198239571373899e-07, "logits/chosen": -2.4213128089904785, "logits/rejected": -2.353787660598755, "logps/chosen": -243.3746795654297, "logps/rejected": -195.84048461914062, "loss": 0.5849, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.056009601801633835, "rewards/margins": 0.43854936957359314, "rewards/rejected": -0.4945589601993561, "step": 710 }, { "epoch": 0.74, "learning_rate": 4.17910447761194e-07, "logits/chosen": -2.46687388420105, "logits/rejected": -2.3652591705322266, "logps/chosen": -267.1708679199219, "logps/rejected": -255.4759521484375, "loss": 0.5977, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.08689933270215988, "rewards/margins": 0.33677542209625244, "rewards/rejected": -0.42367473244667053, "step": 720 }, { "epoch": 0.75, "learning_rate": 4.1599693838499805e-07, "logits/chosen": -2.4511024951934814, "logits/rejected": -2.4267566204071045, "logps/chosen": -295.463134765625, "logps/rejected": -215.197265625, "loss": 0.5815, "rewards/accuracies": 0.65625, "rewards/chosen": -0.06298734992742538, "rewards/margins": 0.3604966402053833, "rewards/rejected": -0.4234839975833893, "step": 730 }, { "epoch": 0.76, "learning_rate": 4.140834290088021e-07, "logits/chosen": -2.4394567012786865, "logits/rejected": -2.4174628257751465, "logps/chosen": -277.1340026855469, "logps/rejected": -221.7968292236328, "loss": 0.5643, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.0872669368982315, "rewards/margins": 0.4404314458370209, "rewards/rejected": -0.5276983976364136, "step": 740 }, { "epoch": 0.77, "learning_rate": 4.121699196326062e-07, "logits/chosen": -2.4356467723846436, "logits/rejected": -2.309382915496826, "logps/chosen": -218.226318359375, "logps/rejected": -185.0907440185547, "loss": 0.5819, "rewards/accuracies": 0.6875, "rewards/chosen": -0.055658143013715744, "rewards/margins": 0.3867154121398926, "rewards/rejected": -0.44237351417541504, "step": 750 }, { "epoch": 0.78, "learning_rate": 4.1025641025641024e-07, "logits/chosen": -2.398838520050049, "logits/rejected": -2.4070441722869873, "logps/chosen": -259.14996337890625, "logps/rejected": -243.26882934570312, "loss": 0.5757, "rewards/accuracies": 0.6875, "rewards/chosen": -0.06217324733734131, "rewards/margins": 0.4077116549015045, "rewards/rejected": -0.4698849320411682, "step": 760 }, { "epoch": 0.8, "learning_rate": 4.083429008802143e-07, "logits/chosen": -2.5257222652435303, "logits/rejected": -2.471179485321045, "logps/chosen": -274.23980712890625, "logps/rejected": -213.1348876953125, "loss": 0.5549, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.041472114622592926, "rewards/margins": 0.4583619236946106, "rewards/rejected": -0.4998340606689453, "step": 770 }, { "epoch": 0.81, "learning_rate": 4.0642939150401836e-07, "logits/chosen": -2.488083600997925, "logits/rejected": -2.3540916442871094, "logps/chosen": -290.1893310546875, "logps/rejected": -217.73001098632812, "loss": 0.5702, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.0671161487698555, "rewards/margins": 0.4410739541053772, "rewards/rejected": -0.5081900954246521, "step": 780 }, { "epoch": 0.82, "learning_rate": 4.0451588212782237e-07, "logits/chosen": -2.4297478199005127, "logits/rejected": -2.3958935737609863, "logps/chosen": -255.67984008789062, "logps/rejected": -227.6651153564453, "loss": 0.5417, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.0655713826417923, "rewards/margins": 0.5016980171203613, "rewards/rejected": -0.5672693252563477, "step": 790 }, { "epoch": 0.83, "learning_rate": 4.0260237275162643e-07, "logits/chosen": -2.368698835372925, "logits/rejected": -2.366753578186035, "logps/chosen": -232.51876831054688, "logps/rejected": -227.71176147460938, "loss": 0.574, "rewards/accuracies": 0.71875, "rewards/chosen": -0.06221083551645279, "rewards/margins": 0.45073550939559937, "rewards/rejected": -0.5129462480545044, "step": 800 }, { "epoch": 0.84, "learning_rate": 4.006888633754305e-07, "logits/chosen": -2.523768186569214, "logits/rejected": -2.472125291824341, "logps/chosen": -277.5516662597656, "logps/rejected": -229.77294921875, "loss": 0.5448, "rewards/accuracies": 0.75, "rewards/chosen": -0.022703617811203003, "rewards/margins": 0.5766944289207458, "rewards/rejected": -0.5993980169296265, "step": 810 }, { "epoch": 0.85, "learning_rate": 3.9877535399923456e-07, "logits/chosen": -2.407522201538086, "logits/rejected": -2.370136260986328, "logps/chosen": -267.1480712890625, "logps/rejected": -224.8208770751953, "loss": 0.5611, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.05944644287228584, "rewards/margins": 0.5258339643478394, "rewards/rejected": -0.5852803587913513, "step": 820 }, { "epoch": 0.86, "learning_rate": 3.968618446230386e-07, "logits/chosen": -2.4421579837799072, "logits/rejected": -2.4140655994415283, "logps/chosen": -319.13446044921875, "logps/rejected": -247.4228973388672, "loss": 0.5424, "rewards/accuracies": 0.75, "rewards/chosen": -0.0017402932280674577, "rewards/margins": 0.6056521534919739, "rewards/rejected": -0.607392430305481, "step": 830 }, { "epoch": 0.87, "learning_rate": 3.949483352468427e-07, "logits/chosen": -2.4426145553588867, "logits/rejected": -2.408177614212036, "logps/chosen": -275.4983215332031, "logps/rejected": -240.1235809326172, "loss": 0.6003, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.188354954123497, "rewards/margins": 0.3069398105144501, "rewards/rejected": -0.49529480934143066, "step": 840 }, { "epoch": 0.88, "learning_rate": 3.9303482587064674e-07, "logits/chosen": -2.4034695625305176, "logits/rejected": -2.420605421066284, "logps/chosen": -282.95208740234375, "logps/rejected": -238.9861297607422, "loss": 0.5644, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07752462476491928, "rewards/margins": 0.49765148758888245, "rewards/rejected": -0.5751761198043823, "step": 850 }, { "epoch": 0.89, "learning_rate": 3.911213164944508e-07, "logits/chosen": -2.448660373687744, "logits/rejected": -2.3693861961364746, "logps/chosen": -291.59942626953125, "logps/rejected": -245.4176483154297, "loss": 0.564, "rewards/accuracies": 0.6875, "rewards/chosen": -0.07620221376419067, "rewards/margins": 0.5494655966758728, "rewards/rejected": -0.6256678700447083, "step": 860 }, { "epoch": 0.9, "learning_rate": 3.8920780711825487e-07, "logits/chosen": -2.332820177078247, "logits/rejected": -2.3668315410614014, "logps/chosen": -270.84954833984375, "logps/rejected": -226.4775390625, "loss": 0.5639, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.06978223472833633, "rewards/margins": 0.528697669506073, "rewards/rejected": -0.5984798669815063, "step": 870 }, { "epoch": 0.91, "learning_rate": 3.8729429774205893e-07, "logits/chosen": -2.414726495742798, "logits/rejected": -2.3955094814300537, "logps/chosen": -265.5794982910156, "logps/rejected": -221.8883056640625, "loss": 0.5595, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09429865330457687, "rewards/margins": 0.48285895586013794, "rewards/rejected": -0.5771576166152954, "step": 880 }, { "epoch": 0.92, "learning_rate": 3.8538078836586294e-07, "logits/chosen": -2.4266517162323, "logits/rejected": -2.3529062271118164, "logps/chosen": -255.7076873779297, "logps/rejected": -249.5386199951172, "loss": 0.5809, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1304847002029419, "rewards/margins": 0.40244507789611816, "rewards/rejected": -0.5329297780990601, "step": 890 }, { "epoch": 0.93, "learning_rate": 3.83467278989667e-07, "logits/chosen": -2.407705545425415, "logits/rejected": -2.356353282928467, "logps/chosen": -281.96405029296875, "logps/rejected": -222.4244384765625, "loss": 0.5463, "rewards/accuracies": 0.768750011920929, "rewards/chosen": 0.0021881351713091135, "rewards/margins": 0.5727441310882568, "rewards/rejected": -0.5705560445785522, "step": 900 }, { "epoch": 0.94, "learning_rate": 3.8155376961347106e-07, "logits/chosen": -2.420719861984253, "logits/rejected": -2.4088523387908936, "logps/chosen": -250.53616333007812, "logps/rejected": -203.44956970214844, "loss": 0.5655, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.07977491617202759, "rewards/margins": 0.5309610366821289, "rewards/rejected": -0.6107359528541565, "step": 910 }, { "epoch": 0.95, "learning_rate": 3.796402602372751e-07, "logits/chosen": -2.426771640777588, "logits/rejected": -2.343543291091919, "logps/chosen": -272.2907409667969, "logps/rejected": -252.50698852539062, "loss": 0.5557, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.0921485498547554, "rewards/margins": 0.42352181673049927, "rewards/rejected": -0.5156703591346741, "step": 920 }, { "epoch": 0.96, "learning_rate": 3.777267508610792e-07, "logits/chosen": -2.493851661682129, "logits/rejected": -2.429084539413452, "logps/chosen": -270.8280944824219, "logps/rejected": -260.43084716796875, "loss": 0.5676, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.10896792262792587, "rewards/margins": 0.5754088163375854, "rewards/rejected": -0.6843767166137695, "step": 930 }, { "epoch": 0.97, "learning_rate": 3.7581324148488325e-07, "logits/chosen": -2.4216136932373047, "logits/rejected": -2.37446665763855, "logps/chosen": -299.2573547363281, "logps/rejected": -224.32192993164062, "loss": 0.5703, "rewards/accuracies": 0.71875, "rewards/chosen": -0.09940418601036072, "rewards/margins": 0.46459144353866577, "rewards/rejected": -0.5639955401420593, "step": 940 }, { "epoch": 0.98, "learning_rate": 3.738997321086873e-07, "logits/chosen": -2.4052574634552, "logits/rejected": -2.359984874725342, "logps/chosen": -257.97174072265625, "logps/rejected": -224.43533325195312, "loss": 0.569, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.055308748036623, "rewards/margins": 0.41939839720726013, "rewards/rejected": -0.47470712661743164, "step": 950 }, { "epoch": 0.99, "learning_rate": 3.7198622273249137e-07, "logits/chosen": -2.4320504665374756, "logits/rejected": -2.4140851497650146, "logps/chosen": -262.2895202636719, "logps/rejected": -195.298583984375, "loss": 0.5516, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.0959898829460144, "rewards/margins": 0.4007217288017273, "rewards/rejected": -0.4967115819454193, "step": 960 }, { "epoch": 1.0, "eval_logits/chosen": -2.11159610748291, "eval_logits/rejected": -1.9903388023376465, "eval_logps/chosen": -265.77178955078125, "eval_logps/rejected": -225.71365356445312, "eval_loss": 0.5546568632125854, "eval_rewards/accuracies": 0.7160000205039978, "eval_rewards/chosen": -0.11396687477827072, "eval_rewards/margins": 0.5291071534156799, "eval_rewards/rejected": -0.6430740356445312, "eval_runtime": 602.672, "eval_samples_per_second": 3.319, "eval_steps_per_second": 0.207, "step": 968 }, { "epoch": 1.0, "learning_rate": 3.7007271335629544e-07, "logits/chosen": -2.3931021690368652, "logits/rejected": -2.2940726280212402, "logps/chosen": -265.32965087890625, "logps/rejected": -216.5413055419922, "loss": 0.5557, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06538908183574677, "rewards/margins": 0.636194109916687, "rewards/rejected": -0.7015832662582397, "step": 970 }, { "epoch": 1.01, "learning_rate": 3.681592039800995e-07, "logits/chosen": -2.3853671550750732, "logits/rejected": -2.3550171852111816, "logps/chosen": -242.0519561767578, "logps/rejected": -217.999755859375, "loss": 0.5501, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.10068665444850922, "rewards/margins": 0.456498384475708, "rewards/rejected": -0.5571850538253784, "step": 980 }, { "epoch": 1.02, "learning_rate": 3.662456946039035e-07, "logits/chosen": -2.339399814605713, "logits/rejected": -2.3183839321136475, "logps/chosen": -231.6448974609375, "logps/rejected": -201.71688842773438, "loss": 0.5999, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.19742384552955627, "rewards/margins": 0.38226670026779175, "rewards/rejected": -0.5796905159950256, "step": 990 }, { "epoch": 1.03, "learning_rate": 3.6433218522770757e-07, "logits/chosen": -2.3831546306610107, "logits/rejected": -2.319021701812744, "logps/chosen": -295.373291015625, "logps/rejected": -220.7757110595703, "loss": 0.5809, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.07577923685312271, "rewards/margins": 0.4884832501411438, "rewards/rejected": -0.5642624497413635, "step": 1000 }, { "epoch": 1.04, "learning_rate": 3.6241867585151163e-07, "logits/chosen": -2.4494717121124268, "logits/rejected": -2.387357711791992, "logps/chosen": -248.5869140625, "logps/rejected": -225.55770874023438, "loss": 0.5584, "rewards/accuracies": 0.75, "rewards/chosen": -0.13781091570854187, "rewards/margins": 0.47680073976516724, "rewards/rejected": -0.6146116256713867, "step": 1010 }, { "epoch": 1.05, "learning_rate": 3.605051664753157e-07, "logits/chosen": -2.4245686531066895, "logits/rejected": -2.430293560028076, "logps/chosen": -251.4219207763672, "logps/rejected": -202.93777465820312, "loss": 0.5474, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.09323601424694061, "rewards/margins": 0.5475600957870483, "rewards/rejected": -0.6407961249351501, "step": 1020 }, { "epoch": 1.06, "learning_rate": 3.5859165709911975e-07, "logits/chosen": -2.3130688667297363, "logits/rejected": -2.319304943084717, "logps/chosen": -292.4891052246094, "logps/rejected": -256.63336181640625, "loss": 0.5382, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.045892782509326935, "rewards/margins": 0.6422568559646606, "rewards/rejected": -0.688149631023407, "step": 1030 }, { "epoch": 1.07, "learning_rate": 3.566781477229238e-07, "logits/chosen": -2.3673741817474365, "logits/rejected": -2.2591471672058105, "logps/chosen": -259.9045104980469, "logps/rejected": -235.18899536132812, "loss": 0.5544, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09036435186862946, "rewards/margins": 0.5122971534729004, "rewards/rejected": -0.6026615500450134, "step": 1040 }, { "epoch": 1.08, "learning_rate": 3.547646383467279e-07, "logits/chosen": -2.3258612155914307, "logits/rejected": -2.3310484886169434, "logps/chosen": -256.3821105957031, "logps/rejected": -235.92697143554688, "loss": 0.5302, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.04464683681726456, "rewards/margins": 0.6154407858848572, "rewards/rejected": -0.6600876450538635, "step": 1050 }, { "epoch": 1.09, "learning_rate": 3.5285112897053194e-07, "logits/chosen": -2.401237726211548, "logits/rejected": -2.2862296104431152, "logps/chosen": -275.3665466308594, "logps/rejected": -235.6302032470703, "loss": 0.5405, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.1668693572282791, "rewards/margins": 0.7094846367835999, "rewards/rejected": -0.8763540387153625, "step": 1060 }, { "epoch": 1.11, "learning_rate": 3.50937619594336e-07, "logits/chosen": -2.427232265472412, "logits/rejected": -2.3744897842407227, "logps/chosen": -291.2286682128906, "logps/rejected": -261.8435363769531, "loss": 0.5492, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0822446197271347, "rewards/margins": 0.643202543258667, "rewards/rejected": -0.7254471778869629, "step": 1070 }, { "epoch": 1.12, "learning_rate": 3.4902411021814007e-07, "logits/chosen": -2.4489409923553467, "logits/rejected": -2.3586411476135254, "logps/chosen": -294.7502746582031, "logps/rejected": -229.1472625732422, "loss": 0.5273, "rewards/accuracies": 0.8125, "rewards/chosen": -0.000645542168058455, "rewards/margins": 0.7959606051445007, "rewards/rejected": -0.796606183052063, "step": 1080 }, { "epoch": 1.13, "learning_rate": 3.4711060084194413e-07, "logits/chosen": -2.3935751914978027, "logits/rejected": -2.4032771587371826, "logps/chosen": -272.8512878417969, "logps/rejected": -228.17575073242188, "loss": 0.5702, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.05014977604150772, "rewards/margins": 0.5901867747306824, "rewards/rejected": -0.6403365135192871, "step": 1090 }, { "epoch": 1.14, "learning_rate": 3.4519709146574814e-07, "logits/chosen": -2.474759578704834, "logits/rejected": -2.3402395248413086, "logps/chosen": -267.2554931640625, "logps/rejected": -247.7689666748047, "loss": 0.5598, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.08641926199197769, "rewards/margins": 0.5510476231575012, "rewards/rejected": -0.6374668478965759, "step": 1100 }, { "epoch": 1.15, "learning_rate": 3.432835820895522e-07, "logits/chosen": -2.408102512359619, "logits/rejected": -2.3299994468688965, "logps/chosen": -249.98458862304688, "logps/rejected": -230.2236328125, "loss": 0.5427, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.10258053243160248, "rewards/margins": 0.5458223223686218, "rewards/rejected": -0.6484029293060303, "step": 1110 }, { "epoch": 1.16, "learning_rate": 3.4137007271335626e-07, "logits/chosen": -2.349371910095215, "logits/rejected": -2.3972179889678955, "logps/chosen": -218.875, "logps/rejected": -212.8376922607422, "loss": 0.5422, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.180108904838562, "rewards/margins": 0.4184727072715759, "rewards/rejected": -0.5985815525054932, "step": 1120 }, { "epoch": 1.17, "learning_rate": 3.394565633371603e-07, "logits/chosen": -2.358839511871338, "logits/rejected": -2.299063205718994, "logps/chosen": -293.2892150878906, "logps/rejected": -245.3092498779297, "loss": 0.5364, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.14460349082946777, "rewards/margins": 0.6029571294784546, "rewards/rejected": -0.7475606203079224, "step": 1130 }, { "epoch": 1.18, "learning_rate": 3.375430539609644e-07, "logits/chosen": -2.2807836532592773, "logits/rejected": -2.345672130584717, "logps/chosen": -239.72476196289062, "logps/rejected": -211.0787353515625, "loss": 0.5132, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.13182897865772247, "rewards/margins": 0.6086454391479492, "rewards/rejected": -0.7404743432998657, "step": 1140 }, { "epoch": 1.19, "learning_rate": 3.3562954458476845e-07, "logits/chosen": -2.441943407058716, "logits/rejected": -2.4284987449645996, "logps/chosen": -306.66900634765625, "logps/rejected": -233.69290161132812, "loss": 0.5589, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.15195252001285553, "rewards/margins": 0.5843501687049866, "rewards/rejected": -0.7363026738166809, "step": 1150 }, { "epoch": 1.2, "learning_rate": 3.337160352085725e-07, "logits/chosen": -2.4269309043884277, "logits/rejected": -2.3844103813171387, "logps/chosen": -298.20721435546875, "logps/rejected": -241.6977996826172, "loss": 0.5365, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.07097179442644119, "rewards/margins": 0.7560560703277588, "rewards/rejected": -0.8270279169082642, "step": 1160 }, { "epoch": 1.21, "learning_rate": 3.3180252583237657e-07, "logits/chosen": -2.336698532104492, "logits/rejected": -2.2902188301086426, "logps/chosen": -278.00860595703125, "logps/rejected": -231.4420928955078, "loss": 0.5528, "rewards/accuracies": 0.75, "rewards/chosen": -0.06676442921161652, "rewards/margins": 0.5907629132270813, "rewards/rejected": -0.6575273275375366, "step": 1170 }, { "epoch": 1.22, "learning_rate": 3.2988901645618063e-07, "logits/chosen": -2.368316650390625, "logits/rejected": -2.2780606746673584, "logps/chosen": -261.25482177734375, "logps/rejected": -225.92269897460938, "loss": 0.5388, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.1373300552368164, "rewards/margins": 0.5785337686538696, "rewards/rejected": -0.715863823890686, "step": 1180 }, { "epoch": 1.23, "learning_rate": 3.279755070799847e-07, "logits/chosen": -2.402346134185791, "logits/rejected": -2.3095052242279053, "logps/chosen": -259.71075439453125, "logps/rejected": -257.037353515625, "loss": 0.5278, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.15283453464508057, "rewards/margins": 0.526613175868988, "rewards/rejected": -0.6794477105140686, "step": 1190 }, { "epoch": 1.24, "learning_rate": 3.260619977037887e-07, "logits/chosen": -2.3968770503997803, "logits/rejected": -2.340967893600464, "logps/chosen": -251.2769775390625, "logps/rejected": -214.03146362304688, "loss": 0.5209, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.07637427002191544, "rewards/margins": 0.7686988711357117, "rewards/rejected": -0.8450730443000793, "step": 1200 }, { "epoch": 1.25, "learning_rate": 3.2414848832759277e-07, "logits/chosen": -2.4229166507720947, "logits/rejected": -2.2702252864837646, "logps/chosen": -261.66217041015625, "logps/rejected": -227.0311279296875, "loss": 0.534, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.14070875942707062, "rewards/margins": 0.5856814384460449, "rewards/rejected": -0.7263902425765991, "step": 1210 }, { "epoch": 1.26, "learning_rate": 3.2223497895139683e-07, "logits/chosen": -2.3875441551208496, "logits/rejected": -2.3234646320343018, "logps/chosen": -263.7579040527344, "logps/rejected": -219.34719848632812, "loss": 0.5537, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.11774899810552597, "rewards/margins": 0.5334910154342651, "rewards/rejected": -0.6512399911880493, "step": 1220 }, { "epoch": 1.27, "learning_rate": 3.203214695752009e-07, "logits/chosen": -2.4645657539367676, "logits/rejected": -2.385596990585327, "logps/chosen": -262.8403015136719, "logps/rejected": -230.195556640625, "loss": 0.5689, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.15035703778266907, "rewards/margins": 0.5768887400627136, "rewards/rejected": -0.7272458076477051, "step": 1230 }, { "epoch": 1.28, "learning_rate": 3.1840796019900495e-07, "logits/chosen": -2.426673173904419, "logits/rejected": -2.3877110481262207, "logps/chosen": -262.39178466796875, "logps/rejected": -254.9650115966797, "loss": 0.5457, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.11363549530506134, "rewards/margins": 0.6810011267662048, "rewards/rejected": -0.7946366667747498, "step": 1240 }, { "epoch": 1.29, "learning_rate": 3.16494450822809e-07, "logits/chosen": -2.3528525829315186, "logits/rejected": -2.3542237281799316, "logps/chosen": -236.56967163085938, "logps/rejected": -233.4956512451172, "loss": 0.5372, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.18625274300575256, "rewards/margins": 0.48403066396713257, "rewards/rejected": -0.6702834367752075, "step": 1250 }, { "epoch": 1.3, "learning_rate": 3.145809414466131e-07, "logits/chosen": -2.48598313331604, "logits/rejected": -2.4767704010009766, "logps/chosen": -272.78662109375, "logps/rejected": -266.92413330078125, "loss": 0.5373, "rewards/accuracies": 0.71875, "rewards/chosen": -0.14394977688789368, "rewards/margins": 0.49152618646621704, "rewards/rejected": -0.6354759335517883, "step": 1260 }, { "epoch": 1.31, "learning_rate": 3.1266743207041714e-07, "logits/chosen": -2.4058761596679688, "logits/rejected": -2.403879165649414, "logps/chosen": -247.5891571044922, "logps/rejected": -226.5963592529297, "loss": 0.5451, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.14257661998271942, "rewards/margins": 0.6169610023498535, "rewards/rejected": -0.7595376968383789, "step": 1270 }, { "epoch": 1.32, "learning_rate": 3.107539226942212e-07, "logits/chosen": -2.392540216445923, "logits/rejected": -2.370917558670044, "logps/chosen": -259.8185729980469, "logps/rejected": -240.8582000732422, "loss": 0.5764, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.14314888417720795, "rewards/margins": 0.5050605535507202, "rewards/rejected": -0.6482094526290894, "step": 1280 }, { "epoch": 1.33, "learning_rate": 3.0884041331802526e-07, "logits/chosen": -2.3564376831054688, "logits/rejected": -2.2619478702545166, "logps/chosen": -243.14151000976562, "logps/rejected": -216.67678833007812, "loss": 0.5529, "rewards/accuracies": 0.71875, "rewards/chosen": -0.17953407764434814, "rewards/margins": 0.5211489200592041, "rewards/rejected": -0.7006829977035522, "step": 1290 }, { "epoch": 1.34, "learning_rate": 3.0692690394182927e-07, "logits/chosen": -2.435981035232544, "logits/rejected": -2.3964314460754395, "logps/chosen": -277.2529296875, "logps/rejected": -233.1074676513672, "loss": 0.5528, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.05609757825732231, "rewards/margins": 0.6833099722862244, "rewards/rejected": -0.7394075393676758, "step": 1300 }, { "epoch": 1.35, "learning_rate": 3.0501339456563334e-07, "logits/chosen": -2.414605140686035, "logits/rejected": -2.270141363143921, "logps/chosen": -269.33770751953125, "logps/rejected": -243.461669921875, "loss": 0.5338, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.1670106053352356, "rewards/margins": 0.5737408995628357, "rewards/rejected": -0.7407516241073608, "step": 1310 }, { "epoch": 1.36, "learning_rate": 3.030998851894374e-07, "logits/chosen": -2.3524057865142822, "logits/rejected": -2.3626418113708496, "logps/chosen": -229.76596069335938, "logps/rejected": -221.08889770507812, "loss": 0.5137, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.07673561573028564, "rewards/margins": 0.6764390468597412, "rewards/rejected": -0.7531746029853821, "step": 1320 }, { "epoch": 1.37, "learning_rate": 3.0118637581324146e-07, "logits/chosen": -2.422010898590088, "logits/rejected": -2.3673031330108643, "logps/chosen": -257.2743835449219, "logps/rejected": -223.86349487304688, "loss": 0.5407, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.09269039332866669, "rewards/margins": 0.7229348421096802, "rewards/rejected": -0.8156253099441528, "step": 1330 }, { "epoch": 1.38, "learning_rate": 2.992728664370455e-07, "logits/chosen": -2.4492976665496826, "logits/rejected": -2.3872756958007812, "logps/chosen": -239.734375, "logps/rejected": -225.47982788085938, "loss": 0.5593, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.21187777817249298, "rewards/margins": 0.5356577634811401, "rewards/rejected": -0.7475355267524719, "step": 1340 }, { "epoch": 1.39, "learning_rate": 2.973593570608496e-07, "logits/chosen": -2.3429622650146484, "logits/rejected": -2.2622060775756836, "logps/chosen": -261.2279052734375, "logps/rejected": -220.1876983642578, "loss": 0.552, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.11654462665319443, "rewards/margins": 0.6566459536552429, "rewards/rejected": -0.7731907367706299, "step": 1350 }, { "epoch": 1.4, "learning_rate": 2.9544584768465365e-07, "logits/chosen": -2.415435314178467, "logits/rejected": -2.3692467212677, "logps/chosen": -310.05035400390625, "logps/rejected": -254.33297729492188, "loss": 0.553, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.1930284947156906, "rewards/margins": 0.6540157198905945, "rewards/rejected": -0.8470442891120911, "step": 1360 }, { "epoch": 1.41, "learning_rate": 2.935323383084577e-07, "logits/chosen": -2.382997989654541, "logits/rejected": -2.331480026245117, "logps/chosen": -290.0738830566406, "logps/rejected": -242.3267822265625, "loss": 0.5599, "rewards/accuracies": 0.6875, "rewards/chosen": -0.09350194036960602, "rewards/margins": 0.5290672183036804, "rewards/rejected": -0.6225691437721252, "step": 1370 }, { "epoch": 1.43, "learning_rate": 2.9161882893226177e-07, "logits/chosen": -2.358276128768921, "logits/rejected": -2.2521064281463623, "logps/chosen": -240.8756561279297, "logps/rejected": -204.3744659423828, "loss": 0.5443, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.21055755019187927, "rewards/margins": 0.43394798040390015, "rewards/rejected": -0.6445055603981018, "step": 1380 }, { "epoch": 1.44, "learning_rate": 2.8970531955606583e-07, "logits/chosen": -2.453087568283081, "logits/rejected": -2.353877305984497, "logps/chosen": -270.0442810058594, "logps/rejected": -231.3137664794922, "loss": 0.5677, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.15533334016799927, "rewards/margins": 0.6584349870681763, "rewards/rejected": -0.8137682676315308, "step": 1390 }, { "epoch": 1.45, "learning_rate": 2.8779181017986984e-07, "logits/chosen": -2.4245669841766357, "logits/rejected": -2.402334213256836, "logps/chosen": -269.0023498535156, "logps/rejected": -239.24465942382812, "loss": 0.5534, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.22131021320819855, "rewards/margins": 0.5528482794761658, "rewards/rejected": -0.7741583585739136, "step": 1400 }, { "epoch": 1.46, "learning_rate": 2.858783008036739e-07, "logits/chosen": -2.314622402191162, "logits/rejected": -2.2626185417175293, "logps/chosen": -278.54620361328125, "logps/rejected": -220.9359893798828, "loss": 0.5297, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1343500018119812, "rewards/margins": 0.6538098454475403, "rewards/rejected": -0.7881597280502319, "step": 1410 }, { "epoch": 1.47, "learning_rate": 2.8396479142747797e-07, "logits/chosen": -2.3986763954162598, "logits/rejected": -2.3692593574523926, "logps/chosen": -230.23330688476562, "logps/rejected": -201.45968627929688, "loss": 0.5397, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.16032734513282776, "rewards/margins": 0.5956254005432129, "rewards/rejected": -0.7559527158737183, "step": 1420 }, { "epoch": 1.48, "learning_rate": 2.8205128205128203e-07, "logits/chosen": -2.3531031608581543, "logits/rejected": -2.3053078651428223, "logps/chosen": -260.5692443847656, "logps/rejected": -229.7131805419922, "loss": 0.5422, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.27216142416000366, "rewards/margins": 0.5033277869224548, "rewards/rejected": -0.7754892110824585, "step": 1430 }, { "epoch": 1.49, "learning_rate": 2.801377726750861e-07, "logits/chosen": -2.386915683746338, "logits/rejected": -2.315340757369995, "logps/chosen": -256.1626281738281, "logps/rejected": -217.6416473388672, "loss": 0.5438, "rewards/accuracies": 0.65625, "rewards/chosen": -0.18697381019592285, "rewards/margins": 0.49360641837120056, "rewards/rejected": -0.6805802583694458, "step": 1440 }, { "epoch": 1.5, "learning_rate": 2.7822426329889015e-07, "logits/chosen": -2.428358554840088, "logits/rejected": -2.3230361938476562, "logps/chosen": -294.2705993652344, "logps/rejected": -235.54171752929688, "loss": 0.5623, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.146778404712677, "rewards/margins": 0.6038089394569397, "rewards/rejected": -0.7505873441696167, "step": 1450 }, { "epoch": 1.51, "learning_rate": 2.763107539226942e-07, "logits/chosen": -2.3813319206237793, "logits/rejected": -2.3310248851776123, "logps/chosen": -280.82110595703125, "logps/rejected": -242.4944305419922, "loss": 0.5582, "rewards/accuracies": 0.6875, "rewards/chosen": -0.12899862229824066, "rewards/margins": 0.6421502828598022, "rewards/rejected": -0.7711488604545593, "step": 1460 }, { "epoch": 1.52, "learning_rate": 2.743972445464983e-07, "logits/chosen": -2.4226574897766113, "logits/rejected": -2.411165475845337, "logps/chosen": -254.8177947998047, "logps/rejected": -232.544921875, "loss": 0.5599, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.1896516978740692, "rewards/margins": 0.4945623278617859, "rewards/rejected": -0.6842139959335327, "step": 1470 }, { "epoch": 1.53, "learning_rate": 2.7248373517030234e-07, "logits/chosen": -2.475400924682617, "logits/rejected": -2.419384479522705, "logps/chosen": -274.40484619140625, "logps/rejected": -237.3146209716797, "loss": 0.5407, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.16451963782310486, "rewards/margins": 0.6172757744789124, "rewards/rejected": -0.7817953824996948, "step": 1480 }, { "epoch": 1.54, "learning_rate": 2.705702257941064e-07, "logits/chosen": -2.433335304260254, "logits/rejected": -2.435105562210083, "logps/chosen": -311.65618896484375, "logps/rejected": -246.72998046875, "loss": 0.5362, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.16845114529132843, "rewards/margins": 0.6784511804580688, "rewards/rejected": -0.8469023704528809, "step": 1490 }, { "epoch": 1.55, "learning_rate": 2.686567164179104e-07, "logits/chosen": -2.3490521907806396, "logits/rejected": -2.275282144546509, "logps/chosen": -259.00787353515625, "logps/rejected": -219.07669067382812, "loss": 0.5453, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.16528849303722382, "rewards/margins": 0.5990539193153381, "rewards/rejected": -0.7643424272537231, "step": 1500 }, { "epoch": 1.56, "learning_rate": 2.6674320704171447e-07, "logits/chosen": -2.4171411991119385, "logits/rejected": -2.3591837882995605, "logps/chosen": -259.59521484375, "logps/rejected": -206.38058471679688, "loss": 0.5221, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.10826051235198975, "rewards/margins": 0.7560392022132874, "rewards/rejected": -0.8642997741699219, "step": 1510 }, { "epoch": 1.57, "learning_rate": 2.6482969766551853e-07, "logits/chosen": -2.4498002529144287, "logits/rejected": -2.447680950164795, "logps/chosen": -274.2942810058594, "logps/rejected": -237.7410430908203, "loss": 0.5422, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.0813172236084938, "rewards/margins": 0.7396507859230042, "rewards/rejected": -0.8209678530693054, "step": 1520 }, { "epoch": 1.58, "learning_rate": 2.629161882893226e-07, "logits/chosen": -2.446469783782959, "logits/rejected": -2.3727550506591797, "logps/chosen": -280.4594421386719, "logps/rejected": -214.17269897460938, "loss": 0.5184, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.1613025665283203, "rewards/margins": 0.7477348446846008, "rewards/rejected": -0.9090374708175659, "step": 1530 }, { "epoch": 1.59, "learning_rate": 2.6100267891312666e-07, "logits/chosen": -2.3352808952331543, "logits/rejected": -2.3035221099853516, "logps/chosen": -269.15008544921875, "logps/rejected": -208.6403350830078, "loss": 0.5564, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.1250370442867279, "rewards/margins": 0.6833819150924683, "rewards/rejected": -0.8084190487861633, "step": 1540 }, { "epoch": 1.6, "learning_rate": 2.590891695369307e-07, "logits/chosen": -2.4506659507751465, "logits/rejected": -2.362159490585327, "logps/chosen": -256.0645751953125, "logps/rejected": -210.37814331054688, "loss": 0.5268, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.16175204515457153, "rewards/margins": 0.5650383830070496, "rewards/rejected": -0.7267904281616211, "step": 1550 }, { "epoch": 1.61, "learning_rate": 2.571756601607348e-07, "logits/chosen": -2.418126106262207, "logits/rejected": -2.3879923820495605, "logps/chosen": -294.83013916015625, "logps/rejected": -217.4688720703125, "loss": 0.5443, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.19011719524860382, "rewards/margins": 0.65594881772995, "rewards/rejected": -0.8460659980773926, "step": 1560 }, { "epoch": 1.62, "learning_rate": 2.5526215078453884e-07, "logits/chosen": -2.4299476146698, "logits/rejected": -2.3945670127868652, "logps/chosen": -301.6930847167969, "logps/rejected": -237.73892211914062, "loss": 0.5348, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.1024274080991745, "rewards/margins": 0.7538636326789856, "rewards/rejected": -0.8562909960746765, "step": 1570 }, { "epoch": 1.63, "learning_rate": 2.533486414083429e-07, "logits/chosen": -2.4256393909454346, "logits/rejected": -2.3724522590637207, "logps/chosen": -306.9393615722656, "logps/rejected": -249.2686004638672, "loss": 0.516, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.08279596269130707, "rewards/margins": 0.7269363403320312, "rewards/rejected": -0.8097323179244995, "step": 1580 }, { "epoch": 1.64, "learning_rate": 2.5143513203214697e-07, "logits/chosen": -2.3666234016418457, "logits/rejected": -2.3259263038635254, "logps/chosen": -275.7200012207031, "logps/rejected": -225.26425170898438, "loss": 0.5617, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20898135006427765, "rewards/margins": 0.559633195400238, "rewards/rejected": -0.7686145305633545, "step": 1590 }, { "epoch": 1.65, "learning_rate": 2.49521622655951e-07, "logits/chosen": -2.4929592609405518, "logits/rejected": -2.355666160583496, "logps/chosen": -293.1372375488281, "logps/rejected": -262.8376159667969, "loss": 0.5547, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.04876649007201195, "rewards/margins": 0.7978218197822571, "rewards/rejected": -0.8465882539749146, "step": 1600 }, { "epoch": 1.66, "learning_rate": 2.4760811327975504e-07, "logits/chosen": -2.4678738117218018, "logits/rejected": -2.3763632774353027, "logps/chosen": -273.61614990234375, "logps/rejected": -253.130126953125, "loss": 0.5343, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1364731788635254, "rewards/margins": 0.5863515734672546, "rewards/rejected": -0.72282475233078, "step": 1610 }, { "epoch": 1.67, "learning_rate": 2.456946039035591e-07, "logits/chosen": -2.4671170711517334, "logits/rejected": -2.4366583824157715, "logps/chosen": -290.646484375, "logps/rejected": -257.22418212890625, "loss": 0.5646, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.10060103982686996, "rewards/margins": 0.7575327754020691, "rewards/rejected": -0.8581337928771973, "step": 1620 }, { "epoch": 1.68, "learning_rate": 2.4378109452736316e-07, "logits/chosen": -2.3189241886138916, "logits/rejected": -2.3253917694091797, "logps/chosen": -272.53802490234375, "logps/rejected": -198.298095703125, "loss": 0.5301, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.10420193523168564, "rewards/margins": 0.7064443826675415, "rewards/rejected": -0.8106463551521301, "step": 1630 }, { "epoch": 1.69, "learning_rate": 2.418675851511672e-07, "logits/chosen": -2.3077940940856934, "logits/rejected": -2.3118600845336914, "logps/chosen": -281.74365234375, "logps/rejected": -221.730712890625, "loss": 0.5622, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.18291965126991272, "rewards/margins": 0.653192937374115, "rewards/rejected": -0.8361126184463501, "step": 1640 }, { "epoch": 1.7, "learning_rate": 2.399540757749713e-07, "logits/chosen": -2.373295783996582, "logits/rejected": -2.2997496128082275, "logps/chosen": -250.97915649414062, "logps/rejected": -216.4395751953125, "loss": 0.5263, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.07915716618299484, "rewards/margins": 0.9101131558418274, "rewards/rejected": -0.9892703294754028, "step": 1650 }, { "epoch": 1.71, "learning_rate": 2.3804056639877535e-07, "logits/chosen": -2.3339505195617676, "logits/rejected": -2.2506117820739746, "logps/chosen": -269.00439453125, "logps/rejected": -230.38687133789062, "loss": 0.5566, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.12828503549098969, "rewards/margins": 0.7284771203994751, "rewards/rejected": -0.8567621111869812, "step": 1660 }, { "epoch": 1.72, "learning_rate": 2.361270570225794e-07, "logits/chosen": -2.3134007453918457, "logits/rejected": -2.2540464401245117, "logps/chosen": -263.0733337402344, "logps/rejected": -223.71591186523438, "loss": 0.5048, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.10775689035654068, "rewards/margins": 0.7968653440475464, "rewards/rejected": -0.9046221971511841, "step": 1670 }, { "epoch": 1.74, "learning_rate": 2.3421354764638345e-07, "logits/chosen": -2.438647747039795, "logits/rejected": -2.3648548126220703, "logps/chosen": -296.474365234375, "logps/rejected": -244.5324249267578, "loss": 0.5559, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.16449935734272003, "rewards/margins": 0.6564738750457764, "rewards/rejected": -0.8209732174873352, "step": 1680 }, { "epoch": 1.75, "learning_rate": 2.323000382701875e-07, "logits/chosen": -2.3885536193847656, "logits/rejected": -2.274456739425659, "logps/chosen": -289.57586669921875, "logps/rejected": -226.74734497070312, "loss": 0.5188, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.11683692783117294, "rewards/margins": 0.7003597021102905, "rewards/rejected": -0.8171966671943665, "step": 1690 }, { "epoch": 1.76, "learning_rate": 2.3038652889399157e-07, "logits/chosen": -2.3566040992736816, "logits/rejected": -2.3693103790283203, "logps/chosen": -259.44281005859375, "logps/rejected": -235.6457061767578, "loss": 0.5218, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07110615074634552, "rewards/margins": 0.7475603818893433, "rewards/rejected": -0.8186665773391724, "step": 1700 }, { "epoch": 1.77, "learning_rate": 2.2847301951779563e-07, "logits/chosen": -2.3193366527557373, "logits/rejected": -2.2707600593566895, "logps/chosen": -230.35177612304688, "logps/rejected": -218.6141357421875, "loss": 0.5379, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.2341269999742508, "rewards/margins": 0.5890001654624939, "rewards/rejected": -0.8231271505355835, "step": 1710 }, { "epoch": 1.78, "learning_rate": 2.265595101415997e-07, "logits/chosen": -2.303278923034668, "logits/rejected": -2.260132312774658, "logps/chosen": -263.609130859375, "logps/rejected": -207.05221557617188, "loss": 0.5417, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.16092923283576965, "rewards/margins": 0.8012853860855103, "rewards/rejected": -0.9622145891189575, "step": 1720 }, { "epoch": 1.79, "learning_rate": 2.2464600076540373e-07, "logits/chosen": -2.3414905071258545, "logits/rejected": -2.3637521266937256, "logps/chosen": -272.04595947265625, "logps/rejected": -233.1014862060547, "loss": 0.5402, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.15146999061107635, "rewards/margins": 0.6853641271591187, "rewards/rejected": -0.8368341326713562, "step": 1730 }, { "epoch": 1.8, "learning_rate": 2.227324913892078e-07, "logits/chosen": -2.45133113861084, "logits/rejected": -2.3326609134674072, "logps/chosen": -262.7754821777344, "logps/rejected": -233.98037719726562, "loss": 0.5238, "rewards/accuracies": 0.71875, "rewards/chosen": -0.09686625003814697, "rewards/margins": 0.6828486919403076, "rewards/rejected": -0.7797149419784546, "step": 1740 }, { "epoch": 1.81, "learning_rate": 2.2081898201301186e-07, "logits/chosen": -2.4001007080078125, "logits/rejected": -2.3274292945861816, "logps/chosen": -251.53952026367188, "logps/rejected": -219.2271270751953, "loss": 0.535, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.14206108450889587, "rewards/margins": 0.6470782160758972, "rewards/rejected": -0.7891392707824707, "step": 1750 }, { "epoch": 1.82, "learning_rate": 2.1890547263681592e-07, "logits/chosen": -2.4295260906219482, "logits/rejected": -2.36590313911438, "logps/chosen": -271.3697814941406, "logps/rejected": -215.4159698486328, "loss": 0.54, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.07891818135976791, "rewards/margins": 0.7567145824432373, "rewards/rejected": -0.8356328010559082, "step": 1760 }, { "epoch": 1.83, "learning_rate": 2.1699196326061998e-07, "logits/chosen": -2.371452569961548, "logits/rejected": -2.3672077655792236, "logps/chosen": -265.51470947265625, "logps/rejected": -241.41165161132812, "loss": 0.5586, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1335269808769226, "rewards/margins": 0.708950400352478, "rewards/rejected": -0.8424774408340454, "step": 1770 }, { "epoch": 1.84, "learning_rate": 2.1507845388442402e-07, "logits/chosen": -2.395923614501953, "logits/rejected": -2.2845911979675293, "logps/chosen": -287.7729187011719, "logps/rejected": -233.2380828857422, "loss": 0.5312, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.05316392332315445, "rewards/margins": 0.7853686809539795, "rewards/rejected": -0.8385326266288757, "step": 1780 }, { "epoch": 1.85, "learning_rate": 2.1316494450822808e-07, "logits/chosen": -2.331926107406616, "logits/rejected": -2.3294596672058105, "logps/chosen": -269.21484375, "logps/rejected": -242.924072265625, "loss": 0.5469, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.12467104196548462, "rewards/margins": 0.7131480574607849, "rewards/rejected": -0.8378192186355591, "step": 1790 }, { "epoch": 1.86, "learning_rate": 2.1125143513203214e-07, "logits/chosen": -2.4469873905181885, "logits/rejected": -2.4214444160461426, "logps/chosen": -266.3757629394531, "logps/rejected": -224.87142944335938, "loss": 0.5127, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.14371030032634735, "rewards/margins": 0.739290714263916, "rewards/rejected": -0.8830010294914246, "step": 1800 }, { "epoch": 1.87, "learning_rate": 2.093379257558362e-07, "logits/chosen": -2.4263453483581543, "logits/rejected": -2.430192232131958, "logps/chosen": -274.2438659667969, "logps/rejected": -237.7062530517578, "loss": 0.5516, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.19841806590557098, "rewards/margins": 0.6394414901733398, "rewards/rejected": -0.8378594517707825, "step": 1810 }, { "epoch": 1.88, "learning_rate": 2.0742441637964026e-07, "logits/chosen": -2.3289737701416016, "logits/rejected": -2.3154170513153076, "logps/chosen": -299.1728820800781, "logps/rejected": -238.6165771484375, "loss": 0.5113, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08980865776538849, "rewards/margins": 0.9050415754318237, "rewards/rejected": -0.9948502779006958, "step": 1820 }, { "epoch": 1.89, "learning_rate": 2.055109070034443e-07, "logits/chosen": -2.4139952659606934, "logits/rejected": -2.394843101501465, "logps/chosen": -287.22265625, "logps/rejected": -256.1412353515625, "loss": 0.5224, "rewards/accuracies": 0.75, "rewards/chosen": -0.1007101982831955, "rewards/margins": 0.7850233912467957, "rewards/rejected": -0.8857336044311523, "step": 1830 }, { "epoch": 1.9, "learning_rate": 2.0359739762724836e-07, "logits/chosen": -2.415116310119629, "logits/rejected": -2.3596065044403076, "logps/chosen": -279.6313781738281, "logps/rejected": -237.9734649658203, "loss": 0.531, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.11246969550848007, "rewards/margins": 0.7976399660110474, "rewards/rejected": -0.9101096391677856, "step": 1840 }, { "epoch": 1.91, "learning_rate": 2.0168388825105242e-07, "logits/chosen": -2.3417422771453857, "logits/rejected": -2.2845070362091064, "logps/chosen": -260.4609680175781, "logps/rejected": -265.02410888671875, "loss": 0.5148, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.14557930827140808, "rewards/margins": 0.8025070428848267, "rewards/rejected": -0.9480863809585571, "step": 1850 }, { "epoch": 1.92, "learning_rate": 1.997703788748565e-07, "logits/chosen": -2.347747564315796, "logits/rejected": -2.301016330718994, "logps/chosen": -278.42144775390625, "logps/rejected": -228.9193878173828, "loss": 0.535, "rewards/accuracies": 0.75, "rewards/chosen": -0.23046691715717316, "rewards/margins": 0.6747422218322754, "rewards/rejected": -0.9052090644836426, "step": 1860 }, { "epoch": 1.93, "learning_rate": 1.9785686949866055e-07, "logits/chosen": -2.374584913253784, "logits/rejected": -2.449509382247925, "logps/chosen": -275.76531982421875, "logps/rejected": -233.2628936767578, "loss": 0.5633, "rewards/accuracies": 0.6875, "rewards/chosen": -0.12146653980016708, "rewards/margins": 0.5764524340629578, "rewards/rejected": -0.6979190111160278, "step": 1870 }, { "epoch": 1.94, "learning_rate": 1.9594336012246458e-07, "logits/chosen": -2.4476470947265625, "logits/rejected": -2.3424344062805176, "logps/chosen": -270.17877197265625, "logps/rejected": -235.03125, "loss": 0.5134, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.15192639827728271, "rewards/margins": 0.7095075845718384, "rewards/rejected": -0.8614339828491211, "step": 1880 }, { "epoch": 1.95, "learning_rate": 1.9402985074626865e-07, "logits/chosen": -2.378629446029663, "logits/rejected": -2.394209861755371, "logps/chosen": -269.76898193359375, "logps/rejected": -228.36172485351562, "loss": 0.5524, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.18615347146987915, "rewards/margins": 0.6800569295883179, "rewards/rejected": -0.8662103414535522, "step": 1890 }, { "epoch": 1.96, "learning_rate": 1.921163413700727e-07, "logits/chosen": -2.3340277671813965, "logits/rejected": -2.29093861579895, "logps/chosen": -280.81298828125, "logps/rejected": -209.24929809570312, "loss": 0.5775, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.2276122123003006, "rewards/margins": 0.6620305180549622, "rewards/rejected": -0.8896427154541016, "step": 1900 }, { "epoch": 1.97, "learning_rate": 1.9020283199387677e-07, "logits/chosen": -2.444658041000366, "logits/rejected": -2.381373405456543, "logps/chosen": -262.42974853515625, "logps/rejected": -233.9261474609375, "loss": 0.5526, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.06337813287973404, "rewards/margins": 0.7153123617172241, "rewards/rejected": -0.7786905169487, "step": 1910 }, { "epoch": 1.98, "learning_rate": 1.8828932261768083e-07, "logits/chosen": -2.431870222091675, "logits/rejected": -2.3378379344940186, "logps/chosen": -266.4524230957031, "logps/rejected": -244.3126678466797, "loss": 0.5322, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.16464334726333618, "rewards/margins": 0.6538031101226807, "rewards/rejected": -0.8184464573860168, "step": 1920 }, { "epoch": 1.99, "learning_rate": 1.8637581324148487e-07, "logits/chosen": -2.4436469078063965, "logits/rejected": -2.416304349899292, "logps/chosen": -265.97393798828125, "logps/rejected": -240.1495361328125, "loss": 0.5443, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.11401587724685669, "rewards/margins": 0.8116496801376343, "rewards/rejected": -0.9256657361984253, "step": 1930 }, { "epoch": 2.0, "eval_logits/chosen": -2.0740063190460205, "eval_logits/rejected": -1.9495693445205688, "eval_logps/chosen": -266.1383361816406, "eval_logps/rejected": -227.92555236816406, "eval_loss": 0.530714750289917, "eval_rewards/accuracies": 0.7419999837875366, "eval_rewards/chosen": -0.1506224423646927, "eval_rewards/margins": 0.7136407494544983, "eval_rewards/rejected": -0.8642632961273193, "eval_runtime": 601.1247, "eval_samples_per_second": 3.327, "eval_steps_per_second": 0.208, "step": 1936 }, { "epoch": 2.0, "learning_rate": 1.8446230386528893e-07, "logits/chosen": -2.3547306060791016, "logits/rejected": -2.309804677963257, "logps/chosen": -239.15170288085938, "logps/rejected": -227.03646850585938, "loss": 0.5441, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20203718543052673, "rewards/margins": 0.5975244641304016, "rewards/rejected": -0.799561619758606, "step": 1940 }, { "epoch": 2.01, "learning_rate": 1.82548794489093e-07, "logits/chosen": -2.30757474899292, "logits/rejected": -2.3320353031158447, "logps/chosen": -243.75341796875, "logps/rejected": -234.9235076904297, "loss": 0.5412, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.19783169031143188, "rewards/margins": 0.6387797594070435, "rewards/rejected": -0.8366113901138306, "step": 1950 }, { "epoch": 2.02, "learning_rate": 1.8063528511289706e-07, "logits/chosen": -2.358428478240967, "logits/rejected": -2.277144432067871, "logps/chosen": -286.93426513671875, "logps/rejected": -259.59027099609375, "loss": 0.5317, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.09961538016796112, "rewards/margins": 0.7642472982406616, "rewards/rejected": -0.8638626337051392, "step": 1960 }, { "epoch": 2.03, "learning_rate": 1.7872177573670112e-07, "logits/chosen": -2.4061942100524902, "logits/rejected": -2.4171481132507324, "logps/chosen": -252.14535522460938, "logps/rejected": -230.7240753173828, "loss": 0.5397, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.09236567467451096, "rewards/margins": 0.6671853065490723, "rewards/rejected": -0.7595510482788086, "step": 1970 }, { "epoch": 2.04, "learning_rate": 1.7680826636050515e-07, "logits/chosen": -2.3134634494781494, "logits/rejected": -2.2795519828796387, "logps/chosen": -278.4905090332031, "logps/rejected": -234.19482421875, "loss": 0.5249, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.07433702796697617, "rewards/margins": 0.7174164652824402, "rewards/rejected": -0.7917534708976746, "step": 1980 }, { "epoch": 2.06, "learning_rate": 1.7489475698430921e-07, "logits/chosen": -2.3330302238464355, "logits/rejected": -2.3669610023498535, "logps/chosen": -249.8580322265625, "logps/rejected": -249.130615234375, "loss": 0.5555, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.14169186353683472, "rewards/margins": 0.6893213391304016, "rewards/rejected": -0.8310132026672363, "step": 1990 }, { "epoch": 2.07, "learning_rate": 1.7298124760811328e-07, "logits/chosen": -2.404921054840088, "logits/rejected": -2.3844478130340576, "logps/chosen": -277.40667724609375, "logps/rejected": -224.07015991210938, "loss": 0.5389, "rewards/accuracies": 0.71875, "rewards/chosen": -0.13002373278141022, "rewards/margins": 0.7138842344284058, "rewards/rejected": -0.8439079523086548, "step": 2000 }, { "epoch": 2.08, "learning_rate": 1.7106773823191734e-07, "logits/chosen": -2.4383153915405273, "logits/rejected": -2.3728132247924805, "logps/chosen": -270.46270751953125, "logps/rejected": -240.8298797607422, "loss": 0.53, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.1426972895860672, "rewards/margins": 0.8270591497421265, "rewards/rejected": -0.9697564244270325, "step": 2010 }, { "epoch": 2.09, "learning_rate": 1.691542288557214e-07, "logits/chosen": -2.418792247772217, "logits/rejected": -2.321077346801758, "logps/chosen": -297.71197509765625, "logps/rejected": -230.908935546875, "loss": 0.543, "rewards/accuracies": 0.71875, "rewards/chosen": -0.18689216673374176, "rewards/margins": 0.7213765382766724, "rewards/rejected": -0.9082688093185425, "step": 2020 }, { "epoch": 2.1, "learning_rate": 1.6724071947952544e-07, "logits/chosen": -2.4345123767852783, "logits/rejected": -2.274202823638916, "logps/chosen": -240.6513671875, "logps/rejected": -232.0623779296875, "loss": 0.5363, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.16003349423408508, "rewards/margins": 0.6547808647155762, "rewards/rejected": -0.8148144483566284, "step": 2030 }, { "epoch": 2.11, "learning_rate": 1.653272101033295e-07, "logits/chosen": -2.4675419330596924, "logits/rejected": -2.341759204864502, "logps/chosen": -276.0350036621094, "logps/rejected": -212.1591339111328, "loss": 0.514, "rewards/accuracies": 0.71875, "rewards/chosen": -0.10016350448131561, "rewards/margins": 0.6785081624984741, "rewards/rejected": -0.7786716222763062, "step": 2040 }, { "epoch": 2.12, "learning_rate": 1.6341370072713356e-07, "logits/chosen": -2.470984935760498, "logits/rejected": -2.4148213863372803, "logps/chosen": -302.93951416015625, "logps/rejected": -248.380859375, "loss": 0.5384, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.08969398587942123, "rewards/margins": 0.8469365835189819, "rewards/rejected": -0.9366306066513062, "step": 2050 }, { "epoch": 2.13, "learning_rate": 1.6150019135093762e-07, "logits/chosen": -2.452855348587036, "logits/rejected": -2.3725485801696777, "logps/chosen": -304.5735168457031, "logps/rejected": -232.25656127929688, "loss": 0.5285, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.132027268409729, "rewards/margins": 0.7218815088272095, "rewards/rejected": -0.8539088368415833, "step": 2060 }, { "epoch": 2.14, "learning_rate": 1.5958668197474169e-07, "logits/chosen": -2.461775541305542, "logits/rejected": -2.3796803951263428, "logps/chosen": -272.814453125, "logps/rejected": -222.52627563476562, "loss": 0.5367, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.105324387550354, "rewards/margins": 0.8349732160568237, "rewards/rejected": -0.9402976036071777, "step": 2070 }, { "epoch": 2.15, "learning_rate": 1.5767317259854572e-07, "logits/chosen": -2.4011590480804443, "logits/rejected": -2.3761496543884277, "logps/chosen": -262.963623046875, "logps/rejected": -233.1734619140625, "loss": 0.5336, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15259674191474915, "rewards/margins": 0.7275049090385437, "rewards/rejected": -0.8801015615463257, "step": 2080 }, { "epoch": 2.16, "learning_rate": 1.5575966322234978e-07, "logits/chosen": -2.326183319091797, "logits/rejected": -2.2559120655059814, "logps/chosen": -259.02337646484375, "logps/rejected": -247.8983154296875, "loss": 0.5301, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.1156080812215805, "rewards/margins": 0.7456148862838745, "rewards/rejected": -0.861223042011261, "step": 2090 }, { "epoch": 2.17, "learning_rate": 1.5384615384615385e-07, "logits/chosen": -2.3786349296569824, "logits/rejected": -2.2938733100891113, "logps/chosen": -238.00930786132812, "logps/rejected": -218.44833374023438, "loss": 0.5195, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.17429223656654358, "rewards/margins": 0.7917351126670837, "rewards/rejected": -0.9660272598266602, "step": 2100 }, { "epoch": 2.18, "learning_rate": 1.519326444699579e-07, "logits/chosen": -2.3865954875946045, "logits/rejected": -2.3452370166778564, "logps/chosen": -279.7684631347656, "logps/rejected": -252.83035278320312, "loss": 0.5121, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.11177567392587662, "rewards/margins": 0.7900134921073914, "rewards/rejected": -0.9017891883850098, "step": 2110 }, { "epoch": 2.19, "learning_rate": 1.5001913509376197e-07, "logits/chosen": -2.2664945125579834, "logits/rejected": -2.2997994422912598, "logps/chosen": -242.1395721435547, "logps/rejected": -219.6393585205078, "loss": 0.5355, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.03740229830145836, "rewards/margins": 0.8150936365127563, "rewards/rejected": -0.8524959683418274, "step": 2120 }, { "epoch": 2.2, "learning_rate": 1.4810562571756603e-07, "logits/chosen": -2.40497088432312, "logits/rejected": -2.382087230682373, "logps/chosen": -260.3258972167969, "logps/rejected": -238.91629028320312, "loss": 0.5401, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.19490735232830048, "rewards/margins": 0.6507551074028015, "rewards/rejected": -0.8456624746322632, "step": 2130 }, { "epoch": 2.21, "learning_rate": 1.4619211634137007e-07, "logits/chosen": -2.4172415733337402, "logits/rejected": -2.3459484577178955, "logps/chosen": -296.9003601074219, "logps/rejected": -244.6107177734375, "loss": 0.5205, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.08288192749023438, "rewards/margins": 0.7755564451217651, "rewards/rejected": -0.8584383726119995, "step": 2140 }, { "epoch": 2.22, "learning_rate": 1.4427860696517413e-07, "logits/chosen": -2.3107922077178955, "logits/rejected": -2.272061824798584, "logps/chosen": -276.7105407714844, "logps/rejected": -244.1803436279297, "loss": 0.547, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.24030157923698425, "rewards/margins": 0.5712770819664001, "rewards/rejected": -0.8115787506103516, "step": 2150 }, { "epoch": 2.23, "learning_rate": 1.423650975889782e-07, "logits/chosen": -2.4137063026428223, "logits/rejected": -2.317960262298584, "logps/chosen": -301.14422607421875, "logps/rejected": -230.6436004638672, "loss": 0.5227, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.0714234784245491, "rewards/margins": 0.8001400232315063, "rewards/rejected": -0.8715635538101196, "step": 2160 }, { "epoch": 2.24, "learning_rate": 1.4045158821278225e-07, "logits/chosen": -2.437873363494873, "logits/rejected": -2.3846659660339355, "logps/chosen": -281.734619140625, "logps/rejected": -252.9493408203125, "loss": 0.5234, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.0724717229604721, "rewards/margins": 0.8193691968917847, "rewards/rejected": -0.891840934753418, "step": 2170 }, { "epoch": 2.25, "learning_rate": 1.3853807883658632e-07, "logits/chosen": -2.3811519145965576, "logits/rejected": -2.3631045818328857, "logps/chosen": -286.5116271972656, "logps/rejected": -263.0299072265625, "loss": 0.5568, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.0669230967760086, "rewards/margins": 0.6814225912094116, "rewards/rejected": -0.748345673084259, "step": 2180 }, { "epoch": 2.26, "learning_rate": 1.3662456946039035e-07, "logits/chosen": -2.3424034118652344, "logits/rejected": -2.3057944774627686, "logps/chosen": -244.4705352783203, "logps/rejected": -206.46615600585938, "loss": 0.5341, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.1323441118001938, "rewards/margins": 0.6189785599708557, "rewards/rejected": -0.7513227462768555, "step": 2190 }, { "epoch": 2.27, "learning_rate": 1.3471106008419441e-07, "logits/chosen": -2.3643927574157715, "logits/rejected": -2.329315662384033, "logps/chosen": -273.6473083496094, "logps/rejected": -260.7103576660156, "loss": 0.5286, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.10188891738653183, "rewards/margins": 0.6854437589645386, "rewards/rejected": -0.7873327732086182, "step": 2200 }, { "epoch": 2.28, "learning_rate": 1.3279755070799848e-07, "logits/chosen": -2.328723430633545, "logits/rejected": -2.2633702754974365, "logps/chosen": -284.9334411621094, "logps/rejected": -232.8155517578125, "loss": 0.5415, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.1077471598982811, "rewards/margins": 0.7453306913375854, "rewards/rejected": -0.8530778884887695, "step": 2210 }, { "epoch": 2.29, "learning_rate": 1.3088404133180254e-07, "logits/chosen": -2.299356460571289, "logits/rejected": -2.2545862197875977, "logps/chosen": -306.23406982421875, "logps/rejected": -226.7987823486328, "loss": 0.4804, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.07135553658008575, "rewards/margins": 0.9380094408988953, "rewards/rejected": -1.0093649625778198, "step": 2220 }, { "epoch": 2.3, "learning_rate": 1.289705319556066e-07, "logits/chosen": -2.314143419265747, "logits/rejected": -2.2511839866638184, "logps/chosen": -252.2981719970703, "logps/rejected": -221.84194946289062, "loss": 0.5264, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.14519211649894714, "rewards/margins": 0.6200018525123596, "rewards/rejected": -0.7651939988136292, "step": 2230 }, { "epoch": 2.31, "learning_rate": 1.2705702257941064e-07, "logits/chosen": -2.3738441467285156, "logits/rejected": -2.3623602390289307, "logps/chosen": -257.61328125, "logps/rejected": -234.92190551757812, "loss": 0.5131, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.18813610076904297, "rewards/margins": 0.68475741147995, "rewards/rejected": -0.8728936314582825, "step": 2240 }, { "epoch": 2.32, "learning_rate": 1.251435132032147e-07, "logits/chosen": -2.346224546432495, "logits/rejected": -2.337629795074463, "logps/chosen": -273.5932312011719, "logps/rejected": -259.9046325683594, "loss": 0.5298, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07904721796512604, "rewards/margins": 0.7541002035140991, "rewards/rejected": -0.833147406578064, "step": 2250 }, { "epoch": 2.33, "learning_rate": 1.2323000382701873e-07, "logits/chosen": -2.3779919147491455, "logits/rejected": -2.3776283264160156, "logps/chosen": -270.1576232910156, "logps/rejected": -229.17239379882812, "loss": 0.5543, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.20323677361011505, "rewards/margins": 0.6380544900894165, "rewards/rejected": -0.8412912487983704, "step": 2260 }, { "epoch": 2.34, "learning_rate": 1.213164944508228e-07, "logits/chosen": -2.450532913208008, "logits/rejected": -2.3503329753875732, "logps/chosen": -266.23175048828125, "logps/rejected": -269.7557067871094, "loss": 0.509, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.25909024477005005, "rewards/margins": 0.6517874002456665, "rewards/rejected": -0.9108778238296509, "step": 2270 }, { "epoch": 2.35, "learning_rate": 1.1940298507462686e-07, "logits/chosen": -2.244485378265381, "logits/rejected": -2.2757506370544434, "logps/chosen": -261.5260314941406, "logps/rejected": -211.95291137695312, "loss": 0.5175, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.09936892986297607, "rewards/margins": 0.829565167427063, "rewards/rejected": -0.9289340972900391, "step": 2280 }, { "epoch": 2.37, "learning_rate": 1.1748947569843092e-07, "logits/chosen": -2.3904809951782227, "logits/rejected": -2.307392120361328, "logps/chosen": -263.42333984375, "logps/rejected": -225.5657196044922, "loss": 0.5077, "rewards/accuracies": 0.78125, "rewards/chosen": -0.11825220286846161, "rewards/margins": 0.8992505073547363, "rewards/rejected": -1.017502784729004, "step": 2290 }, { "epoch": 2.38, "learning_rate": 1.1557596632223497e-07, "logits/chosen": -2.360848903656006, "logits/rejected": -2.3750827312469482, "logps/chosen": -272.0371398925781, "logps/rejected": -224.0919647216797, "loss": 0.5255, "rewards/accuracies": 0.75, "rewards/chosen": -0.1476692408323288, "rewards/margins": 0.835718035697937, "rewards/rejected": -0.983387291431427, "step": 2300 }, { "epoch": 2.39, "learning_rate": 1.1366245694603903e-07, "logits/chosen": -2.4289305210113525, "logits/rejected": -2.2458267211914062, "logps/chosen": -265.1494140625, "logps/rejected": -249.3345489501953, "loss": 0.556, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.24553117156028748, "rewards/margins": 0.6707764863967896, "rewards/rejected": -0.9163076281547546, "step": 2310 }, { "epoch": 2.4, "learning_rate": 1.1174894756984308e-07, "logits/chosen": -2.2975571155548096, "logits/rejected": -2.393068790435791, "logps/chosen": -254.4977569580078, "logps/rejected": -224.1728973388672, "loss": 0.5205, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.18545860052108765, "rewards/margins": 0.8345575332641602, "rewards/rejected": -1.020016074180603, "step": 2320 }, { "epoch": 2.41, "learning_rate": 1.0983543819364714e-07, "logits/chosen": -2.464052200317383, "logits/rejected": -2.4497199058532715, "logps/chosen": -278.8863525390625, "logps/rejected": -231.97512817382812, "loss": 0.477, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.08197133243083954, "rewards/margins": 0.8996642231941223, "rewards/rejected": -0.9816356897354126, "step": 2330 }, { "epoch": 2.42, "learning_rate": 1.079219288174512e-07, "logits/chosen": -2.333491086959839, "logits/rejected": -2.31835675239563, "logps/chosen": -247.50698852539062, "logps/rejected": -209.3056182861328, "loss": 0.5348, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.20645038783550262, "rewards/margins": 0.5899510979652405, "rewards/rejected": -0.7964013814926147, "step": 2340 }, { "epoch": 2.43, "learning_rate": 1.0600841944125525e-07, "logits/chosen": -2.43719744682312, "logits/rejected": -2.396315574645996, "logps/chosen": -271.6535949707031, "logps/rejected": -221.7565155029297, "loss": 0.5527, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.2088332623243332, "rewards/margins": 0.6539155840873718, "rewards/rejected": -0.8627488017082214, "step": 2350 }, { "epoch": 2.44, "learning_rate": 1.0409491006505931e-07, "logits/chosen": -2.400672435760498, "logits/rejected": -2.2855820655822754, "logps/chosen": -283.90576171875, "logps/rejected": -229.03689575195312, "loss": 0.5436, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.18632188439369202, "rewards/margins": 0.7588584423065186, "rewards/rejected": -0.9451802968978882, "step": 2360 }, { "epoch": 2.45, "learning_rate": 1.0218140068886336e-07, "logits/chosen": -2.3788506984710693, "logits/rejected": -2.282285690307617, "logps/chosen": -260.7487487792969, "logps/rejected": -221.43505859375, "loss": 0.5533, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.21385586261749268, "rewards/margins": 0.5428717732429504, "rewards/rejected": -0.7567275762557983, "step": 2370 }, { "epoch": 2.46, "learning_rate": 1.0026789131266743e-07, "logits/chosen": -2.296046018600464, "logits/rejected": -2.3289477825164795, "logps/chosen": -252.4747772216797, "logps/rejected": -242.341796875, "loss": 0.5406, "rewards/accuracies": 0.6875, "rewards/chosen": -0.17420583963394165, "rewards/margins": 0.6258620023727417, "rewards/rejected": -0.8000679016113281, "step": 2380 }, { "epoch": 2.47, "learning_rate": 9.835438193647149e-08, "logits/chosen": -2.3245911598205566, "logits/rejected": -2.3727688789367676, "logps/chosen": -255.92984008789062, "logps/rejected": -230.47940063476562, "loss": 0.5594, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.15971948206424713, "rewards/margins": 0.6732539534568787, "rewards/rejected": -0.8329733610153198, "step": 2390 }, { "epoch": 2.48, "learning_rate": 9.644087256027554e-08, "logits/chosen": -2.3938796520233154, "logits/rejected": -2.388028383255005, "logps/chosen": -239.3699188232422, "logps/rejected": -218.7798614501953, "loss": 0.539, "rewards/accuracies": 0.71875, "rewards/chosen": -0.14847150444984436, "rewards/margins": 0.6557341814041138, "rewards/rejected": -0.804205596446991, "step": 2400 }, { "epoch": 2.49, "learning_rate": 9.45273631840796e-08, "logits/chosen": -2.395660877227783, "logits/rejected": -2.4043540954589844, "logps/chosen": -278.68353271484375, "logps/rejected": -231.53103637695312, "loss": 0.5324, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.08287270367145538, "rewards/margins": 0.7413903474807739, "rewards/rejected": -0.8242629766464233, "step": 2410 }, { "epoch": 2.5, "learning_rate": 9.261385380788366e-08, "logits/chosen": -2.2760956287384033, "logits/rejected": -2.2844595909118652, "logps/chosen": -239.07382202148438, "logps/rejected": -222.31161499023438, "loss": 0.5253, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.2278144657611847, "rewards/margins": 0.6441665887832642, "rewards/rejected": -0.871981143951416, "step": 2420 }, { "epoch": 2.51, "learning_rate": 9.070034443168771e-08, "logits/chosen": -2.3876235485076904, "logits/rejected": -2.3704121112823486, "logps/chosen": -261.8553161621094, "logps/rejected": -227.60379028320312, "loss": 0.5577, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.24931149184703827, "rewards/margins": 0.6487377285957336, "rewards/rejected": -0.8980492353439331, "step": 2430 }, { "epoch": 2.52, "learning_rate": 8.878683505549177e-08, "logits/chosen": -2.2906646728515625, "logits/rejected": -2.32999587059021, "logps/chosen": -267.1397399902344, "logps/rejected": -226.90048217773438, "loss": 0.5109, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.10627348721027374, "rewards/margins": 0.714811384677887, "rewards/rejected": -0.8210847973823547, "step": 2440 }, { "epoch": 2.53, "learning_rate": 8.687332567929582e-08, "logits/chosen": -2.3202567100524902, "logits/rejected": -2.349112033843994, "logps/chosen": -291.82147216796875, "logps/rejected": -246.85574340820312, "loss": 0.5351, "rewards/accuracies": 0.78125, "rewards/chosen": -0.050464726984500885, "rewards/margins": 0.8968712091445923, "rewards/rejected": -0.9473358988761902, "step": 2450 }, { "epoch": 2.54, "learning_rate": 8.495981630309988e-08, "logits/chosen": -2.270942211151123, "logits/rejected": -2.2897868156433105, "logps/chosen": -300.76312255859375, "logps/rejected": -218.22640991210938, "loss": 0.5467, "rewards/accuracies": 0.71875, "rewards/chosen": -0.13602502644062042, "rewards/margins": 0.5781577825546265, "rewards/rejected": -0.7141829133033752, "step": 2460 }, { "epoch": 2.55, "learning_rate": 8.304630692690395e-08, "logits/chosen": -2.317321300506592, "logits/rejected": -2.2713263034820557, "logps/chosen": -262.05743408203125, "logps/rejected": -205.5304412841797, "loss": 0.5247, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1445184201002121, "rewards/margins": 0.7234494090080261, "rewards/rejected": -0.8679677248001099, "step": 2470 }, { "epoch": 2.56, "learning_rate": 8.1132797550708e-08, "logits/chosen": -2.3970394134521484, "logits/rejected": -2.3608124256134033, "logps/chosen": -267.4720458984375, "logps/rejected": -221.71359252929688, "loss": 0.5264, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.15024690330028534, "rewards/margins": 0.7065707445144653, "rewards/rejected": -0.8568177223205566, "step": 2480 }, { "epoch": 2.57, "learning_rate": 7.921928817451206e-08, "logits/chosen": -2.416393280029297, "logits/rejected": -2.3220162391662598, "logps/chosen": -277.0873107910156, "logps/rejected": -227.70947265625, "loss": 0.5077, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.17723213136196136, "rewards/margins": 0.8048456311225891, "rewards/rejected": -0.9820777177810669, "step": 2490 }, { "epoch": 2.58, "learning_rate": 7.73057787983161e-08, "logits/chosen": -2.395048141479492, "logits/rejected": -2.3008649349212646, "logps/chosen": -292.8017883300781, "logps/rejected": -266.5408020019531, "loss": 0.516, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.08461178839206696, "rewards/margins": 0.7476651072502136, "rewards/rejected": -0.8322768211364746, "step": 2500 }, { "epoch": 2.59, "learning_rate": 7.539226942212017e-08, "logits/chosen": -2.357027053833008, "logits/rejected": -2.313039541244507, "logps/chosen": -244.0641326904297, "logps/rejected": -220.01608276367188, "loss": 0.5179, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.11702857911586761, "rewards/margins": 0.8708831071853638, "rewards/rejected": -0.9879117012023926, "step": 2510 }, { "epoch": 2.6, "learning_rate": 7.347876004592423e-08, "logits/chosen": -2.470712900161743, "logits/rejected": -2.401108980178833, "logps/chosen": -280.0924072265625, "logps/rejected": -232.7683563232422, "loss": 0.5404, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.1525488644838333, "rewards/margins": 0.7386666536331177, "rewards/rejected": -0.8912155032157898, "step": 2520 }, { "epoch": 2.61, "learning_rate": 7.156525066972828e-08, "logits/chosen": -2.299323081970215, "logits/rejected": -2.362274646759033, "logps/chosen": -242.1494598388672, "logps/rejected": -227.5293426513672, "loss": 0.5408, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.07852064073085785, "rewards/margins": 0.7071703672409058, "rewards/rejected": -0.7856910824775696, "step": 2530 }, { "epoch": 2.62, "learning_rate": 6.965174129353234e-08, "logits/chosen": -2.3532567024230957, "logits/rejected": -2.292245864868164, "logps/chosen": -302.1033630371094, "logps/rejected": -258.0881042480469, "loss": 0.53, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.11693718284368515, "rewards/margins": 0.8197237253189087, "rewards/rejected": -0.936660885810852, "step": 2540 }, { "epoch": 2.63, "learning_rate": 6.773823191733639e-08, "logits/chosen": -2.285585403442383, "logits/rejected": -2.2158350944519043, "logps/chosen": -259.6685485839844, "logps/rejected": -222.1896514892578, "loss": 0.5305, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.23936796188354492, "rewards/margins": 0.7124180197715759, "rewards/rejected": -0.9517859220504761, "step": 2550 }, { "epoch": 2.64, "learning_rate": 6.582472254114045e-08, "logits/chosen": -2.305962085723877, "logits/rejected": -2.3173716068267822, "logps/chosen": -252.51766967773438, "logps/rejected": -223.96084594726562, "loss": 0.565, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.13089394569396973, "rewards/margins": 0.6327255368232727, "rewards/rejected": -0.7636195421218872, "step": 2560 }, { "epoch": 2.65, "learning_rate": 6.391121316494451e-08, "logits/chosen": -2.325913667678833, "logits/rejected": -2.310243606567383, "logps/chosen": -294.99847412109375, "logps/rejected": -239.6224822998047, "loss": 0.5248, "rewards/accuracies": 0.78125, "rewards/chosen": -0.10585550963878632, "rewards/margins": 0.8481132388114929, "rewards/rejected": -0.9539687037467957, "step": 2570 }, { "epoch": 2.66, "learning_rate": 6.199770378874856e-08, "logits/chosen": -2.3760478496551514, "logits/rejected": -2.2878143787384033, "logps/chosen": -272.9644775390625, "logps/rejected": -225.4691619873047, "loss": 0.5297, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.07944142073392868, "rewards/margins": 0.8911903500556946, "rewards/rejected": -0.9706317782402039, "step": 2580 }, { "epoch": 2.67, "learning_rate": 6.008419441255262e-08, "logits/chosen": -2.3183536529541016, "logits/rejected": -2.2597270011901855, "logps/chosen": -299.25775146484375, "logps/rejected": -217.822509765625, "loss": 0.5237, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.07527098059654236, "rewards/margins": 0.823529839515686, "rewards/rejected": -0.8988008499145508, "step": 2590 }, { "epoch": 2.69, "learning_rate": 5.817068503635668e-08, "logits/chosen": -2.4004032611846924, "logits/rejected": -2.3618216514587402, "logps/chosen": -288.94622802734375, "logps/rejected": -222.6877899169922, "loss": 0.5125, "rewards/accuracies": 0.75, "rewards/chosen": -0.12513655424118042, "rewards/margins": 0.836874783039093, "rewards/rejected": -0.9620113372802734, "step": 2600 }, { "epoch": 2.7, "learning_rate": 5.6257175660160735e-08, "logits/chosen": -2.420538902282715, "logits/rejected": -2.3739724159240723, "logps/chosen": -295.19580078125, "logps/rejected": -238.79727172851562, "loss": 0.5186, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.18515029549598694, "rewards/margins": 0.6977055668830872, "rewards/rejected": -0.8828557729721069, "step": 2610 }, { "epoch": 2.71, "learning_rate": 5.4343666283964784e-08, "logits/chosen": -2.4274239540100098, "logits/rejected": -2.376018762588501, "logps/chosen": -263.94525146484375, "logps/rejected": -249.42178344726562, "loss": 0.5219, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.0742453932762146, "rewards/margins": 0.7254621386528015, "rewards/rejected": -0.7997074127197266, "step": 2620 }, { "epoch": 2.72, "learning_rate": 5.243015690776884e-08, "logits/chosen": -2.4671757221221924, "logits/rejected": -2.4064314365386963, "logps/chosen": -284.95330810546875, "logps/rejected": -235.897705078125, "loss": 0.5148, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.15317563712596893, "rewards/margins": 0.7068864107131958, "rewards/rejected": -0.8600620031356812, "step": 2630 }, { "epoch": 2.73, "learning_rate": 5.05166475315729e-08, "logits/chosen": -2.3765015602111816, "logits/rejected": -2.3243587017059326, "logps/chosen": -263.02545166015625, "logps/rejected": -218.81405639648438, "loss": 0.5407, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.28189724683761597, "rewards/margins": 0.6195310354232788, "rewards/rejected": -0.9014283418655396, "step": 2640 }, { "epoch": 2.74, "learning_rate": 4.860313815537696e-08, "logits/chosen": -2.406059741973877, "logits/rejected": -2.3650240898132324, "logps/chosen": -284.5108337402344, "logps/rejected": -266.8323059082031, "loss": 0.5351, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.12149874866008759, "rewards/margins": 0.7909864187240601, "rewards/rejected": -0.9124851226806641, "step": 2650 }, { "epoch": 2.75, "learning_rate": 4.668962877918101e-08, "logits/chosen": -2.3801705837249756, "logits/rejected": -2.27915620803833, "logps/chosen": -288.0900573730469, "logps/rejected": -278.1248474121094, "loss": 0.5227, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.12331392616033554, "rewards/margins": 0.8058657646179199, "rewards/rejected": -0.9291796684265137, "step": 2660 }, { "epoch": 2.76, "learning_rate": 4.477611940298507e-08, "logits/chosen": -2.4003734588623047, "logits/rejected": -2.3077614307403564, "logps/chosen": -245.1756591796875, "logps/rejected": -227.53317260742188, "loss": 0.4946, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.14258424937725067, "rewards/margins": 0.8004018068313599, "rewards/rejected": -0.942986011505127, "step": 2670 }, { "epoch": 2.77, "learning_rate": 4.2862610026789124e-08, "logits/chosen": -2.4123265743255615, "logits/rejected": -2.295915365219116, "logps/chosen": -259.74932861328125, "logps/rejected": -223.12002563476562, "loss": 0.5486, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.15485641360282898, "rewards/margins": 0.7069037556648254, "rewards/rejected": -0.861760139465332, "step": 2680 }, { "epoch": 2.78, "learning_rate": 4.0949100650593186e-08, "logits/chosen": -2.3854644298553467, "logits/rejected": -2.328233003616333, "logps/chosen": -256.6706848144531, "logps/rejected": -243.61880493164062, "loss": 0.5474, "rewards/accuracies": 0.75, "rewards/chosen": -0.25512591004371643, "rewards/margins": 0.539734959602356, "rewards/rejected": -0.7948609590530396, "step": 2690 }, { "epoch": 2.79, "learning_rate": 3.903559127439724e-08, "logits/chosen": -2.2946548461914062, "logits/rejected": -2.278347969055176, "logps/chosen": -273.6446838378906, "logps/rejected": -230.7551727294922, "loss": 0.5081, "rewards/accuracies": 0.71875, "rewards/chosen": -0.12195589393377304, "rewards/margins": 0.7443081140518188, "rewards/rejected": -0.8662639856338501, "step": 2700 }, { "epoch": 2.8, "learning_rate": 3.71220818982013e-08, "logits/chosen": -2.405937910079956, "logits/rejected": -2.371584415435791, "logps/chosen": -273.60845947265625, "logps/rejected": -218.2377166748047, "loss": 0.5477, "rewards/accuracies": 0.6875, "rewards/chosen": -0.24379794299602509, "rewards/margins": 0.6154786348342896, "rewards/rejected": -0.8592765927314758, "step": 2710 }, { "epoch": 2.81, "learning_rate": 3.520857252200535e-08, "logits/chosen": -2.3632619380950928, "logits/rejected": -2.332373857498169, "logps/chosen": -300.9534912109375, "logps/rejected": -242.65316772460938, "loss": 0.5313, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.04076363891363144, "rewards/margins": 0.986183762550354, "rewards/rejected": -1.0269473791122437, "step": 2720 }, { "epoch": 2.82, "learning_rate": 3.3295063145809414e-08, "logits/chosen": -2.280702829360962, "logits/rejected": -2.2251949310302734, "logps/chosen": -267.03338623046875, "logps/rejected": -244.3408203125, "loss": 0.5581, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.13874498009681702, "rewards/margins": 0.7079328298568726, "rewards/rejected": -0.8466777801513672, "step": 2730 }, { "epoch": 2.83, "learning_rate": 3.138155376961347e-08, "logits/chosen": -2.2881321907043457, "logits/rejected": -2.3319568634033203, "logps/chosen": -234.1023712158203, "logps/rejected": -226.11300659179688, "loss": 0.5666, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.13704028725624084, "rewards/margins": 0.7299059629440308, "rewards/rejected": -0.8669462203979492, "step": 2740 }, { "epoch": 2.84, "learning_rate": 2.9468044393417525e-08, "logits/chosen": -2.4301745891571045, "logits/rejected": -2.402632236480713, "logps/chosen": -273.2855529785156, "logps/rejected": -262.68792724609375, "loss": 0.5173, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.061246536672115326, "rewards/margins": 0.8221151232719421, "rewards/rejected": -0.8833616971969604, "step": 2750 }, { "epoch": 2.85, "learning_rate": 2.755453501722158e-08, "logits/chosen": -2.3756215572357178, "logits/rejected": -2.3666157722473145, "logps/chosen": -285.3559265136719, "logps/rejected": -228.5872344970703, "loss": 0.509, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.09171368926763535, "rewards/margins": 0.8335908055305481, "rewards/rejected": -0.9253045320510864, "step": 2760 }, { "epoch": 2.86, "learning_rate": 2.564102564102564e-08, "logits/chosen": -2.3984246253967285, "logits/rejected": -2.3833839893341064, "logps/chosen": -258.1267395019531, "logps/rejected": -225.0773468017578, "loss": 0.524, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.1915823221206665, "rewards/margins": 0.6331661343574524, "rewards/rejected": -0.8247483968734741, "step": 2770 }, { "epoch": 2.87, "learning_rate": 2.3727516264829695e-08, "logits/chosen": -2.320146083831787, "logits/rejected": -2.2947006225585938, "logps/chosen": -238.8065643310547, "logps/rejected": -234.02822875976562, "loss": 0.5354, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.17902429401874542, "rewards/margins": 0.5986486673355103, "rewards/rejected": -0.7776729464530945, "step": 2780 }, { "epoch": 2.88, "learning_rate": 2.1814006888633754e-08, "logits/chosen": -2.334745168685913, "logits/rejected": -2.353066921234131, "logps/chosen": -263.228515625, "logps/rejected": -233.03515625, "loss": 0.5472, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20434530079364777, "rewards/margins": 0.656082272529602, "rewards/rejected": -0.8604275584220886, "step": 2790 }, { "epoch": 2.89, "learning_rate": 1.990049751243781e-08, "logits/chosen": -2.396660327911377, "logits/rejected": -2.3448100090026855, "logps/chosen": -285.54638671875, "logps/rejected": -231.98117065429688, "loss": 0.5327, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1172541007399559, "rewards/margins": 0.6794244050979614, "rewards/rejected": -0.7966784238815308, "step": 2800 }, { "epoch": 2.9, "learning_rate": 1.7986988136241865e-08, "logits/chosen": -2.4138553142547607, "logits/rejected": -2.357382297515869, "logps/chosen": -262.9327697753906, "logps/rejected": -225.3331756591797, "loss": 0.4929, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.031798310577869415, "rewards/margins": 0.8773431777954102, "rewards/rejected": -0.9091414213180542, "step": 2810 }, { "epoch": 2.91, "learning_rate": 1.6073478760045924e-08, "logits/chosen": -2.2996766567230225, "logits/rejected": -2.2243258953094482, "logps/chosen": -285.4356384277344, "logps/rejected": -201.10208129882812, "loss": 0.5427, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.2585764229297638, "rewards/margins": 0.5490394234657288, "rewards/rejected": -0.8076158761978149, "step": 2820 }, { "epoch": 2.92, "learning_rate": 1.4159969383849981e-08, "logits/chosen": -2.4938712120056152, "logits/rejected": -2.4172348976135254, "logps/chosen": -312.3806457519531, "logps/rejected": -248.057373046875, "loss": 0.5262, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.1045370101928711, "rewards/margins": 0.8157347440719604, "rewards/rejected": -0.9202718734741211, "step": 2830 }, { "epoch": 2.93, "learning_rate": 1.2246460007654037e-08, "logits/chosen": -2.3658456802368164, "logits/rejected": -2.304481029510498, "logps/chosen": -266.4372863769531, "logps/rejected": -227.3815460205078, "loss": 0.5701, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.23648759722709656, "rewards/margins": 0.5965025424957275, "rewards/rejected": -0.8329901695251465, "step": 2840 }, { "epoch": 2.94, "learning_rate": 1.0332950631458094e-08, "logits/chosen": -2.307523012161255, "logits/rejected": -2.3531241416931152, "logps/chosen": -273.6283874511719, "logps/rejected": -230.62344360351562, "loss": 0.5356, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.150599867105484, "rewards/margins": 0.7397447228431702, "rewards/rejected": -0.8903446197509766, "step": 2850 }, { "epoch": 2.95, "learning_rate": 8.419441255262151e-09, "logits/chosen": -2.298656940460205, "logits/rejected": -2.319462299346924, "logps/chosen": -267.07867431640625, "logps/rejected": -219.8665313720703, "loss": 0.5007, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.09595485031604767, "rewards/margins": 0.6610188484191895, "rewards/rejected": -0.7569736838340759, "step": 2860 }, { "epoch": 2.96, "learning_rate": 6.505931879066207e-09, "logits/chosen": -2.4032797813415527, "logits/rejected": -2.3461287021636963, "logps/chosen": -235.07882690429688, "logps/rejected": -236.2791290283203, "loss": 0.5338, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.25155526399612427, "rewards/margins": 0.6543334722518921, "rewards/rejected": -0.9058888554573059, "step": 2870 }, { "epoch": 2.97, "learning_rate": 4.592422502870264e-09, "logits/chosen": -2.41361141204834, "logits/rejected": -2.317509651184082, "logps/chosen": -259.431884765625, "logps/rejected": -227.45394897460938, "loss": 0.5457, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.057419806718826294, "rewards/margins": 0.6843992471694946, "rewards/rejected": -0.7418190240859985, "step": 2880 }, { "epoch": 2.98, "learning_rate": 2.6789131266743202e-09, "logits/chosen": -2.3416519165039062, "logits/rejected": -2.272921562194824, "logps/chosen": -233.4044189453125, "logps/rejected": -225.3540496826172, "loss": 0.5175, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.2774786353111267, "rewards/margins": 0.6129654049873352, "rewards/rejected": -0.8904439806938171, "step": 2890 }, { "epoch": 3.0, "learning_rate": 7.654037504783773e-10, "logits/chosen": -2.313737630844116, "logits/rejected": -2.3516671657562256, "logps/chosen": -238.37814331054688, "logps/rejected": -245.6781005859375, "loss": 0.5439, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.1316399872303009, "rewards/margins": 0.7750416994094849, "rewards/rejected": -0.9066817164421082, "step": 2900 }, { "epoch": 3.0, "eval_logits/chosen": -2.065433979034424, "eval_logits/rejected": -1.9405803680419922, "eval_logps/chosen": -266.1706848144531, "eval_logps/rejected": -228.30780029296875, "eval_loss": 0.5255534052848816, "eval_rewards/accuracies": 0.7419999837875366, "eval_rewards/chosen": -0.15385985374450684, "eval_rewards/margins": 0.7486297488212585, "eval_rewards/rejected": -0.9024895429611206, "eval_runtime": 601.0805, "eval_samples_per_second": 3.327, "eval_steps_per_second": 0.208, "step": 2904 }, { "epoch": 3.0, "step": 2904, "total_flos": 0.0, "train_loss": 0.5642068754707158, "train_runtime": 89225.6094, "train_samples_per_second": 2.083, "train_steps_per_second": 0.033 } ], "logging_steps": 10, "max_steps": 2904, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }