zephyr-7b-gpo-v9-i1 / trainer_state.json
lole25's picture
Model save
134c76e verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 1875,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 2.6595744680851065e-08,
"logits/chosen": -1.7968215942382812,
"logits/rejected": -2.159090995788574,
"logps/chosen": -88.33059692382812,
"logps/rejected": -242.96200561523438,
"loss": 0.4322,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 2.6595744680851066e-07,
"logits/chosen": -2.003159999847412,
"logits/rejected": -1.3869916200637817,
"logps/chosen": -240.9772186279297,
"logps/rejected": -195.60606384277344,
"loss": 0.3319,
"rewards/accuracies": 0.3333333432674408,
"rewards/chosen": -3.270954766776413e-05,
"rewards/margins": -8.25071256258525e-05,
"rewards/rejected": 4.979758523404598e-05,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 5.319148936170213e-07,
"logits/chosen": -2.0388007164001465,
"logits/rejected": -1.5615094900131226,
"logps/chosen": -291.083740234375,
"logps/rejected": -277.5216369628906,
"loss": 0.3514,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 1.1951732631132472e-05,
"rewards/margins": 0.00027519199647940695,
"rewards/rejected": -0.0002632402756717056,
"step": 20
},
{
"epoch": 0.02,
"learning_rate": 7.97872340425532e-07,
"logits/chosen": -1.860889196395874,
"logits/rejected": -1.5862194299697876,
"logps/chosen": -248.38510131835938,
"logps/rejected": -261.7816467285156,
"loss": 0.324,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.0007015246083028615,
"rewards/margins": 0.004821115639060736,
"rewards/rejected": -0.005522639956325293,
"step": 30
},
{
"epoch": 0.02,
"learning_rate": 1.0638297872340427e-06,
"logits/chosen": -1.8764064311981201,
"logits/rejected": -1.2899483442306519,
"logps/chosen": -355.25958251953125,
"logps/rejected": -389.2695007324219,
"loss": 0.3286,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.009422576054930687,
"rewards/margins": 0.022184943780303,
"rewards/rejected": -0.03160751983523369,
"step": 40
},
{
"epoch": 0.03,
"learning_rate": 1.3297872340425533e-06,
"logits/chosen": -1.990142583847046,
"logits/rejected": -1.2961665391921997,
"logps/chosen": -316.00860595703125,
"logps/rejected": -277.88421630859375,
"loss": 0.2629,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.05700983479619026,
"rewards/margins": 0.059757936745882034,
"rewards/rejected": -0.1167677640914917,
"step": 50
},
{
"epoch": 0.03,
"learning_rate": 1.595744680851064e-06,
"logits/chosen": -1.748492956161499,
"logits/rejected": -0.8994135856628418,
"logps/chosen": -389.3627624511719,
"logps/rejected": -579.7057495117188,
"loss": 0.1989,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.09011684358119965,
"rewards/margins": 0.22812744975090027,
"rewards/rejected": -0.3182442784309387,
"step": 60
},
{
"epoch": 0.04,
"learning_rate": 1.8617021276595745e-06,
"logits/chosen": -1.6900399923324585,
"logits/rejected": -1.4010140895843506,
"logps/chosen": -420.5406799316406,
"logps/rejected": -859.8084716796875,
"loss": 0.1253,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.20870384573936462,
"rewards/margins": 0.3385527431964874,
"rewards/rejected": -0.547256588935852,
"step": 70
},
{
"epoch": 0.04,
"learning_rate": 2.1276595744680853e-06,
"logits/chosen": -1.7609751224517822,
"logits/rejected": -1.0384010076522827,
"logps/chosen": -474.48187255859375,
"logps/rejected": -747.34716796875,
"loss": 0.1309,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.19330377876758575,
"rewards/margins": 0.34078216552734375,
"rewards/rejected": -0.5340859293937683,
"step": 80
},
{
"epoch": 0.05,
"learning_rate": 2.393617021276596e-06,
"logits/chosen": -1.7291476726531982,
"logits/rejected": -1.2021540403366089,
"logps/chosen": -454.2134704589844,
"logps/rejected": -764.934326171875,
"loss": 0.16,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.1365814059972763,
"rewards/margins": 0.36457785964012146,
"rewards/rejected": -0.5011593103408813,
"step": 90
},
{
"epoch": 0.05,
"learning_rate": 2.6595744680851065e-06,
"logits/chosen": -1.5737159252166748,
"logits/rejected": -0.9248941540718079,
"logps/chosen": -482.3492126464844,
"logps/rejected": -792.2481689453125,
"loss": 0.1239,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.19203761219978333,
"rewards/margins": 0.3564862310886383,
"rewards/rejected": -0.5485238432884216,
"step": 100
},
{
"epoch": 0.06,
"learning_rate": 2.9255319148936174e-06,
"logits/chosen": -1.7435375452041626,
"logits/rejected": -1.356065034866333,
"logps/chosen": -416.564208984375,
"logps/rejected": -796.4661254882812,
"loss": 0.1253,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.1526903361082077,
"rewards/margins": 0.3349696397781372,
"rewards/rejected": -0.4876599907875061,
"step": 110
},
{
"epoch": 0.06,
"learning_rate": 3.191489361702128e-06,
"logits/chosen": -1.6976553201675415,
"logits/rejected": -1.0894078016281128,
"logps/chosen": -409.96258544921875,
"logps/rejected": -617.7588500976562,
"loss": 0.1948,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.13733306527137756,
"rewards/margins": 0.293459415435791,
"rewards/rejected": -0.4307924807071686,
"step": 120
},
{
"epoch": 0.07,
"learning_rate": 3.457446808510639e-06,
"logits/chosen": -1.7993590831756592,
"logits/rejected": -1.400632619857788,
"logps/chosen": -370.1565856933594,
"logps/rejected": -709.3056640625,
"loss": 0.2055,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.11207763850688934,
"rewards/margins": 0.340470552444458,
"rewards/rejected": -0.45254817605018616,
"step": 130
},
{
"epoch": 0.07,
"learning_rate": 3.723404255319149e-06,
"logits/chosen": -1.495011806488037,
"logits/rejected": -0.9245948791503906,
"logps/chosen": -388.5771789550781,
"logps/rejected": -792.4680786132812,
"loss": 0.1088,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.1575288623571396,
"rewards/margins": 0.3997672200202942,
"rewards/rejected": -0.557296097278595,
"step": 140
},
{
"epoch": 0.08,
"learning_rate": 3.98936170212766e-06,
"logits/chosen": -1.6491578817367554,
"logits/rejected": -1.2172632217407227,
"logps/chosen": -407.8502502441406,
"logps/rejected": -738.5733642578125,
"loss": 0.1397,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.12080486118793488,
"rewards/margins": 0.32797589898109436,
"rewards/rejected": -0.44878071546554565,
"step": 150
},
{
"epoch": 0.09,
"learning_rate": 4.255319148936171e-06,
"logits/chosen": -1.6612653732299805,
"logits/rejected": -1.1705405712127686,
"logps/chosen": -353.0194396972656,
"logps/rejected": -689.8749389648438,
"loss": 0.1454,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.09746531397104263,
"rewards/margins": 0.3515530228614807,
"rewards/rejected": -0.44901829957962036,
"step": 160
},
{
"epoch": 0.09,
"learning_rate": 4.521276595744681e-06,
"logits/chosen": -1.54987370967865,
"logits/rejected": -1.1912695169448853,
"logps/chosen": -544.5787963867188,
"logps/rejected": -835.3132934570312,
"loss": 0.1048,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.23248295485973358,
"rewards/margins": 0.3334501087665558,
"rewards/rejected": -0.565933108329773,
"step": 170
},
{
"epoch": 0.1,
"learning_rate": 4.787234042553192e-06,
"logits/chosen": -1.7397425174713135,
"logits/rejected": -0.8725941777229309,
"logps/chosen": -510.69842529296875,
"logps/rejected": -840.5343017578125,
"loss": 0.1531,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.2171137034893036,
"rewards/margins": 0.39573976397514343,
"rewards/rejected": -0.6128535270690918,
"step": 180
},
{
"epoch": 0.1,
"learning_rate": 4.999982660399688e-06,
"logits/chosen": -1.6966726779937744,
"logits/rejected": -1.09552800655365,
"logps/chosen": -514.5984497070312,
"logps/rejected": -911.4729614257812,
"loss": 0.1503,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.2481248676776886,
"rewards/margins": 0.3679044842720032,
"rewards/rejected": -0.6160293221473694,
"step": 190
},
{
"epoch": 0.11,
"learning_rate": 4.99937579964398e-06,
"logits/chosen": -1.4942667484283447,
"logits/rejected": -1.1419141292572021,
"logps/chosen": -432.5450134277344,
"logps/rejected": -730.1014404296875,
"loss": 0.1267,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.21937580406665802,
"rewards/margins": 0.3185574412345886,
"rewards/rejected": -0.5379332304000854,
"step": 200
},
{
"epoch": 0.11,
"learning_rate": 4.9979021993870645e-06,
"logits/chosen": -1.571395993232727,
"logits/rejected": -0.9183829426765442,
"logps/chosen": -480.79644775390625,
"logps/rejected": -813.7987060546875,
"loss": 0.1624,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.18962515890598297,
"rewards/margins": 0.3737575113773346,
"rewards/rejected": -0.563382625579834,
"step": 210
},
{
"epoch": 0.12,
"learning_rate": 4.995562370647553e-06,
"logits/chosen": -1.668015480041504,
"logits/rejected": -1.1087052822113037,
"logps/chosen": -517.7100219726562,
"logps/rejected": -838.1522216796875,
"loss": 0.1372,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.22473697364330292,
"rewards/margins": 0.35942238569259644,
"rewards/rejected": -0.5841594338417053,
"step": 220
},
{
"epoch": 0.12,
"learning_rate": 4.992357124836838e-06,
"logits/chosen": -1.3532911539077759,
"logits/rejected": -0.6337820291519165,
"logps/chosen": -458.74462890625,
"logps/rejected": -736.6771240234375,
"loss": 0.1419,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.21437951922416687,
"rewards/margins": 0.35455334186553955,
"rewards/rejected": -0.5689328908920288,
"step": 230
},
{
"epoch": 0.13,
"learning_rate": 4.9882875734777044e-06,
"logits/chosen": -1.6833770275115967,
"logits/rejected": -1.0865981578826904,
"logps/chosen": -476.49578857421875,
"logps/rejected": -742.6441650390625,
"loss": 0.162,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.1685108244419098,
"rewards/margins": 0.33409184217453003,
"rewards/rejected": -0.5026026368141174,
"step": 240
},
{
"epoch": 0.13,
"learning_rate": 4.983355127818882e-06,
"logits/chosen": -1.4850168228149414,
"logits/rejected": -0.9603360295295715,
"logps/chosen": -400.22967529296875,
"logps/rejected": -569.9345703125,
"loss": 0.1919,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.20730257034301758,
"rewards/margins": 0.22820453345775604,
"rewards/rejected": -0.4355071187019348,
"step": 250
},
{
"epoch": 0.14,
"learning_rate": 4.977561498345639e-06,
"logits/chosen": -1.544639229774475,
"logits/rejected": -1.1013596057891846,
"logps/chosen": -407.4434509277344,
"logps/rejected": -822.0099487304688,
"loss": 0.0966,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.19301848113536835,
"rewards/margins": 0.41144537925720215,
"rewards/rejected": -0.6044638752937317,
"step": 260
},
{
"epoch": 0.14,
"learning_rate": 4.970908694186624e-06,
"logits/chosen": -1.5448771715164185,
"logits/rejected": -0.8540661931037903,
"logps/chosen": -542.1297607421875,
"logps/rejected": -889.5344848632812,
"loss": 0.1371,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.25042372941970825,
"rewards/margins": 0.39966678619384766,
"rewards/rejected": -0.6500904560089111,
"step": 270
},
{
"epoch": 0.15,
"learning_rate": 4.9633990224171305e-06,
"logits/chosen": -1.4944156408309937,
"logits/rejected": -0.8036524057388306,
"logps/chosen": -671.7164306640625,
"logps/rejected": -922.0513916015625,
"loss": 0.1319,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.3747330605983734,
"rewards/margins": 0.3100079596042633,
"rewards/rejected": -0.6847410202026367,
"step": 280
},
{
"epoch": 0.15,
"learning_rate": 4.955035087259046e-06,
"logits/chosen": -1.4477952718734741,
"logits/rejected": -0.8218593597412109,
"logps/chosen": -612.0665893554688,
"logps/rejected": -863.3322143554688,
"loss": 0.1494,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.3176848888397217,
"rewards/margins": 0.30745354294776917,
"rewards/rejected": -0.6251384019851685,
"step": 290
},
{
"epoch": 0.16,
"learning_rate": 4.945819789177756e-06,
"logits/chosen": -1.611026406288147,
"logits/rejected": -1.1793110370635986,
"logps/chosen": -518.3214111328125,
"logps/rejected": -892.6036987304688,
"loss": 0.1228,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.2295423001050949,
"rewards/margins": 0.37014490365982056,
"rewards/rejected": -0.5996872186660767,
"step": 300
},
{
"epoch": 0.17,
"learning_rate": 4.935756323876306e-06,
"logits/chosen": -1.508418083190918,
"logits/rejected": -1.277306318283081,
"logps/chosen": -459.0326232910156,
"logps/rejected": -823.93017578125,
"loss": 0.1518,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.27009934186935425,
"rewards/margins": 0.32507914304733276,
"rewards/rejected": -0.5951785445213318,
"step": 310
},
{
"epoch": 0.17,
"learning_rate": 4.924848181187199e-06,
"logits/chosen": -1.6442441940307617,
"logits/rejected": -1.1329659223556519,
"logps/chosen": -494.06097412109375,
"logps/rejected": -854.1019287109375,
"loss": 0.1422,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.20919394493103027,
"rewards/margins": 0.39555859565734863,
"rewards/rejected": -0.6047526001930237,
"step": 320
},
{
"epoch": 0.18,
"learning_rate": 4.913099143862173e-06,
"logits/chosen": -1.3633651733398438,
"logits/rejected": -0.9123932123184204,
"logps/chosen": -474.5048828125,
"logps/rejected": -829.7545776367188,
"loss": 0.1498,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.28733277320861816,
"rewards/margins": 0.3500373959541321,
"rewards/rejected": -0.6373701095581055,
"step": 330
},
{
"epoch": 0.18,
"learning_rate": 4.900513286260416e-06,
"logits/chosen": -1.5975598096847534,
"logits/rejected": -1.2887117862701416,
"logps/chosen": -400.32781982421875,
"logps/rejected": -744.0382080078125,
"loss": 0.1053,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.20992426574230194,
"rewards/margins": 0.3338248133659363,
"rewards/rejected": -0.5437491536140442,
"step": 340
},
{
"epoch": 0.19,
"learning_rate": 4.887094972935645e-06,
"logits/chosen": -1.764219045639038,
"logits/rejected": -0.9871004819869995,
"logps/chosen": -573.0086059570312,
"logps/rejected": -937.3956909179688,
"loss": 0.1504,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.2734777331352234,
"rewards/margins": 0.37265342473983765,
"rewards/rejected": -0.646131157875061,
"step": 350
},
{
"epoch": 0.19,
"learning_rate": 4.87284885712256e-06,
"logits/chosen": -1.5166432857513428,
"logits/rejected": -0.8717886209487915,
"logps/chosen": -572.190673828125,
"logps/rejected": -876.5632934570312,
"loss": 0.1876,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.32470396161079407,
"rewards/margins": 0.34118732810020447,
"rewards/rejected": -0.6658912897109985,
"step": 360
},
{
"epoch": 0.2,
"learning_rate": 4.857779879123181e-06,
"logits/chosen": -1.7403156757354736,
"logits/rejected": -0.9518265724182129,
"logps/chosen": -505.53387451171875,
"logps/rejected": -778.5391845703125,
"loss": 0.1207,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.20143766701221466,
"rewards/margins": 0.37924817204475403,
"rewards/rejected": -0.5806857943534851,
"step": 370
},
{
"epoch": 0.2,
"learning_rate": 4.841893264593643e-06,
"logits/chosen": -1.7110675573349,
"logits/rejected": -1.026960849761963,
"logps/chosen": -472.236328125,
"logps/rejected": -761.3172607421875,
"loss": 0.095,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.2130139172077179,
"rewards/margins": 0.36098140478134155,
"rewards/rejected": -0.5739952325820923,
"step": 380
},
{
"epoch": 0.21,
"learning_rate": 4.825194522732023e-06,
"logits/chosen": -1.6140925884246826,
"logits/rejected": -1.1293842792510986,
"logps/chosen": -507.7935485839844,
"logps/rejected": -913.9110107421875,
"loss": 0.1277,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.26502326130867004,
"rewards/margins": 0.3756178915500641,
"rewards/rejected": -0.6406410932540894,
"step": 390
},
{
"epoch": 0.21,
"learning_rate": 4.807689444367853e-06,
"logits/chosen": -1.7682578563690186,
"logits/rejected": -1.3489004373550415,
"logps/chosen": -495.90869140625,
"logps/rejected": -782.658447265625,
"loss": 0.1251,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.260581910610199,
"rewards/margins": 0.3089093565940857,
"rewards/rejected": -0.5694912075996399,
"step": 400
},
{
"epoch": 0.22,
"learning_rate": 4.78938409995396e-06,
"logits/chosen": -1.5254316329956055,
"logits/rejected": -1.0945005416870117,
"logps/chosen": -463.646484375,
"logps/rejected": -902.4519653320312,
"loss": 0.1313,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.24164071679115295,
"rewards/margins": 0.3996545672416687,
"rewards/rejected": -0.6412952542304993,
"step": 410
},
{
"epoch": 0.22,
"learning_rate": 4.770284837461342e-06,
"logits/chosen": -1.5965580940246582,
"logits/rejected": -0.836743175983429,
"logps/chosen": -586.2058715820312,
"logps/rejected": -917.9168090820312,
"loss": 0.1124,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.26902061700820923,
"rewards/margins": 0.3916351795196533,
"rewards/rejected": -0.6606558561325073,
"step": 420
},
{
"epoch": 0.23,
"learning_rate": 4.7503982801778015e-06,
"logits/chosen": -1.557950735092163,
"logits/rejected": -1.0112215280532837,
"logps/chosen": -501.4098205566406,
"logps/rejected": -789.2760009765625,
"loss": 0.154,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.24787664413452148,
"rewards/margins": 0.3141789734363556,
"rewards/rejected": -0.5620556473731995,
"step": 430
},
{
"epoch": 0.23,
"learning_rate": 4.729731324411104e-06,
"logits/chosen": -1.7676448822021484,
"logits/rejected": -1.1603769063949585,
"logps/chosen": -429.96734619140625,
"logps/rejected": -750.7506103515625,
"loss": 0.1237,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.1744639277458191,
"rewards/margins": 0.35027581453323364,
"rewards/rejected": -0.5247397422790527,
"step": 440
},
{
"epoch": 0.24,
"learning_rate": 4.7082911370974645e-06,
"logits/chosen": -1.8625621795654297,
"logits/rejected": -1.36086905002594,
"logps/chosen": -548.0135498046875,
"logps/rejected": -759.1170654296875,
"loss": 0.1707,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.2390761822462082,
"rewards/margins": 0.2892398238182068,
"rewards/rejected": -0.5283160209655762,
"step": 450
},
{
"epoch": 0.25,
"learning_rate": 4.68608515331618e-06,
"logits/chosen": -1.7188348770141602,
"logits/rejected": -1.187195062637329,
"logps/chosen": -492.1756286621094,
"logps/rejected": -859.0760498046875,
"loss": 0.1414,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.23255252838134766,
"rewards/margins": 0.3831843137741089,
"rewards/rejected": -0.6157368421554565,
"step": 460
},
{
"epoch": 0.25,
"learning_rate": 4.663121073711269e-06,
"logits/chosen": -1.5974490642547607,
"logits/rejected": -1.2564659118652344,
"logps/chosen": -336.80487060546875,
"logps/rejected": -661.6661376953125,
"loss": 0.1196,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.14996656775474548,
"rewards/margins": 0.32413381338119507,
"rewards/rejected": -0.47410035133361816,
"step": 470
},
{
"epoch": 0.26,
"learning_rate": 4.63940686182103e-06,
"logits/chosen": -1.6767423152923584,
"logits/rejected": -1.1938632726669312,
"logps/chosen": -505.0990295410156,
"logps/rejected": -846.8779296875,
"loss": 0.159,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.2421807050704956,
"rewards/margins": 0.3599635660648346,
"rewards/rejected": -0.6021442413330078,
"step": 480
},
{
"epoch": 0.26,
"learning_rate": 4.614950741316425e-06,
"logits/chosen": -1.529900312423706,
"logits/rejected": -1.0826785564422607,
"logps/chosen": -421.31707763671875,
"logps/rejected": -685.7420654296875,
"loss": 0.1721,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.22861020267009735,
"rewards/margins": 0.28916865587234497,
"rewards/rejected": -0.5177788734436035,
"step": 490
},
{
"epoch": 0.27,
"learning_rate": 4.589761193149254e-06,
"logits/chosen": -1.6966304779052734,
"logits/rejected": -0.9312071800231934,
"logps/chosen": -535.8978271484375,
"logps/rejected": -940.1627197265625,
"loss": 0.1144,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.24013669788837433,
"rewards/margins": 0.4429057240486145,
"rewards/rejected": -0.6830424070358276,
"step": 500
},
{
"epoch": 0.27,
"learning_rate": 4.563846952611112e-06,
"logits/chosen": -1.6221929788589478,
"logits/rejected": -0.9574362635612488,
"logps/chosen": -428.6238708496094,
"logps/rejected": -704.8244018554688,
"loss": 0.0844,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.195401132106781,
"rewards/margins": 0.3248489797115326,
"rewards/rejected": -0.520250141620636,
"step": 510
},
{
"epoch": 0.28,
"learning_rate": 4.537217006304141e-06,
"logits/chosen": -1.4427409172058105,
"logits/rejected": -1.003901481628418,
"logps/chosen": -429.660400390625,
"logps/rejected": -772.5963134765625,
"loss": 0.1457,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.15723460912704468,
"rewards/margins": 0.3122255206108093,
"rewards/rejected": -0.4694600999355316,
"step": 520
},
{
"epoch": 0.28,
"learning_rate": 4.50988058902464e-06,
"logits/chosen": -1.2717740535736084,
"logits/rejected": -0.8480876684188843,
"logps/chosen": -367.8202209472656,
"logps/rejected": -776.1926879882812,
"loss": 0.1152,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.14414677023887634,
"rewards/margins": 0.38212689757347107,
"rewards/rejected": -0.5262737274169922,
"step": 530
},
{
"epoch": 0.29,
"learning_rate": 4.481847180560593e-06,
"logits/chosen": -1.5822323560714722,
"logits/rejected": -0.9035153388977051,
"logps/chosen": -439.30816650390625,
"logps/rejected": -708.1422729492188,
"loss": 0.2078,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.18015776574611664,
"rewards/margins": 0.32809919118881226,
"rewards/rejected": -0.5082569122314453,
"step": 540
},
{
"epoch": 0.29,
"learning_rate": 4.453126502404253e-06,
"logits/chosen": -1.6248279809951782,
"logits/rejected": -0.9642871022224426,
"logps/chosen": -561.463623046875,
"logps/rejected": -740.08935546875,
"loss": 0.1773,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.27120378613471985,
"rewards/margins": 0.28620854020118713,
"rewards/rejected": -0.557412326335907,
"step": 550
},
{
"epoch": 0.3,
"learning_rate": 4.423728514380892e-06,
"logits/chosen": -1.4605586528778076,
"logits/rejected": -0.8407928347587585,
"logps/chosen": -514.7965087890625,
"logps/rejected": -831.8440551757812,
"loss": 0.12,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.24941739439964294,
"rewards/margins": 0.36968275904655457,
"rewards/rejected": -0.6191002130508423,
"step": 560
},
{
"epoch": 0.3,
"learning_rate": 4.393663411194918e-06,
"logits/chosen": -1.4048388004302979,
"logits/rejected": -1.0212897062301636,
"logps/chosen": -490.42431640625,
"logps/rejected": -855.1259765625,
"loss": 0.1416,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.24018950760364532,
"rewards/margins": 0.3597009778022766,
"rewards/rejected": -0.5998905301094055,
"step": 570
},
{
"epoch": 0.31,
"learning_rate": 4.362941618894523e-06,
"logits/chosen": -1.3778400421142578,
"logits/rejected": -0.983964741230011,
"logps/chosen": -586.1995849609375,
"logps/rejected": -981.2742309570312,
"loss": 0.1205,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.30800628662109375,
"rewards/margins": 0.3807603120803833,
"rewards/rejected": -0.6887666583061218,
"step": 580
},
{
"epoch": 0.31,
"learning_rate": 4.331573791256116e-06,
"logits/chosen": -1.4645698070526123,
"logits/rejected": -0.9271195530891418,
"logps/chosen": -621.4105224609375,
"logps/rejected": -899.0559692382812,
"loss": 0.1273,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.2714827358722687,
"rewards/margins": 0.362936794757843,
"rewards/rejected": -0.6344195604324341,
"step": 590
},
{
"epoch": 0.32,
"learning_rate": 4.299570806089786e-06,
"logits/chosen": -1.6326652765274048,
"logits/rejected": -0.9927080273628235,
"logps/chosen": -490.701904296875,
"logps/rejected": -842.052734375,
"loss": 0.1023,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.18793320655822754,
"rewards/margins": 0.408639132976532,
"rewards/rejected": -0.5965723395347595,
"step": 600
},
{
"epoch": 0.33,
"learning_rate": 4.266943761467057e-06,
"logits/chosen": -1.2816569805145264,
"logits/rejected": -0.8941723704338074,
"logps/chosen": -367.02191162109375,
"logps/rejected": -760.0553588867188,
"loss": 0.1443,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.11693791300058365,
"rewards/margins": 0.39976662397384644,
"rewards/rejected": -0.5167044997215271,
"step": 610
},
{
"epoch": 0.33,
"learning_rate": 4.233703971872287e-06,
"logits/chosen": -1.8729069232940674,
"logits/rejected": -1.0977063179016113,
"logps/chosen": -393.6733093261719,
"logps/rejected": -763.2752075195312,
"loss": 0.1335,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.08343084156513214,
"rewards/margins": 0.4291655123233795,
"rewards/rejected": -0.5125963687896729,
"step": 620
},
{
"epoch": 0.34,
"learning_rate": 4.1998629642789925e-06,
"logits/chosen": -1.5668641328811646,
"logits/rejected": -1.1349601745605469,
"logps/chosen": -426.9754333496094,
"logps/rejected": -820.5556640625,
"loss": 0.1742,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.1329190582036972,
"rewards/margins": 0.36928990483283997,
"rewards/rejected": -0.5022088885307312,
"step": 630
},
{
"epoch": 0.34,
"learning_rate": 4.165432474152505e-06,
"logits/chosen": -1.5012271404266357,
"logits/rejected": -1.278693675994873,
"logps/chosen": -365.3034973144531,
"logps/rejected": -678.4292602539062,
"loss": 0.1737,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.13991737365722656,
"rewards/margins": 0.3017304837703705,
"rewards/rejected": -0.44164785742759705,
"step": 640
},
{
"epoch": 0.35,
"learning_rate": 4.130424441380308e-06,
"logits/chosen": -1.42804753780365,
"logits/rejected": -0.967817485332489,
"logps/chosen": -411.77801513671875,
"logps/rejected": -691.2272338867188,
"loss": 0.1527,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.1383361965417862,
"rewards/margins": 0.33920183777809143,
"rewards/rejected": -0.47753801941871643,
"step": 650
},
{
"epoch": 0.35,
"learning_rate": 4.09485100613151e-06,
"logits/chosen": -1.5555391311645508,
"logits/rejected": -1.1440869569778442,
"logps/chosen": -434.00335693359375,
"logps/rejected": -744.3508911132812,
"loss": 0.168,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.19056299328804016,
"rewards/margins": 0.30447274446487427,
"rewards/rejected": -0.49503573775291443,
"step": 660
},
{
"epoch": 0.36,
"learning_rate": 4.058724504646834e-06,
"logits/chosen": -1.8099536895751953,
"logits/rejected": -1.0837316513061523,
"logps/chosen": -427.7793884277344,
"logps/rejected": -729.915283203125,
"loss": 0.1229,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.1386745572090149,
"rewards/margins": 0.3526052236557007,
"rewards/rejected": -0.4912797808647156,
"step": 670
},
{
"epoch": 0.36,
"learning_rate": 4.022057464960632e-06,
"logits/chosen": -1.6691503524780273,
"logits/rejected": -1.33521568775177,
"logps/chosen": -428.7286071777344,
"logps/rejected": -789.0191040039062,
"loss": 0.1607,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.1496300995349884,
"rewards/margins": 0.35229435563087463,
"rewards/rejected": -0.5019243955612183,
"step": 680
},
{
"epoch": 0.37,
"learning_rate": 3.984862602556383e-06,
"logits/chosen": -1.6232519149780273,
"logits/rejected": -1.197933554649353,
"logps/chosen": -460.2228088378906,
"logps/rejected": -696.0914306640625,
"loss": 0.1346,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.18895591795444489,
"rewards/margins": 0.28954973816871643,
"rewards/rejected": -0.4785057008266449,
"step": 690
},
{
"epoch": 0.37,
"learning_rate": 3.947152815957187e-06,
"logits/chosen": -1.5458933115005493,
"logits/rejected": -1.116236925125122,
"logps/chosen": -433.87322998046875,
"logps/rejected": -756.8858642578125,
"loss": 0.1492,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.22537223994731903,
"rewards/margins": 0.34959647059440613,
"rewards/rejected": -0.5749687552452087,
"step": 700
},
{
"epoch": 0.38,
"learning_rate": 3.908941182252785e-06,
"logits/chosen": -1.5793603658676147,
"logits/rejected": -0.9729734659194946,
"logps/chosen": -458.96368408203125,
"logps/rejected": -781.1962890625,
"loss": 0.1615,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1985333412885666,
"rewards/margins": 0.3719526529312134,
"rewards/rejected": -0.5704860091209412,
"step": 710
},
{
"epoch": 0.38,
"learning_rate": 3.8702409525646535e-06,
"logits/chosen": -1.6880747079849243,
"logits/rejected": -1.0946999788284302,
"logps/chosen": -550.5426635742188,
"logps/rejected": -861.6978759765625,
"loss": 0.1362,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.1908409297466278,
"rewards/margins": 0.3778737485408783,
"rewards/rejected": -0.5687146782875061,
"step": 720
},
{
"epoch": 0.39,
"learning_rate": 3.8310655474507495e-06,
"logits/chosen": -1.7694594860076904,
"logits/rejected": -1.1918199062347412,
"logps/chosen": -443.54736328125,
"logps/rejected": -717.8020629882812,
"loss": 0.1418,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.16424255073070526,
"rewards/margins": 0.30146175622940063,
"rewards/rejected": -0.4657043516635895,
"step": 730
},
{
"epoch": 0.39,
"learning_rate": 3.7914285522515002e-06,
"logits/chosen": -1.539620280265808,
"logits/rejected": -1.3648602962493896,
"logps/chosen": -468.6385192871094,
"logps/rejected": -892.7566528320312,
"loss": 0.1552,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.22199459373950958,
"rewards/margins": 0.36069172620773315,
"rewards/rejected": -0.5826863050460815,
"step": 740
},
{
"epoch": 0.4,
"learning_rate": 3.751343712378639e-06,
"logits/chosen": -1.68185555934906,
"logits/rejected": -1.0438605546951294,
"logps/chosen": -377.9205627441406,
"logps/rejected": -688.3480834960938,
"loss": 0.1417,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.15213271975517273,
"rewards/margins": 0.3400834798812866,
"rewards/rejected": -0.49221619963645935,
"step": 750
},
{
"epoch": 0.41,
"learning_rate": 3.710824928548546e-06,
"logits/chosen": -1.7241179943084717,
"logits/rejected": -1.1749062538146973,
"logps/chosen": -398.90521240234375,
"logps/rejected": -792.80078125,
"loss": 0.1218,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.14098913967609406,
"rewards/margins": 0.4070391058921814,
"rewards/rejected": -0.5480281710624695,
"step": 760
},
{
"epoch": 0.41,
"learning_rate": 3.6698862519617225e-06,
"logits/chosen": -1.862091064453125,
"logits/rejected": -1.0774017572402954,
"logps/chosen": -380.6012878417969,
"logps/rejected": -803.9888916015625,
"loss": 0.1009,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.12483291327953339,
"rewards/margins": 0.46553611755371094,
"rewards/rejected": -0.5903691053390503,
"step": 770
},
{
"epoch": 0.42,
"learning_rate": 3.6285418794300793e-06,
"logits/chosen": -1.416322946548462,
"logits/rejected": -0.8399195671081543,
"logps/chosen": -444.05523681640625,
"logps/rejected": -761.3108520507812,
"loss": 0.1571,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.20516355335712433,
"rewards/margins": 0.3872140049934387,
"rewards/rejected": -0.5923775434494019,
"step": 780
},
{
"epoch": 0.42,
"learning_rate": 3.5868061484537365e-06,
"logits/chosen": -1.39794921875,
"logits/rejected": -0.8267971873283386,
"logps/chosen": -507.1766052246094,
"logps/rejected": -875.86962890625,
"loss": 0.1497,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.23690223693847656,
"rewards/margins": 0.40496787428855896,
"rewards/rejected": -0.6418701410293579,
"step": 790
},
{
"epoch": 0.43,
"learning_rate": 3.5446935322490285e-06,
"logits/chosen": -1.7719318866729736,
"logits/rejected": -0.9355955123901367,
"logps/chosen": -544.9541015625,
"logps/rejected": -865.5302734375,
"loss": 0.1963,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.2105661928653717,
"rewards/margins": 0.3866081237792969,
"rewards/rejected": -0.5971742868423462,
"step": 800
},
{
"epoch": 0.43,
"learning_rate": 3.502218634729447e-06,
"logits/chosen": -1.6639026403427124,
"logits/rejected": -1.067781925201416,
"logps/chosen": -575.9091796875,
"logps/rejected": -838.0983276367188,
"loss": 0.1233,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.23653562366962433,
"rewards/margins": 0.3239360749721527,
"rewards/rejected": -0.5604716539382935,
"step": 810
},
{
"epoch": 0.44,
"learning_rate": 3.459396185441265e-06,
"logits/chosen": -1.7067358493804932,
"logits/rejected": -1.0498546361923218,
"logps/chosen": -398.35516357421875,
"logps/rejected": -626.8757934570312,
"loss": 0.1608,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.1356094628572464,
"rewards/margins": 0.3172938823699951,
"rewards/rejected": -0.4529033601284027,
"step": 820
},
{
"epoch": 0.44,
"learning_rate": 3.4162410344555834e-06,
"logits/chosen": -1.9210001230239868,
"logits/rejected": -1.1206413507461548,
"logps/chosen": -405.9615783691406,
"logps/rejected": -725.4310913085938,
"loss": 0.125,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.10851490497589111,
"rewards/margins": 0.38589829206466675,
"rewards/rejected": -0.49441319704055786,
"step": 830
},
{
"epoch": 0.45,
"learning_rate": 3.3727681472185937e-06,
"logits/chosen": -1.6562303304672241,
"logits/rejected": -1.19851553440094,
"logps/chosen": -486.35107421875,
"logps/rejected": -963.0572509765625,
"loss": 0.1105,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.22348380088806152,
"rewards/margins": 0.4227561056613922,
"rewards/rejected": -0.6462398767471313,
"step": 840
},
{
"epoch": 0.45,
"learning_rate": 3.3289925993618217e-06,
"logits/chosen": -1.5856201648712158,
"logits/rejected": -1.0767395496368408,
"logps/chosen": -526.1747436523438,
"logps/rejected": -797.7916870117188,
"loss": 0.137,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.2687681019306183,
"rewards/margins": 0.2989320755004883,
"rewards/rejected": -0.567700207233429,
"step": 850
},
{
"epoch": 0.46,
"learning_rate": 3.2849295714741643e-06,
"logits/chosen": -1.7678568363189697,
"logits/rejected": -1.2151532173156738,
"logps/chosen": -597.7952880859375,
"logps/rejected": -848.87841796875,
"loss": 0.1308,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.27867773175239563,
"rewards/margins": 0.30206385254859924,
"rewards/rejected": -0.5807415843009949,
"step": 860
},
{
"epoch": 0.46,
"learning_rate": 3.2405943438375287e-06,
"logits/chosen": -1.7643588781356812,
"logits/rejected": -1.099827527999878,
"logps/chosen": -423.9742126464844,
"logps/rejected": -774.4637451171875,
"loss": 0.0974,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1482265591621399,
"rewards/margins": 0.410900741815567,
"rewards/rejected": -0.5591272711753845,
"step": 870
},
{
"epoch": 0.47,
"learning_rate": 3.1960022911279036e-06,
"logits/chosen": -1.5414252281188965,
"logits/rejected": -1.1484423875808716,
"logps/chosen": -493.69464111328125,
"logps/rejected": -835.8029174804688,
"loss": 0.1526,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.19744431972503662,
"rewards/margins": 0.36680763959884644,
"rewards/rejected": -0.5642520189285278,
"step": 880
},
{
"epoch": 0.47,
"learning_rate": 3.1511688770836844e-06,
"logits/chosen": -1.511249303817749,
"logits/rejected": -1.3401678800582886,
"logps/chosen": -404.75933837890625,
"logps/rejected": -805.8262939453125,
"loss": 0.1035,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.18373355269432068,
"rewards/margins": 0.3318132758140564,
"rewards/rejected": -0.5155468583106995,
"step": 890
},
{
"epoch": 0.48,
"learning_rate": 3.1061096491431307e-06,
"logits/chosen": -1.8831459283828735,
"logits/rejected": -1.2160544395446777,
"logps/chosen": -447.65032958984375,
"logps/rejected": -843.8660278320312,
"loss": 0.1345,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.13529552519321442,
"rewards/margins": 0.40966707468032837,
"rewards/rejected": -0.5449625849723816,
"step": 900
},
{
"epoch": 0.49,
"learning_rate": 3.0608402330527796e-06,
"logits/chosen": -1.6770378351211548,
"logits/rejected": -0.9972168803215027,
"logps/chosen": -379.8583984375,
"logps/rejected": -719.3693237304688,
"loss": 0.1765,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.15062110126018524,
"rewards/margins": 0.3817873001098633,
"rewards/rejected": -0.5324083566665649,
"step": 910
},
{
"epoch": 0.49,
"learning_rate": 3.0153763274487176e-06,
"logits/chosen": -1.4134846925735474,
"logits/rejected": -0.966874897480011,
"logps/chosen": -441.3450622558594,
"logps/rejected": -707.3884887695312,
"loss": 0.1235,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.21538302302360535,
"rewards/margins": 0.3049529790878296,
"rewards/rejected": -0.5203360319137573,
"step": 920
},
{
"epoch": 0.5,
"learning_rate": 2.9697336984125683e-06,
"logits/chosen": -1.6667283773422241,
"logits/rejected": -1.0133411884307861,
"logps/chosen": -401.2959899902344,
"logps/rejected": -851.93701171875,
"loss": 0.1206,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.15221676230430603,
"rewards/margins": 0.45876413583755493,
"rewards/rejected": -0.6109809279441833,
"step": 930
},
{
"epoch": 0.5,
"learning_rate": 2.923928174004094e-06,
"logits/chosen": -1.8437814712524414,
"logits/rejected": -1.0747764110565186,
"logps/chosen": -470.7169494628906,
"logps/rejected": -732.7559814453125,
"loss": 0.1247,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.13192041218280792,
"rewards/margins": 0.3781585991382599,
"rewards/rejected": -0.5100789666175842,
"step": 940
},
{
"epoch": 0.51,
"learning_rate": 2.8779756387723036e-06,
"logits/chosen": -1.7663402557373047,
"logits/rejected": -1.3018739223480225,
"logps/chosen": -446.77490234375,
"logps/rejected": -766.7832641601562,
"loss": 0.1146,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1646779477596283,
"rewards/margins": 0.36457663774490356,
"rewards/rejected": -0.5292545557022095,
"step": 950
},
{
"epoch": 0.51,
"learning_rate": 2.831892028246968e-06,
"logits/chosen": -1.848724603652954,
"logits/rejected": -1.216956377029419,
"logps/chosen": -418.67645263671875,
"logps/rejected": -703.2694702148438,
"loss": 0.1209,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.14982689917087555,
"rewards/margins": 0.36974358558654785,
"rewards/rejected": -0.5195704698562622,
"step": 960
},
{
"epoch": 0.52,
"learning_rate": 2.7856933234124617e-06,
"logits/chosen": -1.7911808490753174,
"logits/rejected": -1.0922878980636597,
"logps/chosen": -448.37603759765625,
"logps/rejected": -834.2364501953125,
"loss": 0.1538,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.17878659069538116,
"rewards/margins": 0.3954610228538513,
"rewards/rejected": -0.5742476582527161,
"step": 970
},
{
"epoch": 0.52,
"learning_rate": 2.7393955451658387e-06,
"logits/chosen": -1.7210479974746704,
"logits/rejected": -1.2294584512710571,
"logps/chosen": -514.4754028320312,
"logps/rejected": -868.5929565429688,
"loss": 0.1626,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.21787652373313904,
"rewards/margins": 0.3894199728965759,
"rewards/rejected": -0.6072965264320374,
"step": 980
},
{
"epoch": 0.53,
"learning_rate": 2.6930147487610667e-06,
"logits/chosen": -1.5907623767852783,
"logits/rejected": -0.78331458568573,
"logps/chosen": -462.7984313964844,
"logps/rejected": -805.7174072265625,
"loss": 0.1373,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.17765957117080688,
"rewards/margins": 0.40347957611083984,
"rewards/rejected": -0.581139087677002,
"step": 990
},
{
"epoch": 0.53,
"learning_rate": 2.6465670182413487e-06,
"logits/chosen": -1.6310056447982788,
"logits/rejected": -1.0298982858657837,
"logps/chosen": -411.04937744140625,
"logps/rejected": -758.7462158203125,
"loss": 0.1237,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.1506483405828476,
"rewards/margins": 0.36691543459892273,
"rewards/rejected": -0.5175637602806091,
"step": 1000
},
{
"epoch": 0.54,
"learning_rate": 2.6000684608614594e-06,
"logits/chosen": -1.6570842266082764,
"logits/rejected": -0.8277125358581543,
"logps/chosen": -506.580810546875,
"logps/rejected": -801.989990234375,
"loss": 0.1436,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.1983393281698227,
"rewards/margins": 0.38503485918045044,
"rewards/rejected": -0.5833742022514343,
"step": 1010
},
{
"epoch": 0.54,
"learning_rate": 2.5535352015020338e-06,
"logits/chosen": -1.528637170791626,
"logits/rejected": -0.8484199643135071,
"logps/chosen": -470.8020935058594,
"logps/rejected": -820.8448486328125,
"loss": 0.1363,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.20915472507476807,
"rewards/margins": 0.38422003388404846,
"rewards/rejected": -0.5933747887611389,
"step": 1020
},
{
"epoch": 0.55,
"learning_rate": 2.506983377077741e-06,
"logits/chosen": -1.3463196754455566,
"logits/rejected": -1.018822193145752,
"logps/chosen": -464.81524658203125,
"logps/rejected": -807.076171875,
"loss": 0.1584,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.21939115226268768,
"rewards/margins": 0.3329920172691345,
"rewards/rejected": -0.5523831844329834,
"step": 1030
},
{
"epoch": 0.55,
"learning_rate": 2.460429130941289e-06,
"logits/chosen": -1.4068031311035156,
"logits/rejected": -0.9966346621513367,
"logps/chosen": -443.41583251953125,
"logps/rejected": -826.1185302734375,
"loss": 0.1182,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1990918219089508,
"rewards/margins": 0.39130455255508423,
"rewards/rejected": -0.5903963446617126,
"step": 1040
},
{
"epoch": 0.56,
"learning_rate": 2.413888607285192e-06,
"logits/chosen": -1.2919907569885254,
"logits/rejected": -0.9193531274795532,
"logps/chosen": -496.358642578125,
"logps/rejected": -845.7939453125,
"loss": 0.173,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.24995890259742737,
"rewards/margins": 0.3503498435020447,
"rewards/rejected": -0.6003087162971497,
"step": 1050
},
{
"epoch": 0.57,
"learning_rate": 2.367377945543249e-06,
"logits/chosen": -1.6841480731964111,
"logits/rejected": -0.907370924949646,
"logps/chosen": -446.6328125,
"logps/rejected": -884.2018432617188,
"loss": 0.1068,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.15798960626125336,
"rewards/margins": 0.4673282206058502,
"rewards/rejected": -0.6253177523612976,
"step": 1060
},
{
"epoch": 0.57,
"learning_rate": 2.320913274793676e-06,
"logits/chosen": -1.7113037109375,
"logits/rejected": -1.1816798448562622,
"logps/chosen": -410.67645263671875,
"logps/rejected": -770.4984741210938,
"loss": 0.1423,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.1650415062904358,
"rewards/margins": 0.37022119760513306,
"rewards/rejected": -0.5352627038955688,
"step": 1070
},
{
"epoch": 0.58,
"learning_rate": 2.27451070816582e-06,
"logits/chosen": -1.6226348876953125,
"logits/rejected": -0.9200002551078796,
"logps/chosen": -518.1405029296875,
"logps/rejected": -891.6884765625,
"loss": 0.1105,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.16074618697166443,
"rewards/margins": 0.47465044260025024,
"rewards/rejected": -0.6353966593742371,
"step": 1080
},
{
"epoch": 0.58,
"learning_rate": 2.228186337252414e-06,
"logits/chosen": -1.7294307947158813,
"logits/rejected": -0.8779215812683105,
"logps/chosen": -516.14013671875,
"logps/rejected": -824.5764770507812,
"loss": 0.1407,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.17551277577877045,
"rewards/margins": 0.40844354033470154,
"rewards/rejected": -0.583956241607666,
"step": 1090
},
{
"epoch": 0.59,
"learning_rate": 2.1819562265292946e-06,
"logits/chosen": -1.5813789367675781,
"logits/rejected": -1.001509666442871,
"logps/chosen": -429.26593017578125,
"logps/rejected": -789.7249755859375,
"loss": 0.1574,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.21274442970752716,
"rewards/margins": 0.3669392764568329,
"rewards/rejected": -0.5796837210655212,
"step": 1100
},
{
"epoch": 0.59,
"learning_rate": 2.1358364077845236e-06,
"logits/chosen": -1.533307671546936,
"logits/rejected": -0.9590204954147339,
"logps/chosen": -387.72381591796875,
"logps/rejected": -828.5607299804688,
"loss": 0.1014,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.14084286987781525,
"rewards/margins": 0.4476155638694763,
"rewards/rejected": -0.5884584188461304,
"step": 1110
},
{
"epoch": 0.6,
"learning_rate": 2.089842874558849e-06,
"logits/chosen": -1.3846327066421509,
"logits/rejected": -1.0313770771026611,
"logps/chosen": -479.6465759277344,
"logps/rejected": -906.1482543945312,
"loss": 0.1133,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.22880907356739044,
"rewards/margins": 0.4139330983161926,
"rewards/rejected": -0.6427421569824219,
"step": 1120
},
{
"epoch": 0.6,
"learning_rate": 2.0439915765994242e-06,
"logits/chosen": -1.5441999435424805,
"logits/rejected": -0.8765427470207214,
"logps/chosen": -374.98504638671875,
"logps/rejected": -726.3242797851562,
"loss": 0.1078,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.1631808578968048,
"rewards/margins": 0.38433948159217834,
"rewards/rejected": -0.5475203394889832,
"step": 1130
},
{
"epoch": 0.61,
"learning_rate": 1.9982984143287186e-06,
"logits/chosen": -1.7160451412200928,
"logits/rejected": -0.9389771223068237,
"logps/chosen": -419.886962890625,
"logps/rejected": -727.5339965820312,
"loss": 0.1209,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.1307600438594818,
"rewards/margins": 0.4173372685909271,
"rewards/rejected": -0.5480973720550537,
"step": 1140
},
{
"epoch": 0.61,
"learning_rate": 1.95277923333053e-06,
"logits/chosen": -1.5577538013458252,
"logits/rejected": -0.9766386151313782,
"logps/chosen": -432.010498046875,
"logps/rejected": -772.4149780273438,
"loss": 0.1015,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.1316412091255188,
"rewards/margins": 0.40984097123146057,
"rewards/rejected": -0.541482150554657,
"step": 1150
},
{
"epoch": 0.62,
"learning_rate": 1.9074498188550156e-06,
"logits/chosen": -1.6150667667388916,
"logits/rejected": -1.0481829643249512,
"logps/chosen": -460.06781005859375,
"logps/rejected": -748.8250122070312,
"loss": 0.1577,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.18149954080581665,
"rewards/margins": 0.34171923995018005,
"rewards/rejected": -0.5232187509536743,
"step": 1160
},
{
"epoch": 0.62,
"learning_rate": 1.862325890344643e-06,
"logits/chosen": -1.3022327423095703,
"logits/rejected": -0.9266065359115601,
"logps/chosen": -367.62823486328125,
"logps/rejected": -806.5985107421875,
"loss": 0.1656,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.17992374300956726,
"rewards/margins": 0.42552104592323303,
"rewards/rejected": -0.6054448485374451,
"step": 1170
},
{
"epoch": 0.63,
"learning_rate": 1.817423095982972e-06,
"logits/chosen": -1.3970632553100586,
"logits/rejected": -0.9412476420402527,
"logps/chosen": -451.11883544921875,
"logps/rejected": -783.0731201171875,
"loss": 0.102,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.20501062273979187,
"rewards/margins": 0.37199467420578003,
"rewards/rejected": -0.5770053267478943,
"step": 1180
},
{
"epoch": 0.63,
"learning_rate": 1.7727570072681293e-06,
"logits/chosen": -1.4293451309204102,
"logits/rejected": -0.8616847991943359,
"logps/chosen": -406.25042724609375,
"logps/rejected": -737.0385131835938,
"loss": 0.1378,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.16194511950016022,
"rewards/margins": 0.3638822138309479,
"rewards/rejected": -0.5258272886276245,
"step": 1190
},
{
"epoch": 0.64,
"learning_rate": 1.7283431136128961e-06,
"logits/chosen": -1.6002616882324219,
"logits/rejected": -1.116288423538208,
"logps/chosen": -449.384521484375,
"logps/rejected": -805.2763061523438,
"loss": 0.1454,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.20747177302837372,
"rewards/margins": 0.3553561270236969,
"rewards/rejected": -0.5628278851509094,
"step": 1200
},
{
"epoch": 0.65,
"learning_rate": 1.6841968169732478e-06,
"logits/chosen": -1.5592294931411743,
"logits/rejected": -1.054216742515564,
"logps/chosen": -448.9071350097656,
"logps/rejected": -851.6107177734375,
"loss": 0.1204,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.18522223830223083,
"rewards/margins": 0.4141850471496582,
"rewards/rejected": -0.5994073152542114,
"step": 1210
},
{
"epoch": 0.65,
"learning_rate": 1.6403334265072284e-06,
"logits/chosen": -1.6474437713623047,
"logits/rejected": -0.8614113926887512,
"logps/chosen": -453.735107421875,
"logps/rejected": -801.1546630859375,
"loss": 0.1081,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.1977526694536209,
"rewards/margins": 0.38285189867019653,
"rewards/rejected": -0.5806045532226562,
"step": 1220
},
{
"epoch": 0.66,
"learning_rate": 1.5967681532660066e-06,
"logits/chosen": -1.2708427906036377,
"logits/rejected": -0.9732850790023804,
"logps/chosen": -437.337890625,
"logps/rejected": -822.8092041015625,
"loss": 0.1336,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.20598828792572021,
"rewards/margins": 0.38752201199531555,
"rewards/rejected": -0.5935102701187134,
"step": 1230
},
{
"epoch": 0.66,
"learning_rate": 1.5535161049189463e-06,
"logits/chosen": -1.5570838451385498,
"logits/rejected": -1.1252386569976807,
"logps/chosen": -500.2212829589844,
"logps/rejected": -786.1821899414062,
"loss": 0.1145,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.1828458607196808,
"rewards/margins": 0.3151377737522125,
"rewards/rejected": -0.4979836046695709,
"step": 1240
},
{
"epoch": 0.67,
"learning_rate": 1.5105922805145356e-06,
"logits/chosen": -1.8010812997817993,
"logits/rejected": -1.2702046632766724,
"logps/chosen": -434.25421142578125,
"logps/rejected": -807.052001953125,
"loss": 0.1215,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.15841101109981537,
"rewards/margins": 0.35736268758773804,
"rewards/rejected": -0.5157736539840698,
"step": 1250
},
{
"epoch": 0.67,
"learning_rate": 1.4680115652789823e-06,
"logits/chosen": -1.856612205505371,
"logits/rejected": -1.147216558456421,
"logps/chosen": -523.8411865234375,
"logps/rejected": -821.1082763671875,
"loss": 0.1727,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.2023150473833084,
"rewards/margins": 0.35308974981307983,
"rewards/rejected": -0.555404782295227,
"step": 1260
},
{
"epoch": 0.68,
"learning_rate": 1.4257887254542767e-06,
"logits/chosen": -1.5119379758834839,
"logits/rejected": -1.0702050924301147,
"logps/chosen": -511.7137756347656,
"logps/rejected": -906.3107299804688,
"loss": 0.1025,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.21919742226600647,
"rewards/margins": 0.3765312731266022,
"rewards/rejected": -0.5957286953926086,
"step": 1270
},
{
"epoch": 0.68,
"learning_rate": 1.3839384031775227e-06,
"logits/chosen": -1.6945511102676392,
"logits/rejected": -0.8750427961349487,
"logps/chosen": -440.59552001953125,
"logps/rejected": -766.9216918945312,
"loss": 0.1519,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.15435707569122314,
"rewards/margins": 0.4113141894340515,
"rewards/rejected": -0.5656712651252747,
"step": 1280
},
{
"epoch": 0.69,
"learning_rate": 1.342475111403298e-06,
"logits/chosen": -1.4833415746688843,
"logits/rejected": -1.0713919401168823,
"logps/chosen": -438.8766174316406,
"logps/rejected": -720.0028076171875,
"loss": 0.1574,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.20507605373859406,
"rewards/margins": 0.28728824853897095,
"rewards/rejected": -0.4923642575740814,
"step": 1290
},
{
"epoch": 0.69,
"learning_rate": 1.3014132288708209e-06,
"logits/chosen": -1.5766406059265137,
"logits/rejected": -1.0825704336166382,
"logps/chosen": -438.3309020996094,
"logps/rejected": -823.6751708984375,
"loss": 0.166,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.19768479466438293,
"rewards/margins": 0.3687485158443451,
"rewards/rejected": -0.566433310508728,
"step": 1300
},
{
"epoch": 0.7,
"learning_rate": 1.2607669951176549e-06,
"logits/chosen": -1.4940482378005981,
"logits/rejected": -1.2070845365524292,
"logps/chosen": -389.45343017578125,
"logps/rejected": -773.9241333007812,
"loss": 0.1574,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.15667062997817993,
"rewards/margins": 0.3334207832813263,
"rewards/rejected": -0.49009138345718384,
"step": 1310
},
{
"epoch": 0.7,
"learning_rate": 1.2205505055416891e-06,
"logits/chosen": -1.5122394561767578,
"logits/rejected": -1.3955858945846558,
"logps/chosen": -338.9855651855469,
"logps/rejected": -748.5198364257812,
"loss": 0.1404,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.16774006187915802,
"rewards/margins": 0.34086841344833374,
"rewards/rejected": -0.5086084604263306,
"step": 1320
},
{
"epoch": 0.71,
"learning_rate": 1.1807777065131002e-06,
"logits/chosen": -1.5142749547958374,
"logits/rejected": -1.0132977962493896,
"logps/chosen": -410.44879150390625,
"logps/rejected": -810.9103393554688,
"loss": 0.1108,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.14515772461891174,
"rewards/margins": 0.36618533730506897,
"rewards/rejected": -0.5113429427146912,
"step": 1330
},
{
"epoch": 0.71,
"learning_rate": 1.1414623905380012e-06,
"logits/chosen": -1.756066083908081,
"logits/rejected": -1.1571279764175415,
"logps/chosen": -441.978515625,
"logps/rejected": -786.6061401367188,
"loss": 0.1217,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.1463043987751007,
"rewards/margins": 0.36096060276031494,
"rewards/rejected": -0.5072649717330933,
"step": 1340
},
{
"epoch": 0.72,
"learning_rate": 1.1026181914754388e-06,
"logits/chosen": -1.784054160118103,
"logits/rejected": -1.0276035070419312,
"logps/chosen": -506.1011657714844,
"logps/rejected": -819.1619873046875,
"loss": 0.1352,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1743244081735611,
"rewards/margins": 0.38723859190940857,
"rewards/rejected": -0.5615630149841309,
"step": 1350
},
{
"epoch": 0.73,
"learning_rate": 1.0642585798094136e-06,
"logits/chosen": -1.5410611629486084,
"logits/rejected": -1.0178577899932861,
"logps/chosen": -377.84197998046875,
"logps/rejected": -720.7569580078125,
"loss": 0.1264,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.12278805673122406,
"rewards/margins": 0.38729211688041687,
"rewards/rejected": -0.5100802183151245,
"step": 1360
},
{
"epoch": 0.73,
"learning_rate": 1.0263968579775522e-06,
"logits/chosen": -1.5256543159484863,
"logits/rejected": -0.9656683802604675,
"logps/chosen": -458.48089599609375,
"logps/rejected": -791.9251708984375,
"loss": 0.1401,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.16513575613498688,
"rewards/margins": 0.38074809312820435,
"rewards/rejected": -0.54588383436203,
"step": 1370
},
{
"epoch": 0.74,
"learning_rate": 9.89046155758058e-07,
"logits/chosen": -1.6825745105743408,
"logits/rejected": -0.8826824426651001,
"logps/chosen": -455.65594482421875,
"logps/rejected": -802.0789794921875,
"loss": 0.1228,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.1435212790966034,
"rewards/margins": 0.41453132033348083,
"rewards/rejected": -0.558052659034729,
"step": 1380
},
{
"epoch": 0.74,
"learning_rate": 9.52219425716534e-07,
"logits/chosen": -1.4951298236846924,
"logits/rejected": -0.8258262872695923,
"logps/chosen": -515.0365600585938,
"logps/rejected": -771.9305419921875,
"loss": 0.1429,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.22707219421863556,
"rewards/margins": 0.3406526446342468,
"rewards/rejected": -0.5677248239517212,
"step": 1390
},
{
"epoch": 0.75,
"learning_rate": 9.15929438714262e-07,
"logits/chosen": -1.6602566242218018,
"logits/rejected": -0.9937980771064758,
"logps/chosen": -368.70684814453125,
"logps/rejected": -689.556884765625,
"loss": 0.1528,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.12013135105371475,
"rewards/margins": 0.37646666169166565,
"rewards/rejected": -0.4965980052947998,
"step": 1400
},
{
"epoch": 0.75,
"learning_rate": 8.801887794794911e-07,
"logits/chosen": -1.4943420886993408,
"logits/rejected": -0.9112469553947449,
"logps/chosen": -379.4705810546875,
"logps/rejected": -716.5015258789062,
"loss": 0.1407,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.13394254446029663,
"rewards/margins": 0.3894422650337219,
"rewards/rejected": -0.5233848690986633,
"step": 1410
},
{
"epoch": 0.76,
"learning_rate": 8.450098422432787e-07,
"logits/chosen": -1.7622817754745483,
"logits/rejected": -0.7207467555999756,
"logps/chosen": -537.2728271484375,
"logps/rejected": -854.8095703125,
"loss": 0.1203,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.1893191635608673,
"rewards/margins": 0.4274328649044037,
"rewards/rejected": -0.616752028465271,
"step": 1420
},
{
"epoch": 0.76,
"learning_rate": 8.104048264413858e-07,
"logits/chosen": -1.5849692821502686,
"logits/rejected": -0.9879060983657837,
"logps/chosen": -451.66802978515625,
"logps/rejected": -812.3735961914062,
"loss": 0.1162,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.1757660210132599,
"rewards/margins": 0.4036192297935486,
"rewards/rejected": -0.5793852806091309,
"step": 1430
},
{
"epoch": 0.77,
"learning_rate": 7.763857324837321e-07,
"logits/chosen": -1.7880465984344482,
"logits/rejected": -1.1138683557510376,
"logps/chosen": -470.4102478027344,
"logps/rejected": -782.1883544921875,
"loss": 0.1273,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.18462924659252167,
"rewards/margins": 0.37033870816230774,
"rewards/rejected": -0.5549679398536682,
"step": 1440
},
{
"epoch": 0.77,
"learning_rate": 7.429643575928605e-07,
"logits/chosen": -1.688932180404663,
"logits/rejected": -1.1515108346939087,
"logps/chosen": -416.93896484375,
"logps/rejected": -748.1307373046875,
"loss": 0.1284,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.14920882880687714,
"rewards/margins": 0.345571368932724,
"rewards/rejected": -0.4947802424430847,
"step": 1450
},
{
"epoch": 0.78,
"learning_rate": 7.101522917128709e-07,
"logits/chosen": -1.3505184650421143,
"logits/rejected": -0.8502361178398132,
"logps/chosen": -453.3301696777344,
"logps/rejected": -852.1624755859375,
"loss": 0.149,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.18861651420593262,
"rewards/margins": 0.39376121759414673,
"rewards/rejected": -0.5823776721954346,
"step": 1460
},
{
"epoch": 0.78,
"learning_rate": 6.779609134902312e-07,
"logits/chosen": -1.4756485223770142,
"logits/rejected": -0.8883223533630371,
"logps/chosen": -409.55029296875,
"logps/rejected": -707.3751831054688,
"loss": 0.1383,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.17147330939769745,
"rewards/margins": 0.3226475715637207,
"rewards/rejected": -0.49412089586257935,
"step": 1470
},
{
"epoch": 0.79,
"learning_rate": 6.464013863278629e-07,
"logits/chosen": -1.593145728111267,
"logits/rejected": -0.8717827796936035,
"logps/chosen": -429.87725830078125,
"logps/rejected": -856.5046997070312,
"loss": 0.1131,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.12572301924228668,
"rewards/margins": 0.46143823862075806,
"rewards/rejected": -0.5871611833572388,
"step": 1480
},
{
"epoch": 0.79,
"learning_rate": 6.154846545138696e-07,
"logits/chosen": -1.556706190109253,
"logits/rejected": -1.1209014654159546,
"logps/chosen": -434.39813232421875,
"logps/rejected": -873.4528198242188,
"loss": 0.1184,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.151644766330719,
"rewards/margins": 0.44497567415237427,
"rewards/rejected": -0.5966204404830933,
"step": 1490
},
{
"epoch": 0.8,
"learning_rate": 5.852214394262515e-07,
"logits/chosen": -1.5190951824188232,
"logits/rejected": -1.1570379734039307,
"logps/chosen": -394.3932189941406,
"logps/rejected": -781.7257690429688,
"loss": 0.1364,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.16030506789684296,
"rewards/margins": 0.3787681758403778,
"rewards/rejected": -0.5390732884407043,
"step": 1500
},
{
"epoch": 0.81,
"learning_rate": 5.556222358149191e-07,
"logits/chosen": -1.5962765216827393,
"logits/rejected": -0.9932464361190796,
"logps/chosen": -390.4809875488281,
"logps/rejected": -714.4888916015625,
"loss": 0.1652,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.17372211813926697,
"rewards/margins": 0.361335813999176,
"rewards/rejected": -0.5350579023361206,
"step": 1510
},
{
"epoch": 0.81,
"learning_rate": 5.266973081622992e-07,
"logits/chosen": -1.4811457395553589,
"logits/rejected": -1.0426948070526123,
"logps/chosen": -451.40069580078125,
"logps/rejected": -772.6881713867188,
"loss": 0.1611,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.18898162245750427,
"rewards/margins": 0.3498608469963074,
"rewards/rejected": -0.5388425588607788,
"step": 1520
},
{
"epoch": 0.82,
"learning_rate": 4.984566871237942e-07,
"logits/chosen": -1.4943921566009521,
"logits/rejected": -1.000528335571289,
"logps/chosen": -393.3673095703125,
"logps/rejected": -763.3333129882812,
"loss": 0.1387,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.15240536630153656,
"rewards/margins": 0.3935711681842804,
"rewards/rejected": -0.5459765195846558,
"step": 1530
},
{
"epoch": 0.82,
"learning_rate": 4.709101660493251e-07,
"logits/chosen": -1.4344061613082886,
"logits/rejected": -0.8900424838066101,
"logps/chosen": -454.6851501464844,
"logps/rejected": -862.0211181640625,
"loss": 0.1148,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.2144501656293869,
"rewards/margins": 0.39736613631248474,
"rewards/rejected": -0.6118162870407104,
"step": 1540
},
{
"epoch": 0.83,
"learning_rate": 4.440672975871743e-07,
"logits/chosen": -1.6005455255508423,
"logits/rejected": -1.2345631122589111,
"logps/chosen": -454.6659240722656,
"logps/rejected": -910.2664794921875,
"loss": 0.0851,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.1945376694202423,
"rewards/margins": 0.4257555603981018,
"rewards/rejected": -0.6202932000160217,
"step": 1550
},
{
"epoch": 0.83,
"learning_rate": 4.1793739037129134e-07,
"logits/chosen": -1.755613923072815,
"logits/rejected": -0.9976798892021179,
"logps/chosen": -426.450927734375,
"logps/rejected": -827.8946533203125,
"loss": 0.1088,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1098506897687912,
"rewards/margins": 0.4619103968143463,
"rewards/rejected": -0.5717611908912659,
"step": 1560
},
{
"epoch": 0.84,
"learning_rate": 3.9252950579322405e-07,
"logits/chosen": -1.7585302591323853,
"logits/rejected": -0.9437012672424316,
"logps/chosen": -617.3839111328125,
"logps/rejected": -886.2396240234375,
"loss": 0.1537,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.2242734432220459,
"rewards/margins": 0.37233808636665344,
"rewards/rejected": -0.5966114401817322,
"step": 1570
},
{
"epoch": 0.84,
"learning_rate": 3.6785245485978864e-07,
"logits/chosen": -1.5823638439178467,
"logits/rejected": -1.016841173171997,
"logps/chosen": -453.40643310546875,
"logps/rejected": -826.7568359375,
"loss": 0.1052,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.14566640555858612,
"rewards/margins": 0.41016706824302673,
"rewards/rejected": -0.5558334589004517,
"step": 1580
},
{
"epoch": 0.85,
"learning_rate": 3.43914795137566e-07,
"logits/chosen": -1.3402397632598877,
"logits/rejected": -0.6611793041229248,
"logps/chosen": -491.9454040527344,
"logps/rejected": -827.7058715820312,
"loss": 0.1243,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.1830025464296341,
"rewards/margins": 0.3839171230792999,
"rewards/rejected": -0.5669196844100952,
"step": 1590
},
{
"epoch": 0.85,
"learning_rate": 3.207248277852901e-07,
"logits/chosen": -1.3319523334503174,
"logits/rejected": -1.2467930316925049,
"logps/chosen": -415.2613220214844,
"logps/rejected": -794.6478271484375,
"loss": 0.175,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.18765749037265778,
"rewards/margins": 0.33323392271995544,
"rewards/rejected": -0.5208913683891296,
"step": 1600
},
{
"epoch": 0.86,
"learning_rate": 2.9829059467515074e-07,
"logits/chosen": -1.6862188577651978,
"logits/rejected": -1.0607928037643433,
"logps/chosen": -466.0138244628906,
"logps/rejected": -823.7083129882812,
"loss": 0.1264,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1697189062833786,
"rewards/margins": 0.39212626218795776,
"rewards/rejected": -0.5618451833724976,
"step": 1610
},
{
"epoch": 0.86,
"learning_rate": 2.766198756040153e-07,
"logits/chosen": -1.5529918670654297,
"logits/rejected": -1.1102968454360962,
"logps/chosen": -514.1727294921875,
"logps/rejected": -934.7576293945312,
"loss": 0.0946,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.2095489799976349,
"rewards/margins": 0.39688506722450256,
"rewards/rejected": -0.6064340472221375,
"step": 1620
},
{
"epoch": 0.87,
"learning_rate": 2.5572018559553155e-07,
"logits/chosen": -1.4525808095932007,
"logits/rejected": -1.114332675933838,
"logps/chosen": -429.51336669921875,
"logps/rejected": -814.693115234375,
"loss": 0.1319,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.20347242057323456,
"rewards/margins": 0.3701416254043579,
"rewards/rejected": -0.5736140012741089,
"step": 1630
},
{
"epoch": 0.87,
"learning_rate": 2.3559877229404864e-07,
"logits/chosen": -1.5984094142913818,
"logits/rejected": -1.1003965139389038,
"logps/chosen": -458.2529296875,
"logps/rejected": -795.9619140625,
"loss": 0.1294,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.17800959944725037,
"rewards/margins": 0.363762229681015,
"rewards/rejected": -0.5417717695236206,
"step": 1640
},
{
"epoch": 0.88,
"learning_rate": 2.1626261345126576e-07,
"logits/chosen": -1.4350886344909668,
"logits/rejected": -1.0259506702423096,
"logps/chosen": -415.7510681152344,
"logps/rejected": -919.9736328125,
"loss": 0.076,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.15845449268817902,
"rewards/margins": 0.4928809702396393,
"rewards/rejected": -0.6513354182243347,
"step": 1650
},
{
"epoch": 0.89,
"learning_rate": 1.9771841450646505e-07,
"logits/chosen": -1.6430625915527344,
"logits/rejected": -0.9447630643844604,
"logps/chosen": -506.1864318847656,
"logps/rejected": -795.38134765625,
"loss": 0.1602,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.23687663674354553,
"rewards/margins": 0.33186858892440796,
"rewards/rejected": -0.5687452554702759,
"step": 1660
},
{
"epoch": 0.89,
"learning_rate": 1.7997260626118758e-07,
"logits/chosen": -1.898046851158142,
"logits/rejected": -1.3102858066558838,
"logps/chosen": -514.0572509765625,
"logps/rejected": -825.0703125,
"loss": 0.1456,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.17340177297592163,
"rewards/margins": 0.38971638679504395,
"rewards/rejected": -0.5631181597709656,
"step": 1670
},
{
"epoch": 0.9,
"learning_rate": 1.6303134264914365e-07,
"logits/chosen": -1.6851441860198975,
"logits/rejected": -1.0963430404663086,
"logps/chosen": -480.8072204589844,
"logps/rejected": -728.0396728515625,
"loss": 0.1273,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.1836322844028473,
"rewards/margins": 0.3386848270893097,
"rewards/rejected": -0.5223170518875122,
"step": 1680
},
{
"epoch": 0.9,
"learning_rate": 1.469004986021355e-07,
"logits/chosen": -1.414111852645874,
"logits/rejected": -0.8712374567985535,
"logps/chosen": -443.46728515625,
"logps/rejected": -897.1246948242188,
"loss": 0.0899,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.17597445845603943,
"rewards/margins": 0.4499644339084625,
"rewards/rejected": -0.6259388327598572,
"step": 1690
},
{
"epoch": 0.91,
"learning_rate": 1.315856680127367e-07,
"logits/chosen": -1.4355229139328003,
"logits/rejected": -0.8268268704414368,
"logps/chosen": -411.8287658691406,
"logps/rejected": -796.5527954101562,
"loss": 0.1013,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.16030281782150269,
"rewards/margins": 0.4271472990512848,
"rewards/rejected": -0.5874501466751099,
"step": 1700
},
{
"epoch": 0.91,
"learning_rate": 1.1709216179442817e-07,
"logits/chosen": -1.5936983823776245,
"logits/rejected": -0.9012172818183899,
"logps/chosen": -452.53155517578125,
"logps/rejected": -878.1297607421875,
"loss": 0.1108,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.17777523398399353,
"rewards/margins": 0.4133872389793396,
"rewards/rejected": -0.5911625623703003,
"step": 1710
},
{
"epoch": 0.92,
"learning_rate": 1.0342500603986421e-07,
"logits/chosen": -1.441282033920288,
"logits/rejected": -0.9638457298278809,
"logps/chosen": -417.11895751953125,
"logps/rejected": -741.5521850585938,
"loss": 0.1466,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.16872674226760864,
"rewards/margins": 0.3362739682197571,
"rewards/rejected": -0.5050007104873657,
"step": 1720
},
{
"epoch": 0.92,
"learning_rate": 9.058894027791643e-08,
"logits/chosen": -1.4651381969451904,
"logits/rejected": -0.9410767555236816,
"logps/chosen": -497.6310119628906,
"logps/rejected": -866.1295776367188,
"loss": 0.1057,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.23562383651733398,
"rewards/margins": 0.3830471634864807,
"rewards/rejected": -0.6186710596084595,
"step": 1730
},
{
"epoch": 0.93,
"learning_rate": 7.858841583008592e-08,
"logits/chosen": -1.6138349771499634,
"logits/rejected": -1.0234501361846924,
"logps/chosen": -425.06610107421875,
"logps/rejected": -700.060791015625,
"loss": 0.1241,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.16232439875602722,
"rewards/margins": 0.3415161669254303,
"rewards/rejected": -0.5038405656814575,
"step": 1740
},
{
"epoch": 0.93,
"learning_rate": 6.742759426686313e-08,
"logits/chosen": -1.5296719074249268,
"logits/rejected": -1.15841543674469,
"logps/chosen": -541.86083984375,
"logps/rejected": -857.0759887695312,
"loss": 0.1324,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.22835755348205566,
"rewards/margins": 0.37031129002571106,
"rewards/rejected": -0.5986688733100891,
"step": 1750
},
{
"epoch": 0.94,
"learning_rate": 5.7110345964571104e-08,
"logits/chosen": -1.6711105108261108,
"logits/rejected": -1.0233453512191772,
"logps/chosen": -445.75762939453125,
"logps/rejected": -772.8753662109375,
"loss": 0.1172,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.18701913952827454,
"rewards/margins": 0.3782210052013397,
"rewards/rejected": -0.565240204334259,
"step": 1760
},
{
"epoch": 0.94,
"learning_rate": 4.764024876318357e-08,
"logits/chosen": -1.5489776134490967,
"logits/rejected": -0.8348779678344727,
"logps/chosen": -509.6427307128906,
"logps/rejected": -782.4371337890625,
"loss": 0.1146,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.20378637313842773,
"rewards/margins": 0.37221604585647583,
"rewards/rejected": -0.5760024189949036,
"step": 1770
},
{
"epoch": 0.95,
"learning_rate": 3.902058672559633e-08,
"logits/chosen": -1.8395429849624634,
"logits/rejected": -1.2655082941055298,
"logps/chosen": -375.2162780761719,
"logps/rejected": -805.35302734375,
"loss": 0.1244,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.11881232261657715,
"rewards/margins": 0.4330004155635834,
"rewards/rejected": -0.5518127679824829,
"step": 1780
},
{
"epoch": 0.95,
"learning_rate": 3.125434899876933e-08,
"logits/chosen": -1.5633362531661987,
"logits/rejected": -1.1406381130218506,
"logps/chosen": -356.45098876953125,
"logps/rejected": -783.3472900390625,
"loss": 0.1001,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.1347019374370575,
"rewards/margins": 0.42905181646347046,
"rewards/rejected": -0.5637537837028503,
"step": 1790
},
{
"epoch": 0.96,
"learning_rate": 2.4344228777145873e-08,
"logits/chosen": -1.6571776866912842,
"logits/rejected": -0.7649690508842468,
"logps/chosen": -587.1907348632812,
"logps/rejected": -933.9886474609375,
"loss": 0.132,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.25137990713119507,
"rewards/margins": 0.42299261689186096,
"rewards/rejected": -0.6743724942207336,
"step": 1800
},
{
"epoch": 0.97,
"learning_rate": 1.829262236869772e-08,
"logits/chosen": -1.541998267173767,
"logits/rejected": -0.8689600229263306,
"logps/chosen": -483.3575134277344,
"logps/rejected": -698.212158203125,
"loss": 0.1768,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.22255222499370575,
"rewards/margins": 0.27523303031921387,
"rewards/rejected": -0.4977852404117584,
"step": 1810
},
{
"epoch": 0.97,
"learning_rate": 1.3101628363929586e-08,
"logits/chosen": -1.5238444805145264,
"logits/rejected": -0.7508775591850281,
"logps/chosen": -520.79296875,
"logps/rejected": -767.9632568359375,
"loss": 0.1203,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1896631270647049,
"rewards/margins": 0.36830946803092957,
"rewards/rejected": -0.5579725503921509,
"step": 1820
},
{
"epoch": 0.98,
"learning_rate": 8.773046908123195e-09,
"logits/chosen": -1.6025253534317017,
"logits/rejected": -1.304527997970581,
"logps/chosen": -375.14874267578125,
"logps/rejected": -767.8821411132812,
"loss": 0.1284,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.1781584918498993,
"rewards/margins": 0.33938026428222656,
"rewards/rejected": -0.5175387263298035,
"step": 1830
},
{
"epoch": 0.98,
"learning_rate": 5.308379077080817e-09,
"logits/chosen": -1.6030333042144775,
"logits/rejected": -1.3066356182098389,
"logps/chosen": -397.94873046875,
"logps/rejected": -825.3441162109375,
"loss": 0.1111,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.20664629340171814,
"rewards/margins": 0.3704259991645813,
"rewards/rejected": -0.5770723819732666,
"step": 1840
},
{
"epoch": 0.99,
"learning_rate": 2.7088263565760996e-09,
"logits/chosen": -1.6151325702667236,
"logits/rejected": -0.9792189598083496,
"logps/chosen": -399.3708801269531,
"logps/rejected": -748.7066650390625,
"loss": 0.1181,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.1316554844379425,
"rewards/margins": 0.42211928963661194,
"rewards/rejected": -0.5537747740745544,
"step": 1850
},
{
"epoch": 0.99,
"learning_rate": 9.752902257023633e-10,
"logits/chosen": -1.6095302104949951,
"logits/rejected": -1.1830781698226929,
"logps/chosen": -393.78350830078125,
"logps/rejected": -796.3955078125,
"loss": 0.0928,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.15096323192119598,
"rewards/margins": 0.4236125349998474,
"rewards/rejected": -0.5745757818222046,
"step": 1860
},
{
"epoch": 1.0,
"learning_rate": 1.083718442532189e-10,
"logits/chosen": -1.4612399339675903,
"logits/rejected": -0.8474820256233215,
"logps/chosen": -456.6351623535156,
"logps/rejected": -785.6075439453125,
"loss": 0.1354,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.18163737654685974,
"rewards/margins": 0.3639640808105469,
"rewards/rejected": -0.545601487159729,
"step": 1870
},
{
"epoch": 1.0,
"step": 1875,
"total_flos": 0.0,
"train_loss": 0.13990657812754312,
"train_runtime": 16010.7596,
"train_samples_per_second": 0.937,
"train_steps_per_second": 0.117
}
],
"logging_steps": 10,
"max_steps": 1875,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}