|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1875, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.6595744680851065e-08, |
|
"logits/chosen": -1.7968215942382812, |
|
"logits/rejected": -2.159090995788574, |
|
"logps/chosen": -88.33059692382812, |
|
"logps/rejected": -242.96200561523438, |
|
"loss": 0.4322, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.6595744680851066e-07, |
|
"logits/chosen": -2.003159999847412, |
|
"logits/rejected": -1.3869916200637817, |
|
"logps/chosen": -240.9772186279297, |
|
"logps/rejected": -195.60606384277344, |
|
"loss": 0.3319, |
|
"rewards/accuracies": 0.3333333432674408, |
|
"rewards/chosen": -3.270954766776413e-05, |
|
"rewards/margins": -8.25071256258525e-05, |
|
"rewards/rejected": 4.979758523404598e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.319148936170213e-07, |
|
"logits/chosen": -2.0388007164001465, |
|
"logits/rejected": -1.5615094900131226, |
|
"logps/chosen": -291.083740234375, |
|
"logps/rejected": -277.5216369628906, |
|
"loss": 0.3514, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 1.1951732631132472e-05, |
|
"rewards/margins": 0.00027519199647940695, |
|
"rewards/rejected": -0.0002632402756717056, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.97872340425532e-07, |
|
"logits/chosen": -1.860889196395874, |
|
"logits/rejected": -1.5862194299697876, |
|
"logps/chosen": -248.38510131835938, |
|
"logps/rejected": -261.7816467285156, |
|
"loss": 0.324, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.0007015246083028615, |
|
"rewards/margins": 0.004821115639060736, |
|
"rewards/rejected": -0.005522639956325293, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0638297872340427e-06, |
|
"logits/chosen": -1.8764064311981201, |
|
"logits/rejected": -1.2899483442306519, |
|
"logps/chosen": -355.25958251953125, |
|
"logps/rejected": -389.2695007324219, |
|
"loss": 0.3286, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.009422576054930687, |
|
"rewards/margins": 0.022184943780303, |
|
"rewards/rejected": -0.03160751983523369, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.3297872340425533e-06, |
|
"logits/chosen": -1.990142583847046, |
|
"logits/rejected": -1.2961665391921997, |
|
"logps/chosen": -316.00860595703125, |
|
"logps/rejected": -277.88421630859375, |
|
"loss": 0.2629, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.05700983479619026, |
|
"rewards/margins": 0.059757936745882034, |
|
"rewards/rejected": -0.1167677640914917, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.595744680851064e-06, |
|
"logits/chosen": -1.748492956161499, |
|
"logits/rejected": -0.8994135856628418, |
|
"logps/chosen": -389.3627624511719, |
|
"logps/rejected": -579.7057495117188, |
|
"loss": 0.1989, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.09011684358119965, |
|
"rewards/margins": 0.22812744975090027, |
|
"rewards/rejected": -0.3182442784309387, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.8617021276595745e-06, |
|
"logits/chosen": -1.6900399923324585, |
|
"logits/rejected": -1.4010140895843506, |
|
"logps/chosen": -420.5406799316406, |
|
"logps/rejected": -859.8084716796875, |
|
"loss": 0.1253, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.20870384573936462, |
|
"rewards/margins": 0.3385527431964874, |
|
"rewards/rejected": -0.547256588935852, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.1276595744680853e-06, |
|
"logits/chosen": -1.7609751224517822, |
|
"logits/rejected": -1.0384010076522827, |
|
"logps/chosen": -474.48187255859375, |
|
"logps/rejected": -747.34716796875, |
|
"loss": 0.1309, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.19330377876758575, |
|
"rewards/margins": 0.34078216552734375, |
|
"rewards/rejected": -0.5340859293937683, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.393617021276596e-06, |
|
"logits/chosen": -1.7291476726531982, |
|
"logits/rejected": -1.2021540403366089, |
|
"logps/chosen": -454.2134704589844, |
|
"logps/rejected": -764.934326171875, |
|
"loss": 0.16, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1365814059972763, |
|
"rewards/margins": 0.36457785964012146, |
|
"rewards/rejected": -0.5011593103408813, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.6595744680851065e-06, |
|
"logits/chosen": -1.5737159252166748, |
|
"logits/rejected": -0.9248941540718079, |
|
"logps/chosen": -482.3492126464844, |
|
"logps/rejected": -792.2481689453125, |
|
"loss": 0.1239, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.19203761219978333, |
|
"rewards/margins": 0.3564862310886383, |
|
"rewards/rejected": -0.5485238432884216, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9255319148936174e-06, |
|
"logits/chosen": -1.7435375452041626, |
|
"logits/rejected": -1.356065034866333, |
|
"logps/chosen": -416.564208984375, |
|
"logps/rejected": -796.4661254882812, |
|
"loss": 0.1253, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1526903361082077, |
|
"rewards/margins": 0.3349696397781372, |
|
"rewards/rejected": -0.4876599907875061, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.191489361702128e-06, |
|
"logits/chosen": -1.6976553201675415, |
|
"logits/rejected": -1.0894078016281128, |
|
"logps/chosen": -409.96258544921875, |
|
"logps/rejected": -617.7588500976562, |
|
"loss": 0.1948, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.13733306527137756, |
|
"rewards/margins": 0.293459415435791, |
|
"rewards/rejected": -0.4307924807071686, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.457446808510639e-06, |
|
"logits/chosen": -1.7993590831756592, |
|
"logits/rejected": -1.400632619857788, |
|
"logps/chosen": -370.1565856933594, |
|
"logps/rejected": -709.3056640625, |
|
"loss": 0.2055, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.11207763850688934, |
|
"rewards/margins": 0.340470552444458, |
|
"rewards/rejected": -0.45254817605018616, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.723404255319149e-06, |
|
"logits/chosen": -1.495011806488037, |
|
"logits/rejected": -0.9245948791503906, |
|
"logps/chosen": -388.5771789550781, |
|
"logps/rejected": -792.4680786132812, |
|
"loss": 0.1088, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.1575288623571396, |
|
"rewards/margins": 0.3997672200202942, |
|
"rewards/rejected": -0.557296097278595, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.98936170212766e-06, |
|
"logits/chosen": -1.6491578817367554, |
|
"logits/rejected": -1.2172632217407227, |
|
"logps/chosen": -407.8502502441406, |
|
"logps/rejected": -738.5733642578125, |
|
"loss": 0.1397, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.12080486118793488, |
|
"rewards/margins": 0.32797589898109436, |
|
"rewards/rejected": -0.44878071546554565, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.255319148936171e-06, |
|
"logits/chosen": -1.6612653732299805, |
|
"logits/rejected": -1.1705405712127686, |
|
"logps/chosen": -353.0194396972656, |
|
"logps/rejected": -689.8749389648438, |
|
"loss": 0.1454, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.09746531397104263, |
|
"rewards/margins": 0.3515530228614807, |
|
"rewards/rejected": -0.44901829957962036, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.521276595744681e-06, |
|
"logits/chosen": -1.54987370967865, |
|
"logits/rejected": -1.1912695169448853, |
|
"logps/chosen": -544.5787963867188, |
|
"logps/rejected": -835.3132934570312, |
|
"loss": 0.1048, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.23248295485973358, |
|
"rewards/margins": 0.3334501087665558, |
|
"rewards/rejected": -0.565933108329773, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.787234042553192e-06, |
|
"logits/chosen": -1.7397425174713135, |
|
"logits/rejected": -0.8725941777229309, |
|
"logps/chosen": -510.69842529296875, |
|
"logps/rejected": -840.5343017578125, |
|
"loss": 0.1531, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2171137034893036, |
|
"rewards/margins": 0.39573976397514343, |
|
"rewards/rejected": -0.6128535270690918, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999982660399688e-06, |
|
"logits/chosen": -1.6966726779937744, |
|
"logits/rejected": -1.09552800655365, |
|
"logps/chosen": -514.5984497070312, |
|
"logps/rejected": -911.4729614257812, |
|
"loss": 0.1503, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2481248676776886, |
|
"rewards/margins": 0.3679044842720032, |
|
"rewards/rejected": -0.6160293221473694, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.99937579964398e-06, |
|
"logits/chosen": -1.4942667484283447, |
|
"logits/rejected": -1.1419141292572021, |
|
"logps/chosen": -432.5450134277344, |
|
"logps/rejected": -730.1014404296875, |
|
"loss": 0.1267, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.21937580406665802, |
|
"rewards/margins": 0.3185574412345886, |
|
"rewards/rejected": -0.5379332304000854, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.9979021993870645e-06, |
|
"logits/chosen": -1.571395993232727, |
|
"logits/rejected": -0.9183829426765442, |
|
"logps/chosen": -480.79644775390625, |
|
"logps/rejected": -813.7987060546875, |
|
"loss": 0.1624, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.18962515890598297, |
|
"rewards/margins": 0.3737575113773346, |
|
"rewards/rejected": -0.563382625579834, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.995562370647553e-06, |
|
"logits/chosen": -1.668015480041504, |
|
"logits/rejected": -1.1087052822113037, |
|
"logps/chosen": -517.7100219726562, |
|
"logps/rejected": -838.1522216796875, |
|
"loss": 0.1372, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.22473697364330292, |
|
"rewards/margins": 0.35942238569259644, |
|
"rewards/rejected": -0.5841594338417053, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.992357124836838e-06, |
|
"logits/chosen": -1.3532911539077759, |
|
"logits/rejected": -0.6337820291519165, |
|
"logps/chosen": -458.74462890625, |
|
"logps/rejected": -736.6771240234375, |
|
"loss": 0.1419, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.21437951922416687, |
|
"rewards/margins": 0.35455334186553955, |
|
"rewards/rejected": -0.5689328908920288, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9882875734777044e-06, |
|
"logits/chosen": -1.6833770275115967, |
|
"logits/rejected": -1.0865981578826904, |
|
"logps/chosen": -476.49578857421875, |
|
"logps/rejected": -742.6441650390625, |
|
"loss": 0.162, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1685108244419098, |
|
"rewards/margins": 0.33409184217453003, |
|
"rewards/rejected": -0.5026026368141174, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.983355127818882e-06, |
|
"logits/chosen": -1.4850168228149414, |
|
"logits/rejected": -0.9603360295295715, |
|
"logps/chosen": -400.22967529296875, |
|
"logps/rejected": -569.9345703125, |
|
"loss": 0.1919, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.20730257034301758, |
|
"rewards/margins": 0.22820453345775604, |
|
"rewards/rejected": -0.4355071187019348, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.977561498345639e-06, |
|
"logits/chosen": -1.544639229774475, |
|
"logits/rejected": -1.1013596057891846, |
|
"logps/chosen": -407.4434509277344, |
|
"logps/rejected": -822.0099487304688, |
|
"loss": 0.0966, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.19301848113536835, |
|
"rewards/margins": 0.41144537925720215, |
|
"rewards/rejected": -0.6044638752937317, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.970908694186624e-06, |
|
"logits/chosen": -1.5448771715164185, |
|
"logits/rejected": -0.8540661931037903, |
|
"logps/chosen": -542.1297607421875, |
|
"logps/rejected": -889.5344848632812, |
|
"loss": 0.1371, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.25042372941970825, |
|
"rewards/margins": 0.39966678619384766, |
|
"rewards/rejected": -0.6500904560089111, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9633990224171305e-06, |
|
"logits/chosen": -1.4944156408309937, |
|
"logits/rejected": -0.8036524057388306, |
|
"logps/chosen": -671.7164306640625, |
|
"logps/rejected": -922.0513916015625, |
|
"loss": 0.1319, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.3747330605983734, |
|
"rewards/margins": 0.3100079596042633, |
|
"rewards/rejected": -0.6847410202026367, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.955035087259046e-06, |
|
"logits/chosen": -1.4477952718734741, |
|
"logits/rejected": -0.8218593597412109, |
|
"logps/chosen": -612.0665893554688, |
|
"logps/rejected": -863.3322143554688, |
|
"loss": 0.1494, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.3176848888397217, |
|
"rewards/margins": 0.30745354294776917, |
|
"rewards/rejected": -0.6251384019851685, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.945819789177756e-06, |
|
"logits/chosen": -1.611026406288147, |
|
"logits/rejected": -1.1793110370635986, |
|
"logps/chosen": -518.3214111328125, |
|
"logps/rejected": -892.6036987304688, |
|
"loss": 0.1228, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2295423001050949, |
|
"rewards/margins": 0.37014490365982056, |
|
"rewards/rejected": -0.5996872186660767, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.935756323876306e-06, |
|
"logits/chosen": -1.508418083190918, |
|
"logits/rejected": -1.277306318283081, |
|
"logps/chosen": -459.0326232910156, |
|
"logps/rejected": -823.93017578125, |
|
"loss": 0.1518, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.27009934186935425, |
|
"rewards/margins": 0.32507914304733276, |
|
"rewards/rejected": -0.5951785445213318, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.924848181187199e-06, |
|
"logits/chosen": -1.6442441940307617, |
|
"logits/rejected": -1.1329659223556519, |
|
"logps/chosen": -494.06097412109375, |
|
"logps/rejected": -854.1019287109375, |
|
"loss": 0.1422, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.20919394493103027, |
|
"rewards/margins": 0.39555859565734863, |
|
"rewards/rejected": -0.6047526001930237, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.913099143862173e-06, |
|
"logits/chosen": -1.3633651733398438, |
|
"logits/rejected": -0.9123932123184204, |
|
"logps/chosen": -474.5048828125, |
|
"logps/rejected": -829.7545776367188, |
|
"loss": 0.1498, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.28733277320861816, |
|
"rewards/margins": 0.3500373959541321, |
|
"rewards/rejected": -0.6373701095581055, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.900513286260416e-06, |
|
"logits/chosen": -1.5975598096847534, |
|
"logits/rejected": -1.2887117862701416, |
|
"logps/chosen": -400.32781982421875, |
|
"logps/rejected": -744.0382080078125, |
|
"loss": 0.1053, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.20992426574230194, |
|
"rewards/margins": 0.3338248133659363, |
|
"rewards/rejected": -0.5437491536140442, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.887094972935645e-06, |
|
"logits/chosen": -1.764219045639038, |
|
"logits/rejected": -0.9871004819869995, |
|
"logps/chosen": -573.0086059570312, |
|
"logps/rejected": -937.3956909179688, |
|
"loss": 0.1504, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2734777331352234, |
|
"rewards/margins": 0.37265342473983765, |
|
"rewards/rejected": -0.646131157875061, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.87284885712256e-06, |
|
"logits/chosen": -1.5166432857513428, |
|
"logits/rejected": -0.8717886209487915, |
|
"logps/chosen": -572.190673828125, |
|
"logps/rejected": -876.5632934570312, |
|
"loss": 0.1876, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.32470396161079407, |
|
"rewards/margins": 0.34118732810020447, |
|
"rewards/rejected": -0.6658912897109985, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.857779879123181e-06, |
|
"logits/chosen": -1.7403156757354736, |
|
"logits/rejected": -0.9518265724182129, |
|
"logps/chosen": -505.53387451171875, |
|
"logps/rejected": -778.5391845703125, |
|
"loss": 0.1207, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.20143766701221466, |
|
"rewards/margins": 0.37924817204475403, |
|
"rewards/rejected": -0.5806857943534851, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.841893264593643e-06, |
|
"logits/chosen": -1.7110675573349, |
|
"logits/rejected": -1.026960849761963, |
|
"logps/chosen": -472.236328125, |
|
"logps/rejected": -761.3172607421875, |
|
"loss": 0.095, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2130139172077179, |
|
"rewards/margins": 0.36098140478134155, |
|
"rewards/rejected": -0.5739952325820923, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.825194522732023e-06, |
|
"logits/chosen": -1.6140925884246826, |
|
"logits/rejected": -1.1293842792510986, |
|
"logps/chosen": -507.7935485839844, |
|
"logps/rejected": -913.9110107421875, |
|
"loss": 0.1277, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.26502326130867004, |
|
"rewards/margins": 0.3756178915500641, |
|
"rewards/rejected": -0.6406410932540894, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.807689444367853e-06, |
|
"logits/chosen": -1.7682578563690186, |
|
"logits/rejected": -1.3489004373550415, |
|
"logps/chosen": -495.90869140625, |
|
"logps/rejected": -782.658447265625, |
|
"loss": 0.1251, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.260581910610199, |
|
"rewards/margins": 0.3089093565940857, |
|
"rewards/rejected": -0.5694912075996399, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.78938409995396e-06, |
|
"logits/chosen": -1.5254316329956055, |
|
"logits/rejected": -1.0945005416870117, |
|
"logps/chosen": -463.646484375, |
|
"logps/rejected": -902.4519653320312, |
|
"loss": 0.1313, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.24164071679115295, |
|
"rewards/margins": 0.3996545672416687, |
|
"rewards/rejected": -0.6412952542304993, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.770284837461342e-06, |
|
"logits/chosen": -1.5965580940246582, |
|
"logits/rejected": -0.836743175983429, |
|
"logps/chosen": -586.2058715820312, |
|
"logps/rejected": -917.9168090820312, |
|
"loss": 0.1124, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.26902061700820923, |
|
"rewards/margins": 0.3916351795196533, |
|
"rewards/rejected": -0.6606558561325073, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7503982801778015e-06, |
|
"logits/chosen": -1.557950735092163, |
|
"logits/rejected": -1.0112215280532837, |
|
"logps/chosen": -501.4098205566406, |
|
"logps/rejected": -789.2760009765625, |
|
"loss": 0.154, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.24787664413452148, |
|
"rewards/margins": 0.3141789734363556, |
|
"rewards/rejected": -0.5620556473731995, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.729731324411104e-06, |
|
"logits/chosen": -1.7676448822021484, |
|
"logits/rejected": -1.1603769063949585, |
|
"logps/chosen": -429.96734619140625, |
|
"logps/rejected": -750.7506103515625, |
|
"loss": 0.1237, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1744639277458191, |
|
"rewards/margins": 0.35027581453323364, |
|
"rewards/rejected": -0.5247397422790527, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.7082911370974645e-06, |
|
"logits/chosen": -1.8625621795654297, |
|
"logits/rejected": -1.36086905002594, |
|
"logps/chosen": -548.0135498046875, |
|
"logps/rejected": -759.1170654296875, |
|
"loss": 0.1707, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.2390761822462082, |
|
"rewards/margins": 0.2892398238182068, |
|
"rewards/rejected": -0.5283160209655762, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.68608515331618e-06, |
|
"logits/chosen": -1.7188348770141602, |
|
"logits/rejected": -1.187195062637329, |
|
"logps/chosen": -492.1756286621094, |
|
"logps/rejected": -859.0760498046875, |
|
"loss": 0.1414, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.23255252838134766, |
|
"rewards/margins": 0.3831843137741089, |
|
"rewards/rejected": -0.6157368421554565, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.663121073711269e-06, |
|
"logits/chosen": -1.5974490642547607, |
|
"logits/rejected": -1.2564659118652344, |
|
"logps/chosen": -336.80487060546875, |
|
"logps/rejected": -661.6661376953125, |
|
"loss": 0.1196, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.14996656775474548, |
|
"rewards/margins": 0.32413381338119507, |
|
"rewards/rejected": -0.47410035133361816, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.63940686182103e-06, |
|
"logits/chosen": -1.6767423152923584, |
|
"logits/rejected": -1.1938632726669312, |
|
"logps/chosen": -505.0990295410156, |
|
"logps/rejected": -846.8779296875, |
|
"loss": 0.159, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2421807050704956, |
|
"rewards/margins": 0.3599635660648346, |
|
"rewards/rejected": -0.6021442413330078, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.614950741316425e-06, |
|
"logits/chosen": -1.529900312423706, |
|
"logits/rejected": -1.0826785564422607, |
|
"logps/chosen": -421.31707763671875, |
|
"logps/rejected": -685.7420654296875, |
|
"loss": 0.1721, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.22861020267009735, |
|
"rewards/margins": 0.28916865587234497, |
|
"rewards/rejected": -0.5177788734436035, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.589761193149254e-06, |
|
"logits/chosen": -1.6966304779052734, |
|
"logits/rejected": -0.9312071800231934, |
|
"logps/chosen": -535.8978271484375, |
|
"logps/rejected": -940.1627197265625, |
|
"loss": 0.1144, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.24013669788837433, |
|
"rewards/margins": 0.4429057240486145, |
|
"rewards/rejected": -0.6830424070358276, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.563846952611112e-06, |
|
"logits/chosen": -1.6221929788589478, |
|
"logits/rejected": -0.9574362635612488, |
|
"logps/chosen": -428.6238708496094, |
|
"logps/rejected": -704.8244018554688, |
|
"loss": 0.0844, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.195401132106781, |
|
"rewards/margins": 0.3248489797115326, |
|
"rewards/rejected": -0.520250141620636, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.537217006304141e-06, |
|
"logits/chosen": -1.4427409172058105, |
|
"logits/rejected": -1.003901481628418, |
|
"logps/chosen": -429.660400390625, |
|
"logps/rejected": -772.5963134765625, |
|
"loss": 0.1457, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.15723460912704468, |
|
"rewards/margins": 0.3122255206108093, |
|
"rewards/rejected": -0.4694600999355316, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.50988058902464e-06, |
|
"logits/chosen": -1.2717740535736084, |
|
"logits/rejected": -0.8480876684188843, |
|
"logps/chosen": -367.8202209472656, |
|
"logps/rejected": -776.1926879882812, |
|
"loss": 0.1152, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.14414677023887634, |
|
"rewards/margins": 0.38212689757347107, |
|
"rewards/rejected": -0.5262737274169922, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.481847180560593e-06, |
|
"logits/chosen": -1.5822323560714722, |
|
"logits/rejected": -0.9035153388977051, |
|
"logps/chosen": -439.30816650390625, |
|
"logps/rejected": -708.1422729492188, |
|
"loss": 0.2078, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.18015776574611664, |
|
"rewards/margins": 0.32809919118881226, |
|
"rewards/rejected": -0.5082569122314453, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.453126502404253e-06, |
|
"logits/chosen": -1.6248279809951782, |
|
"logits/rejected": -0.9642871022224426, |
|
"logps/chosen": -561.463623046875, |
|
"logps/rejected": -740.08935546875, |
|
"loss": 0.1773, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.27120378613471985, |
|
"rewards/margins": 0.28620854020118713, |
|
"rewards/rejected": -0.557412326335907, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.423728514380892e-06, |
|
"logits/chosen": -1.4605586528778076, |
|
"logits/rejected": -0.8407928347587585, |
|
"logps/chosen": -514.7965087890625, |
|
"logps/rejected": -831.8440551757812, |
|
"loss": 0.12, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.24941739439964294, |
|
"rewards/margins": 0.36968275904655457, |
|
"rewards/rejected": -0.6191002130508423, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.393663411194918e-06, |
|
"logits/chosen": -1.4048388004302979, |
|
"logits/rejected": -1.0212897062301636, |
|
"logps/chosen": -490.42431640625, |
|
"logps/rejected": -855.1259765625, |
|
"loss": 0.1416, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.24018950760364532, |
|
"rewards/margins": 0.3597009778022766, |
|
"rewards/rejected": -0.5998905301094055, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.362941618894523e-06, |
|
"logits/chosen": -1.3778400421142578, |
|
"logits/rejected": -0.983964741230011, |
|
"logps/chosen": -586.1995849609375, |
|
"logps/rejected": -981.2742309570312, |
|
"loss": 0.1205, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.30800628662109375, |
|
"rewards/margins": 0.3807603120803833, |
|
"rewards/rejected": -0.6887666583061218, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.331573791256116e-06, |
|
"logits/chosen": -1.4645698070526123, |
|
"logits/rejected": -0.9271195530891418, |
|
"logps/chosen": -621.4105224609375, |
|
"logps/rejected": -899.0559692382812, |
|
"loss": 0.1273, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.2714827358722687, |
|
"rewards/margins": 0.362936794757843, |
|
"rewards/rejected": -0.6344195604324341, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.299570806089786e-06, |
|
"logits/chosen": -1.6326652765274048, |
|
"logits/rejected": -0.9927080273628235, |
|
"logps/chosen": -490.701904296875, |
|
"logps/rejected": -842.052734375, |
|
"loss": 0.1023, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.18793320655822754, |
|
"rewards/margins": 0.408639132976532, |
|
"rewards/rejected": -0.5965723395347595, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.266943761467057e-06, |
|
"logits/chosen": -1.2816569805145264, |
|
"logits/rejected": -0.8941723704338074, |
|
"logps/chosen": -367.02191162109375, |
|
"logps/rejected": -760.0553588867188, |
|
"loss": 0.1443, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.11693791300058365, |
|
"rewards/margins": 0.39976662397384644, |
|
"rewards/rejected": -0.5167044997215271, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.233703971872287e-06, |
|
"logits/chosen": -1.8729069232940674, |
|
"logits/rejected": -1.0977063179016113, |
|
"logps/chosen": -393.6733093261719, |
|
"logps/rejected": -763.2752075195312, |
|
"loss": 0.1335, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.08343084156513214, |
|
"rewards/margins": 0.4291655123233795, |
|
"rewards/rejected": -0.5125963687896729, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.1998629642789925e-06, |
|
"logits/chosen": -1.5668641328811646, |
|
"logits/rejected": -1.1349601745605469, |
|
"logps/chosen": -426.9754333496094, |
|
"logps/rejected": -820.5556640625, |
|
"loss": 0.1742, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1329190582036972, |
|
"rewards/margins": 0.36928990483283997, |
|
"rewards/rejected": -0.5022088885307312, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.165432474152505e-06, |
|
"logits/chosen": -1.5012271404266357, |
|
"logits/rejected": -1.278693675994873, |
|
"logps/chosen": -365.3034973144531, |
|
"logps/rejected": -678.4292602539062, |
|
"loss": 0.1737, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.13991737365722656, |
|
"rewards/margins": 0.3017304837703705, |
|
"rewards/rejected": -0.44164785742759705, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.130424441380308e-06, |
|
"logits/chosen": -1.42804753780365, |
|
"logits/rejected": -0.967817485332489, |
|
"logps/chosen": -411.77801513671875, |
|
"logps/rejected": -691.2272338867188, |
|
"loss": 0.1527, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1383361965417862, |
|
"rewards/margins": 0.33920183777809143, |
|
"rewards/rejected": -0.47753801941871643, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.09485100613151e-06, |
|
"logits/chosen": -1.5555391311645508, |
|
"logits/rejected": -1.1440869569778442, |
|
"logps/chosen": -434.00335693359375, |
|
"logps/rejected": -744.3508911132812, |
|
"loss": 0.168, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.19056299328804016, |
|
"rewards/margins": 0.30447274446487427, |
|
"rewards/rejected": -0.49503573775291443, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.058724504646834e-06, |
|
"logits/chosen": -1.8099536895751953, |
|
"logits/rejected": -1.0837316513061523, |
|
"logps/chosen": -427.7793884277344, |
|
"logps/rejected": -729.915283203125, |
|
"loss": 0.1229, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1386745572090149, |
|
"rewards/margins": 0.3526052236557007, |
|
"rewards/rejected": -0.4912797808647156, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.022057464960632e-06, |
|
"logits/chosen": -1.6691503524780273, |
|
"logits/rejected": -1.33521568775177, |
|
"logps/chosen": -428.7286071777344, |
|
"logps/rejected": -789.0191040039062, |
|
"loss": 0.1607, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1496300995349884, |
|
"rewards/margins": 0.35229435563087463, |
|
"rewards/rejected": -0.5019243955612183, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.984862602556383e-06, |
|
"logits/chosen": -1.6232519149780273, |
|
"logits/rejected": -1.197933554649353, |
|
"logps/chosen": -460.2228088378906, |
|
"logps/rejected": -696.0914306640625, |
|
"loss": 0.1346, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.18895591795444489, |
|
"rewards/margins": 0.28954973816871643, |
|
"rewards/rejected": -0.4785057008266449, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.947152815957187e-06, |
|
"logits/chosen": -1.5458933115005493, |
|
"logits/rejected": -1.116236925125122, |
|
"logps/chosen": -433.87322998046875, |
|
"logps/rejected": -756.8858642578125, |
|
"loss": 0.1492, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.22537223994731903, |
|
"rewards/margins": 0.34959647059440613, |
|
"rewards/rejected": -0.5749687552452087, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.908941182252785e-06, |
|
"logits/chosen": -1.5793603658676147, |
|
"logits/rejected": -0.9729734659194946, |
|
"logps/chosen": -458.96368408203125, |
|
"logps/rejected": -781.1962890625, |
|
"loss": 0.1615, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1985333412885666, |
|
"rewards/margins": 0.3719526529312134, |
|
"rewards/rejected": -0.5704860091209412, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.8702409525646535e-06, |
|
"logits/chosen": -1.6880747079849243, |
|
"logits/rejected": -1.0946999788284302, |
|
"logps/chosen": -550.5426635742188, |
|
"logps/rejected": -861.6978759765625, |
|
"loss": 0.1362, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1908409297466278, |
|
"rewards/margins": 0.3778737485408783, |
|
"rewards/rejected": -0.5687146782875061, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8310655474507495e-06, |
|
"logits/chosen": -1.7694594860076904, |
|
"logits/rejected": -1.1918199062347412, |
|
"logps/chosen": -443.54736328125, |
|
"logps/rejected": -717.8020629882812, |
|
"loss": 0.1418, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.16424255073070526, |
|
"rewards/margins": 0.30146175622940063, |
|
"rewards/rejected": -0.4657043516635895, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.7914285522515002e-06, |
|
"logits/chosen": -1.539620280265808, |
|
"logits/rejected": -1.3648602962493896, |
|
"logps/chosen": -468.6385192871094, |
|
"logps/rejected": -892.7566528320312, |
|
"loss": 0.1552, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.22199459373950958, |
|
"rewards/margins": 0.36069172620773315, |
|
"rewards/rejected": -0.5826863050460815, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.751343712378639e-06, |
|
"logits/chosen": -1.68185555934906, |
|
"logits/rejected": -1.0438605546951294, |
|
"logps/chosen": -377.9205627441406, |
|
"logps/rejected": -688.3480834960938, |
|
"loss": 0.1417, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.15213271975517273, |
|
"rewards/margins": 0.3400834798812866, |
|
"rewards/rejected": -0.49221619963645935, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.710824928548546e-06, |
|
"logits/chosen": -1.7241179943084717, |
|
"logits/rejected": -1.1749062538146973, |
|
"logps/chosen": -398.90521240234375, |
|
"logps/rejected": -792.80078125, |
|
"loss": 0.1218, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.14098913967609406, |
|
"rewards/margins": 0.4070391058921814, |
|
"rewards/rejected": -0.5480281710624695, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.6698862519617225e-06, |
|
"logits/chosen": -1.862091064453125, |
|
"logits/rejected": -1.0774017572402954, |
|
"logps/chosen": -380.6012878417969, |
|
"logps/rejected": -803.9888916015625, |
|
"loss": 0.1009, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.12483291327953339, |
|
"rewards/margins": 0.46553611755371094, |
|
"rewards/rejected": -0.5903691053390503, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.6285418794300793e-06, |
|
"logits/chosen": -1.416322946548462, |
|
"logits/rejected": -0.8399195671081543, |
|
"logps/chosen": -444.05523681640625, |
|
"logps/rejected": -761.3108520507812, |
|
"loss": 0.1571, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.20516355335712433, |
|
"rewards/margins": 0.3872140049934387, |
|
"rewards/rejected": -0.5923775434494019, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.5868061484537365e-06, |
|
"logits/chosen": -1.39794921875, |
|
"logits/rejected": -0.8267971873283386, |
|
"logps/chosen": -507.1766052246094, |
|
"logps/rejected": -875.86962890625, |
|
"loss": 0.1497, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.23690223693847656, |
|
"rewards/margins": 0.40496787428855896, |
|
"rewards/rejected": -0.6418701410293579, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.5446935322490285e-06, |
|
"logits/chosen": -1.7719318866729736, |
|
"logits/rejected": -0.9355955123901367, |
|
"logps/chosen": -544.9541015625, |
|
"logps/rejected": -865.5302734375, |
|
"loss": 0.1963, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.2105661928653717, |
|
"rewards/margins": 0.3866081237792969, |
|
"rewards/rejected": -0.5971742868423462, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.502218634729447e-06, |
|
"logits/chosen": -1.6639026403427124, |
|
"logits/rejected": -1.067781925201416, |
|
"logps/chosen": -575.9091796875, |
|
"logps/rejected": -838.0983276367188, |
|
"loss": 0.1233, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.23653562366962433, |
|
"rewards/margins": 0.3239360749721527, |
|
"rewards/rejected": -0.5604716539382935, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.459396185441265e-06, |
|
"logits/chosen": -1.7067358493804932, |
|
"logits/rejected": -1.0498546361923218, |
|
"logps/chosen": -398.35516357421875, |
|
"logps/rejected": -626.8757934570312, |
|
"loss": 0.1608, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.1356094628572464, |
|
"rewards/margins": 0.3172938823699951, |
|
"rewards/rejected": -0.4529033601284027, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4162410344555834e-06, |
|
"logits/chosen": -1.9210001230239868, |
|
"logits/rejected": -1.1206413507461548, |
|
"logps/chosen": -405.9615783691406, |
|
"logps/rejected": -725.4310913085938, |
|
"loss": 0.125, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.10851490497589111, |
|
"rewards/margins": 0.38589829206466675, |
|
"rewards/rejected": -0.49441319704055786, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3727681472185937e-06, |
|
"logits/chosen": -1.6562303304672241, |
|
"logits/rejected": -1.19851553440094, |
|
"logps/chosen": -486.35107421875, |
|
"logps/rejected": -963.0572509765625, |
|
"loss": 0.1105, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.22348380088806152, |
|
"rewards/margins": 0.4227561056613922, |
|
"rewards/rejected": -0.6462398767471313, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3289925993618217e-06, |
|
"logits/chosen": -1.5856201648712158, |
|
"logits/rejected": -1.0767395496368408, |
|
"logps/chosen": -526.1747436523438, |
|
"logps/rejected": -797.7916870117188, |
|
"loss": 0.137, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.2687681019306183, |
|
"rewards/margins": 0.2989320755004883, |
|
"rewards/rejected": -0.567700207233429, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2849295714741643e-06, |
|
"logits/chosen": -1.7678568363189697, |
|
"logits/rejected": -1.2151532173156738, |
|
"logps/chosen": -597.7952880859375, |
|
"logps/rejected": -848.87841796875, |
|
"loss": 0.1308, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.27867773175239563, |
|
"rewards/margins": 0.30206385254859924, |
|
"rewards/rejected": -0.5807415843009949, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2405943438375287e-06, |
|
"logits/chosen": -1.7643588781356812, |
|
"logits/rejected": -1.099827527999878, |
|
"logps/chosen": -423.9742126464844, |
|
"logps/rejected": -774.4637451171875, |
|
"loss": 0.0974, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1482265591621399, |
|
"rewards/margins": 0.410900741815567, |
|
"rewards/rejected": -0.5591272711753845, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1960022911279036e-06, |
|
"logits/chosen": -1.5414252281188965, |
|
"logits/rejected": -1.1484423875808716, |
|
"logps/chosen": -493.69464111328125, |
|
"logps/rejected": -835.8029174804688, |
|
"loss": 0.1526, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.19744431972503662, |
|
"rewards/margins": 0.36680763959884644, |
|
"rewards/rejected": -0.5642520189285278, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.1511688770836844e-06, |
|
"logits/chosen": -1.511249303817749, |
|
"logits/rejected": -1.3401678800582886, |
|
"logps/chosen": -404.75933837890625, |
|
"logps/rejected": -805.8262939453125, |
|
"loss": 0.1035, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.18373355269432068, |
|
"rewards/margins": 0.3318132758140564, |
|
"rewards/rejected": -0.5155468583106995, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.1061096491431307e-06, |
|
"logits/chosen": -1.8831459283828735, |
|
"logits/rejected": -1.2160544395446777, |
|
"logps/chosen": -447.65032958984375, |
|
"logps/rejected": -843.8660278320312, |
|
"loss": 0.1345, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.13529552519321442, |
|
"rewards/margins": 0.40966707468032837, |
|
"rewards/rejected": -0.5449625849723816, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0608402330527796e-06, |
|
"logits/chosen": -1.6770378351211548, |
|
"logits/rejected": -0.9972168803215027, |
|
"logps/chosen": -379.8583984375, |
|
"logps/rejected": -719.3693237304688, |
|
"loss": 0.1765, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.15062110126018524, |
|
"rewards/margins": 0.3817873001098633, |
|
"rewards/rejected": -0.5324083566665649, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.0153763274487176e-06, |
|
"logits/chosen": -1.4134846925735474, |
|
"logits/rejected": -0.966874897480011, |
|
"logps/chosen": -441.3450622558594, |
|
"logps/rejected": -707.3884887695312, |
|
"loss": 0.1235, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.21538302302360535, |
|
"rewards/margins": 0.3049529790878296, |
|
"rewards/rejected": -0.5203360319137573, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9697336984125683e-06, |
|
"logits/chosen": -1.6667283773422241, |
|
"logits/rejected": -1.0133411884307861, |
|
"logps/chosen": -401.2959899902344, |
|
"logps/rejected": -851.93701171875, |
|
"loss": 0.1206, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.15221676230430603, |
|
"rewards/margins": 0.45876413583755493, |
|
"rewards/rejected": -0.6109809279441833, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.923928174004094e-06, |
|
"logits/chosen": -1.8437814712524414, |
|
"logits/rejected": -1.0747764110565186, |
|
"logps/chosen": -470.7169494628906, |
|
"logps/rejected": -732.7559814453125, |
|
"loss": 0.1247, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.13192041218280792, |
|
"rewards/margins": 0.3781585991382599, |
|
"rewards/rejected": -0.5100789666175842, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.8779756387723036e-06, |
|
"logits/chosen": -1.7663402557373047, |
|
"logits/rejected": -1.3018739223480225, |
|
"logps/chosen": -446.77490234375, |
|
"logps/rejected": -766.7832641601562, |
|
"loss": 0.1146, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1646779477596283, |
|
"rewards/margins": 0.36457663774490356, |
|
"rewards/rejected": -0.5292545557022095, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.831892028246968e-06, |
|
"logits/chosen": -1.848724603652954, |
|
"logits/rejected": -1.216956377029419, |
|
"logps/chosen": -418.67645263671875, |
|
"logps/rejected": -703.2694702148438, |
|
"loss": 0.1209, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.14982689917087555, |
|
"rewards/margins": 0.36974358558654785, |
|
"rewards/rejected": -0.5195704698562622, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7856933234124617e-06, |
|
"logits/chosen": -1.7911808490753174, |
|
"logits/rejected": -1.0922878980636597, |
|
"logps/chosen": -448.37603759765625, |
|
"logps/rejected": -834.2364501953125, |
|
"loss": 0.1538, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.17878659069538116, |
|
"rewards/margins": 0.3954610228538513, |
|
"rewards/rejected": -0.5742476582527161, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7393955451658387e-06, |
|
"logits/chosen": -1.7210479974746704, |
|
"logits/rejected": -1.2294584512710571, |
|
"logps/chosen": -514.4754028320312, |
|
"logps/rejected": -868.5929565429688, |
|
"loss": 0.1626, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.21787652373313904, |
|
"rewards/margins": 0.3894199728965759, |
|
"rewards/rejected": -0.6072965264320374, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6930147487610667e-06, |
|
"logits/chosen": -1.5907623767852783, |
|
"logits/rejected": -0.78331458568573, |
|
"logps/chosen": -462.7984313964844, |
|
"logps/rejected": -805.7174072265625, |
|
"loss": 0.1373, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.17765957117080688, |
|
"rewards/margins": 0.40347957611083984, |
|
"rewards/rejected": -0.581139087677002, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.6465670182413487e-06, |
|
"logits/chosen": -1.6310056447982788, |
|
"logits/rejected": -1.0298982858657837, |
|
"logps/chosen": -411.04937744140625, |
|
"logps/rejected": -758.7462158203125, |
|
"loss": 0.1237, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1506483405828476, |
|
"rewards/margins": 0.36691543459892273, |
|
"rewards/rejected": -0.5175637602806091, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.6000684608614594e-06, |
|
"logits/chosen": -1.6570842266082764, |
|
"logits/rejected": -0.8277125358581543, |
|
"logps/chosen": -506.580810546875, |
|
"logps/rejected": -801.989990234375, |
|
"loss": 0.1436, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1983393281698227, |
|
"rewards/margins": 0.38503485918045044, |
|
"rewards/rejected": -0.5833742022514343, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5535352015020338e-06, |
|
"logits/chosen": -1.528637170791626, |
|
"logits/rejected": -0.8484199643135071, |
|
"logps/chosen": -470.8020935058594, |
|
"logps/rejected": -820.8448486328125, |
|
"loss": 0.1363, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.20915472507476807, |
|
"rewards/margins": 0.38422003388404846, |
|
"rewards/rejected": -0.5933747887611389, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.506983377077741e-06, |
|
"logits/chosen": -1.3463196754455566, |
|
"logits/rejected": -1.018822193145752, |
|
"logps/chosen": -464.81524658203125, |
|
"logps/rejected": -807.076171875, |
|
"loss": 0.1584, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.21939115226268768, |
|
"rewards/margins": 0.3329920172691345, |
|
"rewards/rejected": -0.5523831844329834, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.460429130941289e-06, |
|
"logits/chosen": -1.4068031311035156, |
|
"logits/rejected": -0.9966346621513367, |
|
"logps/chosen": -443.41583251953125, |
|
"logps/rejected": -826.1185302734375, |
|
"loss": 0.1182, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1990918219089508, |
|
"rewards/margins": 0.39130455255508423, |
|
"rewards/rejected": -0.5903963446617126, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.413888607285192e-06, |
|
"logits/chosen": -1.2919907569885254, |
|
"logits/rejected": -0.9193531274795532, |
|
"logps/chosen": -496.358642578125, |
|
"logps/rejected": -845.7939453125, |
|
"loss": 0.173, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.24995890259742737, |
|
"rewards/margins": 0.3503498435020447, |
|
"rewards/rejected": -0.6003087162971497, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.367377945543249e-06, |
|
"logits/chosen": -1.6841480731964111, |
|
"logits/rejected": -0.907370924949646, |
|
"logps/chosen": -446.6328125, |
|
"logps/rejected": -884.2018432617188, |
|
"loss": 0.1068, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.15798960626125336, |
|
"rewards/margins": 0.4673282206058502, |
|
"rewards/rejected": -0.6253177523612976, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.320913274793676e-06, |
|
"logits/chosen": -1.7113037109375, |
|
"logits/rejected": -1.1816798448562622, |
|
"logps/chosen": -410.67645263671875, |
|
"logps/rejected": -770.4984741210938, |
|
"loss": 0.1423, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1650415062904358, |
|
"rewards/margins": 0.37022119760513306, |
|
"rewards/rejected": -0.5352627038955688, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.27451070816582e-06, |
|
"logits/chosen": -1.6226348876953125, |
|
"logits/rejected": -0.9200002551078796, |
|
"logps/chosen": -518.1405029296875, |
|
"logps/rejected": -891.6884765625, |
|
"loss": 0.1105, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.16074618697166443, |
|
"rewards/margins": 0.47465044260025024, |
|
"rewards/rejected": -0.6353966593742371, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.228186337252414e-06, |
|
"logits/chosen": -1.7294307947158813, |
|
"logits/rejected": -0.8779215812683105, |
|
"logps/chosen": -516.14013671875, |
|
"logps/rejected": -824.5764770507812, |
|
"loss": 0.1407, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.17551277577877045, |
|
"rewards/margins": 0.40844354033470154, |
|
"rewards/rejected": -0.583956241607666, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1819562265292946e-06, |
|
"logits/chosen": -1.5813789367675781, |
|
"logits/rejected": -1.001509666442871, |
|
"logps/chosen": -429.26593017578125, |
|
"logps/rejected": -789.7249755859375, |
|
"loss": 0.1574, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.21274442970752716, |
|
"rewards/margins": 0.3669392764568329, |
|
"rewards/rejected": -0.5796837210655212, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1358364077845236e-06, |
|
"logits/chosen": -1.533307671546936, |
|
"logits/rejected": -0.9590204954147339, |
|
"logps/chosen": -387.72381591796875, |
|
"logps/rejected": -828.5607299804688, |
|
"loss": 0.1014, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.14084286987781525, |
|
"rewards/margins": 0.4476155638694763, |
|
"rewards/rejected": -0.5884584188461304, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.089842874558849e-06, |
|
"logits/chosen": -1.3846327066421509, |
|
"logits/rejected": -1.0313770771026611, |
|
"logps/chosen": -479.6465759277344, |
|
"logps/rejected": -906.1482543945312, |
|
"loss": 0.1133, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.22880907356739044, |
|
"rewards/margins": 0.4139330983161926, |
|
"rewards/rejected": -0.6427421569824219, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0439915765994242e-06, |
|
"logits/chosen": -1.5441999435424805, |
|
"logits/rejected": -0.8765427470207214, |
|
"logps/chosen": -374.98504638671875, |
|
"logps/rejected": -726.3242797851562, |
|
"loss": 0.1078, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1631808578968048, |
|
"rewards/margins": 0.38433948159217834, |
|
"rewards/rejected": -0.5475203394889832, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9982984143287186e-06, |
|
"logits/chosen": -1.7160451412200928, |
|
"logits/rejected": -0.9389771223068237, |
|
"logps/chosen": -419.886962890625, |
|
"logps/rejected": -727.5339965820312, |
|
"loss": 0.1209, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1307600438594818, |
|
"rewards/margins": 0.4173372685909271, |
|
"rewards/rejected": -0.5480973720550537, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.95277923333053e-06, |
|
"logits/chosen": -1.5577538013458252, |
|
"logits/rejected": -0.9766386151313782, |
|
"logps/chosen": -432.010498046875, |
|
"logps/rejected": -772.4149780273438, |
|
"loss": 0.1015, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1316412091255188, |
|
"rewards/margins": 0.40984097123146057, |
|
"rewards/rejected": -0.541482150554657, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9074498188550156e-06, |
|
"logits/chosen": -1.6150667667388916, |
|
"logits/rejected": -1.0481829643249512, |
|
"logps/chosen": -460.06781005859375, |
|
"logps/rejected": -748.8250122070312, |
|
"loss": 0.1577, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.18149954080581665, |
|
"rewards/margins": 0.34171923995018005, |
|
"rewards/rejected": -0.5232187509536743, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.862325890344643e-06, |
|
"logits/chosen": -1.3022327423095703, |
|
"logits/rejected": -0.9266065359115601, |
|
"logps/chosen": -367.62823486328125, |
|
"logps/rejected": -806.5985107421875, |
|
"loss": 0.1656, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.17992374300956726, |
|
"rewards/margins": 0.42552104592323303, |
|
"rewards/rejected": -0.6054448485374451, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.817423095982972e-06, |
|
"logits/chosen": -1.3970632553100586, |
|
"logits/rejected": -0.9412476420402527, |
|
"logps/chosen": -451.11883544921875, |
|
"logps/rejected": -783.0731201171875, |
|
"loss": 0.102, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.20501062273979187, |
|
"rewards/margins": 0.37199467420578003, |
|
"rewards/rejected": -0.5770053267478943, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.7727570072681293e-06, |
|
"logits/chosen": -1.4293451309204102, |
|
"logits/rejected": -0.8616847991943359, |
|
"logps/chosen": -406.25042724609375, |
|
"logps/rejected": -737.0385131835938, |
|
"loss": 0.1378, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.16194511950016022, |
|
"rewards/margins": 0.3638822138309479, |
|
"rewards/rejected": -0.5258272886276245, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7283431136128961e-06, |
|
"logits/chosen": -1.6002616882324219, |
|
"logits/rejected": -1.116288423538208, |
|
"logps/chosen": -449.384521484375, |
|
"logps/rejected": -805.2763061523438, |
|
"loss": 0.1454, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.20747177302837372, |
|
"rewards/margins": 0.3553561270236969, |
|
"rewards/rejected": -0.5628278851509094, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6841968169732478e-06, |
|
"logits/chosen": -1.5592294931411743, |
|
"logits/rejected": -1.054216742515564, |
|
"logps/chosen": -448.9071350097656, |
|
"logps/rejected": -851.6107177734375, |
|
"loss": 0.1204, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.18522223830223083, |
|
"rewards/margins": 0.4141850471496582, |
|
"rewards/rejected": -0.5994073152542114, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6403334265072284e-06, |
|
"logits/chosen": -1.6474437713623047, |
|
"logits/rejected": -0.8614113926887512, |
|
"logps/chosen": -453.735107421875, |
|
"logps/rejected": -801.1546630859375, |
|
"loss": 0.1081, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1977526694536209, |
|
"rewards/margins": 0.38285189867019653, |
|
"rewards/rejected": -0.5806045532226562, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5967681532660066e-06, |
|
"logits/chosen": -1.2708427906036377, |
|
"logits/rejected": -0.9732850790023804, |
|
"logps/chosen": -437.337890625, |
|
"logps/rejected": -822.8092041015625, |
|
"loss": 0.1336, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.20598828792572021, |
|
"rewards/margins": 0.38752201199531555, |
|
"rewards/rejected": -0.5935102701187134, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5535161049189463e-06, |
|
"logits/chosen": -1.5570838451385498, |
|
"logits/rejected": -1.1252386569976807, |
|
"logps/chosen": -500.2212829589844, |
|
"logps/rejected": -786.1821899414062, |
|
"loss": 0.1145, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1828458607196808, |
|
"rewards/margins": 0.3151377737522125, |
|
"rewards/rejected": -0.4979836046695709, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.5105922805145356e-06, |
|
"logits/chosen": -1.8010812997817993, |
|
"logits/rejected": -1.2702046632766724, |
|
"logps/chosen": -434.25421142578125, |
|
"logps/rejected": -807.052001953125, |
|
"loss": 0.1215, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.15841101109981537, |
|
"rewards/margins": 0.35736268758773804, |
|
"rewards/rejected": -0.5157736539840698, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4680115652789823e-06, |
|
"logits/chosen": -1.856612205505371, |
|
"logits/rejected": -1.147216558456421, |
|
"logps/chosen": -523.8411865234375, |
|
"logps/rejected": -821.1082763671875, |
|
"loss": 0.1727, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.2023150473833084, |
|
"rewards/margins": 0.35308974981307983, |
|
"rewards/rejected": -0.555404782295227, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4257887254542767e-06, |
|
"logits/chosen": -1.5119379758834839, |
|
"logits/rejected": -1.0702050924301147, |
|
"logps/chosen": -511.7137756347656, |
|
"logps/rejected": -906.3107299804688, |
|
"loss": 0.1025, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.21919742226600647, |
|
"rewards/margins": 0.3765312731266022, |
|
"rewards/rejected": -0.5957286953926086, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.3839384031775227e-06, |
|
"logits/chosen": -1.6945511102676392, |
|
"logits/rejected": -0.8750427961349487, |
|
"logps/chosen": -440.59552001953125, |
|
"logps/rejected": -766.9216918945312, |
|
"loss": 0.1519, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.15435707569122314, |
|
"rewards/margins": 0.4113141894340515, |
|
"rewards/rejected": -0.5656712651252747, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.342475111403298e-06, |
|
"logits/chosen": -1.4833415746688843, |
|
"logits/rejected": -1.0713919401168823, |
|
"logps/chosen": -438.8766174316406, |
|
"logps/rejected": -720.0028076171875, |
|
"loss": 0.1574, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.20507605373859406, |
|
"rewards/margins": 0.28728824853897095, |
|
"rewards/rejected": -0.4923642575740814, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3014132288708209e-06, |
|
"logits/chosen": -1.5766406059265137, |
|
"logits/rejected": -1.0825704336166382, |
|
"logps/chosen": -438.3309020996094, |
|
"logps/rejected": -823.6751708984375, |
|
"loss": 0.166, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.19768479466438293, |
|
"rewards/margins": 0.3687485158443451, |
|
"rewards/rejected": -0.566433310508728, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2607669951176549e-06, |
|
"logits/chosen": -1.4940482378005981, |
|
"logits/rejected": -1.2070845365524292, |
|
"logps/chosen": -389.45343017578125, |
|
"logps/rejected": -773.9241333007812, |
|
"loss": 0.1574, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.15667062997817993, |
|
"rewards/margins": 0.3334207832813263, |
|
"rewards/rejected": -0.49009138345718384, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2205505055416891e-06, |
|
"logits/chosen": -1.5122394561767578, |
|
"logits/rejected": -1.3955858945846558, |
|
"logps/chosen": -338.9855651855469, |
|
"logps/rejected": -748.5198364257812, |
|
"loss": 0.1404, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.16774006187915802, |
|
"rewards/margins": 0.34086841344833374, |
|
"rewards/rejected": -0.5086084604263306, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1807777065131002e-06, |
|
"logits/chosen": -1.5142749547958374, |
|
"logits/rejected": -1.0132977962493896, |
|
"logps/chosen": -410.44879150390625, |
|
"logps/rejected": -810.9103393554688, |
|
"loss": 0.1108, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.14515772461891174, |
|
"rewards/margins": 0.36618533730506897, |
|
"rewards/rejected": -0.5113429427146912, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1414623905380012e-06, |
|
"logits/chosen": -1.756066083908081, |
|
"logits/rejected": -1.1571279764175415, |
|
"logps/chosen": -441.978515625, |
|
"logps/rejected": -786.6061401367188, |
|
"loss": 0.1217, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1463043987751007, |
|
"rewards/margins": 0.36096060276031494, |
|
"rewards/rejected": -0.5072649717330933, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.1026181914754388e-06, |
|
"logits/chosen": -1.784054160118103, |
|
"logits/rejected": -1.0276035070419312, |
|
"logps/chosen": -506.1011657714844, |
|
"logps/rejected": -819.1619873046875, |
|
"loss": 0.1352, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1743244081735611, |
|
"rewards/margins": 0.38723859190940857, |
|
"rewards/rejected": -0.5615630149841309, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0642585798094136e-06, |
|
"logits/chosen": -1.5410611629486084, |
|
"logits/rejected": -1.0178577899932861, |
|
"logps/chosen": -377.84197998046875, |
|
"logps/rejected": -720.7569580078125, |
|
"loss": 0.1264, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.12278805673122406, |
|
"rewards/margins": 0.38729211688041687, |
|
"rewards/rejected": -0.5100802183151245, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0263968579775522e-06, |
|
"logits/chosen": -1.5256543159484863, |
|
"logits/rejected": -0.9656683802604675, |
|
"logps/chosen": -458.48089599609375, |
|
"logps/rejected": -791.9251708984375, |
|
"loss": 0.1401, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.16513575613498688, |
|
"rewards/margins": 0.38074809312820435, |
|
"rewards/rejected": -0.54588383436203, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.89046155758058e-07, |
|
"logits/chosen": -1.6825745105743408, |
|
"logits/rejected": -0.8826824426651001, |
|
"logps/chosen": -455.65594482421875, |
|
"logps/rejected": -802.0789794921875, |
|
"loss": 0.1228, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.1435212790966034, |
|
"rewards/margins": 0.41453132033348083, |
|
"rewards/rejected": -0.558052659034729, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.52219425716534e-07, |
|
"logits/chosen": -1.4951298236846924, |
|
"logits/rejected": -0.8258262872695923, |
|
"logps/chosen": -515.0365600585938, |
|
"logps/rejected": -771.9305419921875, |
|
"loss": 0.1429, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.22707219421863556, |
|
"rewards/margins": 0.3406526446342468, |
|
"rewards/rejected": -0.5677248239517212, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.15929438714262e-07, |
|
"logits/chosen": -1.6602566242218018, |
|
"logits/rejected": -0.9937980771064758, |
|
"logps/chosen": -368.70684814453125, |
|
"logps/rejected": -689.556884765625, |
|
"loss": 0.1528, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.12013135105371475, |
|
"rewards/margins": 0.37646666169166565, |
|
"rewards/rejected": -0.4965980052947998, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.801887794794911e-07, |
|
"logits/chosen": -1.4943420886993408, |
|
"logits/rejected": -0.9112469553947449, |
|
"logps/chosen": -379.4705810546875, |
|
"logps/rejected": -716.5015258789062, |
|
"loss": 0.1407, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.13394254446029663, |
|
"rewards/margins": 0.3894422650337219, |
|
"rewards/rejected": -0.5233848690986633, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.450098422432787e-07, |
|
"logits/chosen": -1.7622817754745483, |
|
"logits/rejected": -0.7207467555999756, |
|
"logps/chosen": -537.2728271484375, |
|
"logps/rejected": -854.8095703125, |
|
"loss": 0.1203, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.1893191635608673, |
|
"rewards/margins": 0.4274328649044037, |
|
"rewards/rejected": -0.616752028465271, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.104048264413858e-07, |
|
"logits/chosen": -1.5849692821502686, |
|
"logits/rejected": -0.9879060983657837, |
|
"logps/chosen": -451.66802978515625, |
|
"logps/rejected": -812.3735961914062, |
|
"loss": 0.1162, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1757660210132599, |
|
"rewards/margins": 0.4036192297935486, |
|
"rewards/rejected": -0.5793852806091309, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.763857324837321e-07, |
|
"logits/chosen": -1.7880465984344482, |
|
"logits/rejected": -1.1138683557510376, |
|
"logps/chosen": -470.4102478027344, |
|
"logps/rejected": -782.1883544921875, |
|
"loss": 0.1273, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.18462924659252167, |
|
"rewards/margins": 0.37033870816230774, |
|
"rewards/rejected": -0.5549679398536682, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.429643575928605e-07, |
|
"logits/chosen": -1.688932180404663, |
|
"logits/rejected": -1.1515108346939087, |
|
"logps/chosen": -416.93896484375, |
|
"logps/rejected": -748.1307373046875, |
|
"loss": 0.1284, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.14920882880687714, |
|
"rewards/margins": 0.345571368932724, |
|
"rewards/rejected": -0.4947802424430847, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.101522917128709e-07, |
|
"logits/chosen": -1.3505184650421143, |
|
"logits/rejected": -0.8502361178398132, |
|
"logps/chosen": -453.3301696777344, |
|
"logps/rejected": -852.1624755859375, |
|
"loss": 0.149, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.18861651420593262, |
|
"rewards/margins": 0.39376121759414673, |
|
"rewards/rejected": -0.5823776721954346, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.779609134902312e-07, |
|
"logits/chosen": -1.4756485223770142, |
|
"logits/rejected": -0.8883223533630371, |
|
"logps/chosen": -409.55029296875, |
|
"logps/rejected": -707.3751831054688, |
|
"loss": 0.1383, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.17147330939769745, |
|
"rewards/margins": 0.3226475715637207, |
|
"rewards/rejected": -0.49412089586257935, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.464013863278629e-07, |
|
"logits/chosen": -1.593145728111267, |
|
"logits/rejected": -0.8717827796936035, |
|
"logps/chosen": -429.87725830078125, |
|
"logps/rejected": -856.5046997070312, |
|
"loss": 0.1131, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.12572301924228668, |
|
"rewards/margins": 0.46143823862075806, |
|
"rewards/rejected": -0.5871611833572388, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.154846545138696e-07, |
|
"logits/chosen": -1.556706190109253, |
|
"logits/rejected": -1.1209014654159546, |
|
"logps/chosen": -434.39813232421875, |
|
"logps/rejected": -873.4528198242188, |
|
"loss": 0.1184, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.151644766330719, |
|
"rewards/margins": 0.44497567415237427, |
|
"rewards/rejected": -0.5966204404830933, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.852214394262515e-07, |
|
"logits/chosen": -1.5190951824188232, |
|
"logits/rejected": -1.1570379734039307, |
|
"logps/chosen": -394.3932189941406, |
|
"logps/rejected": -781.7257690429688, |
|
"loss": 0.1364, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.16030506789684296, |
|
"rewards/margins": 0.3787681758403778, |
|
"rewards/rejected": -0.5390732884407043, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.556222358149191e-07, |
|
"logits/chosen": -1.5962765216827393, |
|
"logits/rejected": -0.9932464361190796, |
|
"logps/chosen": -390.4809875488281, |
|
"logps/rejected": -714.4888916015625, |
|
"loss": 0.1652, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.17372211813926697, |
|
"rewards/margins": 0.361335813999176, |
|
"rewards/rejected": -0.5350579023361206, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.266973081622992e-07, |
|
"logits/chosen": -1.4811457395553589, |
|
"logits/rejected": -1.0426948070526123, |
|
"logps/chosen": -451.40069580078125, |
|
"logps/rejected": -772.6881713867188, |
|
"loss": 0.1611, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.18898162245750427, |
|
"rewards/margins": 0.3498608469963074, |
|
"rewards/rejected": -0.5388425588607788, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.984566871237942e-07, |
|
"logits/chosen": -1.4943921566009521, |
|
"logits/rejected": -1.000528335571289, |
|
"logps/chosen": -393.3673095703125, |
|
"logps/rejected": -763.3333129882812, |
|
"loss": 0.1387, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.15240536630153656, |
|
"rewards/margins": 0.3935711681842804, |
|
"rewards/rejected": -0.5459765195846558, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.709101660493251e-07, |
|
"logits/chosen": -1.4344061613082886, |
|
"logits/rejected": -0.8900424838066101, |
|
"logps/chosen": -454.6851501464844, |
|
"logps/rejected": -862.0211181640625, |
|
"loss": 0.1148, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.2144501656293869, |
|
"rewards/margins": 0.39736613631248474, |
|
"rewards/rejected": -0.6118162870407104, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.440672975871743e-07, |
|
"logits/chosen": -1.6005455255508423, |
|
"logits/rejected": -1.2345631122589111, |
|
"logps/chosen": -454.6659240722656, |
|
"logps/rejected": -910.2664794921875, |
|
"loss": 0.0851, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.1945376694202423, |
|
"rewards/margins": 0.4257555603981018, |
|
"rewards/rejected": -0.6202932000160217, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1793739037129134e-07, |
|
"logits/chosen": -1.755613923072815, |
|
"logits/rejected": -0.9976798892021179, |
|
"logps/chosen": -426.450927734375, |
|
"logps/rejected": -827.8946533203125, |
|
"loss": 0.1088, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1098506897687912, |
|
"rewards/margins": 0.4619103968143463, |
|
"rewards/rejected": -0.5717611908912659, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9252950579322405e-07, |
|
"logits/chosen": -1.7585302591323853, |
|
"logits/rejected": -0.9437012672424316, |
|
"logps/chosen": -617.3839111328125, |
|
"logps/rejected": -886.2396240234375, |
|
"loss": 0.1537, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.2242734432220459, |
|
"rewards/margins": 0.37233808636665344, |
|
"rewards/rejected": -0.5966114401817322, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.6785245485978864e-07, |
|
"logits/chosen": -1.5823638439178467, |
|
"logits/rejected": -1.016841173171997, |
|
"logps/chosen": -453.40643310546875, |
|
"logps/rejected": -826.7568359375, |
|
"loss": 0.1052, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.14566640555858612, |
|
"rewards/margins": 0.41016706824302673, |
|
"rewards/rejected": -0.5558334589004517, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.43914795137566e-07, |
|
"logits/chosen": -1.3402397632598877, |
|
"logits/rejected": -0.6611793041229248, |
|
"logps/chosen": -491.9454040527344, |
|
"logps/rejected": -827.7058715820312, |
|
"loss": 0.1243, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1830025464296341, |
|
"rewards/margins": 0.3839171230792999, |
|
"rewards/rejected": -0.5669196844100952, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.207248277852901e-07, |
|
"logits/chosen": -1.3319523334503174, |
|
"logits/rejected": -1.2467930316925049, |
|
"logps/chosen": -415.2613220214844, |
|
"logps/rejected": -794.6478271484375, |
|
"loss": 0.175, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.18765749037265778, |
|
"rewards/margins": 0.33323392271995544, |
|
"rewards/rejected": -0.5208913683891296, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.9829059467515074e-07, |
|
"logits/chosen": -1.6862188577651978, |
|
"logits/rejected": -1.0607928037643433, |
|
"logps/chosen": -466.0138244628906, |
|
"logps/rejected": -823.7083129882812, |
|
"loss": 0.1264, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1697189062833786, |
|
"rewards/margins": 0.39212626218795776, |
|
"rewards/rejected": -0.5618451833724976, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.766198756040153e-07, |
|
"logits/chosen": -1.5529918670654297, |
|
"logits/rejected": -1.1102968454360962, |
|
"logps/chosen": -514.1727294921875, |
|
"logps/rejected": -934.7576293945312, |
|
"loss": 0.0946, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.2095489799976349, |
|
"rewards/margins": 0.39688506722450256, |
|
"rewards/rejected": -0.6064340472221375, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.5572018559553155e-07, |
|
"logits/chosen": -1.4525808095932007, |
|
"logits/rejected": -1.114332675933838, |
|
"logps/chosen": -429.51336669921875, |
|
"logps/rejected": -814.693115234375, |
|
"loss": 0.1319, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.20347242057323456, |
|
"rewards/margins": 0.3701416254043579, |
|
"rewards/rejected": -0.5736140012741089, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.3559877229404864e-07, |
|
"logits/chosen": -1.5984094142913818, |
|
"logits/rejected": -1.1003965139389038, |
|
"logps/chosen": -458.2529296875, |
|
"logps/rejected": -795.9619140625, |
|
"loss": 0.1294, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.17800959944725037, |
|
"rewards/margins": 0.363762229681015, |
|
"rewards/rejected": -0.5417717695236206, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1626261345126576e-07, |
|
"logits/chosen": -1.4350886344909668, |
|
"logits/rejected": -1.0259506702423096, |
|
"logps/chosen": -415.7510681152344, |
|
"logps/rejected": -919.9736328125, |
|
"loss": 0.076, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.15845449268817902, |
|
"rewards/margins": 0.4928809702396393, |
|
"rewards/rejected": -0.6513354182243347, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.9771841450646505e-07, |
|
"logits/chosen": -1.6430625915527344, |
|
"logits/rejected": -0.9447630643844604, |
|
"logps/chosen": -506.1864318847656, |
|
"logps/rejected": -795.38134765625, |
|
"loss": 0.1602, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.23687663674354553, |
|
"rewards/margins": 0.33186858892440796, |
|
"rewards/rejected": -0.5687452554702759, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.7997260626118758e-07, |
|
"logits/chosen": -1.898046851158142, |
|
"logits/rejected": -1.3102858066558838, |
|
"logps/chosen": -514.0572509765625, |
|
"logps/rejected": -825.0703125, |
|
"loss": 0.1456, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.17340177297592163, |
|
"rewards/margins": 0.38971638679504395, |
|
"rewards/rejected": -0.5631181597709656, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.6303134264914365e-07, |
|
"logits/chosen": -1.6851441860198975, |
|
"logits/rejected": -1.0963430404663086, |
|
"logps/chosen": -480.8072204589844, |
|
"logps/rejected": -728.0396728515625, |
|
"loss": 0.1273, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1836322844028473, |
|
"rewards/margins": 0.3386848270893097, |
|
"rewards/rejected": -0.5223170518875122, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.469004986021355e-07, |
|
"logits/chosen": -1.414111852645874, |
|
"logits/rejected": -0.8712374567985535, |
|
"logps/chosen": -443.46728515625, |
|
"logps/rejected": -897.1246948242188, |
|
"loss": 0.0899, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.17597445845603943, |
|
"rewards/margins": 0.4499644339084625, |
|
"rewards/rejected": -0.6259388327598572, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.315856680127367e-07, |
|
"logits/chosen": -1.4355229139328003, |
|
"logits/rejected": -0.8268268704414368, |
|
"logps/chosen": -411.8287658691406, |
|
"logps/rejected": -796.5527954101562, |
|
"loss": 0.1013, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.16030281782150269, |
|
"rewards/margins": 0.4271472990512848, |
|
"rewards/rejected": -0.5874501466751099, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1709216179442817e-07, |
|
"logits/chosen": -1.5936983823776245, |
|
"logits/rejected": -0.9012172818183899, |
|
"logps/chosen": -452.53155517578125, |
|
"logps/rejected": -878.1297607421875, |
|
"loss": 0.1108, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.17777523398399353, |
|
"rewards/margins": 0.4133872389793396, |
|
"rewards/rejected": -0.5911625623703003, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0342500603986421e-07, |
|
"logits/chosen": -1.441282033920288, |
|
"logits/rejected": -0.9638457298278809, |
|
"logps/chosen": -417.11895751953125, |
|
"logps/rejected": -741.5521850585938, |
|
"loss": 0.1466, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.16872674226760864, |
|
"rewards/margins": 0.3362739682197571, |
|
"rewards/rejected": -0.5050007104873657, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.058894027791643e-08, |
|
"logits/chosen": -1.4651381969451904, |
|
"logits/rejected": -0.9410767555236816, |
|
"logps/chosen": -497.6310119628906, |
|
"logps/rejected": -866.1295776367188, |
|
"loss": 0.1057, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.23562383651733398, |
|
"rewards/margins": 0.3830471634864807, |
|
"rewards/rejected": -0.6186710596084595, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.858841583008592e-08, |
|
"logits/chosen": -1.6138349771499634, |
|
"logits/rejected": -1.0234501361846924, |
|
"logps/chosen": -425.06610107421875, |
|
"logps/rejected": -700.060791015625, |
|
"loss": 0.1241, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.16232439875602722, |
|
"rewards/margins": 0.3415161669254303, |
|
"rewards/rejected": -0.5038405656814575, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.742759426686313e-08, |
|
"logits/chosen": -1.5296719074249268, |
|
"logits/rejected": -1.15841543674469, |
|
"logps/chosen": -541.86083984375, |
|
"logps/rejected": -857.0759887695312, |
|
"loss": 0.1324, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.22835755348205566, |
|
"rewards/margins": 0.37031129002571106, |
|
"rewards/rejected": -0.5986688733100891, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.7110345964571104e-08, |
|
"logits/chosen": -1.6711105108261108, |
|
"logits/rejected": -1.0233453512191772, |
|
"logps/chosen": -445.75762939453125, |
|
"logps/rejected": -772.8753662109375, |
|
"loss": 0.1172, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.18701913952827454, |
|
"rewards/margins": 0.3782210052013397, |
|
"rewards/rejected": -0.565240204334259, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.764024876318357e-08, |
|
"logits/chosen": -1.5489776134490967, |
|
"logits/rejected": -0.8348779678344727, |
|
"logps/chosen": -509.6427307128906, |
|
"logps/rejected": -782.4371337890625, |
|
"loss": 0.1146, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.20378637313842773, |
|
"rewards/margins": 0.37221604585647583, |
|
"rewards/rejected": -0.5760024189949036, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.902058672559633e-08, |
|
"logits/chosen": -1.8395429849624634, |
|
"logits/rejected": -1.2655082941055298, |
|
"logps/chosen": -375.2162780761719, |
|
"logps/rejected": -805.35302734375, |
|
"loss": 0.1244, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.11881232261657715, |
|
"rewards/margins": 0.4330004155635834, |
|
"rewards/rejected": -0.5518127679824829, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.125434899876933e-08, |
|
"logits/chosen": -1.5633362531661987, |
|
"logits/rejected": -1.1406381130218506, |
|
"logps/chosen": -356.45098876953125, |
|
"logps/rejected": -783.3472900390625, |
|
"loss": 0.1001, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1347019374370575, |
|
"rewards/margins": 0.42905181646347046, |
|
"rewards/rejected": -0.5637537837028503, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.4344228777145873e-08, |
|
"logits/chosen": -1.6571776866912842, |
|
"logits/rejected": -0.7649690508842468, |
|
"logps/chosen": -587.1907348632812, |
|
"logps/rejected": -933.9886474609375, |
|
"loss": 0.132, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.25137990713119507, |
|
"rewards/margins": 0.42299261689186096, |
|
"rewards/rejected": -0.6743724942207336, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.829262236869772e-08, |
|
"logits/chosen": -1.541998267173767, |
|
"logits/rejected": -0.8689600229263306, |
|
"logps/chosen": -483.3575134277344, |
|
"logps/rejected": -698.212158203125, |
|
"loss": 0.1768, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.22255222499370575, |
|
"rewards/margins": 0.27523303031921387, |
|
"rewards/rejected": -0.4977852404117584, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.3101628363929586e-08, |
|
"logits/chosen": -1.5238444805145264, |
|
"logits/rejected": -0.7508775591850281, |
|
"logps/chosen": -520.79296875, |
|
"logps/rejected": -767.9632568359375, |
|
"loss": 0.1203, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1896631270647049, |
|
"rewards/margins": 0.36830946803092957, |
|
"rewards/rejected": -0.5579725503921509, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.773046908123195e-09, |
|
"logits/chosen": -1.6025253534317017, |
|
"logits/rejected": -1.304527997970581, |
|
"logps/chosen": -375.14874267578125, |
|
"logps/rejected": -767.8821411132812, |
|
"loss": 0.1284, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.1781584918498993, |
|
"rewards/margins": 0.33938026428222656, |
|
"rewards/rejected": -0.5175387263298035, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.308379077080817e-09, |
|
"logits/chosen": -1.6030333042144775, |
|
"logits/rejected": -1.3066356182098389, |
|
"logps/chosen": -397.94873046875, |
|
"logps/rejected": -825.3441162109375, |
|
"loss": 0.1111, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.20664629340171814, |
|
"rewards/margins": 0.3704259991645813, |
|
"rewards/rejected": -0.5770723819732666, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.7088263565760996e-09, |
|
"logits/chosen": -1.6151325702667236, |
|
"logits/rejected": -0.9792189598083496, |
|
"logps/chosen": -399.3708801269531, |
|
"logps/rejected": -748.7066650390625, |
|
"loss": 0.1181, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.1316554844379425, |
|
"rewards/margins": 0.42211928963661194, |
|
"rewards/rejected": -0.5537747740745544, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.752902257023633e-10, |
|
"logits/chosen": -1.6095302104949951, |
|
"logits/rejected": -1.1830781698226929, |
|
"logps/chosen": -393.78350830078125, |
|
"logps/rejected": -796.3955078125, |
|
"loss": 0.0928, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.15096323192119598, |
|
"rewards/margins": 0.4236125349998474, |
|
"rewards/rejected": -0.5745757818222046, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.083718442532189e-10, |
|
"logits/chosen": -1.4612399339675903, |
|
"logits/rejected": -0.8474820256233215, |
|
"logps/chosen": -456.6351623535156, |
|
"logps/rejected": -785.6075439453125, |
|
"loss": 0.1354, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.18163737654685974, |
|
"rewards/margins": 0.3639640808105469, |
|
"rewards/rejected": -0.545601487159729, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1875, |
|
"total_flos": 0.0, |
|
"train_loss": 0.13990657812754312, |
|
"train_runtime": 16010.7596, |
|
"train_samples_per_second": 0.937, |
|
"train_steps_per_second": 0.117 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1875, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|