|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9992254066615027, |
|
"eval_steps": 100, |
|
"global_step": 2904, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.7182130584192438e-09, |
|
"logits/chosen": -2.293531894683838, |
|
"logits/rejected": -2.2362442016601562, |
|
"logps/chosen": -280.74072265625, |
|
"logps/rejected": -204.830322265625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.718213058419244e-08, |
|
"logits/chosen": -2.411555290222168, |
|
"logits/rejected": -2.3393168449401855, |
|
"logps/chosen": -294.2322998046875, |
|
"logps/rejected": -213.8911895751953, |
|
"loss": 0.6946, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.005316631868481636, |
|
"rewards/margins": 0.0028615635819733143, |
|
"rewards/rejected": 0.002455067355185747, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.436426116838488e-08, |
|
"logits/chosen": -2.4150137901306152, |
|
"logits/rejected": -2.3802390098571777, |
|
"logps/chosen": -279.42938232421875, |
|
"logps/rejected": -237.62747192382812, |
|
"loss": 0.6943, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.0023494327906519175, |
|
"rewards/margins": 0.0011181762674823403, |
|
"rewards/rejected": 0.0012312561739236116, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.154639175257731e-08, |
|
"logits/chosen": -2.461092472076416, |
|
"logits/rejected": -2.39383602142334, |
|
"logps/chosen": -301.07952880859375, |
|
"logps/rejected": -215.763427734375, |
|
"loss": 0.6943, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.0003540778416208923, |
|
"rewards/margins": -0.001285408972762525, |
|
"rewards/rejected": 0.0009313317714259028, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.872852233676976e-08, |
|
"logits/chosen": -2.3856747150421143, |
|
"logits/rejected": -2.3453280925750732, |
|
"logps/chosen": -291.4425354003906, |
|
"logps/rejected": -231.8385772705078, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": 0.0012902533635497093, |
|
"rewards/margins": -0.00038409550325013697, |
|
"rewards/rejected": 0.001674349419772625, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 8.59106529209622e-08, |
|
"logits/chosen": -2.443054437637329, |
|
"logits/rejected": -2.383383274078369, |
|
"logps/chosen": -299.1965026855469, |
|
"logps/rejected": -220.2180938720703, |
|
"loss": 0.6948, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.005029269959777594, |
|
"rewards/margins": 0.004818198271095753, |
|
"rewards/rejected": 0.0002110706700477749, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.0309278350515462e-07, |
|
"logits/chosen": -2.435997247695923, |
|
"logits/rejected": -2.4249629974365234, |
|
"logps/chosen": -272.54656982421875, |
|
"logps/rejected": -227.5023193359375, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.0016036666929721832, |
|
"rewards/margins": 0.0018250759458169341, |
|
"rewards/rejected": -0.00022140909277368337, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.202749140893471e-07, |
|
"logits/chosen": -2.4656193256378174, |
|
"logits/rejected": -2.420733690261841, |
|
"logps/chosen": -292.0702209472656, |
|
"logps/rejected": -206.99124145507812, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.00455916253849864, |
|
"rewards/margins": 0.008069148287177086, |
|
"rewards/rejected": -0.003509984817355871, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.3745704467353952e-07, |
|
"logits/chosen": -2.381108283996582, |
|
"logits/rejected": -2.3896584510803223, |
|
"logps/chosen": -250.19076538085938, |
|
"logps/rejected": -212.47366333007812, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.006951277144253254, |
|
"rewards/margins": 0.008491529151797295, |
|
"rewards/rejected": -0.0015402527060359716, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.5463917525773197e-07, |
|
"logits/chosen": -2.3331446647644043, |
|
"logits/rejected": -2.254476547241211, |
|
"logps/chosen": -241.5772247314453, |
|
"logps/rejected": -185.46815490722656, |
|
"loss": 0.6898, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.008401724509894848, |
|
"rewards/margins": 0.014180210418999195, |
|
"rewards/rejected": -0.005778484977781773, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.718213058419244e-07, |
|
"logits/chosen": -2.4016473293304443, |
|
"logits/rejected": -2.3915467262268066, |
|
"logps/chosen": -259.73956298828125, |
|
"logps/rejected": -221.9446258544922, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0046272119507193565, |
|
"rewards/margins": 0.00822476390749216, |
|
"rewards/rejected": -0.0035975512582808733, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.8900343642611682e-07, |
|
"logits/chosen": -2.441370725631714, |
|
"logits/rejected": -2.3111448287963867, |
|
"logps/chosen": -252.11367797851562, |
|
"logps/rejected": -210.3745574951172, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.0009224863606505096, |
|
"rewards/margins": 0.001503048581071198, |
|
"rewards/rejected": -0.0005805626278743148, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.0618556701030925e-07, |
|
"logits/chosen": -2.3808603286743164, |
|
"logits/rejected": -2.437734842300415, |
|
"logps/chosen": -258.5278015136719, |
|
"logps/rejected": -210.9561309814453, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.001118434825912118, |
|
"rewards/margins": 0.00486636720597744, |
|
"rewards/rejected": -0.0037479314487427473, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.2336769759450173e-07, |
|
"logits/chosen": -2.447282075881958, |
|
"logits/rejected": -2.431652784347534, |
|
"logps/chosen": -279.3333435058594, |
|
"logps/rejected": -213.950439453125, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.005058329086750746, |
|
"rewards/margins": 0.012270588427782059, |
|
"rewards/rejected": -0.0072122602723538876, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.405498281786942e-07, |
|
"logits/chosen": -2.574840545654297, |
|
"logits/rejected": -2.400458812713623, |
|
"logps/chosen": -267.6883544921875, |
|
"logps/rejected": -203.22642517089844, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.002968291286379099, |
|
"rewards/margins": 0.005260258913040161, |
|
"rewards/rejected": -0.0022919676266610622, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.5773195876288655e-07, |
|
"logits/chosen": -2.357297420501709, |
|
"logits/rejected": -2.391117811203003, |
|
"logps/chosen": -280.30828857421875, |
|
"logps/rejected": -214.0823974609375, |
|
"loss": 0.6878, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.013498497195541859, |
|
"rewards/margins": 0.025946879759430885, |
|
"rewards/rejected": -0.012448383495211601, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.7491408934707903e-07, |
|
"logits/chosen": -2.460391044616699, |
|
"logits/rejected": -2.435685873031616, |
|
"logps/chosen": -307.55450439453125, |
|
"logps/rejected": -234.9291534423828, |
|
"loss": 0.6834, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.007904368452727795, |
|
"rewards/margins": 0.025973070412874222, |
|
"rewards/rejected": -0.01806870475411415, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.9209621993127146e-07, |
|
"logits/chosen": -2.417241334915161, |
|
"logits/rejected": -2.4194204807281494, |
|
"logps/chosen": -284.2513732910156, |
|
"logps/rejected": -220.6437530517578, |
|
"loss": 0.6872, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.006491424981504679, |
|
"rewards/margins": 0.01496223546564579, |
|
"rewards/rejected": -0.008470811881124973, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.0927835051546394e-07, |
|
"logits/chosen": -2.426492214202881, |
|
"logits/rejected": -2.427013635635376, |
|
"logps/chosen": -261.0791320800781, |
|
"logps/rejected": -236.6595916748047, |
|
"loss": 0.6821, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.007498173974454403, |
|
"rewards/margins": 0.020344991236925125, |
|
"rewards/rejected": -0.012846815399825573, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.2646048109965636e-07, |
|
"logits/chosen": -2.479682445526123, |
|
"logits/rejected": -2.3931996822357178, |
|
"logps/chosen": -261.3951721191406, |
|
"logps/rejected": -213.754150390625, |
|
"loss": 0.6816, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.011733494699001312, |
|
"rewards/margins": 0.023404525592923164, |
|
"rewards/rejected": -0.011671033687889576, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.436426116838488e-07, |
|
"logits/chosen": -2.420584201812744, |
|
"logits/rejected": -2.3466110229492188, |
|
"logps/chosen": -262.15338134765625, |
|
"logps/rejected": -223.8980255126953, |
|
"loss": 0.6793, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.01480065006762743, |
|
"rewards/margins": 0.030916428193449974, |
|
"rewards/rejected": -0.01611577905714512, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.608247422680412e-07, |
|
"logits/chosen": -2.4468910694122314, |
|
"logits/rejected": -2.367849826812744, |
|
"logps/chosen": -276.70526123046875, |
|
"logps/rejected": -203.1634979248047, |
|
"loss": 0.6773, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0047667198814451694, |
|
"rewards/margins": 0.023584634065628052, |
|
"rewards/rejected": -0.01881791278719902, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.7800687285223364e-07, |
|
"logits/chosen": -2.443112850189209, |
|
"logits/rejected": -2.4011592864990234, |
|
"logps/chosen": -248.66348266601562, |
|
"logps/rejected": -211.2028350830078, |
|
"loss": 0.6725, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.01790793612599373, |
|
"rewards/margins": 0.04252880811691284, |
|
"rewards/rejected": -0.024620870128273964, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.9518900343642607e-07, |
|
"logits/chosen": -2.451524257659912, |
|
"logits/rejected": -2.444117546081543, |
|
"logps/chosen": -261.34912109375, |
|
"logps/rejected": -210.658447265625, |
|
"loss": 0.6754, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.027729609981179237, |
|
"rewards/margins": 0.043738484382629395, |
|
"rewards/rejected": -0.016008879989385605, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.123711340206185e-07, |
|
"logits/chosen": -2.3075928688049316, |
|
"logits/rejected": -2.343151092529297, |
|
"logps/chosen": -280.16119384765625, |
|
"logps/rejected": -234.6321563720703, |
|
"loss": 0.6685, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.024107476696372032, |
|
"rewards/margins": 0.05809453874826431, |
|
"rewards/rejected": -0.03398705646395683, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.2955326460481097e-07, |
|
"logits/chosen": -2.4088199138641357, |
|
"logits/rejected": -2.380805492401123, |
|
"logps/chosen": -267.1762390136719, |
|
"logps/rejected": -210.53866577148438, |
|
"loss": 0.6682, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.02717725932598114, |
|
"rewards/margins": 0.06459168344736099, |
|
"rewards/rejected": -0.03741442412137985, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.4673539518900345e-07, |
|
"logits/chosen": -2.3903017044067383, |
|
"logits/rejected": -2.4178988933563232, |
|
"logps/chosen": -261.8699951171875, |
|
"logps/rejected": -215.27633666992188, |
|
"loss": 0.6653, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.01536791305989027, |
|
"rewards/margins": 0.03926190733909607, |
|
"rewards/rejected": -0.023893997073173523, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.639175257731959e-07, |
|
"logits/chosen": -2.3510518074035645, |
|
"logits/rejected": -2.374760389328003, |
|
"logps/chosen": -221.19140625, |
|
"logps/rejected": -214.09078979492188, |
|
"loss": 0.6619, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.025187481194734573, |
|
"rewards/margins": 0.06599839776754379, |
|
"rewards/rejected": -0.04081092029809952, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.810996563573884e-07, |
|
"logits/chosen": -2.4117298126220703, |
|
"logits/rejected": -2.4171319007873535, |
|
"logps/chosen": -265.48126220703125, |
|
"logps/rejected": -219.87637329101562, |
|
"loss": 0.6574, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.02810204029083252, |
|
"rewards/margins": 0.07569292932748795, |
|
"rewards/rejected": -0.047590889036655426, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.982817869415807e-07, |
|
"logits/chosen": -2.4645633697509766, |
|
"logits/rejected": -2.3363564014434814, |
|
"logps/chosen": -299.02349853515625, |
|
"logps/rejected": -233.5424346923828, |
|
"loss": 0.6555, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.042131923139095306, |
|
"rewards/margins": 0.09228460490703583, |
|
"rewards/rejected": -0.05015267804265022, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.982778415614236e-07, |
|
"logits/chosen": -2.388867139816284, |
|
"logits/rejected": -2.3494858741760254, |
|
"logps/chosen": -236.2740020751953, |
|
"logps/rejected": -220.2272186279297, |
|
"loss": 0.6511, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.020576827228069305, |
|
"rewards/margins": 0.09741847962141037, |
|
"rewards/rejected": -0.07684165239334106, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.963643321852277e-07, |
|
"logits/chosen": -2.4252231121063232, |
|
"logits/rejected": -2.3302061557769775, |
|
"logps/chosen": -285.35650634765625, |
|
"logps/rejected": -231.33602905273438, |
|
"loss": 0.6499, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.015449454076588154, |
|
"rewards/margins": 0.10541415214538574, |
|
"rewards/rejected": -0.08996469527482986, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.944508228090318e-07, |
|
"logits/chosen": -2.437065839767456, |
|
"logits/rejected": -2.4959487915039062, |
|
"logps/chosen": -238.969482421875, |
|
"logps/rejected": -192.4582977294922, |
|
"loss": 0.6367, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.03995648771524429, |
|
"rewards/margins": 0.13508270680904388, |
|
"rewards/rejected": -0.09512621909379959, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.925373134328357e-07, |
|
"logits/chosen": -2.461618423461914, |
|
"logits/rejected": -2.4392247200012207, |
|
"logps/chosen": -252.608642578125, |
|
"logps/rejected": -222.25125122070312, |
|
"loss": 0.6428, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.0467003658413887, |
|
"rewards/margins": 0.106337770819664, |
|
"rewards/rejected": -0.05963738635182381, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.906238040566398e-07, |
|
"logits/chosen": -2.4741828441619873, |
|
"logits/rejected": -2.355389356613159, |
|
"logps/chosen": -271.66387939453125, |
|
"logps/rejected": -231.6305694580078, |
|
"loss": 0.6431, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.04233894124627113, |
|
"rewards/margins": 0.15629062056541443, |
|
"rewards/rejected": -0.11395169794559479, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.887102946804438e-07, |
|
"logits/chosen": -2.5339910984039307, |
|
"logits/rejected": -2.424262523651123, |
|
"logps/chosen": -289.12408447265625, |
|
"logps/rejected": -223.707275390625, |
|
"loss": 0.6293, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.05516533926129341, |
|
"rewards/margins": 0.20021691918373108, |
|
"rewards/rejected": -0.14505159854888916, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.867967853042479e-07, |
|
"logits/chosen": -2.440347194671631, |
|
"logits/rejected": -2.469924211502075, |
|
"logps/chosen": -272.43304443359375, |
|
"logps/rejected": -223.846435546875, |
|
"loss": 0.6225, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.05506114289164543, |
|
"rewards/margins": 0.1845804899930954, |
|
"rewards/rejected": -0.12951937317848206, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.84883275928052e-07, |
|
"logits/chosen": -2.4580020904541016, |
|
"logits/rejected": -2.422905206680298, |
|
"logps/chosen": -274.3728332519531, |
|
"logps/rejected": -228.1702117919922, |
|
"loss": 0.622, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.04404681175947189, |
|
"rewards/margins": 0.1968606859445572, |
|
"rewards/rejected": -0.1528138816356659, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.82969766551856e-07, |
|
"logits/chosen": -2.3649065494537354, |
|
"logits/rejected": -2.3759725093841553, |
|
"logps/chosen": -258.05328369140625, |
|
"logps/rejected": -228.05404663085938, |
|
"loss": 0.6335, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.04060830920934677, |
|
"rewards/margins": 0.17422744631767273, |
|
"rewards/rejected": -0.13361915946006775, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.810562571756601e-07, |
|
"logits/chosen": -2.4177675247192383, |
|
"logits/rejected": -2.406047821044922, |
|
"logps/chosen": -255.5844268798828, |
|
"logps/rejected": -219.80984497070312, |
|
"loss": 0.6251, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.02088143676519394, |
|
"rewards/margins": 0.18797752261161804, |
|
"rewards/rejected": -0.1670960783958435, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.791427477994642e-07, |
|
"logits/chosen": -2.418471574783325, |
|
"logits/rejected": -2.4022955894470215, |
|
"logps/chosen": -306.84869384765625, |
|
"logps/rejected": -239.74612426757812, |
|
"loss": 0.62, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.06454652547836304, |
|
"rewards/margins": 0.1981131136417389, |
|
"rewards/rejected": -0.13356655836105347, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.772292384232682e-07, |
|
"logits/chosen": -2.4177165031433105, |
|
"logits/rejected": -2.379561185836792, |
|
"logps/chosen": -246.2447967529297, |
|
"logps/rejected": -245.50753784179688, |
|
"loss": 0.6214, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.025952398777008057, |
|
"rewards/margins": 0.16164085268974304, |
|
"rewards/rejected": -0.1875932663679123, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.753157290470723e-07, |
|
"logits/chosen": -2.488081216812134, |
|
"logits/rejected": -2.4657857418060303, |
|
"logps/chosen": -256.111083984375, |
|
"logps/rejected": -225.01748657226562, |
|
"loss": 0.6292, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.001765048480592668, |
|
"rewards/margins": 0.16318608820438385, |
|
"rewards/rejected": -0.1614210307598114, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.7340221967087635e-07, |
|
"logits/chosen": -2.411403179168701, |
|
"logits/rejected": -2.3677725791931152, |
|
"logps/chosen": -251.43051147460938, |
|
"logps/rejected": -224.96240234375, |
|
"loss": 0.6157, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.036291979253292084, |
|
"rewards/margins": 0.2256316840648651, |
|
"rewards/rejected": -0.18933971226215363, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.714887102946804e-07, |
|
"logits/chosen": -2.4702084064483643, |
|
"logits/rejected": -2.3358662128448486, |
|
"logps/chosen": -257.2681884765625, |
|
"logps/rejected": -200.87564086914062, |
|
"loss": 0.6008, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.021172259002923965, |
|
"rewards/margins": 0.22813072800636292, |
|
"rewards/rejected": -0.20695844292640686, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.6957520091848447e-07, |
|
"logits/chosen": -2.426776885986328, |
|
"logits/rejected": -2.3923580646514893, |
|
"logps/chosen": -228.310791015625, |
|
"logps/rejected": -204.06149291992188, |
|
"loss": 0.6346, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.00926109217107296, |
|
"rewards/margins": 0.17081685364246368, |
|
"rewards/rejected": -0.16155575215816498, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.6766169154228853e-07, |
|
"logits/chosen": -2.3557937145233154, |
|
"logits/rejected": -2.3138818740844727, |
|
"logps/chosen": -268.3694152832031, |
|
"logps/rejected": -239.6737518310547, |
|
"loss": 0.5994, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.07315589487552643, |
|
"rewards/margins": 0.2832576632499695, |
|
"rewards/rejected": -0.21010179817676544, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.657481821660926e-07, |
|
"logits/chosen": -2.4533188343048096, |
|
"logits/rejected": -2.4328866004943848, |
|
"logps/chosen": -283.4711608886719, |
|
"logps/rejected": -211.56640625, |
|
"loss": 0.5987, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.0002489805337972939, |
|
"rewards/margins": 0.27414873242378235, |
|
"rewards/rejected": -0.27439773082733154, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.6383467278989666e-07, |
|
"logits/chosen": -2.3915047645568848, |
|
"logits/rejected": -2.4537439346313477, |
|
"logps/chosen": -248.0155029296875, |
|
"logps/rejected": -230.64767456054688, |
|
"loss": 0.6144, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.018264885991811752, |
|
"rewards/margins": 0.20035696029663086, |
|
"rewards/rejected": -0.21862182021141052, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.6192116341370067e-07, |
|
"logits/chosen": -2.5227386951446533, |
|
"logits/rejected": -2.4403090476989746, |
|
"logps/chosen": -277.166748046875, |
|
"logps/rejected": -230.45849609375, |
|
"loss": 0.5927, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.04206278175115585, |
|
"rewards/margins": 0.3326976001262665, |
|
"rewards/rejected": -0.29063481092453003, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.6000765403750473e-07, |
|
"logits/chosen": -2.4241671562194824, |
|
"logits/rejected": -2.336174488067627, |
|
"logps/chosen": -255.9370574951172, |
|
"logps/rejected": -211.1270751953125, |
|
"loss": 0.5983, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.01858004741370678, |
|
"rewards/margins": 0.2676704525947571, |
|
"rewards/rejected": -0.24909043312072754, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.580941446613088e-07, |
|
"logits/chosen": -2.4633803367614746, |
|
"logits/rejected": -2.4187140464782715, |
|
"logps/chosen": -257.75225830078125, |
|
"logps/rejected": -238.34164428710938, |
|
"loss": 0.5972, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.019993241876363754, |
|
"rewards/margins": 0.240191251039505, |
|
"rewards/rejected": -0.26018446683883667, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.5618063528511285e-07, |
|
"logits/chosen": -2.387589454650879, |
|
"logits/rejected": -2.358363628387451, |
|
"logps/chosen": -274.30670166015625, |
|
"logps/rejected": -232.08969116210938, |
|
"loss": 0.6006, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.05401581525802612, |
|
"rewards/margins": 0.24342355132102966, |
|
"rewards/rejected": -0.29743942618370056, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.542671259089169e-07, |
|
"logits/chosen": -2.4646763801574707, |
|
"logits/rejected": -2.407026767730713, |
|
"logps/chosen": -273.80029296875, |
|
"logps/rejected": -233.99826049804688, |
|
"loss": 0.6071, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.009464024566113949, |
|
"rewards/margins": 0.2972859740257263, |
|
"rewards/rejected": -0.30674999952316284, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.52353616532721e-07, |
|
"logits/chosen": -2.4378743171691895, |
|
"logits/rejected": -2.415499448776245, |
|
"logps/chosen": -266.07171630859375, |
|
"logps/rejected": -235.11093139648438, |
|
"loss": 0.6005, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.009244078770279884, |
|
"rewards/margins": 0.26727497577667236, |
|
"rewards/rejected": -0.2765190303325653, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.5044010715652504e-07, |
|
"logits/chosen": -2.439612627029419, |
|
"logits/rejected": -2.3910512924194336, |
|
"logps/chosen": -249.3505401611328, |
|
"logps/rejected": -228.9892120361328, |
|
"loss": 0.6001, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.029611006379127502, |
|
"rewards/margins": 0.2741519510746002, |
|
"rewards/rejected": -0.30376294255256653, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.485265977803291e-07, |
|
"logits/chosen": -2.46055269241333, |
|
"logits/rejected": -2.3553805351257324, |
|
"logps/chosen": -293.1697692871094, |
|
"logps/rejected": -232.68115234375, |
|
"loss": 0.5852, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.061341024935245514, |
|
"rewards/margins": 0.43009573221206665, |
|
"rewards/rejected": -0.36875468492507935, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.4661308840413316e-07, |
|
"logits/chosen": -2.509950637817383, |
|
"logits/rejected": -2.377487897872925, |
|
"logps/chosen": -285.7837829589844, |
|
"logps/rejected": -236.22866821289062, |
|
"loss": 0.6017, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.0002725020167417824, |
|
"rewards/margins": 0.3595966100692749, |
|
"rewards/rejected": -0.3593241274356842, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.446995790279372e-07, |
|
"logits/chosen": -2.4554500579833984, |
|
"logits/rejected": -2.4359169006347656, |
|
"logps/chosen": -283.475341796875, |
|
"logps/rejected": -230.9565887451172, |
|
"loss": 0.5871, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.004635247401893139, |
|
"rewards/margins": 0.32853394746780396, |
|
"rewards/rejected": -0.32389870285987854, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.4278606965174123e-07, |
|
"logits/chosen": -2.472580671310425, |
|
"logits/rejected": -2.470784902572632, |
|
"logps/chosen": -262.70196533203125, |
|
"logps/rejected": -241.69784545898438, |
|
"loss": 0.5868, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.021802525967359543, |
|
"rewards/margins": 0.33116960525512695, |
|
"rewards/rejected": -0.3529720902442932, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.408725602755453e-07, |
|
"logits/chosen": -2.4604482650756836, |
|
"logits/rejected": -2.4069600105285645, |
|
"logps/chosen": -233.8094482421875, |
|
"logps/rejected": -207.21536254882812, |
|
"loss": 0.5901, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.03362672030925751, |
|
"rewards/margins": 0.2702116370201111, |
|
"rewards/rejected": -0.3038383424282074, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.3895905089934936e-07, |
|
"logits/chosen": -2.476191282272339, |
|
"logits/rejected": -2.3460450172424316, |
|
"logps/chosen": -276.6174621582031, |
|
"logps/rejected": -231.13705444335938, |
|
"loss": 0.5887, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.0008199826115742326, |
|
"rewards/margins": 0.3593784272670746, |
|
"rewards/rejected": -0.360198438167572, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.370455415231534e-07, |
|
"logits/chosen": -2.418025493621826, |
|
"logits/rejected": -2.426182270050049, |
|
"logps/chosen": -276.7029724121094, |
|
"logps/rejected": -260.8800354003906, |
|
"loss": 0.5695, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.050284016877412796, |
|
"rewards/margins": 0.325679212808609, |
|
"rewards/rejected": -0.37596315145492554, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.351320321469575e-07, |
|
"logits/chosen": -2.4381816387176514, |
|
"logits/rejected": -2.4007935523986816, |
|
"logps/chosen": -298.5264587402344, |
|
"logps/rejected": -234.97250366210938, |
|
"loss": 0.5822, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.040602535009384155, |
|
"rewards/margins": 0.45465603470802307, |
|
"rewards/rejected": -0.4140535295009613, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.3321852277076154e-07, |
|
"logits/chosen": -2.4025540351867676, |
|
"logits/rejected": -2.3902947902679443, |
|
"logps/chosen": -267.2010192871094, |
|
"logps/rejected": -240.09646606445312, |
|
"loss": 0.5675, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.013172095641493797, |
|
"rewards/margins": 0.4019550383090973, |
|
"rewards/rejected": -0.38878297805786133, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.313050133945656e-07, |
|
"logits/chosen": -2.507800340652466, |
|
"logits/rejected": -2.4536452293395996, |
|
"logps/chosen": -291.55218505859375, |
|
"logps/rejected": -242.00558471679688, |
|
"loss": 0.5795, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.00018588601960800588, |
|
"rewards/margins": 0.46111243963241577, |
|
"rewards/rejected": -0.4609266221523285, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.2939150401836967e-07, |
|
"logits/chosen": -2.4589836597442627, |
|
"logits/rejected": -2.4613184928894043, |
|
"logps/chosen": -245.21237182617188, |
|
"logps/rejected": -246.0800323486328, |
|
"loss": 0.5688, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.06368513405323029, |
|
"rewards/margins": 0.32503411173820496, |
|
"rewards/rejected": -0.38871926069259644, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.2747799464217373e-07, |
|
"logits/chosen": -2.397803783416748, |
|
"logits/rejected": -2.3992929458618164, |
|
"logps/chosen": -262.5749816894531, |
|
"logps/rejected": -231.0945281982422, |
|
"loss": 0.574, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.10684232413768768, |
|
"rewards/margins": 0.3226371705532074, |
|
"rewards/rejected": -0.4294795095920563, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.255644852659778e-07, |
|
"logits/chosen": -2.4963364601135254, |
|
"logits/rejected": -2.4439988136291504, |
|
"logps/chosen": -287.1022644042969, |
|
"logps/rejected": -249.372802734375, |
|
"loss": 0.6135, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.08951963484287262, |
|
"rewards/margins": 0.2950531840324402, |
|
"rewards/rejected": -0.384572833776474, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.236509758897818e-07, |
|
"logits/chosen": -2.3793249130249023, |
|
"logits/rejected": -2.3877103328704834, |
|
"logps/chosen": -260.2186584472656, |
|
"logps/rejected": -218.5549774169922, |
|
"loss": 0.6012, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.07342827320098877, |
|
"rewards/margins": 0.3570996820926666, |
|
"rewards/rejected": -0.4305279850959778, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.2173746651358586e-07, |
|
"logits/chosen": -2.4117445945739746, |
|
"logits/rejected": -2.3893179893493652, |
|
"logps/chosen": -260.35223388671875, |
|
"logps/rejected": -246.88528442382812, |
|
"loss": 0.5919, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.11649465560913086, |
|
"rewards/margins": 0.276920884847641, |
|
"rewards/rejected": -0.39341551065444946, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.198239571373899e-07, |
|
"logits/chosen": -2.4213128089904785, |
|
"logits/rejected": -2.353787660598755, |
|
"logps/chosen": -243.3746795654297, |
|
"logps/rejected": -195.84048461914062, |
|
"loss": 0.5849, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.056009601801633835, |
|
"rewards/margins": 0.43854936957359314, |
|
"rewards/rejected": -0.4945589601993561, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.17910447761194e-07, |
|
"logits/chosen": -2.46687388420105, |
|
"logits/rejected": -2.3652591705322266, |
|
"logps/chosen": -267.1708679199219, |
|
"logps/rejected": -255.4759521484375, |
|
"loss": 0.5977, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.08689933270215988, |
|
"rewards/margins": 0.33677542209625244, |
|
"rewards/rejected": -0.42367473244667053, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.1599693838499805e-07, |
|
"logits/chosen": -2.4511024951934814, |
|
"logits/rejected": -2.4267566204071045, |
|
"logps/chosen": -295.463134765625, |
|
"logps/rejected": -215.197265625, |
|
"loss": 0.5815, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.06298734992742538, |
|
"rewards/margins": 0.3604966402053833, |
|
"rewards/rejected": -0.4234839975833893, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.140834290088021e-07, |
|
"logits/chosen": -2.4394567012786865, |
|
"logits/rejected": -2.4174628257751465, |
|
"logps/chosen": -277.1340026855469, |
|
"logps/rejected": -221.7968292236328, |
|
"loss": 0.5643, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.0872669368982315, |
|
"rewards/margins": 0.4404314458370209, |
|
"rewards/rejected": -0.5276983976364136, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.121699196326062e-07, |
|
"logits/chosen": -2.4356467723846436, |
|
"logits/rejected": -2.309382915496826, |
|
"logps/chosen": -218.226318359375, |
|
"logps/rejected": -185.0907440185547, |
|
"loss": 0.5819, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.055658143013715744, |
|
"rewards/margins": 0.3867154121398926, |
|
"rewards/rejected": -0.44237351417541504, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.1025641025641024e-07, |
|
"logits/chosen": -2.398838520050049, |
|
"logits/rejected": -2.4070441722869873, |
|
"logps/chosen": -259.14996337890625, |
|
"logps/rejected": -243.26882934570312, |
|
"loss": 0.5757, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.06217324733734131, |
|
"rewards/margins": 0.4077116549015045, |
|
"rewards/rejected": -0.4698849320411682, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.083429008802143e-07, |
|
"logits/chosen": -2.5257222652435303, |
|
"logits/rejected": -2.471179485321045, |
|
"logps/chosen": -274.23980712890625, |
|
"logps/rejected": -213.1348876953125, |
|
"loss": 0.5549, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.041472114622592926, |
|
"rewards/margins": 0.4583619236946106, |
|
"rewards/rejected": -0.4998340606689453, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.0642939150401836e-07, |
|
"logits/chosen": -2.488083600997925, |
|
"logits/rejected": -2.3540916442871094, |
|
"logps/chosen": -290.1893310546875, |
|
"logps/rejected": -217.73001098632812, |
|
"loss": 0.5702, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.0671161487698555, |
|
"rewards/margins": 0.4410739541053772, |
|
"rewards/rejected": -0.5081900954246521, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.0451588212782237e-07, |
|
"logits/chosen": -2.4297478199005127, |
|
"logits/rejected": -2.3958935737609863, |
|
"logps/chosen": -255.67984008789062, |
|
"logps/rejected": -227.6651153564453, |
|
"loss": 0.5417, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.0655713826417923, |
|
"rewards/margins": 0.5016980171203613, |
|
"rewards/rejected": -0.5672693252563477, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.0260237275162643e-07, |
|
"logits/chosen": -2.368698835372925, |
|
"logits/rejected": -2.366753578186035, |
|
"logps/chosen": -232.51876831054688, |
|
"logps/rejected": -227.71176147460938, |
|
"loss": 0.574, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.06221083551645279, |
|
"rewards/margins": 0.45073550939559937, |
|
"rewards/rejected": -0.5129462480545044, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.006888633754305e-07, |
|
"logits/chosen": -2.523768186569214, |
|
"logits/rejected": -2.472125291824341, |
|
"logps/chosen": -277.5516662597656, |
|
"logps/rejected": -229.77294921875, |
|
"loss": 0.5448, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.022703617811203003, |
|
"rewards/margins": 0.5766944289207458, |
|
"rewards/rejected": -0.5993980169296265, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.9877535399923456e-07, |
|
"logits/chosen": -2.407522201538086, |
|
"logits/rejected": -2.370136260986328, |
|
"logps/chosen": -267.1480712890625, |
|
"logps/rejected": -224.8208770751953, |
|
"loss": 0.5611, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.05944644287228584, |
|
"rewards/margins": 0.5258339643478394, |
|
"rewards/rejected": -0.5852803587913513, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.968618446230386e-07, |
|
"logits/chosen": -2.4421579837799072, |
|
"logits/rejected": -2.4140655994415283, |
|
"logps/chosen": -319.13446044921875, |
|
"logps/rejected": -247.4228973388672, |
|
"loss": 0.5424, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.0017402932280674577, |
|
"rewards/margins": 0.6056521534919739, |
|
"rewards/rejected": -0.607392430305481, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.949483352468427e-07, |
|
"logits/chosen": -2.4426145553588867, |
|
"logits/rejected": -2.408177614212036, |
|
"logps/chosen": -275.4983215332031, |
|
"logps/rejected": -240.1235809326172, |
|
"loss": 0.6003, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.188354954123497, |
|
"rewards/margins": 0.3069398105144501, |
|
"rewards/rejected": -0.49529480934143066, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.9303482587064674e-07, |
|
"logits/chosen": -2.4034695625305176, |
|
"logits/rejected": -2.420605421066284, |
|
"logps/chosen": -282.95208740234375, |
|
"logps/rejected": -238.9861297607422, |
|
"loss": 0.5644, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.07752462476491928, |
|
"rewards/margins": 0.49765148758888245, |
|
"rewards/rejected": -0.5751761198043823, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.911213164944508e-07, |
|
"logits/chosen": -2.448660373687744, |
|
"logits/rejected": -2.3693861961364746, |
|
"logps/chosen": -291.59942626953125, |
|
"logps/rejected": -245.4176483154297, |
|
"loss": 0.564, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.07620221376419067, |
|
"rewards/margins": 0.5494655966758728, |
|
"rewards/rejected": -0.6256678700447083, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.8920780711825487e-07, |
|
"logits/chosen": -2.332820177078247, |
|
"logits/rejected": -2.3668315410614014, |
|
"logps/chosen": -270.84954833984375, |
|
"logps/rejected": -226.4775390625, |
|
"loss": 0.5639, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.06978223472833633, |
|
"rewards/margins": 0.528697669506073, |
|
"rewards/rejected": -0.5984798669815063, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.8729429774205893e-07, |
|
"logits/chosen": -2.414726495742798, |
|
"logits/rejected": -2.3955094814300537, |
|
"logps/chosen": -265.5794982910156, |
|
"logps/rejected": -221.8883056640625, |
|
"loss": 0.5595, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.09429865330457687, |
|
"rewards/margins": 0.48285895586013794, |
|
"rewards/rejected": -0.5771576166152954, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.8538078836586294e-07, |
|
"logits/chosen": -2.4266517162323, |
|
"logits/rejected": -2.3529062271118164, |
|
"logps/chosen": -255.7076873779297, |
|
"logps/rejected": -249.5386199951172, |
|
"loss": 0.5809, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.1304847002029419, |
|
"rewards/margins": 0.40244507789611816, |
|
"rewards/rejected": -0.5329297780990601, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.83467278989667e-07, |
|
"logits/chosen": -2.407705545425415, |
|
"logits/rejected": -2.356353282928467, |
|
"logps/chosen": -281.96405029296875, |
|
"logps/rejected": -222.4244384765625, |
|
"loss": 0.5463, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.0021881351713091135, |
|
"rewards/margins": 0.5727441310882568, |
|
"rewards/rejected": -0.5705560445785522, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.8155376961347106e-07, |
|
"logits/chosen": -2.420719861984253, |
|
"logits/rejected": -2.4088523387908936, |
|
"logps/chosen": -250.53616333007812, |
|
"logps/rejected": -203.44956970214844, |
|
"loss": 0.5655, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.07977491617202759, |
|
"rewards/margins": 0.5309610366821289, |
|
"rewards/rejected": -0.6107359528541565, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.796402602372751e-07, |
|
"logits/chosen": -2.426771640777588, |
|
"logits/rejected": -2.343543291091919, |
|
"logps/chosen": -272.2907409667969, |
|
"logps/rejected": -252.50698852539062, |
|
"loss": 0.5557, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.0921485498547554, |
|
"rewards/margins": 0.42352181673049927, |
|
"rewards/rejected": -0.5156703591346741, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.777267508610792e-07, |
|
"logits/chosen": -2.493851661682129, |
|
"logits/rejected": -2.429084539413452, |
|
"logps/chosen": -270.8280944824219, |
|
"logps/rejected": -260.43084716796875, |
|
"loss": 0.5676, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.10896792262792587, |
|
"rewards/margins": 0.5754088163375854, |
|
"rewards/rejected": -0.6843767166137695, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.7581324148488325e-07, |
|
"logits/chosen": -2.4216136932373047, |
|
"logits/rejected": -2.37446665763855, |
|
"logps/chosen": -299.2573547363281, |
|
"logps/rejected": -224.32192993164062, |
|
"loss": 0.5703, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.09940418601036072, |
|
"rewards/margins": 0.46459144353866577, |
|
"rewards/rejected": -0.5639955401420593, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.738997321086873e-07, |
|
"logits/chosen": -2.4052574634552, |
|
"logits/rejected": -2.359984874725342, |
|
"logps/chosen": -257.97174072265625, |
|
"logps/rejected": -224.43533325195312, |
|
"loss": 0.569, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.055308748036623, |
|
"rewards/margins": 0.41939839720726013, |
|
"rewards/rejected": -0.47470712661743164, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7198622273249137e-07, |
|
"logits/chosen": -2.4320504665374756, |
|
"logits/rejected": -2.4140851497650146, |
|
"logps/chosen": -262.2895202636719, |
|
"logps/rejected": -195.298583984375, |
|
"loss": 0.5516, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.0959898829460144, |
|
"rewards/margins": 0.4007217288017273, |
|
"rewards/rejected": -0.4967115819454193, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -2.11159610748291, |
|
"eval_logits/rejected": -1.9903388023376465, |
|
"eval_logps/chosen": -265.77178955078125, |
|
"eval_logps/rejected": -225.71365356445312, |
|
"eval_loss": 0.5546568632125854, |
|
"eval_rewards/accuracies": 0.7160000205039978, |
|
"eval_rewards/chosen": -0.11396687477827072, |
|
"eval_rewards/margins": 0.5291071534156799, |
|
"eval_rewards/rejected": -0.6430740356445312, |
|
"eval_runtime": 602.672, |
|
"eval_samples_per_second": 3.319, |
|
"eval_steps_per_second": 0.207, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.7007271335629544e-07, |
|
"logits/chosen": -2.3931021690368652, |
|
"logits/rejected": -2.2940726280212402, |
|
"logps/chosen": -265.32965087890625, |
|
"logps/rejected": -216.5413055419922, |
|
"loss": 0.5557, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.06538908183574677, |
|
"rewards/margins": 0.636194109916687, |
|
"rewards/rejected": -0.7015832662582397, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.681592039800995e-07, |
|
"logits/chosen": -2.3853671550750732, |
|
"logits/rejected": -2.3550171852111816, |
|
"logps/chosen": -242.0519561767578, |
|
"logps/rejected": -217.999755859375, |
|
"loss": 0.5501, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.10068665444850922, |
|
"rewards/margins": 0.456498384475708, |
|
"rewards/rejected": -0.5571850538253784, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.662456946039035e-07, |
|
"logits/chosen": -2.339399814605713, |
|
"logits/rejected": -2.3183839321136475, |
|
"logps/chosen": -231.6448974609375, |
|
"logps/rejected": -201.71688842773438, |
|
"loss": 0.5999, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.19742384552955627, |
|
"rewards/margins": 0.38226670026779175, |
|
"rewards/rejected": -0.5796905159950256, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.6433218522770757e-07, |
|
"logits/chosen": -2.3831546306610107, |
|
"logits/rejected": -2.319021701812744, |
|
"logps/chosen": -295.373291015625, |
|
"logps/rejected": -220.7757110595703, |
|
"loss": 0.5809, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.07577923685312271, |
|
"rewards/margins": 0.4884832501411438, |
|
"rewards/rejected": -0.5642624497413635, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.6241867585151163e-07, |
|
"logits/chosen": -2.4494717121124268, |
|
"logits/rejected": -2.387357711791992, |
|
"logps/chosen": -248.5869140625, |
|
"logps/rejected": -225.55770874023438, |
|
"loss": 0.5584, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.13781091570854187, |
|
"rewards/margins": 0.47680073976516724, |
|
"rewards/rejected": -0.6146116256713867, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.605051664753157e-07, |
|
"logits/chosen": -2.4245686531066895, |
|
"logits/rejected": -2.430293560028076, |
|
"logps/chosen": -251.4219207763672, |
|
"logps/rejected": -202.93777465820312, |
|
"loss": 0.5474, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.09323601424694061, |
|
"rewards/margins": 0.5475600957870483, |
|
"rewards/rejected": -0.6407961249351501, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.5859165709911975e-07, |
|
"logits/chosen": -2.3130688667297363, |
|
"logits/rejected": -2.319304943084717, |
|
"logps/chosen": -292.4891052246094, |
|
"logps/rejected": -256.63336181640625, |
|
"loss": 0.5382, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.045892782509326935, |
|
"rewards/margins": 0.6422568559646606, |
|
"rewards/rejected": -0.688149631023407, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.566781477229238e-07, |
|
"logits/chosen": -2.3673741817474365, |
|
"logits/rejected": -2.2591471672058105, |
|
"logps/chosen": -259.9045104980469, |
|
"logps/rejected": -235.18899536132812, |
|
"loss": 0.5544, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.09036435186862946, |
|
"rewards/margins": 0.5122971534729004, |
|
"rewards/rejected": -0.6026615500450134, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.547646383467279e-07, |
|
"logits/chosen": -2.3258612155914307, |
|
"logits/rejected": -2.3310484886169434, |
|
"logps/chosen": -256.3821105957031, |
|
"logps/rejected": -235.92697143554688, |
|
"loss": 0.5302, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.04464683681726456, |
|
"rewards/margins": 0.6154407858848572, |
|
"rewards/rejected": -0.6600876450538635, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.5285112897053194e-07, |
|
"logits/chosen": -2.401237726211548, |
|
"logits/rejected": -2.2862296104431152, |
|
"logps/chosen": -275.3665466308594, |
|
"logps/rejected": -235.6302032470703, |
|
"loss": 0.5405, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.1668693572282791, |
|
"rewards/margins": 0.7094846367835999, |
|
"rewards/rejected": -0.8763540387153625, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.50937619594336e-07, |
|
"logits/chosen": -2.427232265472412, |
|
"logits/rejected": -2.3744897842407227, |
|
"logps/chosen": -291.2286682128906, |
|
"logps/rejected": -261.8435363769531, |
|
"loss": 0.5492, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.0822446197271347, |
|
"rewards/margins": 0.643202543258667, |
|
"rewards/rejected": -0.7254471778869629, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.4902411021814007e-07, |
|
"logits/chosen": -2.4489409923553467, |
|
"logits/rejected": -2.3586411476135254, |
|
"logps/chosen": -294.7502746582031, |
|
"logps/rejected": -229.1472625732422, |
|
"loss": 0.5273, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.000645542168058455, |
|
"rewards/margins": 0.7959606051445007, |
|
"rewards/rejected": -0.796606183052063, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.4711060084194413e-07, |
|
"logits/chosen": -2.3935751914978027, |
|
"logits/rejected": -2.4032771587371826, |
|
"logps/chosen": -272.8512878417969, |
|
"logps/rejected": -228.17575073242188, |
|
"loss": 0.5702, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.05014977604150772, |
|
"rewards/margins": 0.5901867747306824, |
|
"rewards/rejected": -0.6403365135192871, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.4519709146574814e-07, |
|
"logits/chosen": -2.474759578704834, |
|
"logits/rejected": -2.3402395248413086, |
|
"logps/chosen": -267.2554931640625, |
|
"logps/rejected": -247.7689666748047, |
|
"loss": 0.5598, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.08641926199197769, |
|
"rewards/margins": 0.5510476231575012, |
|
"rewards/rejected": -0.6374668478965759, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.432835820895522e-07, |
|
"logits/chosen": -2.408102512359619, |
|
"logits/rejected": -2.3299994468688965, |
|
"logps/chosen": -249.98458862304688, |
|
"logps/rejected": -230.2236328125, |
|
"loss": 0.5427, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.10258053243160248, |
|
"rewards/margins": 0.5458223223686218, |
|
"rewards/rejected": -0.6484029293060303, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.4137007271335626e-07, |
|
"logits/chosen": -2.349371910095215, |
|
"logits/rejected": -2.3972179889678955, |
|
"logps/chosen": -218.875, |
|
"logps/rejected": -212.8376922607422, |
|
"loss": 0.5422, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.180108904838562, |
|
"rewards/margins": 0.4184727072715759, |
|
"rewards/rejected": -0.5985815525054932, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.394565633371603e-07, |
|
"logits/chosen": -2.358839511871338, |
|
"logits/rejected": -2.299063205718994, |
|
"logps/chosen": -293.2892150878906, |
|
"logps/rejected": -245.3092498779297, |
|
"loss": 0.5364, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.14460349082946777, |
|
"rewards/margins": 0.6029571294784546, |
|
"rewards/rejected": -0.7475606203079224, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.375430539609644e-07, |
|
"logits/chosen": -2.2807836532592773, |
|
"logits/rejected": -2.345672130584717, |
|
"logps/chosen": -239.72476196289062, |
|
"logps/rejected": -211.0787353515625, |
|
"loss": 0.5132, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.13182897865772247, |
|
"rewards/margins": 0.6086454391479492, |
|
"rewards/rejected": -0.7404743432998657, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.3562954458476845e-07, |
|
"logits/chosen": -2.441943407058716, |
|
"logits/rejected": -2.4284987449645996, |
|
"logps/chosen": -306.66900634765625, |
|
"logps/rejected": -233.69290161132812, |
|
"loss": 0.5589, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.15195252001285553, |
|
"rewards/margins": 0.5843501687049866, |
|
"rewards/rejected": -0.7363026738166809, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.337160352085725e-07, |
|
"logits/chosen": -2.4269309043884277, |
|
"logits/rejected": -2.3844103813171387, |
|
"logps/chosen": -298.20721435546875, |
|
"logps/rejected": -241.6977996826172, |
|
"loss": 0.5365, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.07097179442644119, |
|
"rewards/margins": 0.7560560703277588, |
|
"rewards/rejected": -0.8270279169082642, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.3180252583237657e-07, |
|
"logits/chosen": -2.336698532104492, |
|
"logits/rejected": -2.2902188301086426, |
|
"logps/chosen": -278.00860595703125, |
|
"logps/rejected": -231.4420928955078, |
|
"loss": 0.5528, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.06676442921161652, |
|
"rewards/margins": 0.5907629132270813, |
|
"rewards/rejected": -0.6575273275375366, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.2988901645618063e-07, |
|
"logits/chosen": -2.368316650390625, |
|
"logits/rejected": -2.2780606746673584, |
|
"logps/chosen": -261.25482177734375, |
|
"logps/rejected": -225.92269897460938, |
|
"loss": 0.5388, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.1373300552368164, |
|
"rewards/margins": 0.5785337686538696, |
|
"rewards/rejected": -0.715863823890686, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.279755070799847e-07, |
|
"logits/chosen": -2.402346134185791, |
|
"logits/rejected": -2.3095052242279053, |
|
"logps/chosen": -259.71075439453125, |
|
"logps/rejected": -257.037353515625, |
|
"loss": 0.5278, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.15283453464508057, |
|
"rewards/margins": 0.526613175868988, |
|
"rewards/rejected": -0.6794477105140686, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.260619977037887e-07, |
|
"logits/chosen": -2.3968770503997803, |
|
"logits/rejected": -2.340967893600464, |
|
"logps/chosen": -251.2769775390625, |
|
"logps/rejected": -214.03146362304688, |
|
"loss": 0.5209, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.07637427002191544, |
|
"rewards/margins": 0.7686988711357117, |
|
"rewards/rejected": -0.8450730443000793, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.2414848832759277e-07, |
|
"logits/chosen": -2.4229166507720947, |
|
"logits/rejected": -2.2702252864837646, |
|
"logps/chosen": -261.66217041015625, |
|
"logps/rejected": -227.0311279296875, |
|
"loss": 0.534, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.14070875942707062, |
|
"rewards/margins": 0.5856814384460449, |
|
"rewards/rejected": -0.7263902425765991, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.2223497895139683e-07, |
|
"logits/chosen": -2.3875441551208496, |
|
"logits/rejected": -2.3234646320343018, |
|
"logps/chosen": -263.7579040527344, |
|
"logps/rejected": -219.34719848632812, |
|
"loss": 0.5537, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.11774899810552597, |
|
"rewards/margins": 0.5334910154342651, |
|
"rewards/rejected": -0.6512399911880493, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.203214695752009e-07, |
|
"logits/chosen": -2.4645657539367676, |
|
"logits/rejected": -2.385596990585327, |
|
"logps/chosen": -262.8403015136719, |
|
"logps/rejected": -230.195556640625, |
|
"loss": 0.5689, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.15035703778266907, |
|
"rewards/margins": 0.5768887400627136, |
|
"rewards/rejected": -0.7272458076477051, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.1840796019900495e-07, |
|
"logits/chosen": -2.426673173904419, |
|
"logits/rejected": -2.3877110481262207, |
|
"logps/chosen": -262.39178466796875, |
|
"logps/rejected": -254.9650115966797, |
|
"loss": 0.5457, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.11363549530506134, |
|
"rewards/margins": 0.6810011267662048, |
|
"rewards/rejected": -0.7946366667747498, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.16494450822809e-07, |
|
"logits/chosen": -2.3528525829315186, |
|
"logits/rejected": -2.3542237281799316, |
|
"logps/chosen": -236.56967163085938, |
|
"logps/rejected": -233.4956512451172, |
|
"loss": 0.5372, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.18625274300575256, |
|
"rewards/margins": 0.48403066396713257, |
|
"rewards/rejected": -0.6702834367752075, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.145809414466131e-07, |
|
"logits/chosen": -2.48598313331604, |
|
"logits/rejected": -2.4767704010009766, |
|
"logps/chosen": -272.78662109375, |
|
"logps/rejected": -266.92413330078125, |
|
"loss": 0.5373, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.14394977688789368, |
|
"rewards/margins": 0.49152618646621704, |
|
"rewards/rejected": -0.6354759335517883, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.1266743207041714e-07, |
|
"logits/chosen": -2.4058761596679688, |
|
"logits/rejected": -2.403879165649414, |
|
"logps/chosen": -247.5891571044922, |
|
"logps/rejected": -226.5963592529297, |
|
"loss": 0.5451, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.14257661998271942, |
|
"rewards/margins": 0.6169610023498535, |
|
"rewards/rejected": -0.7595376968383789, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.107539226942212e-07, |
|
"logits/chosen": -2.392540216445923, |
|
"logits/rejected": -2.370917558670044, |
|
"logps/chosen": -259.8185729980469, |
|
"logps/rejected": -240.8582000732422, |
|
"loss": 0.5764, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.14314888417720795, |
|
"rewards/margins": 0.5050605535507202, |
|
"rewards/rejected": -0.6482094526290894, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.0884041331802526e-07, |
|
"logits/chosen": -2.3564376831054688, |
|
"logits/rejected": -2.2619478702545166, |
|
"logps/chosen": -243.14151000976562, |
|
"logps/rejected": -216.67678833007812, |
|
"loss": 0.5529, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.17953407764434814, |
|
"rewards/margins": 0.5211489200592041, |
|
"rewards/rejected": -0.7006829977035522, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.0692690394182927e-07, |
|
"logits/chosen": -2.435981035232544, |
|
"logits/rejected": -2.3964314460754395, |
|
"logps/chosen": -277.2529296875, |
|
"logps/rejected": -233.1074676513672, |
|
"loss": 0.5528, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.05609757825732231, |
|
"rewards/margins": 0.6833099722862244, |
|
"rewards/rejected": -0.7394075393676758, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.0501339456563334e-07, |
|
"logits/chosen": -2.414605140686035, |
|
"logits/rejected": -2.270141363143921, |
|
"logps/chosen": -269.33770751953125, |
|
"logps/rejected": -243.461669921875, |
|
"loss": 0.5338, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.1670106053352356, |
|
"rewards/margins": 0.5737408995628357, |
|
"rewards/rejected": -0.7407516241073608, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.030998851894374e-07, |
|
"logits/chosen": -2.3524057865142822, |
|
"logits/rejected": -2.3626418113708496, |
|
"logps/chosen": -229.76596069335938, |
|
"logps/rejected": -221.08889770507812, |
|
"loss": 0.5137, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.07673561573028564, |
|
"rewards/margins": 0.6764390468597412, |
|
"rewards/rejected": -0.7531746029853821, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.0118637581324146e-07, |
|
"logits/chosen": -2.422010898590088, |
|
"logits/rejected": -2.3673031330108643, |
|
"logps/chosen": -257.2743835449219, |
|
"logps/rejected": -223.86349487304688, |
|
"loss": 0.5407, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.09269039332866669, |
|
"rewards/margins": 0.7229348421096802, |
|
"rewards/rejected": -0.8156253099441528, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.992728664370455e-07, |
|
"logits/chosen": -2.4492976665496826, |
|
"logits/rejected": -2.3872756958007812, |
|
"logps/chosen": -239.734375, |
|
"logps/rejected": -225.47982788085938, |
|
"loss": 0.5593, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.21187777817249298, |
|
"rewards/margins": 0.5356577634811401, |
|
"rewards/rejected": -0.7475355267524719, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.973593570608496e-07, |
|
"logits/chosen": -2.3429622650146484, |
|
"logits/rejected": -2.2622060775756836, |
|
"logps/chosen": -261.2279052734375, |
|
"logps/rejected": -220.1876983642578, |
|
"loss": 0.552, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.11654462665319443, |
|
"rewards/margins": 0.6566459536552429, |
|
"rewards/rejected": -0.7731907367706299, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.9544584768465365e-07, |
|
"logits/chosen": -2.415435314178467, |
|
"logits/rejected": -2.3692467212677, |
|
"logps/chosen": -310.05035400390625, |
|
"logps/rejected": -254.33297729492188, |
|
"loss": 0.553, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.1930284947156906, |
|
"rewards/margins": 0.6540157198905945, |
|
"rewards/rejected": -0.8470442891120911, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.935323383084577e-07, |
|
"logits/chosen": -2.382997989654541, |
|
"logits/rejected": -2.331480026245117, |
|
"logps/chosen": -290.0738830566406, |
|
"logps/rejected": -242.3267822265625, |
|
"loss": 0.5599, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.09350194036960602, |
|
"rewards/margins": 0.5290672183036804, |
|
"rewards/rejected": -0.6225691437721252, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.9161882893226177e-07, |
|
"logits/chosen": -2.358276128768921, |
|
"logits/rejected": -2.2521064281463623, |
|
"logps/chosen": -240.8756561279297, |
|
"logps/rejected": -204.3744659423828, |
|
"loss": 0.5443, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.21055755019187927, |
|
"rewards/margins": 0.43394798040390015, |
|
"rewards/rejected": -0.6445055603981018, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.8970531955606583e-07, |
|
"logits/chosen": -2.453087568283081, |
|
"logits/rejected": -2.353877305984497, |
|
"logps/chosen": -270.0442810058594, |
|
"logps/rejected": -231.3137664794922, |
|
"loss": 0.5677, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.15533334016799927, |
|
"rewards/margins": 0.6584349870681763, |
|
"rewards/rejected": -0.8137682676315308, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.8779181017986984e-07, |
|
"logits/chosen": -2.4245669841766357, |
|
"logits/rejected": -2.402334213256836, |
|
"logps/chosen": -269.0023498535156, |
|
"logps/rejected": -239.24465942382812, |
|
"loss": 0.5534, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.22131021320819855, |
|
"rewards/margins": 0.5528482794761658, |
|
"rewards/rejected": -0.7741583585739136, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.858783008036739e-07, |
|
"logits/chosen": -2.314622402191162, |
|
"logits/rejected": -2.2626185417175293, |
|
"logps/chosen": -278.54620361328125, |
|
"logps/rejected": -220.9359893798828, |
|
"loss": 0.5297, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1343500018119812, |
|
"rewards/margins": 0.6538098454475403, |
|
"rewards/rejected": -0.7881597280502319, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.8396479142747797e-07, |
|
"logits/chosen": -2.3986763954162598, |
|
"logits/rejected": -2.3692593574523926, |
|
"logps/chosen": -230.23330688476562, |
|
"logps/rejected": -201.45968627929688, |
|
"loss": 0.5397, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.16032734513282776, |
|
"rewards/margins": 0.5956254005432129, |
|
"rewards/rejected": -0.7559527158737183, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.8205128205128203e-07, |
|
"logits/chosen": -2.3531031608581543, |
|
"logits/rejected": -2.3053078651428223, |
|
"logps/chosen": -260.5692443847656, |
|
"logps/rejected": -229.7131805419922, |
|
"loss": 0.5422, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.27216142416000366, |
|
"rewards/margins": 0.5033277869224548, |
|
"rewards/rejected": -0.7754892110824585, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.801377726750861e-07, |
|
"logits/chosen": -2.386915683746338, |
|
"logits/rejected": -2.315340757369995, |
|
"logps/chosen": -256.1626281738281, |
|
"logps/rejected": -217.6416473388672, |
|
"loss": 0.5438, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.18697381019592285, |
|
"rewards/margins": 0.49360641837120056, |
|
"rewards/rejected": -0.6805802583694458, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.7822426329889015e-07, |
|
"logits/chosen": -2.428358554840088, |
|
"logits/rejected": -2.3230361938476562, |
|
"logps/chosen": -294.2705993652344, |
|
"logps/rejected": -235.54171752929688, |
|
"loss": 0.5623, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.146778404712677, |
|
"rewards/margins": 0.6038089394569397, |
|
"rewards/rejected": -0.7505873441696167, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.763107539226942e-07, |
|
"logits/chosen": -2.3813319206237793, |
|
"logits/rejected": -2.3310248851776123, |
|
"logps/chosen": -280.82110595703125, |
|
"logps/rejected": -242.4944305419922, |
|
"loss": 0.5582, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.12899862229824066, |
|
"rewards/margins": 0.6421502828598022, |
|
"rewards/rejected": -0.7711488604545593, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.743972445464983e-07, |
|
"logits/chosen": -2.4226574897766113, |
|
"logits/rejected": -2.411165475845337, |
|
"logps/chosen": -254.8177947998047, |
|
"logps/rejected": -232.544921875, |
|
"loss": 0.5599, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.1896516978740692, |
|
"rewards/margins": 0.4945623278617859, |
|
"rewards/rejected": -0.6842139959335327, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.7248373517030234e-07, |
|
"logits/chosen": -2.475400924682617, |
|
"logits/rejected": -2.419384479522705, |
|
"logps/chosen": -274.40484619140625, |
|
"logps/rejected": -237.3146209716797, |
|
"loss": 0.5407, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.16451963782310486, |
|
"rewards/margins": 0.6172757744789124, |
|
"rewards/rejected": -0.7817953824996948, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.705702257941064e-07, |
|
"logits/chosen": -2.433335304260254, |
|
"logits/rejected": -2.435105562210083, |
|
"logps/chosen": -311.65618896484375, |
|
"logps/rejected": -246.72998046875, |
|
"loss": 0.5362, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.16845114529132843, |
|
"rewards/margins": 0.6784511804580688, |
|
"rewards/rejected": -0.8469023704528809, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.686567164179104e-07, |
|
"logits/chosen": -2.3490521907806396, |
|
"logits/rejected": -2.275282144546509, |
|
"logps/chosen": -259.00787353515625, |
|
"logps/rejected": -219.07669067382812, |
|
"loss": 0.5453, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.16528849303722382, |
|
"rewards/margins": 0.5990539193153381, |
|
"rewards/rejected": -0.7643424272537231, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.6674320704171447e-07, |
|
"logits/chosen": -2.4171411991119385, |
|
"logits/rejected": -2.3591837882995605, |
|
"logps/chosen": -259.59521484375, |
|
"logps/rejected": -206.38058471679688, |
|
"loss": 0.5221, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.10826051235198975, |
|
"rewards/margins": 0.7560392022132874, |
|
"rewards/rejected": -0.8642997741699219, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.6482969766551853e-07, |
|
"logits/chosen": -2.4498002529144287, |
|
"logits/rejected": -2.447680950164795, |
|
"logps/chosen": -274.2942810058594, |
|
"logps/rejected": -237.7410430908203, |
|
"loss": 0.5422, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.0813172236084938, |
|
"rewards/margins": 0.7396507859230042, |
|
"rewards/rejected": -0.8209678530693054, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.629161882893226e-07, |
|
"logits/chosen": -2.446469783782959, |
|
"logits/rejected": -2.3727550506591797, |
|
"logps/chosen": -280.4594421386719, |
|
"logps/rejected": -214.17269897460938, |
|
"loss": 0.5184, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.1613025665283203, |
|
"rewards/margins": 0.7477348446846008, |
|
"rewards/rejected": -0.9090374708175659, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.6100267891312666e-07, |
|
"logits/chosen": -2.3352808952331543, |
|
"logits/rejected": -2.3035221099853516, |
|
"logps/chosen": -269.15008544921875, |
|
"logps/rejected": -208.6403350830078, |
|
"loss": 0.5564, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.1250370442867279, |
|
"rewards/margins": 0.6833819150924683, |
|
"rewards/rejected": -0.8084190487861633, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.590891695369307e-07, |
|
"logits/chosen": -2.4506659507751465, |
|
"logits/rejected": -2.362159490585327, |
|
"logps/chosen": -256.0645751953125, |
|
"logps/rejected": -210.37814331054688, |
|
"loss": 0.5268, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.16175204515457153, |
|
"rewards/margins": 0.5650383830070496, |
|
"rewards/rejected": -0.7267904281616211, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.571756601607348e-07, |
|
"logits/chosen": -2.418126106262207, |
|
"logits/rejected": -2.3879923820495605, |
|
"logps/chosen": -294.83013916015625, |
|
"logps/rejected": -217.4688720703125, |
|
"loss": 0.5443, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.19011719524860382, |
|
"rewards/margins": 0.65594881772995, |
|
"rewards/rejected": -0.8460659980773926, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.5526215078453884e-07, |
|
"logits/chosen": -2.4299476146698, |
|
"logits/rejected": -2.3945670127868652, |
|
"logps/chosen": -301.6930847167969, |
|
"logps/rejected": -237.73892211914062, |
|
"loss": 0.5348, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.1024274080991745, |
|
"rewards/margins": 0.7538636326789856, |
|
"rewards/rejected": -0.8562909960746765, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.533486414083429e-07, |
|
"logits/chosen": -2.4256393909454346, |
|
"logits/rejected": -2.3724522590637207, |
|
"logps/chosen": -306.9393615722656, |
|
"logps/rejected": -249.2686004638672, |
|
"loss": 0.516, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.08279596269130707, |
|
"rewards/margins": 0.7269363403320312, |
|
"rewards/rejected": -0.8097323179244995, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.5143513203214697e-07, |
|
"logits/chosen": -2.3666234016418457, |
|
"logits/rejected": -2.3259263038635254, |
|
"logps/chosen": -275.7200012207031, |
|
"logps/rejected": -225.26425170898438, |
|
"loss": 0.5617, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.20898135006427765, |
|
"rewards/margins": 0.559633195400238, |
|
"rewards/rejected": -0.7686145305633545, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.49521622655951e-07, |
|
"logits/chosen": -2.4929592609405518, |
|
"logits/rejected": -2.355666160583496, |
|
"logps/chosen": -293.1372375488281, |
|
"logps/rejected": -262.8376159667969, |
|
"loss": 0.5547, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.04876649007201195, |
|
"rewards/margins": 0.7978218197822571, |
|
"rewards/rejected": -0.8465882539749146, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.4760811327975504e-07, |
|
"logits/chosen": -2.4678738117218018, |
|
"logits/rejected": -2.3763632774353027, |
|
"logps/chosen": -273.61614990234375, |
|
"logps/rejected": -253.130126953125, |
|
"loss": 0.5343, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.1364731788635254, |
|
"rewards/margins": 0.5863515734672546, |
|
"rewards/rejected": -0.72282475233078, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.456946039035591e-07, |
|
"logits/chosen": -2.4671170711517334, |
|
"logits/rejected": -2.4366583824157715, |
|
"logps/chosen": -290.646484375, |
|
"logps/rejected": -257.22418212890625, |
|
"loss": 0.5646, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.10060103982686996, |
|
"rewards/margins": 0.7575327754020691, |
|
"rewards/rejected": -0.8581337928771973, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.4378109452736316e-07, |
|
"logits/chosen": -2.3189241886138916, |
|
"logits/rejected": -2.3253917694091797, |
|
"logps/chosen": -272.53802490234375, |
|
"logps/rejected": -198.298095703125, |
|
"loss": 0.5301, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.10420193523168564, |
|
"rewards/margins": 0.7064443826675415, |
|
"rewards/rejected": -0.8106463551521301, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.418675851511672e-07, |
|
"logits/chosen": -2.3077940940856934, |
|
"logits/rejected": -2.3118600845336914, |
|
"logps/chosen": -281.74365234375, |
|
"logps/rejected": -221.730712890625, |
|
"loss": 0.5622, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.18291965126991272, |
|
"rewards/margins": 0.653192937374115, |
|
"rewards/rejected": -0.8361126184463501, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.399540757749713e-07, |
|
"logits/chosen": -2.373295783996582, |
|
"logits/rejected": -2.2997496128082275, |
|
"logps/chosen": -250.97915649414062, |
|
"logps/rejected": -216.4395751953125, |
|
"loss": 0.5263, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.07915716618299484, |
|
"rewards/margins": 0.9101131558418274, |
|
"rewards/rejected": -0.9892703294754028, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.3804056639877535e-07, |
|
"logits/chosen": -2.3339505195617676, |
|
"logits/rejected": -2.2506117820739746, |
|
"logps/chosen": -269.00439453125, |
|
"logps/rejected": -230.38687133789062, |
|
"loss": 0.5566, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.12828503549098969, |
|
"rewards/margins": 0.7284771203994751, |
|
"rewards/rejected": -0.8567621111869812, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.361270570225794e-07, |
|
"logits/chosen": -2.3134007453918457, |
|
"logits/rejected": -2.2540464401245117, |
|
"logps/chosen": -263.0733337402344, |
|
"logps/rejected": -223.71591186523438, |
|
"loss": 0.5048, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.10775689035654068, |
|
"rewards/margins": 0.7968653440475464, |
|
"rewards/rejected": -0.9046221971511841, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.3421354764638345e-07, |
|
"logits/chosen": -2.438647747039795, |
|
"logits/rejected": -2.3648548126220703, |
|
"logps/chosen": -296.474365234375, |
|
"logps/rejected": -244.5324249267578, |
|
"loss": 0.5559, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.16449935734272003, |
|
"rewards/margins": 0.6564738750457764, |
|
"rewards/rejected": -0.8209732174873352, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.323000382701875e-07, |
|
"logits/chosen": -2.3885536193847656, |
|
"logits/rejected": -2.274456739425659, |
|
"logps/chosen": -289.57586669921875, |
|
"logps/rejected": -226.74734497070312, |
|
"loss": 0.5188, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.11683692783117294, |
|
"rewards/margins": 0.7003597021102905, |
|
"rewards/rejected": -0.8171966671943665, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.3038652889399157e-07, |
|
"logits/chosen": -2.3566040992736816, |
|
"logits/rejected": -2.3693103790283203, |
|
"logps/chosen": -259.44281005859375, |
|
"logps/rejected": -235.6457061767578, |
|
"loss": 0.5218, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.07110615074634552, |
|
"rewards/margins": 0.7475603818893433, |
|
"rewards/rejected": -0.8186665773391724, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.2847301951779563e-07, |
|
"logits/chosen": -2.3193366527557373, |
|
"logits/rejected": -2.2707600593566895, |
|
"logps/chosen": -230.35177612304688, |
|
"logps/rejected": -218.6141357421875, |
|
"loss": 0.5379, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.2341269999742508, |
|
"rewards/margins": 0.5890001654624939, |
|
"rewards/rejected": -0.8231271505355835, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.265595101415997e-07, |
|
"logits/chosen": -2.303278923034668, |
|
"logits/rejected": -2.260132312774658, |
|
"logps/chosen": -263.609130859375, |
|
"logps/rejected": -207.05221557617188, |
|
"loss": 0.5417, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.16092923283576965, |
|
"rewards/margins": 0.8012853860855103, |
|
"rewards/rejected": -0.9622145891189575, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.2464600076540373e-07, |
|
"logits/chosen": -2.3414905071258545, |
|
"logits/rejected": -2.3637521266937256, |
|
"logps/chosen": -272.04595947265625, |
|
"logps/rejected": -233.1014862060547, |
|
"loss": 0.5402, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.15146999061107635, |
|
"rewards/margins": 0.6853641271591187, |
|
"rewards/rejected": -0.8368341326713562, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.227324913892078e-07, |
|
"logits/chosen": -2.45133113861084, |
|
"logits/rejected": -2.3326609134674072, |
|
"logps/chosen": -262.7754821777344, |
|
"logps/rejected": -233.98037719726562, |
|
"loss": 0.5238, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.09686625003814697, |
|
"rewards/margins": 0.6828486919403076, |
|
"rewards/rejected": -0.7797149419784546, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.2081898201301186e-07, |
|
"logits/chosen": -2.4001007080078125, |
|
"logits/rejected": -2.3274292945861816, |
|
"logps/chosen": -251.53952026367188, |
|
"logps/rejected": -219.2271270751953, |
|
"loss": 0.535, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.14206108450889587, |
|
"rewards/margins": 0.6470782160758972, |
|
"rewards/rejected": -0.7891392707824707, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.1890547263681592e-07, |
|
"logits/chosen": -2.4295260906219482, |
|
"logits/rejected": -2.36590313911438, |
|
"logps/chosen": -271.3697814941406, |
|
"logps/rejected": -215.4159698486328, |
|
"loss": 0.54, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.07891818135976791, |
|
"rewards/margins": 0.7567145824432373, |
|
"rewards/rejected": -0.8356328010559082, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.1699196326061998e-07, |
|
"logits/chosen": -2.371452569961548, |
|
"logits/rejected": -2.3672077655792236, |
|
"logps/chosen": -265.51470947265625, |
|
"logps/rejected": -241.41165161132812, |
|
"loss": 0.5586, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1335269808769226, |
|
"rewards/margins": 0.708950400352478, |
|
"rewards/rejected": -0.8424774408340454, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.1507845388442402e-07, |
|
"logits/chosen": -2.395923614501953, |
|
"logits/rejected": -2.2845911979675293, |
|
"logps/chosen": -287.7729187011719, |
|
"logps/rejected": -233.2380828857422, |
|
"loss": 0.5312, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.05316392332315445, |
|
"rewards/margins": 0.7853686809539795, |
|
"rewards/rejected": -0.8385326266288757, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.1316494450822808e-07, |
|
"logits/chosen": -2.331926107406616, |
|
"logits/rejected": -2.3294596672058105, |
|
"logps/chosen": -269.21484375, |
|
"logps/rejected": -242.924072265625, |
|
"loss": 0.5469, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.12467104196548462, |
|
"rewards/margins": 0.7131480574607849, |
|
"rewards/rejected": -0.8378192186355591, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.1125143513203214e-07, |
|
"logits/chosen": -2.4469873905181885, |
|
"logits/rejected": -2.4214444160461426, |
|
"logps/chosen": -266.3757629394531, |
|
"logps/rejected": -224.87142944335938, |
|
"loss": 0.5127, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.14371030032634735, |
|
"rewards/margins": 0.739290714263916, |
|
"rewards/rejected": -0.8830010294914246, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.093379257558362e-07, |
|
"logits/chosen": -2.4263453483581543, |
|
"logits/rejected": -2.430192232131958, |
|
"logps/chosen": -274.2438659667969, |
|
"logps/rejected": -237.7062530517578, |
|
"loss": 0.5516, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.19841806590557098, |
|
"rewards/margins": 0.6394414901733398, |
|
"rewards/rejected": -0.8378594517707825, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.0742441637964026e-07, |
|
"logits/chosen": -2.3289737701416016, |
|
"logits/rejected": -2.3154170513153076, |
|
"logps/chosen": -299.1728820800781, |
|
"logps/rejected": -238.6165771484375, |
|
"loss": 0.5113, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.08980865776538849, |
|
"rewards/margins": 0.9050415754318237, |
|
"rewards/rejected": -0.9948502779006958, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.055109070034443e-07, |
|
"logits/chosen": -2.4139952659606934, |
|
"logits/rejected": -2.394843101501465, |
|
"logps/chosen": -287.22265625, |
|
"logps/rejected": -256.1412353515625, |
|
"loss": 0.5224, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1007101982831955, |
|
"rewards/margins": 0.7850233912467957, |
|
"rewards/rejected": -0.8857336044311523, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.0359739762724836e-07, |
|
"logits/chosen": -2.415116310119629, |
|
"logits/rejected": -2.3596065044403076, |
|
"logps/chosen": -279.6313781738281, |
|
"logps/rejected": -237.9734649658203, |
|
"loss": 0.531, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.11246969550848007, |
|
"rewards/margins": 0.7976399660110474, |
|
"rewards/rejected": -0.9101096391677856, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.0168388825105242e-07, |
|
"logits/chosen": -2.3417422771453857, |
|
"logits/rejected": -2.2845070362091064, |
|
"logps/chosen": -260.4609680175781, |
|
"logps/rejected": -265.02410888671875, |
|
"loss": 0.5148, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.14557930827140808, |
|
"rewards/margins": 0.8025070428848267, |
|
"rewards/rejected": -0.9480863809585571, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.997703788748565e-07, |
|
"logits/chosen": -2.347747564315796, |
|
"logits/rejected": -2.301016330718994, |
|
"logps/chosen": -278.42144775390625, |
|
"logps/rejected": -228.9193878173828, |
|
"loss": 0.535, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.23046691715717316, |
|
"rewards/margins": 0.6747422218322754, |
|
"rewards/rejected": -0.9052090644836426, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.9785686949866055e-07, |
|
"logits/chosen": -2.374584913253784, |
|
"logits/rejected": -2.449509382247925, |
|
"logps/chosen": -275.76531982421875, |
|
"logps/rejected": -233.2628936767578, |
|
"loss": 0.5633, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.12146653980016708, |
|
"rewards/margins": 0.5764524340629578, |
|
"rewards/rejected": -0.6979190111160278, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.9594336012246458e-07, |
|
"logits/chosen": -2.4476470947265625, |
|
"logits/rejected": -2.3424344062805176, |
|
"logps/chosen": -270.17877197265625, |
|
"logps/rejected": -235.03125, |
|
"loss": 0.5134, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.15192639827728271, |
|
"rewards/margins": 0.7095075845718384, |
|
"rewards/rejected": -0.8614339828491211, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.9402985074626865e-07, |
|
"logits/chosen": -2.378629446029663, |
|
"logits/rejected": -2.394209861755371, |
|
"logps/chosen": -269.76898193359375, |
|
"logps/rejected": -228.36172485351562, |
|
"loss": 0.5524, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.18615347146987915, |
|
"rewards/margins": 0.6800569295883179, |
|
"rewards/rejected": -0.8662103414535522, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.921163413700727e-07, |
|
"logits/chosen": -2.3340277671813965, |
|
"logits/rejected": -2.29093861579895, |
|
"logps/chosen": -280.81298828125, |
|
"logps/rejected": -209.24929809570312, |
|
"loss": 0.5775, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.2276122123003006, |
|
"rewards/margins": 0.6620305180549622, |
|
"rewards/rejected": -0.8896427154541016, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.9020283199387677e-07, |
|
"logits/chosen": -2.444658041000366, |
|
"logits/rejected": -2.381373405456543, |
|
"logps/chosen": -262.42974853515625, |
|
"logps/rejected": -233.9261474609375, |
|
"loss": 0.5526, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.06337813287973404, |
|
"rewards/margins": 0.7153123617172241, |
|
"rewards/rejected": -0.7786905169487, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.8828932261768083e-07, |
|
"logits/chosen": -2.431870222091675, |
|
"logits/rejected": -2.3378379344940186, |
|
"logps/chosen": -266.4524230957031, |
|
"logps/rejected": -244.3126678466797, |
|
"loss": 0.5322, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.16464334726333618, |
|
"rewards/margins": 0.6538031101226807, |
|
"rewards/rejected": -0.8184464573860168, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.8637581324148487e-07, |
|
"logits/chosen": -2.4436469078063965, |
|
"logits/rejected": -2.416304349899292, |
|
"logps/chosen": -265.97393798828125, |
|
"logps/rejected": -240.1495361328125, |
|
"loss": 0.5443, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.11401587724685669, |
|
"rewards/margins": 0.8116496801376343, |
|
"rewards/rejected": -0.9256657361984253, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_logits/chosen": -2.0740063190460205, |
|
"eval_logits/rejected": -1.9495693445205688, |
|
"eval_logps/chosen": -266.1383361816406, |
|
"eval_logps/rejected": -227.92555236816406, |
|
"eval_loss": 0.530714750289917, |
|
"eval_rewards/accuracies": 0.7419999837875366, |
|
"eval_rewards/chosen": -0.1506224423646927, |
|
"eval_rewards/margins": 0.7136407494544983, |
|
"eval_rewards/rejected": -0.8642632961273193, |
|
"eval_runtime": 601.1247, |
|
"eval_samples_per_second": 3.327, |
|
"eval_steps_per_second": 0.208, |
|
"step": 1936 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8446230386528893e-07, |
|
"logits/chosen": -2.3547306060791016, |
|
"logits/rejected": -2.309804677963257, |
|
"logps/chosen": -239.15170288085938, |
|
"logps/rejected": -227.03646850585938, |
|
"loss": 0.5441, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.20203718543052673, |
|
"rewards/margins": 0.5975244641304016, |
|
"rewards/rejected": -0.799561619758606, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.82548794489093e-07, |
|
"logits/chosen": -2.30757474899292, |
|
"logits/rejected": -2.3320353031158447, |
|
"logps/chosen": -243.75341796875, |
|
"logps/rejected": -234.9235076904297, |
|
"loss": 0.5412, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.19783169031143188, |
|
"rewards/margins": 0.6387797594070435, |
|
"rewards/rejected": -0.8366113901138306, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.8063528511289706e-07, |
|
"logits/chosen": -2.358428478240967, |
|
"logits/rejected": -2.277144432067871, |
|
"logps/chosen": -286.93426513671875, |
|
"logps/rejected": -259.59027099609375, |
|
"loss": 0.5317, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.09961538016796112, |
|
"rewards/margins": 0.7642472982406616, |
|
"rewards/rejected": -0.8638626337051392, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.7872177573670112e-07, |
|
"logits/chosen": -2.4061942100524902, |
|
"logits/rejected": -2.4171481132507324, |
|
"logps/chosen": -252.14535522460938, |
|
"logps/rejected": -230.7240753173828, |
|
"loss": 0.5397, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.09236567467451096, |
|
"rewards/margins": 0.6671853065490723, |
|
"rewards/rejected": -0.7595510482788086, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.7680826636050515e-07, |
|
"logits/chosen": -2.3134634494781494, |
|
"logits/rejected": -2.2795519828796387, |
|
"logps/chosen": -278.4905090332031, |
|
"logps/rejected": -234.19482421875, |
|
"loss": 0.5249, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.07433702796697617, |
|
"rewards/margins": 0.7174164652824402, |
|
"rewards/rejected": -0.7917534708976746, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.7489475698430921e-07, |
|
"logits/chosen": -2.3330302238464355, |
|
"logits/rejected": -2.3669610023498535, |
|
"logps/chosen": -249.8580322265625, |
|
"logps/rejected": -249.130615234375, |
|
"loss": 0.5555, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.14169186353683472, |
|
"rewards/margins": 0.6893213391304016, |
|
"rewards/rejected": -0.8310132026672363, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.7298124760811328e-07, |
|
"logits/chosen": -2.404921054840088, |
|
"logits/rejected": -2.3844478130340576, |
|
"logps/chosen": -277.40667724609375, |
|
"logps/rejected": -224.07015991210938, |
|
"loss": 0.5389, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.13002373278141022, |
|
"rewards/margins": 0.7138842344284058, |
|
"rewards/rejected": -0.8439079523086548, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.7106773823191734e-07, |
|
"logits/chosen": -2.4383153915405273, |
|
"logits/rejected": -2.3728132247924805, |
|
"logps/chosen": -270.46270751953125, |
|
"logps/rejected": -240.8298797607422, |
|
"loss": 0.53, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.1426972895860672, |
|
"rewards/margins": 0.8270591497421265, |
|
"rewards/rejected": -0.9697564244270325, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.691542288557214e-07, |
|
"logits/chosen": -2.418792247772217, |
|
"logits/rejected": -2.321077346801758, |
|
"logps/chosen": -297.71197509765625, |
|
"logps/rejected": -230.908935546875, |
|
"loss": 0.543, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.18689216673374176, |
|
"rewards/margins": 0.7213765382766724, |
|
"rewards/rejected": -0.9082688093185425, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.6724071947952544e-07, |
|
"logits/chosen": -2.4345123767852783, |
|
"logits/rejected": -2.274202823638916, |
|
"logps/chosen": -240.6513671875, |
|
"logps/rejected": -232.0623779296875, |
|
"loss": 0.5363, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.16003349423408508, |
|
"rewards/margins": 0.6547808647155762, |
|
"rewards/rejected": -0.8148144483566284, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.653272101033295e-07, |
|
"logits/chosen": -2.4675419330596924, |
|
"logits/rejected": -2.341759204864502, |
|
"logps/chosen": -276.0350036621094, |
|
"logps/rejected": -212.1591339111328, |
|
"loss": 0.514, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.10016350448131561, |
|
"rewards/margins": 0.6785081624984741, |
|
"rewards/rejected": -0.7786716222763062, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.6341370072713356e-07, |
|
"logits/chosen": -2.470984935760498, |
|
"logits/rejected": -2.4148213863372803, |
|
"logps/chosen": -302.93951416015625, |
|
"logps/rejected": -248.380859375, |
|
"loss": 0.5384, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.08969398587942123, |
|
"rewards/margins": 0.8469365835189819, |
|
"rewards/rejected": -0.9366306066513062, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.6150019135093762e-07, |
|
"logits/chosen": -2.452855348587036, |
|
"logits/rejected": -2.3725485801696777, |
|
"logps/chosen": -304.5735168457031, |
|
"logps/rejected": -232.25656127929688, |
|
"loss": 0.5285, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.132027268409729, |
|
"rewards/margins": 0.7218815088272095, |
|
"rewards/rejected": -0.8539088368415833, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.5958668197474169e-07, |
|
"logits/chosen": -2.461775541305542, |
|
"logits/rejected": -2.3796803951263428, |
|
"logps/chosen": -272.814453125, |
|
"logps/rejected": -222.52627563476562, |
|
"loss": 0.5367, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.105324387550354, |
|
"rewards/margins": 0.8349732160568237, |
|
"rewards/rejected": -0.9402976036071777, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.5767317259854572e-07, |
|
"logits/chosen": -2.4011590480804443, |
|
"logits/rejected": -2.3761496543884277, |
|
"logps/chosen": -262.963623046875, |
|
"logps/rejected": -233.1734619140625, |
|
"loss": 0.5336, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.15259674191474915, |
|
"rewards/margins": 0.7275049090385437, |
|
"rewards/rejected": -0.8801015615463257, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.5575966322234978e-07, |
|
"logits/chosen": -2.326183319091797, |
|
"logits/rejected": -2.2559120655059814, |
|
"logps/chosen": -259.02337646484375, |
|
"logps/rejected": -247.8983154296875, |
|
"loss": 0.5301, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.1156080812215805, |
|
"rewards/margins": 0.7456148862838745, |
|
"rewards/rejected": -0.861223042011261, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.5384615384615385e-07, |
|
"logits/chosen": -2.3786349296569824, |
|
"logits/rejected": -2.2938733100891113, |
|
"logps/chosen": -238.00930786132812, |
|
"logps/rejected": -218.44833374023438, |
|
"loss": 0.5195, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.17429223656654358, |
|
"rewards/margins": 0.7917351126670837, |
|
"rewards/rejected": -0.9660272598266602, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.519326444699579e-07, |
|
"logits/chosen": -2.3865954875946045, |
|
"logits/rejected": -2.3452370166778564, |
|
"logps/chosen": -279.7684631347656, |
|
"logps/rejected": -252.83035278320312, |
|
"loss": 0.5121, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.11177567392587662, |
|
"rewards/margins": 0.7900134921073914, |
|
"rewards/rejected": -0.9017891883850098, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.5001913509376197e-07, |
|
"logits/chosen": -2.2664945125579834, |
|
"logits/rejected": -2.2997994422912598, |
|
"logps/chosen": -242.1395721435547, |
|
"logps/rejected": -219.6393585205078, |
|
"loss": 0.5355, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.03740229830145836, |
|
"rewards/margins": 0.8150936365127563, |
|
"rewards/rejected": -0.8524959683418274, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.4810562571756603e-07, |
|
"logits/chosen": -2.40497088432312, |
|
"logits/rejected": -2.382087230682373, |
|
"logps/chosen": -260.3258972167969, |
|
"logps/rejected": -238.91629028320312, |
|
"loss": 0.5401, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.19490735232830048, |
|
"rewards/margins": 0.6507551074028015, |
|
"rewards/rejected": -0.8456624746322632, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.4619211634137007e-07, |
|
"logits/chosen": -2.4172415733337402, |
|
"logits/rejected": -2.3459484577178955, |
|
"logps/chosen": -296.9003601074219, |
|
"logps/rejected": -244.6107177734375, |
|
"loss": 0.5205, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.08288192749023438, |
|
"rewards/margins": 0.7755564451217651, |
|
"rewards/rejected": -0.8584383726119995, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.4427860696517413e-07, |
|
"logits/chosen": -2.3107922077178955, |
|
"logits/rejected": -2.272061824798584, |
|
"logps/chosen": -276.7105407714844, |
|
"logps/rejected": -244.1803436279297, |
|
"loss": 0.547, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.24030157923698425, |
|
"rewards/margins": 0.5712770819664001, |
|
"rewards/rejected": -0.8115787506103516, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.423650975889782e-07, |
|
"logits/chosen": -2.4137063026428223, |
|
"logits/rejected": -2.317960262298584, |
|
"logps/chosen": -301.14422607421875, |
|
"logps/rejected": -230.6436004638672, |
|
"loss": 0.5227, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.0714234784245491, |
|
"rewards/margins": 0.8001400232315063, |
|
"rewards/rejected": -0.8715635538101196, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.4045158821278225e-07, |
|
"logits/chosen": -2.437873363494873, |
|
"logits/rejected": -2.3846659660339355, |
|
"logps/chosen": -281.734619140625, |
|
"logps/rejected": -252.9493408203125, |
|
"loss": 0.5234, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.0724717229604721, |
|
"rewards/margins": 0.8193691968917847, |
|
"rewards/rejected": -0.891840934753418, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.3853807883658632e-07, |
|
"logits/chosen": -2.3811519145965576, |
|
"logits/rejected": -2.3631045818328857, |
|
"logps/chosen": -286.5116271972656, |
|
"logps/rejected": -263.0299072265625, |
|
"loss": 0.5568, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.0669230967760086, |
|
"rewards/margins": 0.6814225912094116, |
|
"rewards/rejected": -0.748345673084259, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.3662456946039035e-07, |
|
"logits/chosen": -2.3424034118652344, |
|
"logits/rejected": -2.3057944774627686, |
|
"logps/chosen": -244.4705352783203, |
|
"logps/rejected": -206.46615600585938, |
|
"loss": 0.5341, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.1323441118001938, |
|
"rewards/margins": 0.6189785599708557, |
|
"rewards/rejected": -0.7513227462768555, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.3471106008419441e-07, |
|
"logits/chosen": -2.3643927574157715, |
|
"logits/rejected": -2.329315662384033, |
|
"logps/chosen": -273.6473083496094, |
|
"logps/rejected": -260.7103576660156, |
|
"loss": 0.5286, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.10188891738653183, |
|
"rewards/margins": 0.6854437589645386, |
|
"rewards/rejected": -0.7873327732086182, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.3279755070799848e-07, |
|
"logits/chosen": -2.328723430633545, |
|
"logits/rejected": -2.2633702754974365, |
|
"logps/chosen": -284.9334411621094, |
|
"logps/rejected": -232.8155517578125, |
|
"loss": 0.5415, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.1077471598982811, |
|
"rewards/margins": 0.7453306913375854, |
|
"rewards/rejected": -0.8530778884887695, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.3088404133180254e-07, |
|
"logits/chosen": -2.299356460571289, |
|
"logits/rejected": -2.2545862197875977, |
|
"logps/chosen": -306.23406982421875, |
|
"logps/rejected": -226.7987823486328, |
|
"loss": 0.4804, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.07135553658008575, |
|
"rewards/margins": 0.9380094408988953, |
|
"rewards/rejected": -1.0093649625778198, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.289705319556066e-07, |
|
"logits/chosen": -2.314143419265747, |
|
"logits/rejected": -2.2511839866638184, |
|
"logps/chosen": -252.2981719970703, |
|
"logps/rejected": -221.84194946289062, |
|
"loss": 0.5264, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.14519211649894714, |
|
"rewards/margins": 0.6200018525123596, |
|
"rewards/rejected": -0.7651939988136292, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.2705702257941064e-07, |
|
"logits/chosen": -2.3738441467285156, |
|
"logits/rejected": -2.3623602390289307, |
|
"logps/chosen": -257.61328125, |
|
"logps/rejected": -234.92190551757812, |
|
"loss": 0.5131, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.18813610076904297, |
|
"rewards/margins": 0.68475741147995, |
|
"rewards/rejected": -0.8728936314582825, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.251435132032147e-07, |
|
"logits/chosen": -2.346224546432495, |
|
"logits/rejected": -2.337629795074463, |
|
"logps/chosen": -273.5932312011719, |
|
"logps/rejected": -259.9046325683594, |
|
"loss": 0.5298, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.07904721796512604, |
|
"rewards/margins": 0.7541002035140991, |
|
"rewards/rejected": -0.833147406578064, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.2323000382701873e-07, |
|
"logits/chosen": -2.3779919147491455, |
|
"logits/rejected": -2.3776283264160156, |
|
"logps/chosen": -270.1576232910156, |
|
"logps/rejected": -229.17239379882812, |
|
"loss": 0.5543, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.20323677361011505, |
|
"rewards/margins": 0.6380544900894165, |
|
"rewards/rejected": -0.8412912487983704, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.213164944508228e-07, |
|
"logits/chosen": -2.450532913208008, |
|
"logits/rejected": -2.3503329753875732, |
|
"logps/chosen": -266.23175048828125, |
|
"logps/rejected": -269.7557067871094, |
|
"loss": 0.509, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.25909024477005005, |
|
"rewards/margins": 0.6517874002456665, |
|
"rewards/rejected": -0.9108778238296509, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.1940298507462686e-07, |
|
"logits/chosen": -2.244485378265381, |
|
"logits/rejected": -2.2757506370544434, |
|
"logps/chosen": -261.5260314941406, |
|
"logps/rejected": -211.95291137695312, |
|
"loss": 0.5175, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.09936892986297607, |
|
"rewards/margins": 0.829565167427063, |
|
"rewards/rejected": -0.9289340972900391, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.1748947569843092e-07, |
|
"logits/chosen": -2.3904809951782227, |
|
"logits/rejected": -2.307392120361328, |
|
"logps/chosen": -263.42333984375, |
|
"logps/rejected": -225.5657196044922, |
|
"loss": 0.5077, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.11825220286846161, |
|
"rewards/margins": 0.8992505073547363, |
|
"rewards/rejected": -1.017502784729004, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.1557596632223497e-07, |
|
"logits/chosen": -2.360848903656006, |
|
"logits/rejected": -2.3750827312469482, |
|
"logps/chosen": -272.0371398925781, |
|
"logps/rejected": -224.0919647216797, |
|
"loss": 0.5255, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.1476692408323288, |
|
"rewards/margins": 0.835718035697937, |
|
"rewards/rejected": -0.983387291431427, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.1366245694603903e-07, |
|
"logits/chosen": -2.4289305210113525, |
|
"logits/rejected": -2.2458267211914062, |
|
"logps/chosen": -265.1494140625, |
|
"logps/rejected": -249.3345489501953, |
|
"loss": 0.556, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.24553117156028748, |
|
"rewards/margins": 0.6707764863967896, |
|
"rewards/rejected": -0.9163076281547546, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.1174894756984308e-07, |
|
"logits/chosen": -2.2975571155548096, |
|
"logits/rejected": -2.393068790435791, |
|
"logps/chosen": -254.4977569580078, |
|
"logps/rejected": -224.1728973388672, |
|
"loss": 0.5205, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.18545860052108765, |
|
"rewards/margins": 0.8345575332641602, |
|
"rewards/rejected": -1.020016074180603, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.0983543819364714e-07, |
|
"logits/chosen": -2.464052200317383, |
|
"logits/rejected": -2.4497199058532715, |
|
"logps/chosen": -278.8863525390625, |
|
"logps/rejected": -231.97512817382812, |
|
"loss": 0.477, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.08197133243083954, |
|
"rewards/margins": 0.8996642231941223, |
|
"rewards/rejected": -0.9816356897354126, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.079219288174512e-07, |
|
"logits/chosen": -2.333491086959839, |
|
"logits/rejected": -2.31835675239563, |
|
"logps/chosen": -247.50698852539062, |
|
"logps/rejected": -209.3056182861328, |
|
"loss": 0.5348, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.20645038783550262, |
|
"rewards/margins": 0.5899510979652405, |
|
"rewards/rejected": -0.7964013814926147, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.0600841944125525e-07, |
|
"logits/chosen": -2.43719744682312, |
|
"logits/rejected": -2.396315574645996, |
|
"logps/chosen": -271.6535949707031, |
|
"logps/rejected": -221.7565155029297, |
|
"loss": 0.5527, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.2088332623243332, |
|
"rewards/margins": 0.6539155840873718, |
|
"rewards/rejected": -0.8627488017082214, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0409491006505931e-07, |
|
"logits/chosen": -2.400672435760498, |
|
"logits/rejected": -2.2855820655822754, |
|
"logps/chosen": -283.90576171875, |
|
"logps/rejected": -229.03689575195312, |
|
"loss": 0.5436, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.18632188439369202, |
|
"rewards/margins": 0.7588584423065186, |
|
"rewards/rejected": -0.9451802968978882, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.0218140068886336e-07, |
|
"logits/chosen": -2.3788506984710693, |
|
"logits/rejected": -2.282285690307617, |
|
"logps/chosen": -260.7487487792969, |
|
"logps/rejected": -221.43505859375, |
|
"loss": 0.5533, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.21385586261749268, |
|
"rewards/margins": 0.5428717732429504, |
|
"rewards/rejected": -0.7567275762557983, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.0026789131266743e-07, |
|
"logits/chosen": -2.296046018600464, |
|
"logits/rejected": -2.3289477825164795, |
|
"logps/chosen": -252.4747772216797, |
|
"logps/rejected": -242.341796875, |
|
"loss": 0.5406, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.17420583963394165, |
|
"rewards/margins": 0.6258620023727417, |
|
"rewards/rejected": -0.8000679016113281, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.835438193647149e-08, |
|
"logits/chosen": -2.3245911598205566, |
|
"logits/rejected": -2.3727688789367676, |
|
"logps/chosen": -255.92984008789062, |
|
"logps/rejected": -230.47940063476562, |
|
"loss": 0.5594, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.15971948206424713, |
|
"rewards/margins": 0.6732539534568787, |
|
"rewards/rejected": -0.8329733610153198, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.644087256027554e-08, |
|
"logits/chosen": -2.3938796520233154, |
|
"logits/rejected": -2.388028383255005, |
|
"logps/chosen": -239.3699188232422, |
|
"logps/rejected": -218.7798614501953, |
|
"loss": 0.539, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.14847150444984436, |
|
"rewards/margins": 0.6557341814041138, |
|
"rewards/rejected": -0.804205596446991, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.45273631840796e-08, |
|
"logits/chosen": -2.395660877227783, |
|
"logits/rejected": -2.4043540954589844, |
|
"logps/chosen": -278.68353271484375, |
|
"logps/rejected": -231.53103637695312, |
|
"loss": 0.5324, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.08287270367145538, |
|
"rewards/margins": 0.7413903474807739, |
|
"rewards/rejected": -0.8242629766464233, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.261385380788366e-08, |
|
"logits/chosen": -2.2760956287384033, |
|
"logits/rejected": -2.2844595909118652, |
|
"logps/chosen": -239.07382202148438, |
|
"logps/rejected": -222.31161499023438, |
|
"loss": 0.5253, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.2278144657611847, |
|
"rewards/margins": 0.6441665887832642, |
|
"rewards/rejected": -0.871981143951416, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.070034443168771e-08, |
|
"logits/chosen": -2.3876235485076904, |
|
"logits/rejected": -2.3704121112823486, |
|
"logps/chosen": -261.8553161621094, |
|
"logps/rejected": -227.60379028320312, |
|
"loss": 0.5577, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.24931149184703827, |
|
"rewards/margins": 0.6487377285957336, |
|
"rewards/rejected": -0.8980492353439331, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.878683505549177e-08, |
|
"logits/chosen": -2.2906646728515625, |
|
"logits/rejected": -2.32999587059021, |
|
"logps/chosen": -267.1397399902344, |
|
"logps/rejected": -226.90048217773438, |
|
"loss": 0.5109, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.10627348721027374, |
|
"rewards/margins": 0.714811384677887, |
|
"rewards/rejected": -0.8210847973823547, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.687332567929582e-08, |
|
"logits/chosen": -2.3202567100524902, |
|
"logits/rejected": -2.349112033843994, |
|
"logps/chosen": -291.82147216796875, |
|
"logps/rejected": -246.85574340820312, |
|
"loss": 0.5351, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.050464726984500885, |
|
"rewards/margins": 0.8968712091445923, |
|
"rewards/rejected": -0.9473358988761902, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.495981630309988e-08, |
|
"logits/chosen": -2.270942211151123, |
|
"logits/rejected": -2.2897868156433105, |
|
"logps/chosen": -300.76312255859375, |
|
"logps/rejected": -218.22640991210938, |
|
"loss": 0.5467, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.13602502644062042, |
|
"rewards/margins": 0.5781577825546265, |
|
"rewards/rejected": -0.7141829133033752, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.304630692690395e-08, |
|
"logits/chosen": -2.317321300506592, |
|
"logits/rejected": -2.2713263034820557, |
|
"logps/chosen": -262.05743408203125, |
|
"logps/rejected": -205.5304412841797, |
|
"loss": 0.5247, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1445184201002121, |
|
"rewards/margins": 0.7234494090080261, |
|
"rewards/rejected": -0.8679677248001099, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.1132797550708e-08, |
|
"logits/chosen": -2.3970394134521484, |
|
"logits/rejected": -2.3608124256134033, |
|
"logps/chosen": -267.4720458984375, |
|
"logps/rejected": -221.71359252929688, |
|
"loss": 0.5264, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.15024690330028534, |
|
"rewards/margins": 0.7065707445144653, |
|
"rewards/rejected": -0.8568177223205566, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.921928817451206e-08, |
|
"logits/chosen": -2.416393280029297, |
|
"logits/rejected": -2.3220162391662598, |
|
"logps/chosen": -277.0873107910156, |
|
"logps/rejected": -227.70947265625, |
|
"loss": 0.5077, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.17723213136196136, |
|
"rewards/margins": 0.8048456311225891, |
|
"rewards/rejected": -0.9820777177810669, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.73057787983161e-08, |
|
"logits/chosen": -2.395048141479492, |
|
"logits/rejected": -2.3008649349212646, |
|
"logps/chosen": -292.8017883300781, |
|
"logps/rejected": -266.5408020019531, |
|
"loss": 0.516, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.08461178839206696, |
|
"rewards/margins": 0.7476651072502136, |
|
"rewards/rejected": -0.8322768211364746, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.539226942212017e-08, |
|
"logits/chosen": -2.357027053833008, |
|
"logits/rejected": -2.313039541244507, |
|
"logps/chosen": -244.0641326904297, |
|
"logps/rejected": -220.01608276367188, |
|
"loss": 0.5179, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.11702857911586761, |
|
"rewards/margins": 0.8708831071853638, |
|
"rewards/rejected": -0.9879117012023926, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.347876004592423e-08, |
|
"logits/chosen": -2.470712900161743, |
|
"logits/rejected": -2.401108980178833, |
|
"logps/chosen": -280.0924072265625, |
|
"logps/rejected": -232.7683563232422, |
|
"loss": 0.5404, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.1525488644838333, |
|
"rewards/margins": 0.7386666536331177, |
|
"rewards/rejected": -0.8912155032157898, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.156525066972828e-08, |
|
"logits/chosen": -2.299323081970215, |
|
"logits/rejected": -2.362274646759033, |
|
"logps/chosen": -242.1494598388672, |
|
"logps/rejected": -227.5293426513672, |
|
"loss": 0.5408, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.07852064073085785, |
|
"rewards/margins": 0.7071703672409058, |
|
"rewards/rejected": -0.7856910824775696, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.965174129353234e-08, |
|
"logits/chosen": -2.3532567024230957, |
|
"logits/rejected": -2.292245864868164, |
|
"logps/chosen": -302.1033630371094, |
|
"logps/rejected": -258.0881042480469, |
|
"loss": 0.53, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.11693718284368515, |
|
"rewards/margins": 0.8197237253189087, |
|
"rewards/rejected": -0.936660885810852, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.773823191733639e-08, |
|
"logits/chosen": -2.285585403442383, |
|
"logits/rejected": -2.2158350944519043, |
|
"logps/chosen": -259.6685485839844, |
|
"logps/rejected": -222.1896514892578, |
|
"loss": 0.5305, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.23936796188354492, |
|
"rewards/margins": 0.7124180197715759, |
|
"rewards/rejected": -0.9517859220504761, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.582472254114045e-08, |
|
"logits/chosen": -2.305962085723877, |
|
"logits/rejected": -2.3173716068267822, |
|
"logps/chosen": -252.51766967773438, |
|
"logps/rejected": -223.96084594726562, |
|
"loss": 0.565, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.13089394569396973, |
|
"rewards/margins": 0.6327255368232727, |
|
"rewards/rejected": -0.7636195421218872, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.391121316494451e-08, |
|
"logits/chosen": -2.325913667678833, |
|
"logits/rejected": -2.310243606567383, |
|
"logps/chosen": -294.99847412109375, |
|
"logps/rejected": -239.6224822998047, |
|
"loss": 0.5248, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.10585550963878632, |
|
"rewards/margins": 0.8481132388114929, |
|
"rewards/rejected": -0.9539687037467957, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.199770378874856e-08, |
|
"logits/chosen": -2.3760478496551514, |
|
"logits/rejected": -2.2878143787384033, |
|
"logps/chosen": -272.9644775390625, |
|
"logps/rejected": -225.4691619873047, |
|
"loss": 0.5297, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.07944142073392868, |
|
"rewards/margins": 0.8911903500556946, |
|
"rewards/rejected": -0.9706317782402039, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.008419441255262e-08, |
|
"logits/chosen": -2.3183536529541016, |
|
"logits/rejected": -2.2597270011901855, |
|
"logps/chosen": -299.25775146484375, |
|
"logps/rejected": -217.822509765625, |
|
"loss": 0.5237, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.07527098059654236, |
|
"rewards/margins": 0.823529839515686, |
|
"rewards/rejected": -0.8988008499145508, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.817068503635668e-08, |
|
"logits/chosen": -2.4004032611846924, |
|
"logits/rejected": -2.3618216514587402, |
|
"logps/chosen": -288.94622802734375, |
|
"logps/rejected": -222.6877899169922, |
|
"loss": 0.5125, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.12513655424118042, |
|
"rewards/margins": 0.836874783039093, |
|
"rewards/rejected": -0.9620113372802734, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.6257175660160735e-08, |
|
"logits/chosen": -2.420538902282715, |
|
"logits/rejected": -2.3739724159240723, |
|
"logps/chosen": -295.19580078125, |
|
"logps/rejected": -238.79727172851562, |
|
"loss": 0.5186, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.18515029549598694, |
|
"rewards/margins": 0.6977055668830872, |
|
"rewards/rejected": -0.8828557729721069, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.4343666283964784e-08, |
|
"logits/chosen": -2.4274239540100098, |
|
"logits/rejected": -2.376018762588501, |
|
"logps/chosen": -263.94525146484375, |
|
"logps/rejected": -249.42178344726562, |
|
"loss": 0.5219, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.0742453932762146, |
|
"rewards/margins": 0.7254621386528015, |
|
"rewards/rejected": -0.7997074127197266, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.243015690776884e-08, |
|
"logits/chosen": -2.4671757221221924, |
|
"logits/rejected": -2.4064314365386963, |
|
"logps/chosen": -284.95330810546875, |
|
"logps/rejected": -235.897705078125, |
|
"loss": 0.5148, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.15317563712596893, |
|
"rewards/margins": 0.7068864107131958, |
|
"rewards/rejected": -0.8600620031356812, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 5.05166475315729e-08, |
|
"logits/chosen": -2.3765015602111816, |
|
"logits/rejected": -2.3243587017059326, |
|
"logps/chosen": -263.02545166015625, |
|
"logps/rejected": -218.81405639648438, |
|
"loss": 0.5407, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.28189724683761597, |
|
"rewards/margins": 0.6195310354232788, |
|
"rewards/rejected": -0.9014283418655396, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.860313815537696e-08, |
|
"logits/chosen": -2.406059741973877, |
|
"logits/rejected": -2.3650240898132324, |
|
"logps/chosen": -284.5108337402344, |
|
"logps/rejected": -266.8323059082031, |
|
"loss": 0.5351, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.12149874866008759, |
|
"rewards/margins": 0.7909864187240601, |
|
"rewards/rejected": -0.9124851226806641, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.668962877918101e-08, |
|
"logits/chosen": -2.3801705837249756, |
|
"logits/rejected": -2.27915620803833, |
|
"logps/chosen": -288.0900573730469, |
|
"logps/rejected": -278.1248474121094, |
|
"loss": 0.5227, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.12331392616033554, |
|
"rewards/margins": 0.8058657646179199, |
|
"rewards/rejected": -0.9291796684265137, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.477611940298507e-08, |
|
"logits/chosen": -2.4003734588623047, |
|
"logits/rejected": -2.3077614307403564, |
|
"logps/chosen": -245.1756591796875, |
|
"logps/rejected": -227.53317260742188, |
|
"loss": 0.4946, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.14258424937725067, |
|
"rewards/margins": 0.8004018068313599, |
|
"rewards/rejected": -0.942986011505127, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.2862610026789124e-08, |
|
"logits/chosen": -2.4123265743255615, |
|
"logits/rejected": -2.295915365219116, |
|
"logps/chosen": -259.74932861328125, |
|
"logps/rejected": -223.12002563476562, |
|
"loss": 0.5486, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.15485641360282898, |
|
"rewards/margins": 0.7069037556648254, |
|
"rewards/rejected": -0.861760139465332, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.0949100650593186e-08, |
|
"logits/chosen": -2.3854644298553467, |
|
"logits/rejected": -2.328233003616333, |
|
"logps/chosen": -256.6706848144531, |
|
"logps/rejected": -243.61880493164062, |
|
"loss": 0.5474, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.25512591004371643, |
|
"rewards/margins": 0.539734959602356, |
|
"rewards/rejected": -0.7948609590530396, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.903559127439724e-08, |
|
"logits/chosen": -2.2946548461914062, |
|
"logits/rejected": -2.278347969055176, |
|
"logps/chosen": -273.6446838378906, |
|
"logps/rejected": -230.7551727294922, |
|
"loss": 0.5081, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.12195589393377304, |
|
"rewards/margins": 0.7443081140518188, |
|
"rewards/rejected": -0.8662639856338501, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.71220818982013e-08, |
|
"logits/chosen": -2.405937910079956, |
|
"logits/rejected": -2.371584415435791, |
|
"logps/chosen": -273.60845947265625, |
|
"logps/rejected": -218.2377166748047, |
|
"loss": 0.5477, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.24379794299602509, |
|
"rewards/margins": 0.6154786348342896, |
|
"rewards/rejected": -0.8592765927314758, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.520857252200535e-08, |
|
"logits/chosen": -2.3632619380950928, |
|
"logits/rejected": -2.332373857498169, |
|
"logps/chosen": -300.9534912109375, |
|
"logps/rejected": -242.65316772460938, |
|
"loss": 0.5313, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.04076363891363144, |
|
"rewards/margins": 0.986183762550354, |
|
"rewards/rejected": -1.0269473791122437, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.3295063145809414e-08, |
|
"logits/chosen": -2.280702829360962, |
|
"logits/rejected": -2.2251949310302734, |
|
"logps/chosen": -267.03338623046875, |
|
"logps/rejected": -244.3408203125, |
|
"loss": 0.5581, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.13874498009681702, |
|
"rewards/margins": 0.7079328298568726, |
|
"rewards/rejected": -0.8466777801513672, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.138155376961347e-08, |
|
"logits/chosen": -2.2881321907043457, |
|
"logits/rejected": -2.3319568634033203, |
|
"logps/chosen": -234.1023712158203, |
|
"logps/rejected": -226.11300659179688, |
|
"loss": 0.5666, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.13704028725624084, |
|
"rewards/margins": 0.7299059629440308, |
|
"rewards/rejected": -0.8669462203979492, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.9468044393417525e-08, |
|
"logits/chosen": -2.4301745891571045, |
|
"logits/rejected": -2.402632236480713, |
|
"logps/chosen": -273.2855529785156, |
|
"logps/rejected": -262.68792724609375, |
|
"loss": 0.5173, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.061246536672115326, |
|
"rewards/margins": 0.8221151232719421, |
|
"rewards/rejected": -0.8833616971969604, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.755453501722158e-08, |
|
"logits/chosen": -2.3756215572357178, |
|
"logits/rejected": -2.3666157722473145, |
|
"logps/chosen": -285.3559265136719, |
|
"logps/rejected": -228.5872344970703, |
|
"loss": 0.509, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.09171368926763535, |
|
"rewards/margins": 0.8335908055305481, |
|
"rewards/rejected": -0.9253045320510864, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.564102564102564e-08, |
|
"logits/chosen": -2.3984246253967285, |
|
"logits/rejected": -2.3833839893341064, |
|
"logps/chosen": -258.1267395019531, |
|
"logps/rejected": -225.0773468017578, |
|
"loss": 0.524, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.1915823221206665, |
|
"rewards/margins": 0.6331661343574524, |
|
"rewards/rejected": -0.8247483968734741, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.3727516264829695e-08, |
|
"logits/chosen": -2.320146083831787, |
|
"logits/rejected": -2.2947006225585938, |
|
"logps/chosen": -238.8065643310547, |
|
"logps/rejected": -234.02822875976562, |
|
"loss": 0.5354, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.17902429401874542, |
|
"rewards/margins": 0.5986486673355103, |
|
"rewards/rejected": -0.7776729464530945, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.1814006888633754e-08, |
|
"logits/chosen": -2.334745168685913, |
|
"logits/rejected": -2.353066921234131, |
|
"logps/chosen": -263.228515625, |
|
"logps/rejected": -233.03515625, |
|
"loss": 0.5472, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.20434530079364777, |
|
"rewards/margins": 0.656082272529602, |
|
"rewards/rejected": -0.8604275584220886, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.990049751243781e-08, |
|
"logits/chosen": -2.396660327911377, |
|
"logits/rejected": -2.3448100090026855, |
|
"logps/chosen": -285.54638671875, |
|
"logps/rejected": -231.98117065429688, |
|
"loss": 0.5327, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1172541007399559, |
|
"rewards/margins": 0.6794244050979614, |
|
"rewards/rejected": -0.7966784238815308, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.7986988136241865e-08, |
|
"logits/chosen": -2.4138553142547607, |
|
"logits/rejected": -2.357382297515869, |
|
"logps/chosen": -262.9327697753906, |
|
"logps/rejected": -225.3331756591797, |
|
"loss": 0.4929, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.031798310577869415, |
|
"rewards/margins": 0.8773431777954102, |
|
"rewards/rejected": -0.9091414213180542, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.6073478760045924e-08, |
|
"logits/chosen": -2.2996766567230225, |
|
"logits/rejected": -2.2243258953094482, |
|
"logps/chosen": -285.4356384277344, |
|
"logps/rejected": -201.10208129882812, |
|
"loss": 0.5427, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.2585764229297638, |
|
"rewards/margins": 0.5490394234657288, |
|
"rewards/rejected": -0.8076158761978149, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.4159969383849981e-08, |
|
"logits/chosen": -2.4938712120056152, |
|
"logits/rejected": -2.4172348976135254, |
|
"logps/chosen": -312.3806457519531, |
|
"logps/rejected": -248.057373046875, |
|
"loss": 0.5262, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.1045370101928711, |
|
"rewards/margins": 0.8157347440719604, |
|
"rewards/rejected": -0.9202718734741211, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.2246460007654037e-08, |
|
"logits/chosen": -2.3658456802368164, |
|
"logits/rejected": -2.304481029510498, |
|
"logps/chosen": -266.4372863769531, |
|
"logps/rejected": -227.3815460205078, |
|
"loss": 0.5701, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.23648759722709656, |
|
"rewards/margins": 0.5965025424957275, |
|
"rewards/rejected": -0.8329901695251465, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0332950631458094e-08, |
|
"logits/chosen": -2.307523012161255, |
|
"logits/rejected": -2.3531241416931152, |
|
"logps/chosen": -273.6283874511719, |
|
"logps/rejected": -230.62344360351562, |
|
"loss": 0.5356, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.150599867105484, |
|
"rewards/margins": 0.7397447228431702, |
|
"rewards/rejected": -0.8903446197509766, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.419441255262151e-09, |
|
"logits/chosen": -2.298656940460205, |
|
"logits/rejected": -2.319462299346924, |
|
"logps/chosen": -267.07867431640625, |
|
"logps/rejected": -219.8665313720703, |
|
"loss": 0.5007, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.09595485031604767, |
|
"rewards/margins": 0.6610188484191895, |
|
"rewards/rejected": -0.7569736838340759, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 6.505931879066207e-09, |
|
"logits/chosen": -2.4032797813415527, |
|
"logits/rejected": -2.3461287021636963, |
|
"logps/chosen": -235.07882690429688, |
|
"logps/rejected": -236.2791290283203, |
|
"loss": 0.5338, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.25155526399612427, |
|
"rewards/margins": 0.6543334722518921, |
|
"rewards/rejected": -0.9058888554573059, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.592422502870264e-09, |
|
"logits/chosen": -2.41361141204834, |
|
"logits/rejected": -2.317509651184082, |
|
"logps/chosen": -259.431884765625, |
|
"logps/rejected": -227.45394897460938, |
|
"loss": 0.5457, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.057419806718826294, |
|
"rewards/margins": 0.6843992471694946, |
|
"rewards/rejected": -0.7418190240859985, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.6789131266743202e-09, |
|
"logits/chosen": -2.3416519165039062, |
|
"logits/rejected": -2.272921562194824, |
|
"logps/chosen": -233.4044189453125, |
|
"logps/rejected": -225.3540496826172, |
|
"loss": 0.5175, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.2774786353111267, |
|
"rewards/margins": 0.6129654049873352, |
|
"rewards/rejected": -0.8904439806938171, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 7.654037504783773e-10, |
|
"logits/chosen": -2.313737630844116, |
|
"logits/rejected": -2.3516671657562256, |
|
"logps/chosen": -238.37814331054688, |
|
"logps/rejected": -245.6781005859375, |
|
"loss": 0.5439, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.1316399872303009, |
|
"rewards/margins": 0.7750416994094849, |
|
"rewards/rejected": -0.9066817164421082, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_logits/chosen": -2.065433979034424, |
|
"eval_logits/rejected": -1.9405803680419922, |
|
"eval_logps/chosen": -266.1706848144531, |
|
"eval_logps/rejected": -228.30780029296875, |
|
"eval_loss": 0.5255534052848816, |
|
"eval_rewards/accuracies": 0.7419999837875366, |
|
"eval_rewards/chosen": -0.15385985374450684, |
|
"eval_rewards/margins": 0.7486297488212585, |
|
"eval_rewards/rejected": -0.9024895429611206, |
|
"eval_runtime": 601.0805, |
|
"eval_samples_per_second": 3.327, |
|
"eval_steps_per_second": 0.208, |
|
"step": 2904 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 2904, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5642068754707158, |
|
"train_runtime": 89225.6094, |
|
"train_samples_per_second": 2.083, |
|
"train_steps_per_second": 0.033 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2904, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|