|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.1389521640091116, |
|
"eval_steps": 500, |
|
"global_step": 4000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0028473804100227792, |
|
"grad_norm": 1.8249249458312988, |
|
"learning_rate": 1.4099732346241459e-05, |
|
"logits/chosen": 1.5533103942871094, |
|
"logits/rejected": 1.544719934463501, |
|
"logps/chosen": -192.5115509033203, |
|
"logps/rejected": -190.71209716796875, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.008504397235810757, |
|
"rewards/margins": -0.00039802552782930434, |
|
"rewards/rejected": -0.008106371387839317, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0056947608200455585, |
|
"grad_norm": 1.7557423114776611, |
|
"learning_rate": 1.4099464692482917e-05, |
|
"logits/chosen": 1.467641830444336, |
|
"logits/rejected": 1.4499633312225342, |
|
"logps/chosen": -186.05093383789062, |
|
"logps/rejected": -189.79763793945312, |
|
"loss": 0.6938, |
|
"rewards/accuracies": 0.46666669845581055, |
|
"rewards/chosen": -0.03809415176510811, |
|
"rewards/margins": -0.001227277098223567, |
|
"rewards/rejected": -0.03686687722802162, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.008542141230068337, |
|
"grad_norm": 1.97469162940979, |
|
"learning_rate": 1.4099197038724375e-05, |
|
"logits/chosen": 1.4973409175872803, |
|
"logits/rejected": 1.481737494468689, |
|
"logps/chosen": -192.17340087890625, |
|
"logps/rejected": -190.8618927001953, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.03541722521185875, |
|
"rewards/margins": 0.0023386760149151087, |
|
"rewards/rejected": -0.037755902856588364, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.011389521640091117, |
|
"grad_norm": 1.5228796005249023, |
|
"learning_rate": 1.4098929384965832e-05, |
|
"logits/chosen": 1.6990602016448975, |
|
"logits/rejected": 1.6831896305084229, |
|
"logps/chosen": -187.31143188476562, |
|
"logps/rejected": -194.3468780517578, |
|
"loss": 0.6878, |
|
"rewards/accuracies": 0.6833333373069763, |
|
"rewards/chosen": -0.02589474990963936, |
|
"rewards/margins": 0.010891737416386604, |
|
"rewards/rejected": -0.036786485463380814, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.014236902050113895, |
|
"grad_norm": 2.0425374507904053, |
|
"learning_rate": 1.409866173120729e-05, |
|
"logits/chosen": 1.6863610744476318, |
|
"logits/rejected": 1.6478984355926514, |
|
"logps/chosen": -191.1285400390625, |
|
"logps/rejected": -185.4095916748047, |
|
"loss": 0.6875, |
|
"rewards/accuracies": 0.6666667461395264, |
|
"rewards/chosen": -0.045168228447437286, |
|
"rewards/margins": 0.011603166349232197, |
|
"rewards/rejected": -0.05677139759063721, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.017084282460136675, |
|
"grad_norm": 1.708747386932373, |
|
"learning_rate": 1.4098394077448748e-05, |
|
"logits/chosen": 1.4254977703094482, |
|
"logits/rejected": 1.4232938289642334, |
|
"logps/chosen": -190.47293090820312, |
|
"logps/rejected": -188.8648223876953, |
|
"loss": 0.6886, |
|
"rewards/accuracies": 0.5333333611488342, |
|
"rewards/chosen": -0.05600341409444809, |
|
"rewards/margins": 0.009662959724664688, |
|
"rewards/rejected": -0.06566638499498367, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.019931662870159454, |
|
"grad_norm": 1.7994238138198853, |
|
"learning_rate": 1.4098126423690206e-05, |
|
"logits/chosen": 1.6648858785629272, |
|
"logits/rejected": 1.6290760040283203, |
|
"logps/chosen": -186.66439819335938, |
|
"logps/rejected": -185.37510681152344, |
|
"loss": 0.6807, |
|
"rewards/accuracies": 0.6833332777023315, |
|
"rewards/chosen": -0.062477756291627884, |
|
"rewards/margins": 0.02577758952975273, |
|
"rewards/rejected": -0.08825534582138062, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.022779043280182234, |
|
"grad_norm": 1.9590085744857788, |
|
"learning_rate": 1.4097858769931664e-05, |
|
"logits/chosen": 1.5724966526031494, |
|
"logits/rejected": 1.552328109741211, |
|
"logps/chosen": -194.67926025390625, |
|
"logps/rejected": -189.94973754882812, |
|
"loss": 0.6847, |
|
"rewards/accuracies": 0.6166666746139526, |
|
"rewards/chosen": -0.05106516554951668, |
|
"rewards/margins": 0.018698066473007202, |
|
"rewards/rejected": -0.06976323574781418, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02562642369020501, |
|
"grad_norm": 2.6948983669281006, |
|
"learning_rate": 1.4097591116173122e-05, |
|
"logits/chosen": 1.6500282287597656, |
|
"logits/rejected": 1.6157087087631226, |
|
"logps/chosen": -190.87289428710938, |
|
"logps/rejected": -190.57659912109375, |
|
"loss": 0.6812, |
|
"rewards/accuracies": 0.5833333730697632, |
|
"rewards/chosen": -0.06399134546518326, |
|
"rewards/margins": 0.0265243761241436, |
|
"rewards/rejected": -0.09051571786403656, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02847380410022779, |
|
"grad_norm": 2.4923946857452393, |
|
"learning_rate": 1.409732346241458e-05, |
|
"logits/chosen": 1.762634515762329, |
|
"logits/rejected": 1.7326021194458008, |
|
"logps/chosen": -191.38101196289062, |
|
"logps/rejected": -190.61634826660156, |
|
"loss": 0.6778, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.05230847746133804, |
|
"rewards/margins": 0.03380978852510452, |
|
"rewards/rejected": -0.08611828088760376, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03132118451025057, |
|
"grad_norm": 2.1932480335235596, |
|
"learning_rate": 1.4097055808656037e-05, |
|
"logits/chosen": 1.9199613332748413, |
|
"logits/rejected": 1.8599956035614014, |
|
"logps/chosen": -191.87869262695312, |
|
"logps/rejected": -191.84231567382812, |
|
"loss": 0.6757, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.05387324094772339, |
|
"rewards/margins": 0.0382312536239624, |
|
"rewards/rejected": -0.09210449457168579, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03416856492027335, |
|
"grad_norm": 1.9617068767547607, |
|
"learning_rate": 1.4096788154897494e-05, |
|
"logits/chosen": 1.4400994777679443, |
|
"logits/rejected": 1.4526021480560303, |
|
"logps/chosen": -187.902587890625, |
|
"logps/rejected": -185.8184814453125, |
|
"loss": 0.6764, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.07060353457927704, |
|
"rewards/margins": 0.03810857608914375, |
|
"rewards/rejected": -0.10871211439371109, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.037015945330296125, |
|
"grad_norm": 2.709176540374756, |
|
"learning_rate": 1.4096520501138952e-05, |
|
"logits/chosen": 1.8948795795440674, |
|
"logits/rejected": 1.934361219406128, |
|
"logps/chosen": -191.3181610107422, |
|
"logps/rejected": -188.64439392089844, |
|
"loss": 0.6868, |
|
"rewards/accuracies": 0.5500000715255737, |
|
"rewards/chosen": -0.0670461356639862, |
|
"rewards/margins": 0.018613968044519424, |
|
"rewards/rejected": -0.08566009998321533, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03986332574031891, |
|
"grad_norm": 2.8838157653808594, |
|
"learning_rate": 1.409625284738041e-05, |
|
"logits/chosen": 1.595059871673584, |
|
"logits/rejected": 1.593703031539917, |
|
"logps/chosen": -182.03919982910156, |
|
"logps/rejected": -188.1974334716797, |
|
"loss": 0.6772, |
|
"rewards/accuracies": 0.6166666746139526, |
|
"rewards/chosen": -0.02041524276137352, |
|
"rewards/margins": 0.03836324065923691, |
|
"rewards/rejected": -0.05877848342061043, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.042710706150341685, |
|
"grad_norm": 2.5347156524658203, |
|
"learning_rate": 1.4095985193621868e-05, |
|
"logits/chosen": 1.4492591619491577, |
|
"logits/rejected": 1.465135097503662, |
|
"logps/chosen": -186.55406188964844, |
|
"logps/rejected": -185.12742614746094, |
|
"loss": 0.6761, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.02626597322523594, |
|
"rewards/margins": 0.04097798839211464, |
|
"rewards/rejected": -0.014712016098201275, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04555808656036447, |
|
"grad_norm": 2.7310001850128174, |
|
"learning_rate": 1.4095717539863326e-05, |
|
"logits/chosen": 1.1774822473526, |
|
"logits/rejected": 1.1877602338790894, |
|
"logps/chosen": -192.26759338378906, |
|
"logps/rejected": -190.67376708984375, |
|
"loss": 0.6481, |
|
"rewards/accuracies": 0.73333340883255, |
|
"rewards/chosen": 0.05477526783943176, |
|
"rewards/margins": 0.0998917669057846, |
|
"rewards/rejected": -0.04511650279164314, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.048405466970387244, |
|
"grad_norm": 2.826904058456421, |
|
"learning_rate": 1.4095449886104784e-05, |
|
"logits/chosen": 1.487045407295227, |
|
"logits/rejected": 1.4947443008422852, |
|
"logps/chosen": -188.93545532226562, |
|
"logps/rejected": -191.91812133789062, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.5166666507720947, |
|
"rewards/chosen": 0.029497122392058372, |
|
"rewards/margins": 0.014662249013781548, |
|
"rewards/rejected": 0.014834875240921974, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05125284738041002, |
|
"grad_norm": 3.4622280597686768, |
|
"learning_rate": 1.4095182232346241e-05, |
|
"logits/chosen": 1.340524673461914, |
|
"logits/rejected": 1.3434597253799438, |
|
"logps/chosen": -196.30982971191406, |
|
"logps/rejected": -190.28746032714844, |
|
"loss": 0.6557, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.07341782003641129, |
|
"rewards/margins": 0.08739937841892242, |
|
"rewards/rejected": -0.013981550931930542, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.0541002277904328, |
|
"grad_norm": 2.600072145462036, |
|
"learning_rate": 1.4094914578587699e-05, |
|
"logits/chosen": 1.1103225946426392, |
|
"logits/rejected": 1.1387397050857544, |
|
"logps/chosen": -188.13656616210938, |
|
"logps/rejected": -183.8177947998047, |
|
"loss": 0.6545, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.09875272214412689, |
|
"rewards/margins": 0.08882729709148407, |
|
"rewards/rejected": 0.009925423189997673, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05694760820045558, |
|
"grad_norm": 2.8922410011291504, |
|
"learning_rate": 1.4094646924829157e-05, |
|
"logits/chosen": 1.510562539100647, |
|
"logits/rejected": 1.513253092765808, |
|
"logps/chosen": -189.52235412597656, |
|
"logps/rejected": -187.32711791992188, |
|
"loss": 0.6551, |
|
"rewards/accuracies": 0.6666666269302368, |
|
"rewards/chosen": 0.14362266659736633, |
|
"rewards/margins": 0.09014402329921722, |
|
"rewards/rejected": 0.05347864702343941, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05979498861047836, |
|
"grad_norm": 3.4067959785461426, |
|
"learning_rate": 1.4094379271070615e-05, |
|
"logits/chosen": 1.609612226486206, |
|
"logits/rejected": 1.616186499595642, |
|
"logps/chosen": -184.65316772460938, |
|
"logps/rejected": -185.62258911132812, |
|
"loss": 0.6634, |
|
"rewards/accuracies": 0.6333333849906921, |
|
"rewards/chosen": 0.21420073509216309, |
|
"rewards/margins": 0.07762883603572845, |
|
"rewards/rejected": 0.13657189905643463, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06264236902050115, |
|
"grad_norm": 3.975015640258789, |
|
"learning_rate": 1.4094111617312074e-05, |
|
"logits/chosen": 1.4019300937652588, |
|
"logits/rejected": 1.4394185543060303, |
|
"logps/chosen": -184.24755859375, |
|
"logps/rejected": -186.11117553710938, |
|
"loss": 0.6454, |
|
"rewards/accuracies": 0.7333332896232605, |
|
"rewards/chosen": 0.2533518075942993, |
|
"rewards/margins": 0.11108176410198212, |
|
"rewards/rejected": 0.142270028591156, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06548974943052392, |
|
"grad_norm": 2.7093393802642822, |
|
"learning_rate": 1.4093843963553532e-05, |
|
"logits/chosen": 1.3297879695892334, |
|
"logits/rejected": 1.3092052936553955, |
|
"logps/chosen": -184.32113647460938, |
|
"logps/rejected": -184.41651916503906, |
|
"loss": 0.668, |
|
"rewards/accuracies": 0.5833333134651184, |
|
"rewards/chosen": 0.23246827721595764, |
|
"rewards/margins": 0.06593836843967438, |
|
"rewards/rejected": 0.16652987897396088, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.0683371298405467, |
|
"grad_norm": 3.239527702331543, |
|
"learning_rate": 1.409357630979499e-05, |
|
"logits/chosen": 1.6373841762542725, |
|
"logits/rejected": 1.618748426437378, |
|
"logps/chosen": -187.17330932617188, |
|
"logps/rejected": -187.43460083007812, |
|
"loss": 0.6351, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.3167666792869568, |
|
"rewards/margins": 0.1365506947040558, |
|
"rewards/rejected": 0.180215984582901, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07118451025056947, |
|
"grad_norm": 3.3547203540802, |
|
"learning_rate": 1.4093308656036446e-05, |
|
"logits/chosen": 1.3807073831558228, |
|
"logits/rejected": 1.3713890314102173, |
|
"logps/chosen": -189.984130859375, |
|
"logps/rejected": -194.66429138183594, |
|
"loss": 0.6498, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": 0.421330988407135, |
|
"rewards/margins": 0.11505619436502457, |
|
"rewards/rejected": 0.3062748312950134, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07403189066059225, |
|
"grad_norm": 3.3612968921661377, |
|
"learning_rate": 1.4093041002277905e-05, |
|
"logits/chosen": 1.3529853820800781, |
|
"logits/rejected": 1.3378267288208008, |
|
"logps/chosen": -182.531494140625, |
|
"logps/rejected": -180.93429565429688, |
|
"loss": 0.6556, |
|
"rewards/accuracies": 0.6166667342185974, |
|
"rewards/chosen": 0.49432888627052307, |
|
"rewards/margins": 0.09916864335536957, |
|
"rewards/rejected": 0.3951602280139923, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07687927107061504, |
|
"grad_norm": 5.357379913330078, |
|
"learning_rate": 1.4092773348519363e-05, |
|
"logits/chosen": 1.3663667440414429, |
|
"logits/rejected": 1.3557411432266235, |
|
"logps/chosen": -180.38389587402344, |
|
"logps/rejected": -180.8890380859375, |
|
"loss": 0.6403, |
|
"rewards/accuracies": 0.6666666865348816, |
|
"rewards/chosen": 0.45921817421913147, |
|
"rewards/margins": 0.13937883079051971, |
|
"rewards/rejected": 0.31983932852745056, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07972665148063782, |
|
"grad_norm": 4.233500003814697, |
|
"learning_rate": 1.4092505694760821e-05, |
|
"logits/chosen": 1.3878666162490845, |
|
"logits/rejected": 1.3849413394927979, |
|
"logps/chosen": -187.7335205078125, |
|
"logps/rejected": -187.17578125, |
|
"loss": 0.6419, |
|
"rewards/accuracies": 0.6666666269302368, |
|
"rewards/chosen": 0.5678433179855347, |
|
"rewards/margins": 0.13482099771499634, |
|
"rewards/rejected": 0.4330223500728607, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.08257403189066059, |
|
"grad_norm": 3.41872501373291, |
|
"learning_rate": 1.4092238041002279e-05, |
|
"logits/chosen": 1.4805208444595337, |
|
"logits/rejected": 1.4516006708145142, |
|
"logps/chosen": -182.12094116210938, |
|
"logps/rejected": -184.3780975341797, |
|
"loss": 0.6337, |
|
"rewards/accuracies": 0.6666666269302368, |
|
"rewards/chosen": 0.7841524481773376, |
|
"rewards/margins": 0.16332173347473145, |
|
"rewards/rejected": 0.6208308339118958, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.08542141230068337, |
|
"grad_norm": 3.346146821975708, |
|
"learning_rate": 1.4091970387243737e-05, |
|
"logits/chosen": 1.4589000940322876, |
|
"logits/rejected": 1.4556543827056885, |
|
"logps/chosen": -182.80862426757812, |
|
"logps/rejected": -183.60992431640625, |
|
"loss": 0.6396, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.7094445824623108, |
|
"rewards/margins": 0.14436820149421692, |
|
"rewards/rejected": 0.5650763511657715, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08826879271070615, |
|
"grad_norm": 4.612051963806152, |
|
"learning_rate": 1.4091702733485195e-05, |
|
"logits/chosen": 1.5287668704986572, |
|
"logits/rejected": 1.5147442817687988, |
|
"logps/chosen": -182.61416625976562, |
|
"logps/rejected": -183.923828125, |
|
"loss": 0.6683, |
|
"rewards/accuracies": 0.7000000476837158, |
|
"rewards/chosen": 0.8587290644645691, |
|
"rewards/margins": 0.10906902700662613, |
|
"rewards/rejected": 0.7496601343154907, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.09111617312072894, |
|
"grad_norm": 3.749986171722412, |
|
"learning_rate": 1.4091435079726652e-05, |
|
"logits/chosen": 1.4341462850570679, |
|
"logits/rejected": 1.408405065536499, |
|
"logps/chosen": -183.7834014892578, |
|
"logps/rejected": -184.10061645507812, |
|
"loss": 0.6241, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.9308969378471375, |
|
"rewards/margins": 0.1793207824230194, |
|
"rewards/rejected": 0.75157630443573, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.09396355353075171, |
|
"grad_norm": 5.060494899749756, |
|
"learning_rate": 1.409116742596811e-05, |
|
"logits/chosen": 1.3190691471099854, |
|
"logits/rejected": 1.2641618251800537, |
|
"logps/chosen": -176.72817993164062, |
|
"logps/rejected": -177.97030639648438, |
|
"loss": 0.6306, |
|
"rewards/accuracies": 0.6333333849906921, |
|
"rewards/chosen": 0.9671937823295593, |
|
"rewards/margins": 0.19174639880657196, |
|
"rewards/rejected": 0.7754473686218262, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.09681093394077449, |
|
"grad_norm": 3.9803237915039062, |
|
"learning_rate": 1.4090899772209567e-05, |
|
"logits/chosen": 1.8298852443695068, |
|
"logits/rejected": 1.8242895603179932, |
|
"logps/chosen": -179.95370483398438, |
|
"logps/rejected": -179.55535888671875, |
|
"loss": 0.6068, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": 1.0600353479385376, |
|
"rewards/margins": 0.23250079154968262, |
|
"rewards/rejected": 0.827534556388855, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09965831435079726, |
|
"grad_norm": 5.183294773101807, |
|
"learning_rate": 1.4090632118451025e-05, |
|
"logits/chosen": 1.5438239574432373, |
|
"logits/rejected": 1.513770341873169, |
|
"logps/chosen": -180.3792724609375, |
|
"logps/rejected": -184.63238525390625, |
|
"loss": 0.605, |
|
"rewards/accuracies": 0.7166666388511658, |
|
"rewards/chosen": 1.0928313732147217, |
|
"rewards/margins": 0.2398819476366043, |
|
"rewards/rejected": 0.8529494404792786, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.10250569476082004, |
|
"grad_norm": 5.333198070526123, |
|
"learning_rate": 1.4090364464692483e-05, |
|
"logits/chosen": 1.2938203811645508, |
|
"logits/rejected": 1.2762978076934814, |
|
"logps/chosen": -182.16403198242188, |
|
"logps/rejected": -181.60708618164062, |
|
"loss": 0.6208, |
|
"rewards/accuracies": 0.6166666746139526, |
|
"rewards/chosen": 0.9186900854110718, |
|
"rewards/margins": 0.23468203842639923, |
|
"rewards/rejected": 0.6840081214904785, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.10535307517084283, |
|
"grad_norm": 4.603104114532471, |
|
"learning_rate": 1.4090096810933941e-05, |
|
"logits/chosen": 1.5223296880722046, |
|
"logits/rejected": 1.4895663261413574, |
|
"logps/chosen": -179.9161376953125, |
|
"logps/rejected": -187.1411590576172, |
|
"loss": 0.5639, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.7743527293205261, |
|
"rewards/margins": 0.34860119223594666, |
|
"rewards/rejected": 0.42575159668922424, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1082004555808656, |
|
"grad_norm": 8.414449691772461, |
|
"learning_rate": 1.40898291571754e-05, |
|
"logits/chosen": 1.5139826536178589, |
|
"logits/rejected": 1.4941532611846924, |
|
"logps/chosen": -180.5480499267578, |
|
"logps/rejected": -183.39735412597656, |
|
"loss": 0.5821, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.7373024225234985, |
|
"rewards/margins": 0.30213838815689087, |
|
"rewards/rejected": 0.4351639747619629, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.11104783599088838, |
|
"grad_norm": 4.939911842346191, |
|
"learning_rate": 1.4089561503416856e-05, |
|
"logits/chosen": 1.5266821384429932, |
|
"logits/rejected": 1.5277129411697388, |
|
"logps/chosen": -182.3626251220703, |
|
"logps/rejected": -186.07952880859375, |
|
"loss": 0.608, |
|
"rewards/accuracies": 0.7166666984558105, |
|
"rewards/chosen": 0.665320098400116, |
|
"rewards/margins": 0.2625730037689209, |
|
"rewards/rejected": 0.40274715423583984, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.11389521640091116, |
|
"grad_norm": 4.656453609466553, |
|
"learning_rate": 1.4089293849658314e-05, |
|
"logits/chosen": 1.84712815284729, |
|
"logits/rejected": 1.8255043029785156, |
|
"logps/chosen": -185.7529296875, |
|
"logps/rejected": -191.48007202148438, |
|
"loss": 0.5975, |
|
"rewards/accuracies": 0.6666666865348816, |
|
"rewards/chosen": 0.485964298248291, |
|
"rewards/margins": 0.28932809829711914, |
|
"rewards/rejected": 0.19663624465465546, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11674259681093394, |
|
"grad_norm": 6.418772220611572, |
|
"learning_rate": 1.4089026195899772e-05, |
|
"logits/chosen": 1.6944414377212524, |
|
"logits/rejected": 1.6806474924087524, |
|
"logps/chosen": -188.72129821777344, |
|
"logps/rejected": -187.45614624023438, |
|
"loss": 0.618, |
|
"rewards/accuracies": 0.6833332777023315, |
|
"rewards/chosen": 0.6671693921089172, |
|
"rewards/margins": 0.26440855860710144, |
|
"rewards/rejected": 0.4027608036994934, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.11958997722095673, |
|
"grad_norm": 6.491544723510742, |
|
"learning_rate": 1.408875854214123e-05, |
|
"logits/chosen": 1.5915181636810303, |
|
"logits/rejected": 1.5981369018554688, |
|
"logps/chosen": -183.72654724121094, |
|
"logps/rejected": -189.572021484375, |
|
"loss": 0.5637, |
|
"rewards/accuracies": 0.73333340883255, |
|
"rewards/chosen": 0.8245267868041992, |
|
"rewards/margins": 0.3756122887134552, |
|
"rewards/rejected": 0.44891443848609924, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.1224373576309795, |
|
"grad_norm": 4.8121185302734375, |
|
"learning_rate": 1.4088490888382688e-05, |
|
"logits/chosen": 1.659536600112915, |
|
"logits/rejected": 1.6513128280639648, |
|
"logps/chosen": -187.01806640625, |
|
"logps/rejected": -188.09231567382812, |
|
"loss": 0.6012, |
|
"rewards/accuracies": 0.7166666984558105, |
|
"rewards/chosen": 0.8251700401306152, |
|
"rewards/margins": 0.31730136275291443, |
|
"rewards/rejected": 0.5078686475753784, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.1252847380410023, |
|
"grad_norm": 4.413024425506592, |
|
"learning_rate": 1.4088223234624147e-05, |
|
"logits/chosen": 1.6399879455566406, |
|
"logits/rejected": 1.6434142589569092, |
|
"logps/chosen": -184.6322021484375, |
|
"logps/rejected": -184.0763397216797, |
|
"loss": 0.6125, |
|
"rewards/accuracies": 0.6333333849906921, |
|
"rewards/chosen": 0.9141266942024231, |
|
"rewards/margins": 0.2925638258457184, |
|
"rewards/rejected": 0.6215628981590271, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.12813211845102507, |
|
"grad_norm": 4.256793975830078, |
|
"learning_rate": 1.4087955580865605e-05, |
|
"logits/chosen": 1.6364936828613281, |
|
"logits/rejected": 1.613063097000122, |
|
"logps/chosen": -187.21011352539062, |
|
"logps/rejected": -185.93507385253906, |
|
"loss": 0.5987, |
|
"rewards/accuracies": 0.73333340883255, |
|
"rewards/chosen": 0.6389889121055603, |
|
"rewards/margins": 0.27906617522239685, |
|
"rewards/rejected": 0.35992270708084106, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.13097949886104784, |
|
"grad_norm": 6.61959981918335, |
|
"learning_rate": 1.4087687927107061e-05, |
|
"logits/chosen": 1.768164038658142, |
|
"logits/rejected": 1.7589298486709595, |
|
"logps/chosen": -179.0304718017578, |
|
"logps/rejected": -189.29208374023438, |
|
"loss": 0.5597, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": 0.7509846687316895, |
|
"rewards/margins": 0.4243010878562927, |
|
"rewards/rejected": 0.32668358087539673, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.13382687927107062, |
|
"grad_norm": 3.841958522796631, |
|
"learning_rate": 1.408742027334852e-05, |
|
"logits/chosen": 1.8268096446990967, |
|
"logits/rejected": 1.7784650325775146, |
|
"logps/chosen": -180.85340881347656, |
|
"logps/rejected": -188.13333129882812, |
|
"loss": 0.6454, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": 0.7165217399597168, |
|
"rewards/margins": 0.24613836407661438, |
|
"rewards/rejected": 0.47038334608078003, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.1366742596810934, |
|
"grad_norm": 4.740571022033691, |
|
"learning_rate": 1.4087152619589978e-05, |
|
"logits/chosen": 2.0922436714172363, |
|
"logits/rejected": 2.054403066635132, |
|
"logps/chosen": -177.0702667236328, |
|
"logps/rejected": -182.60592651367188, |
|
"loss": 0.6027, |
|
"rewards/accuracies": 0.6833333373069763, |
|
"rewards/chosen": 0.6301138997077942, |
|
"rewards/margins": 0.36010658740997314, |
|
"rewards/rejected": 0.27000728249549866, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.13952164009111617, |
|
"grad_norm": 4.578260898590088, |
|
"learning_rate": 1.4086884965831436e-05, |
|
"logits/chosen": 1.6760629415512085, |
|
"logits/rejected": 1.6328535079956055, |
|
"logps/chosen": -179.05679321289062, |
|
"logps/rejected": -185.8597412109375, |
|
"loss": 0.5984, |
|
"rewards/accuracies": 0.7500000596046448, |
|
"rewards/chosen": 0.885299026966095, |
|
"rewards/margins": 0.3471626341342926, |
|
"rewards/rejected": 0.53813636302948, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.14236902050113895, |
|
"grad_norm": 9.571253776550293, |
|
"learning_rate": 1.4086617312072894e-05, |
|
"logits/chosen": 1.8746957778930664, |
|
"logits/rejected": 1.8733335733413696, |
|
"logps/chosen": -181.01669311523438, |
|
"logps/rejected": -188.90940856933594, |
|
"loss": 0.5617, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.8897438049316406, |
|
"rewards/margins": 0.44727665185928345, |
|
"rewards/rejected": 0.44246721267700195, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.14521640091116172, |
|
"grad_norm": 4.612638473510742, |
|
"learning_rate": 1.4086349658314352e-05, |
|
"logits/chosen": 1.926553726196289, |
|
"logits/rejected": 1.9168344736099243, |
|
"logps/chosen": -179.99130249023438, |
|
"logps/rejected": -183.90756225585938, |
|
"loss": 0.5381, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": 0.7969551682472229, |
|
"rewards/margins": 0.5556143522262573, |
|
"rewards/rejected": 0.2413407862186432, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.1480637813211845, |
|
"grad_norm": 5.260079860687256, |
|
"learning_rate": 1.408608200455581e-05, |
|
"logits/chosen": 2.0790677070617676, |
|
"logits/rejected": 2.0622916221618652, |
|
"logps/chosen": -191.6678009033203, |
|
"logps/rejected": -195.73336791992188, |
|
"loss": 0.5373, |
|
"rewards/accuracies": 0.7166666984558105, |
|
"rewards/chosen": -0.02513580396771431, |
|
"rewards/margins": 0.46241217851638794, |
|
"rewards/rejected": -0.48754796385765076, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.15091116173120728, |
|
"grad_norm": 6.061770439147949, |
|
"learning_rate": 1.4085814350797267e-05, |
|
"logits/chosen": 1.8206462860107422, |
|
"logits/rejected": 1.8210747241973877, |
|
"logps/chosen": -189.5788116455078, |
|
"logps/rejected": -192.89532470703125, |
|
"loss": 0.6216, |
|
"rewards/accuracies": 0.6666666269302368, |
|
"rewards/chosen": -0.11778483539819717, |
|
"rewards/margins": 0.24859514832496643, |
|
"rewards/rejected": -0.3663800060749054, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.15375854214123008, |
|
"grad_norm": 7.366477012634277, |
|
"learning_rate": 1.4085546697038725e-05, |
|
"logits/chosen": 1.4823158979415894, |
|
"logits/rejected": 1.4424474239349365, |
|
"logps/chosen": -189.91629028320312, |
|
"logps/rejected": -194.8428497314453, |
|
"loss": 0.6584, |
|
"rewards/accuracies": 0.5833333730697632, |
|
"rewards/chosen": 0.1267489641904831, |
|
"rewards/margins": 0.19468553364276886, |
|
"rewards/rejected": -0.06793657690286636, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.15660592255125286, |
|
"grad_norm": 6.191103935241699, |
|
"learning_rate": 1.4085279043280183e-05, |
|
"logits/chosen": 1.7889503240585327, |
|
"logits/rejected": 1.7639633417129517, |
|
"logps/chosen": -180.61203002929688, |
|
"logps/rejected": -184.73779296875, |
|
"loss": 0.5676, |
|
"rewards/accuracies": 0.7166666984558105, |
|
"rewards/chosen": 0.6674291491508484, |
|
"rewards/margins": 0.39728331565856934, |
|
"rewards/rejected": 0.27014586329460144, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.15945330296127563, |
|
"grad_norm": 5.199939250946045, |
|
"learning_rate": 1.408501138952164e-05, |
|
"logits/chosen": 1.800641417503357, |
|
"logits/rejected": 1.7895002365112305, |
|
"logps/chosen": -181.73495483398438, |
|
"logps/rejected": -189.62301635742188, |
|
"loss": 0.5967, |
|
"rewards/accuracies": 0.6666666269302368, |
|
"rewards/chosen": 0.6316097974777222, |
|
"rewards/margins": 0.381971538066864, |
|
"rewards/rejected": 0.24963828921318054, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.1623006833712984, |
|
"grad_norm": 6.458276271820068, |
|
"learning_rate": 1.4084743735763098e-05, |
|
"logits/chosen": 1.6199853420257568, |
|
"logits/rejected": 1.6275495290756226, |
|
"logps/chosen": -188.76553344726562, |
|
"logps/rejected": -193.72213745117188, |
|
"loss": 0.5953, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.44772014021873474, |
|
"rewards/margins": 0.34694141149520874, |
|
"rewards/rejected": 0.10077869892120361, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.16514806378132119, |
|
"grad_norm": 8.612196922302246, |
|
"learning_rate": 1.4084476082004556e-05, |
|
"logits/chosen": 1.5094552040100098, |
|
"logits/rejected": 1.4994395971298218, |
|
"logps/chosen": -183.19235229492188, |
|
"logps/rejected": -188.16197204589844, |
|
"loss": 0.56, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.45164498686790466, |
|
"rewards/margins": 0.4487348198890686, |
|
"rewards/rejected": 0.002910163952037692, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.16799544419134396, |
|
"grad_norm": 7.949935436248779, |
|
"learning_rate": 1.4084208428246014e-05, |
|
"logits/chosen": 1.9034900665283203, |
|
"logits/rejected": 1.8692665100097656, |
|
"logps/chosen": -188.60250854492188, |
|
"logps/rejected": -186.42356872558594, |
|
"loss": 0.5381, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.6019371747970581, |
|
"rewards/margins": 0.49507027864456177, |
|
"rewards/rejected": 0.10686691105365753, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.17084282460136674, |
|
"grad_norm": 6.373099327087402, |
|
"learning_rate": 1.408394077448747e-05, |
|
"logits/chosen": 1.8054128885269165, |
|
"logits/rejected": 1.7788407802581787, |
|
"logps/chosen": -186.22625732421875, |
|
"logps/rejected": -188.0371856689453, |
|
"loss": 0.5623, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.30558791756629944, |
|
"rewards/margins": 0.4337770938873291, |
|
"rewards/rejected": -0.12818923592567444, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.17369020501138951, |
|
"grad_norm": 7.086456775665283, |
|
"learning_rate": 1.4083673120728929e-05, |
|
"logits/chosen": 1.990748643875122, |
|
"logits/rejected": 1.954580307006836, |
|
"logps/chosen": -190.6208953857422, |
|
"logps/rejected": -194.99554443359375, |
|
"loss": 0.5549, |
|
"rewards/accuracies": 0.7500000596046448, |
|
"rewards/chosen": 0.09909350425004959, |
|
"rewards/margins": 0.45813828706741333, |
|
"rewards/rejected": -0.35904479026794434, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.1765375854214123, |
|
"grad_norm": 5.860842227935791, |
|
"learning_rate": 1.4083405466970387e-05, |
|
"logits/chosen": 1.4852467775344849, |
|
"logits/rejected": 1.486603021621704, |
|
"logps/chosen": -189.75148010253906, |
|
"logps/rejected": -189.5113067626953, |
|
"loss": 0.5515, |
|
"rewards/accuracies": 0.7166666984558105, |
|
"rewards/chosen": 0.12619808316230774, |
|
"rewards/margins": 0.5254445672035217, |
|
"rewards/rejected": -0.3992464542388916, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.17938496583143507, |
|
"grad_norm": 6.520554542541504, |
|
"learning_rate": 1.4083137813211845e-05, |
|
"logits/chosen": 1.6842315196990967, |
|
"logits/rejected": 1.679579734802246, |
|
"logps/chosen": -186.0720672607422, |
|
"logps/rejected": -186.9860076904297, |
|
"loss": 0.6135, |
|
"rewards/accuracies": 0.6333333849906921, |
|
"rewards/chosen": 0.2344587743282318, |
|
"rewards/margins": 0.41377443075180054, |
|
"rewards/rejected": -0.17931564152240753, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.18223234624145787, |
|
"grad_norm": 5.756778240203857, |
|
"learning_rate": 1.4082870159453303e-05, |
|
"logits/chosen": 1.7839409112930298, |
|
"logits/rejected": 1.758226990699768, |
|
"logps/chosen": -185.8971405029297, |
|
"logps/rejected": -195.45352172851562, |
|
"loss": 0.5255, |
|
"rewards/accuracies": 0.73333340883255, |
|
"rewards/chosen": 0.22731363773345947, |
|
"rewards/margins": 0.5177304148674011, |
|
"rewards/rejected": -0.29041680693626404, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.18507972665148065, |
|
"grad_norm": 6.184617519378662, |
|
"learning_rate": 1.4082602505694762e-05, |
|
"logits/chosen": 1.5313549041748047, |
|
"logits/rejected": 1.5081932544708252, |
|
"logps/chosen": -190.99046325683594, |
|
"logps/rejected": -195.67564392089844, |
|
"loss": 0.5314, |
|
"rewards/accuracies": 0.6999999284744263, |
|
"rewards/chosen": 0.30462446808815, |
|
"rewards/margins": 0.598679780960083, |
|
"rewards/rejected": -0.294055312871933, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.18792710706150342, |
|
"grad_norm": 6.379988670349121, |
|
"learning_rate": 1.408233485193622e-05, |
|
"logits/chosen": 1.446187973022461, |
|
"logits/rejected": 1.4404445886611938, |
|
"logps/chosen": -186.88339233398438, |
|
"logps/rejected": -192.68328857421875, |
|
"loss": 0.5728, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.21411502361297607, |
|
"rewards/margins": 0.503459632396698, |
|
"rewards/rejected": -0.2893446087837219, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.1907744874715262, |
|
"grad_norm": 6.756308078765869, |
|
"learning_rate": 1.4082067198177676e-05, |
|
"logits/chosen": 1.5809494256973267, |
|
"logits/rejected": 1.5566542148590088, |
|
"logps/chosen": -193.43441772460938, |
|
"logps/rejected": -194.9256134033203, |
|
"loss": 0.5794, |
|
"rewards/accuracies": 0.6500000357627869, |
|
"rewards/chosen": 0.1698226034641266, |
|
"rewards/margins": 0.48203667998313904, |
|
"rewards/rejected": -0.31221404671669006, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.19362186788154898, |
|
"grad_norm": 8.186720848083496, |
|
"learning_rate": 1.4081799544419134e-05, |
|
"logits/chosen": 1.4054545164108276, |
|
"logits/rejected": 1.389957070350647, |
|
"logps/chosen": -184.97232055664062, |
|
"logps/rejected": -192.24832153320312, |
|
"loss": 0.4763, |
|
"rewards/accuracies": 0.8000000715255737, |
|
"rewards/chosen": 0.35529452562332153, |
|
"rewards/margins": 0.6863371133804321, |
|
"rewards/rejected": -0.3310425579547882, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.19646924829157175, |
|
"grad_norm": 6.5290937423706055, |
|
"learning_rate": 1.4081531890660593e-05, |
|
"logits/chosen": 1.960597038269043, |
|
"logits/rejected": 1.9493227005004883, |
|
"logps/chosen": -190.00778198242188, |
|
"logps/rejected": -194.88388061523438, |
|
"loss": 0.5296, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.3141848146915436, |
|
"rewards/margins": 0.6257708072662354, |
|
"rewards/rejected": -0.3115859925746918, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.19931662870159453, |
|
"grad_norm": 8.158761978149414, |
|
"learning_rate": 1.408126423690205e-05, |
|
"logits/chosen": 1.9396240711212158, |
|
"logits/rejected": 1.901085615158081, |
|
"logps/chosen": -181.5595703125, |
|
"logps/rejected": -188.5738525390625, |
|
"loss": 0.5346, |
|
"rewards/accuracies": 0.7833333611488342, |
|
"rewards/chosen": 0.8007356524467468, |
|
"rewards/margins": 0.5970959663391113, |
|
"rewards/rejected": 0.20363974571228027, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.2021640091116173, |
|
"grad_norm": 7.908491611480713, |
|
"learning_rate": 1.4080996583143509e-05, |
|
"logits/chosen": 2.1133837699890137, |
|
"logits/rejected": 2.0991783142089844, |
|
"logps/chosen": -186.72463989257812, |
|
"logps/rejected": -191.86631774902344, |
|
"loss": 0.5594, |
|
"rewards/accuracies": 0.7166666984558105, |
|
"rewards/chosen": 0.43003392219543457, |
|
"rewards/margins": 0.4883079528808594, |
|
"rewards/rejected": -0.05827409029006958, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.20501138952164008, |
|
"grad_norm": 4.833853721618652, |
|
"learning_rate": 1.4080728929384967e-05, |
|
"logits/chosen": 1.790161371231079, |
|
"logits/rejected": 1.7500584125518799, |
|
"logps/chosen": -184.5858154296875, |
|
"logps/rejected": -193.99765014648438, |
|
"loss": 0.5817, |
|
"rewards/accuracies": 0.6500000357627869, |
|
"rewards/chosen": 0.5971255898475647, |
|
"rewards/margins": 0.5937383770942688, |
|
"rewards/rejected": 0.003387200878933072, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.20785876993166286, |
|
"grad_norm": 6.047511100769043, |
|
"learning_rate": 1.4080461275626425e-05, |
|
"logits/chosen": 1.7548940181732178, |
|
"logits/rejected": 1.7236839532852173, |
|
"logps/chosen": -182.98989868164062, |
|
"logps/rejected": -190.8938446044922, |
|
"loss": 0.5335, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.427555650472641, |
|
"rewards/margins": 0.56451416015625, |
|
"rewards/rejected": -0.136958509683609, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.21070615034168566, |
|
"grad_norm": 13.955177307128906, |
|
"learning_rate": 1.4080193621867883e-05, |
|
"logits/chosen": 1.519672155380249, |
|
"logits/rejected": 1.4972164630889893, |
|
"logps/chosen": -189.01983642578125, |
|
"logps/rejected": -195.07350158691406, |
|
"loss": 0.5435, |
|
"rewards/accuracies": 0.6833332777023315, |
|
"rewards/chosen": -0.3326644003391266, |
|
"rewards/margins": 0.5098401308059692, |
|
"rewards/rejected": -0.8425045013427734, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.21355353075170844, |
|
"grad_norm": 9.393014907836914, |
|
"learning_rate": 1.407992596810934e-05, |
|
"logits/chosen": 1.8645124435424805, |
|
"logits/rejected": 1.8256851434707642, |
|
"logps/chosen": -192.75039672851562, |
|
"logps/rejected": -196.81118774414062, |
|
"loss": 0.5669, |
|
"rewards/accuracies": 0.6833333373069763, |
|
"rewards/chosen": -0.15597638487815857, |
|
"rewards/margins": 0.5917509198188782, |
|
"rewards/rejected": -0.7477271556854248, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2164009111617312, |
|
"grad_norm": 13.501594543457031, |
|
"learning_rate": 1.4079658314350798e-05, |
|
"logits/chosen": 2.1007590293884277, |
|
"logits/rejected": 2.077477216720581, |
|
"logps/chosen": -191.407958984375, |
|
"logps/rejected": -195.40225219726562, |
|
"loss": 0.551, |
|
"rewards/accuracies": 0.7333332896232605, |
|
"rewards/chosen": -0.4909743666648865, |
|
"rewards/margins": 0.5510894060134888, |
|
"rewards/rejected": -1.04206383228302, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.219248291571754, |
|
"grad_norm": 11.767430305480957, |
|
"learning_rate": 1.4079390660592256e-05, |
|
"logits/chosen": 2.3974993228912354, |
|
"logits/rejected": 2.375783681869507, |
|
"logps/chosen": -193.2632293701172, |
|
"logps/rejected": -201.26290893554688, |
|
"loss": 0.4595, |
|
"rewards/accuracies": 0.783333420753479, |
|
"rewards/chosen": -0.15944749116897583, |
|
"rewards/margins": 0.7945913076400757, |
|
"rewards/rejected": -0.9540387988090515, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.22209567198177677, |
|
"grad_norm": 7.549858570098877, |
|
"learning_rate": 1.4079123006833714e-05, |
|
"logits/chosen": 1.6657108068466187, |
|
"logits/rejected": 1.6228992938995361, |
|
"logps/chosen": -179.9643096923828, |
|
"logps/rejected": -191.71475219726562, |
|
"loss": 0.4792, |
|
"rewards/accuracies": 0.7833333611488342, |
|
"rewards/chosen": 0.4781853258609772, |
|
"rewards/margins": 0.878165066242218, |
|
"rewards/rejected": -0.39997971057891846, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.22494305239179954, |
|
"grad_norm": 10.577963829040527, |
|
"learning_rate": 1.4078855353075171e-05, |
|
"logits/chosen": 1.9104713201522827, |
|
"logits/rejected": 1.869380235671997, |
|
"logps/chosen": -182.4436492919922, |
|
"logps/rejected": -191.72471618652344, |
|
"loss": 0.5378, |
|
"rewards/accuracies": 0.7166666984558105, |
|
"rewards/chosen": 0.591245174407959, |
|
"rewards/margins": 0.5981950163841248, |
|
"rewards/rejected": -0.006949782371520996, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.22779043280182232, |
|
"grad_norm": 10.107033729553223, |
|
"learning_rate": 1.4078587699316629e-05, |
|
"logits/chosen": 1.9709075689315796, |
|
"logits/rejected": 1.941165566444397, |
|
"logps/chosen": -187.01475524902344, |
|
"logps/rejected": -188.1396484375, |
|
"loss": 0.6002, |
|
"rewards/accuracies": 0.7166666984558105, |
|
"rewards/chosen": 0.21675971150398254, |
|
"rewards/margins": 0.5140406489372253, |
|
"rewards/rejected": -0.29728102684020996, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2306378132118451, |
|
"grad_norm": 10.437517166137695, |
|
"learning_rate": 1.4078320045558087e-05, |
|
"logits/chosen": 1.768145203590393, |
|
"logits/rejected": 1.756650686264038, |
|
"logps/chosen": -192.96310424804688, |
|
"logps/rejected": -203.16757202148438, |
|
"loss": 0.6074, |
|
"rewards/accuracies": 0.7333332896232605, |
|
"rewards/chosen": -0.4098523259162903, |
|
"rewards/margins": 0.6148914694786072, |
|
"rewards/rejected": -1.024743914604187, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.23348519362186787, |
|
"grad_norm": 5.297072410583496, |
|
"learning_rate": 1.4078052391799544e-05, |
|
"logits/chosen": 1.579408884048462, |
|
"logits/rejected": 1.557755708694458, |
|
"logps/chosen": -195.23289489746094, |
|
"logps/rejected": -202.75173950195312, |
|
"loss": 0.5093, |
|
"rewards/accuracies": 0.8333333730697632, |
|
"rewards/chosen": -0.551082193851471, |
|
"rewards/margins": 0.6868919730186462, |
|
"rewards/rejected": -1.2379741668701172, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.23633257403189067, |
|
"grad_norm": 8.658047676086426, |
|
"learning_rate": 1.4077784738041002e-05, |
|
"logits/chosen": 1.7794520854949951, |
|
"logits/rejected": 1.763954520225525, |
|
"logps/chosen": -191.802734375, |
|
"logps/rejected": -201.72024536132812, |
|
"loss": 0.512, |
|
"rewards/accuracies": 0.73333340883255, |
|
"rewards/chosen": -0.3704865276813507, |
|
"rewards/margins": 0.8075221180915833, |
|
"rewards/rejected": -1.1780085563659668, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.23917995444191345, |
|
"grad_norm": 6.875248432159424, |
|
"learning_rate": 1.407751708428246e-05, |
|
"logits/chosen": 1.9156545400619507, |
|
"logits/rejected": 1.8953126668930054, |
|
"logps/chosen": -187.77313232421875, |
|
"logps/rejected": -195.1903076171875, |
|
"loss": 0.535, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.16680260002613068, |
|
"rewards/margins": 0.6953068971633911, |
|
"rewards/rejected": -0.5285042524337769, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.24202733485193623, |
|
"grad_norm": 7.518496513366699, |
|
"learning_rate": 1.4077249430523918e-05, |
|
"logits/chosen": 1.585115671157837, |
|
"logits/rejected": 1.5631306171417236, |
|
"logps/chosen": -185.40744018554688, |
|
"logps/rejected": -194.33627319335938, |
|
"loss": 0.5249, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": 0.23953166604042053, |
|
"rewards/margins": 0.7418320178985596, |
|
"rewards/rejected": -0.5023003220558167, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.244874715261959, |
|
"grad_norm": 7.626437664031982, |
|
"learning_rate": 1.4076981776765376e-05, |
|
"logits/chosen": 2.024663209915161, |
|
"logits/rejected": 1.9502818584442139, |
|
"logps/chosen": -188.0728302001953, |
|
"logps/rejected": -194.77236938476562, |
|
"loss": 0.4572, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": 0.18363861739635468, |
|
"rewards/margins": 0.8329526782035828, |
|
"rewards/rejected": -0.6493140459060669, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.24772209567198178, |
|
"grad_norm": 6.028398036956787, |
|
"learning_rate": 1.4076714123006835e-05, |
|
"logits/chosen": 2.0717482566833496, |
|
"logits/rejected": 2.015554904937744, |
|
"logps/chosen": -190.80706787109375, |
|
"logps/rejected": -197.00929260253906, |
|
"loss": 0.5263, |
|
"rewards/accuracies": 0.7166666984558105, |
|
"rewards/chosen": -0.23343093693256378, |
|
"rewards/margins": 0.7314363121986389, |
|
"rewards/rejected": -0.9648672938346863, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.2505694760820046, |
|
"grad_norm": 11.498568534851074, |
|
"learning_rate": 1.4076446469248293e-05, |
|
"logits/chosen": 1.7024204730987549, |
|
"logits/rejected": 1.6783952713012695, |
|
"logps/chosen": -189.88160705566406, |
|
"logps/rejected": -200.33108520507812, |
|
"loss": 0.5148, |
|
"rewards/accuracies": 0.7000000476837158, |
|
"rewards/chosen": -0.3500627875328064, |
|
"rewards/margins": 0.6954061388969421, |
|
"rewards/rejected": -1.045469045639038, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.25341685649202733, |
|
"grad_norm": 8.39510440826416, |
|
"learning_rate": 1.407617881548975e-05, |
|
"logits/chosen": 2.072392225265503, |
|
"logits/rejected": 1.9953988790512085, |
|
"logps/chosen": -184.83578491210938, |
|
"logps/rejected": -192.18081665039062, |
|
"loss": 0.4797, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.27530142664909363, |
|
"rewards/margins": 0.8618084192276001, |
|
"rewards/rejected": -0.5865069627761841, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.25626423690205014, |
|
"grad_norm": 7.497321605682373, |
|
"learning_rate": 1.4075911161731207e-05, |
|
"logits/chosen": 2.0645341873168945, |
|
"logits/rejected": 2.0675666332244873, |
|
"logps/chosen": -191.05245971679688, |
|
"logps/rejected": -191.71282958984375, |
|
"loss": 0.5487, |
|
"rewards/accuracies": 0.6500000357627869, |
|
"rewards/chosen": 0.19750186800956726, |
|
"rewards/margins": 0.5128003358840942, |
|
"rewards/rejected": -0.3152984380722046, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.2591116173120729, |
|
"grad_norm": 7.78532600402832, |
|
"learning_rate": 1.4075643507972666e-05, |
|
"logits/chosen": 1.673762321472168, |
|
"logits/rejected": 1.6521574258804321, |
|
"logps/chosen": -186.23394775390625, |
|
"logps/rejected": -194.6929931640625, |
|
"loss": 0.5387, |
|
"rewards/accuracies": 0.6833333373069763, |
|
"rewards/chosen": 0.042056869715452194, |
|
"rewards/margins": 0.629268229007721, |
|
"rewards/rejected": -0.5872113108634949, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.2619589977220957, |
|
"grad_norm": 9.374770164489746, |
|
"learning_rate": 1.4075375854214124e-05, |
|
"logits/chosen": 2.255173444747925, |
|
"logits/rejected": 2.217841625213623, |
|
"logps/chosen": -181.05465698242188, |
|
"logps/rejected": -191.16636657714844, |
|
"loss": 0.4878, |
|
"rewards/accuracies": 0.8000000715255737, |
|
"rewards/chosen": 0.2503889501094818, |
|
"rewards/margins": 0.7921093702316284, |
|
"rewards/rejected": -0.541720449924469, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.26480637813211844, |
|
"grad_norm": 8.376593589782715, |
|
"learning_rate": 1.4075108200455582e-05, |
|
"logits/chosen": 1.7911618947982788, |
|
"logits/rejected": 1.7365095615386963, |
|
"logps/chosen": -197.14320373535156, |
|
"logps/rejected": -204.03855895996094, |
|
"loss": 0.416, |
|
"rewards/accuracies": 0.8666666746139526, |
|
"rewards/chosen": -0.07821293920278549, |
|
"rewards/margins": 1.092116117477417, |
|
"rewards/rejected": -1.1703290939331055, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.26765375854214124, |
|
"grad_norm": 12.098174095153809, |
|
"learning_rate": 1.407484054669704e-05, |
|
"logits/chosen": 1.771426796913147, |
|
"logits/rejected": 1.7347352504730225, |
|
"logps/chosen": -190.84811401367188, |
|
"logps/rejected": -202.90049743652344, |
|
"loss": 0.4819, |
|
"rewards/accuracies": 0.783333420753479, |
|
"rewards/chosen": -0.07078223675489426, |
|
"rewards/margins": 0.9916135668754578, |
|
"rewards/rejected": -1.0623959302902222, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.270501138952164, |
|
"grad_norm": 13.370096206665039, |
|
"learning_rate": 1.4074572892938498e-05, |
|
"logits/chosen": 2.4168200492858887, |
|
"logits/rejected": 2.3770642280578613, |
|
"logps/chosen": -197.3751678466797, |
|
"logps/rejected": -205.10140991210938, |
|
"loss": 0.4909, |
|
"rewards/accuracies": 0.7500000596046448, |
|
"rewards/chosen": -0.508063018321991, |
|
"rewards/margins": 0.8573676943778992, |
|
"rewards/rejected": -1.3654309511184692, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.2733485193621868, |
|
"grad_norm": 8.180644989013672, |
|
"learning_rate": 1.4074305239179955e-05, |
|
"logits/chosen": 1.9371917247772217, |
|
"logits/rejected": 1.903172492980957, |
|
"logps/chosen": -203.6707000732422, |
|
"logps/rejected": -210.0899200439453, |
|
"loss": 0.557, |
|
"rewards/accuracies": 0.7333332896232605, |
|
"rewards/chosen": -1.0856568813323975, |
|
"rewards/margins": 0.657774806022644, |
|
"rewards/rejected": -1.7434314489364624, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.27619589977220954, |
|
"grad_norm": 7.428178787231445, |
|
"learning_rate": 1.4074037585421413e-05, |
|
"logits/chosen": 1.4970729351043701, |
|
"logits/rejected": 1.4920985698699951, |
|
"logps/chosen": -197.6659698486328, |
|
"logps/rejected": -204.35134887695312, |
|
"loss": 0.4897, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": -0.8745123147964478, |
|
"rewards/margins": 0.7961224913597107, |
|
"rewards/rejected": -1.6706346273422241, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.27904328018223234, |
|
"grad_norm": 9.783337593078613, |
|
"learning_rate": 1.4073769931662871e-05, |
|
"logits/chosen": 1.8168764114379883, |
|
"logits/rejected": 1.7757813930511475, |
|
"logps/chosen": -190.90243530273438, |
|
"logps/rejected": -200.79592895507812, |
|
"loss": 0.4804, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": -0.17690226435661316, |
|
"rewards/margins": 0.8342889547348022, |
|
"rewards/rejected": -1.0111911296844482, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.28189066059225515, |
|
"grad_norm": 10.380950927734375, |
|
"learning_rate": 1.407350227790433e-05, |
|
"logits/chosen": 1.904358148574829, |
|
"logits/rejected": 1.8814504146575928, |
|
"logps/chosen": -192.22409057617188, |
|
"logps/rejected": -194.5697021484375, |
|
"loss": 0.5484, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.04954533651471138, |
|
"rewards/margins": 0.7774869799613953, |
|
"rewards/rejected": -0.727941632270813, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.2847380410022779, |
|
"grad_norm": 12.717456817626953, |
|
"learning_rate": 1.4073234624145788e-05, |
|
"logits/chosen": 1.9216387271881104, |
|
"logits/rejected": 1.909235954284668, |
|
"logps/chosen": -189.25656127929688, |
|
"logps/rejected": -189.6717987060547, |
|
"loss": 0.6481, |
|
"rewards/accuracies": 0.6166666746139526, |
|
"rewards/chosen": 0.24547600746154785, |
|
"rewards/margins": 0.46644410490989685, |
|
"rewards/rejected": -0.22096815705299377, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2875854214123007, |
|
"grad_norm": 8.20633316040039, |
|
"learning_rate": 1.4072966970387244e-05, |
|
"logits/chosen": 2.119447708129883, |
|
"logits/rejected": 2.0835766792297363, |
|
"logps/chosen": -185.17373657226562, |
|
"logps/rejected": -197.90789794921875, |
|
"loss": 0.4552, |
|
"rewards/accuracies": 0.7833333611488342, |
|
"rewards/chosen": 0.463967889547348, |
|
"rewards/margins": 0.9948002099990845, |
|
"rewards/rejected": -0.5308324098587036, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.29043280182232345, |
|
"grad_norm": 7.67072868347168, |
|
"learning_rate": 1.4072699316628702e-05, |
|
"logits/chosen": 2.101510763168335, |
|
"logits/rejected": 2.0747411251068115, |
|
"logps/chosen": -189.11227416992188, |
|
"logps/rejected": -199.26864624023438, |
|
"loss": 0.4964, |
|
"rewards/accuracies": 0.7833333611488342, |
|
"rewards/chosen": -0.03253510594367981, |
|
"rewards/margins": 0.7407889366149902, |
|
"rewards/rejected": -0.7733240723609924, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.29328018223234625, |
|
"grad_norm": 2.5675323009490967, |
|
"learning_rate": 1.4072431662870159e-05, |
|
"logits/chosen": 1.761235237121582, |
|
"logits/rejected": 1.69741952419281, |
|
"logps/chosen": -192.15017700195312, |
|
"logps/rejected": -196.58164978027344, |
|
"loss": 0.3807, |
|
"rewards/accuracies": 0.8666666746139526, |
|
"rewards/chosen": 0.30114102363586426, |
|
"rewards/margins": 1.2773263454437256, |
|
"rewards/rejected": -0.9761852025985718, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.296127562642369, |
|
"grad_norm": 6.229589462280273, |
|
"learning_rate": 1.4072164009111617e-05, |
|
"logits/chosen": 1.8937768936157227, |
|
"logits/rejected": 1.8909927606582642, |
|
"logps/chosen": -192.98153686523438, |
|
"logps/rejected": -198.7643585205078, |
|
"loss": 0.4607, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -0.3074316382408142, |
|
"rewards/margins": 0.925214946269989, |
|
"rewards/rejected": -1.2326464653015137, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2989749430523918, |
|
"grad_norm": 21.918458938598633, |
|
"learning_rate": 1.4071896355353075e-05, |
|
"logits/chosen": 1.5101830959320068, |
|
"logits/rejected": 1.4930336475372314, |
|
"logps/chosen": -187.5404510498047, |
|
"logps/rejected": -200.8413543701172, |
|
"loss": 0.5419, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": 0.17931926250457764, |
|
"rewards/margins": 0.7964944839477539, |
|
"rewards/rejected": -0.6171752214431763, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.30182232346241455, |
|
"grad_norm": 8.404467582702637, |
|
"learning_rate": 1.4071628701594533e-05, |
|
"logits/chosen": 1.6592843532562256, |
|
"logits/rejected": 1.675798773765564, |
|
"logps/chosen": -191.4521026611328, |
|
"logps/rejected": -199.44200134277344, |
|
"loss": 0.4875, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -0.2935402989387512, |
|
"rewards/margins": 1.0806810855865479, |
|
"rewards/rejected": -1.3742212057113647, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.30466970387243736, |
|
"grad_norm": 8.605794906616211, |
|
"learning_rate": 1.4071361047835991e-05, |
|
"logits/chosen": 2.1089935302734375, |
|
"logits/rejected": 2.0790882110595703, |
|
"logps/chosen": -203.49526977539062, |
|
"logps/rejected": -208.977294921875, |
|
"loss": 0.4672, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7177016139030457, |
|
"rewards/margins": 0.9588042497634888, |
|
"rewards/rejected": -1.6765056848526, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.30751708428246016, |
|
"grad_norm": 10.008315086364746, |
|
"learning_rate": 1.407109339407745e-05, |
|
"logits/chosen": 1.850354790687561, |
|
"logits/rejected": 1.805641770362854, |
|
"logps/chosen": -195.0923309326172, |
|
"logps/rejected": -202.8920440673828, |
|
"loss": 0.5457, |
|
"rewards/accuracies": 0.7000000476837158, |
|
"rewards/chosen": -0.3235352039337158, |
|
"rewards/margins": 0.6880014538764954, |
|
"rewards/rejected": -1.011536717414856, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.3103644646924829, |
|
"grad_norm": 13.152917861938477, |
|
"learning_rate": 1.4070825740318908e-05, |
|
"logits/chosen": 1.8469655513763428, |
|
"logits/rejected": 1.8281362056732178, |
|
"logps/chosen": -199.08865356445312, |
|
"logps/rejected": -203.9534912109375, |
|
"loss": 0.5631, |
|
"rewards/accuracies": 0.73333340883255, |
|
"rewards/chosen": -0.3010219931602478, |
|
"rewards/margins": 0.7448440790176392, |
|
"rewards/rejected": -1.0458661317825317, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.3132118451025057, |
|
"grad_norm": 13.376197814941406, |
|
"learning_rate": 1.4070558086560364e-05, |
|
"logits/chosen": 2.043653964996338, |
|
"logits/rejected": 1.949476957321167, |
|
"logps/chosen": -199.87838745117188, |
|
"logps/rejected": -205.461181640625, |
|
"loss": 0.4732, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -0.33904480934143066, |
|
"rewards/margins": 1.0151432752609253, |
|
"rewards/rejected": -1.354188084602356, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.31605922551252846, |
|
"grad_norm": 11.804425239562988, |
|
"learning_rate": 1.4070290432801822e-05, |
|
"logits/chosen": 1.7380597591400146, |
|
"logits/rejected": 1.7194397449493408, |
|
"logps/chosen": -196.87367248535156, |
|
"logps/rejected": -205.9291229248047, |
|
"loss": 0.4909, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6721280813217163, |
|
"rewards/margins": 0.8940645456314087, |
|
"rewards/rejected": -1.566192626953125, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.31890660592255127, |
|
"grad_norm": 11.524681091308594, |
|
"learning_rate": 1.407002277904328e-05, |
|
"logits/chosen": 1.8095728158950806, |
|
"logits/rejected": 1.7724698781967163, |
|
"logps/chosen": -194.70455932617188, |
|
"logps/rejected": -205.51736450195312, |
|
"loss": 0.3973, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -0.4333480894565582, |
|
"rewards/margins": 1.3439867496490479, |
|
"rewards/rejected": -1.7773349285125732, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.321753986332574, |
|
"grad_norm": 10.198122024536133, |
|
"learning_rate": 1.4069755125284739e-05, |
|
"logits/chosen": 1.5907586812973022, |
|
"logits/rejected": 1.554947853088379, |
|
"logps/chosen": -198.55319213867188, |
|
"logps/rejected": -208.1469268798828, |
|
"loss": 0.3429, |
|
"rewards/accuracies": 0.8333333730697632, |
|
"rewards/chosen": -0.379428893327713, |
|
"rewards/margins": 1.4134459495544434, |
|
"rewards/rejected": -1.7928749322891235, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.3246013667425968, |
|
"grad_norm": 11.933664321899414, |
|
"learning_rate": 1.4069487471526197e-05, |
|
"logits/chosen": 2.073201894760132, |
|
"logits/rejected": 2.0558342933654785, |
|
"logps/chosen": -196.92529296875, |
|
"logps/rejected": -207.8292694091797, |
|
"loss": 0.492, |
|
"rewards/accuracies": 0.7500000596046448, |
|
"rewards/chosen": -0.7831190824508667, |
|
"rewards/margins": 0.9773709177970886, |
|
"rewards/rejected": -1.7604900598526, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.32744874715261957, |
|
"grad_norm": 10.23611831665039, |
|
"learning_rate": 1.4069219817767655e-05, |
|
"logits/chosen": 1.8735500574111938, |
|
"logits/rejected": 1.8228156566619873, |
|
"logps/chosen": -198.6478271484375, |
|
"logps/rejected": -211.98245239257812, |
|
"loss": 0.533, |
|
"rewards/accuracies": 0.7500000596046448, |
|
"rewards/chosen": -1.0398519039154053, |
|
"rewards/margins": 0.9020865559577942, |
|
"rewards/rejected": -1.9419386386871338, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.33029612756264237, |
|
"grad_norm": 8.893653869628906, |
|
"learning_rate": 1.4068952164009113e-05, |
|
"logits/chosen": 1.9107131958007812, |
|
"logits/rejected": 1.891579031944275, |
|
"logps/chosen": -197.96861267089844, |
|
"logps/rejected": -205.613037109375, |
|
"loss": 0.5612, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": -0.6610264182090759, |
|
"rewards/margins": 0.8070106506347656, |
|
"rewards/rejected": -1.4680370092391968, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.3331435079726651, |
|
"grad_norm": 6.878332614898682, |
|
"learning_rate": 1.406868451025057e-05, |
|
"logits/chosen": 1.7560676336288452, |
|
"logits/rejected": 1.7299124002456665, |
|
"logps/chosen": -192.83486938476562, |
|
"logps/rejected": -201.4220733642578, |
|
"loss": 0.4402, |
|
"rewards/accuracies": 0.7333332896232605, |
|
"rewards/chosen": -0.16509667038917542, |
|
"rewards/margins": 1.2705130577087402, |
|
"rewards/rejected": -1.4356096982955933, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.3359908883826879, |
|
"grad_norm": 13.861001014709473, |
|
"learning_rate": 1.4068416856492028e-05, |
|
"logits/chosen": 2.2744829654693604, |
|
"logits/rejected": 2.2301721572875977, |
|
"logps/chosen": -191.3073272705078, |
|
"logps/rejected": -203.01943969726562, |
|
"loss": 0.4838, |
|
"rewards/accuracies": 0.7166666984558105, |
|
"rewards/chosen": -0.39433032274246216, |
|
"rewards/margins": 1.16512930393219, |
|
"rewards/rejected": -1.5594595670700073, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.33883826879271073, |
|
"grad_norm": 8.740705490112305, |
|
"learning_rate": 1.4068149202733486e-05, |
|
"logits/chosen": 2.088308095932007, |
|
"logits/rejected": 2.0627994537353516, |
|
"logps/chosen": -197.44985961914062, |
|
"logps/rejected": -206.495361328125, |
|
"loss": 0.5276, |
|
"rewards/accuracies": 0.6833332777023315, |
|
"rewards/chosen": -0.9009159207344055, |
|
"rewards/margins": 0.9384552836418152, |
|
"rewards/rejected": -1.8393710851669312, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.3416856492027335, |
|
"grad_norm": 12.754415512084961, |
|
"learning_rate": 1.4067881548974944e-05, |
|
"logits/chosen": 1.8001388311386108, |
|
"logits/rejected": 1.7345482110977173, |
|
"logps/chosen": -204.52352905273438, |
|
"logps/rejected": -215.9031524658203, |
|
"loss": 0.4862, |
|
"rewards/accuracies": 0.7500001192092896, |
|
"rewards/chosen": -1.0903289318084717, |
|
"rewards/margins": 1.0529909133911133, |
|
"rewards/rejected": -2.143319606781006, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3445330296127563, |
|
"grad_norm": 12.682507514953613, |
|
"learning_rate": 1.4067613895216402e-05, |
|
"logits/chosen": 1.8131275177001953, |
|
"logits/rejected": 1.7931241989135742, |
|
"logps/chosen": -192.96182250976562, |
|
"logps/rejected": -201.81529235839844, |
|
"loss": 0.5983, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": -0.5940048098564148, |
|
"rewards/margins": 0.8659802675247192, |
|
"rewards/rejected": -1.4599850177764893, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.34738041002277903, |
|
"grad_norm": 5.179973602294922, |
|
"learning_rate": 1.406734624145786e-05, |
|
"logits/chosen": 1.7856050729751587, |
|
"logits/rejected": 1.7136350870132446, |
|
"logps/chosen": -190.21469116210938, |
|
"logps/rejected": -199.30459594726562, |
|
"loss": 0.4641, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.08599545061588287, |
|
"rewards/margins": 1.0723307132720947, |
|
"rewards/rejected": -0.9863353967666626, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.35022779043280183, |
|
"grad_norm": 9.299216270446777, |
|
"learning_rate": 1.4067078587699317e-05, |
|
"logits/chosen": 2.1109414100646973, |
|
"logits/rejected": 2.084815263748169, |
|
"logps/chosen": -198.41253662109375, |
|
"logps/rejected": -201.63990783691406, |
|
"loss": 0.5133, |
|
"rewards/accuracies": 0.7166666984558105, |
|
"rewards/chosen": -0.5270928740501404, |
|
"rewards/margins": 0.92596435546875, |
|
"rewards/rejected": -1.453057050704956, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.3530751708428246, |
|
"grad_norm": 4.547011852264404, |
|
"learning_rate": 1.4066810933940774e-05, |
|
"logits/chosen": 1.6101499795913696, |
|
"logits/rejected": 1.5630186796188354, |
|
"logps/chosen": -199.369384765625, |
|
"logps/rejected": -206.1526336669922, |
|
"loss": 0.5698, |
|
"rewards/accuracies": 0.7166666388511658, |
|
"rewards/chosen": -0.8662067651748657, |
|
"rewards/margins": 0.6912031769752502, |
|
"rewards/rejected": -1.5574098825454712, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.3559225512528474, |
|
"grad_norm": 8.161728858947754, |
|
"learning_rate": 1.4066543280182232e-05, |
|
"logits/chosen": 1.626274824142456, |
|
"logits/rejected": 1.608764410018921, |
|
"logps/chosen": -199.33700561523438, |
|
"logps/rejected": -207.776123046875, |
|
"loss": 0.5068, |
|
"rewards/accuracies": 0.7500000596046448, |
|
"rewards/chosen": -1.0500733852386475, |
|
"rewards/margins": 0.8237847089767456, |
|
"rewards/rejected": -1.873858094215393, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.35876993166287013, |
|
"grad_norm": 9.337955474853516, |
|
"learning_rate": 1.406627562642369e-05, |
|
"logits/chosen": 1.614162802696228, |
|
"logits/rejected": 1.5890023708343506, |
|
"logps/chosen": -191.52984619140625, |
|
"logps/rejected": -204.58090209960938, |
|
"loss": 0.4568, |
|
"rewards/accuracies": 0.7833333611488342, |
|
"rewards/chosen": -0.9015741348266602, |
|
"rewards/margins": 1.1320171356201172, |
|
"rewards/rejected": -2.0335912704467773, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.36161731207289294, |
|
"grad_norm": 12.726293563842773, |
|
"learning_rate": 1.4066007972665148e-05, |
|
"logits/chosen": 1.9878727197647095, |
|
"logits/rejected": 1.9336633682250977, |
|
"logps/chosen": -196.55551147460938, |
|
"logps/rejected": -206.54592895507812, |
|
"loss": 0.5638, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9223095178604126, |
|
"rewards/margins": 0.6745713949203491, |
|
"rewards/rejected": -1.5968811511993408, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.36446469248291574, |
|
"grad_norm": 5.186792373657227, |
|
"learning_rate": 1.4065740318906606e-05, |
|
"logits/chosen": 2.260460376739502, |
|
"logits/rejected": 2.2322933673858643, |
|
"logps/chosen": -191.57492065429688, |
|
"logps/rejected": -204.7289581298828, |
|
"loss": 0.4348, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.18739666044712067, |
|
"rewards/margins": 1.1786158084869385, |
|
"rewards/rejected": -1.366012454032898, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.3673120728929385, |
|
"grad_norm": 24.165205001831055, |
|
"learning_rate": 1.4065472665148064e-05, |
|
"logits/chosen": 1.9065678119659424, |
|
"logits/rejected": 1.8787645101547241, |
|
"logps/chosen": -190.4572296142578, |
|
"logps/rejected": -203.6009521484375, |
|
"loss": 0.4677, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.2891443371772766, |
|
"rewards/margins": 1.2570773363113403, |
|
"rewards/rejected": -0.967933177947998, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.3701594533029613, |
|
"grad_norm": 10.15389633178711, |
|
"learning_rate": 1.4065205011389523e-05, |
|
"logits/chosen": 2.21083664894104, |
|
"logits/rejected": 2.1624722480773926, |
|
"logps/chosen": -191.64566040039062, |
|
"logps/rejected": -203.72608947753906, |
|
"loss": 0.5796, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -0.5001910924911499, |
|
"rewards/margins": 0.7388169765472412, |
|
"rewards/rejected": -1.2390079498291016, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.37300683371298404, |
|
"grad_norm": 8.427837371826172, |
|
"learning_rate": 1.4064937357630979e-05, |
|
"logits/chosen": 1.5343374013900757, |
|
"logits/rejected": 1.5085475444793701, |
|
"logps/chosen": -198.10842895507812, |
|
"logps/rejected": -214.72476196289062, |
|
"loss": 0.4727, |
|
"rewards/accuracies": 0.7333332896232605, |
|
"rewards/chosen": -0.6099187135696411, |
|
"rewards/margins": 1.084291696548462, |
|
"rewards/rejected": -1.694210410118103, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.37585421412300685, |
|
"grad_norm": 9.226961135864258, |
|
"learning_rate": 1.4064669703872437e-05, |
|
"logits/chosen": 2.1237213611602783, |
|
"logits/rejected": 2.0830631256103516, |
|
"logps/chosen": -199.69631958007812, |
|
"logps/rejected": -211.4775390625, |
|
"loss": 0.4277, |
|
"rewards/accuracies": 0.8333333730697632, |
|
"rewards/chosen": -0.9815210103988647, |
|
"rewards/margins": 1.2467997074127197, |
|
"rewards/rejected": -2.228320360183716, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.3787015945330296, |
|
"grad_norm": 12.941588401794434, |
|
"learning_rate": 1.4064402050113895e-05, |
|
"logits/chosen": 2.2387449741363525, |
|
"logits/rejected": 2.177280902862549, |
|
"logps/chosen": -207.823974609375, |
|
"logps/rejected": -223.4504852294922, |
|
"loss": 0.3913, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -1.2012609243392944, |
|
"rewards/margins": 1.5182263851165771, |
|
"rewards/rejected": -2.719486951828003, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.3815489749430524, |
|
"grad_norm": 14.2744722366333, |
|
"learning_rate": 1.4064134396355354e-05, |
|
"logits/chosen": 2.0903189182281494, |
|
"logits/rejected": 2.0274603366851807, |
|
"logps/chosen": -204.43905639648438, |
|
"logps/rejected": -217.285888671875, |
|
"loss": 0.4283, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.1711370944976807, |
|
"rewards/margins": 1.2676665782928467, |
|
"rewards/rejected": -2.4388039112091064, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.38439635535307515, |
|
"grad_norm": 12.081289291381836, |
|
"learning_rate": 1.4063866742596812e-05, |
|
"logits/chosen": 2.0742850303649902, |
|
"logits/rejected": 2.009190797805786, |
|
"logps/chosen": -202.11859130859375, |
|
"logps/rejected": -212.6031494140625, |
|
"loss": 0.5151, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2778351306915283, |
|
"rewards/margins": 1.1755545139312744, |
|
"rewards/rejected": -2.4533896446228027, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.38724373576309795, |
|
"grad_norm": 11.74995231628418, |
|
"learning_rate": 1.406359908883827e-05, |
|
"logits/chosen": 2.0087525844573975, |
|
"logits/rejected": 1.9561760425567627, |
|
"logps/chosen": -203.6864013671875, |
|
"logps/rejected": -213.94332885742188, |
|
"loss": 0.5604, |
|
"rewards/accuracies": 0.6666666269302368, |
|
"rewards/chosen": -1.6550267934799194, |
|
"rewards/margins": 0.7960943579673767, |
|
"rewards/rejected": -2.4511213302612305, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.39009111617312076, |
|
"grad_norm": 9.296886444091797, |
|
"learning_rate": 1.4063331435079728e-05, |
|
"logits/chosen": 1.7611463069915771, |
|
"logits/rejected": 1.6809743642807007, |
|
"logps/chosen": -209.0948028564453, |
|
"logps/rejected": -217.0623779296875, |
|
"loss": 0.5996, |
|
"rewards/accuracies": 0.7166667580604553, |
|
"rewards/chosen": -1.9659429788589478, |
|
"rewards/margins": 0.6299344897270203, |
|
"rewards/rejected": -2.5958774089813232, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.3929384965831435, |
|
"grad_norm": 4.803467273712158, |
|
"learning_rate": 1.4063063781321185e-05, |
|
"logits/chosen": 1.5451164245605469, |
|
"logits/rejected": 1.5325957536697388, |
|
"logps/chosen": -208.9802703857422, |
|
"logps/rejected": -213.04342651367188, |
|
"loss": 0.5885, |
|
"rewards/accuracies": 0.6833333373069763, |
|
"rewards/chosen": -1.538881540298462, |
|
"rewards/margins": 0.8458682298660278, |
|
"rewards/rejected": -2.3847498893737793, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.3957858769931663, |
|
"grad_norm": 10.66770076751709, |
|
"learning_rate": 1.4062796127562643e-05, |
|
"logits/chosen": 1.4009406566619873, |
|
"logits/rejected": 1.37747323513031, |
|
"logps/chosen": -204.7423095703125, |
|
"logps/rejected": -205.3677520751953, |
|
"loss": 0.4312, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -0.7842095494270325, |
|
"rewards/margins": 1.1969635486602783, |
|
"rewards/rejected": -1.9811729192733765, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.39863325740318906, |
|
"grad_norm": 7.478954315185547, |
|
"learning_rate": 1.4062528473804101e-05, |
|
"logits/chosen": 1.744096040725708, |
|
"logits/rejected": 1.6914621591567993, |
|
"logps/chosen": -203.30039978027344, |
|
"logps/rejected": -209.58816528320312, |
|
"loss": 0.4153, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -0.6862292885780334, |
|
"rewards/margins": 1.180418848991394, |
|
"rewards/rejected": -1.8666483163833618, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.40148063781321186, |
|
"grad_norm": 7.500680923461914, |
|
"learning_rate": 1.4062260820045559e-05, |
|
"logits/chosen": 1.2981688976287842, |
|
"logits/rejected": 1.2812628746032715, |
|
"logps/chosen": -198.92678833007812, |
|
"logps/rejected": -209.11264038085938, |
|
"loss": 0.5276, |
|
"rewards/accuracies": 0.7500000596046448, |
|
"rewards/chosen": -0.5509114265441895, |
|
"rewards/margins": 0.9838689565658569, |
|
"rewards/rejected": -1.5347803831100464, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.4043280182232346, |
|
"grad_norm": 7.44333553314209, |
|
"learning_rate": 1.4061993166287017e-05, |
|
"logits/chosen": 1.2393553256988525, |
|
"logits/rejected": 1.1910442113876343, |
|
"logps/chosen": -199.60992431640625, |
|
"logps/rejected": -212.3382110595703, |
|
"loss": 0.4608, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7129297852516174, |
|
"rewards/margins": 1.1817357540130615, |
|
"rewards/rejected": -1.8946659564971924, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.4071753986332574, |
|
"grad_norm": 9.176630020141602, |
|
"learning_rate": 1.4061725512528475e-05, |
|
"logits/chosen": 1.7447378635406494, |
|
"logits/rejected": 1.6703119277954102, |
|
"logps/chosen": -198.02667236328125, |
|
"logps/rejected": -206.87356567382812, |
|
"loss": 0.5164, |
|
"rewards/accuracies": 0.7000000476837158, |
|
"rewards/chosen": -0.953807532787323, |
|
"rewards/margins": 0.9193550944328308, |
|
"rewards/rejected": -1.873162865638733, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.41002277904328016, |
|
"grad_norm": 6.571470737457275, |
|
"learning_rate": 1.4061457858769934e-05, |
|
"logits/chosen": 1.5202054977416992, |
|
"logits/rejected": 1.4729554653167725, |
|
"logps/chosen": -204.49246215820312, |
|
"logps/rejected": -215.2471160888672, |
|
"loss": 0.4928, |
|
"rewards/accuracies": 0.7500000596046448, |
|
"rewards/chosen": -1.5182138681411743, |
|
"rewards/margins": 1.0777556896209717, |
|
"rewards/rejected": -2.5959696769714355, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.41287015945330297, |
|
"grad_norm": 11.84085750579834, |
|
"learning_rate": 1.406119020501139e-05, |
|
"logits/chosen": 1.6191060543060303, |
|
"logits/rejected": 1.5997555255889893, |
|
"logps/chosen": -212.749755859375, |
|
"logps/rejected": -218.8915252685547, |
|
"loss": 0.4843, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": -1.8460090160369873, |
|
"rewards/margins": 1.0541627407073975, |
|
"rewards/rejected": -2.9001717567443848, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.4157175398633257, |
|
"grad_norm": 5.9276838302612305, |
|
"learning_rate": 1.4060922551252847e-05, |
|
"logits/chosen": 1.4602617025375366, |
|
"logits/rejected": 1.4181015491485596, |
|
"logps/chosen": -205.3323211669922, |
|
"logps/rejected": -209.7090606689453, |
|
"loss": 0.4555, |
|
"rewards/accuracies": 0.76666659116745, |
|
"rewards/chosen": -1.1707617044448853, |
|
"rewards/margins": 1.0197278261184692, |
|
"rewards/rejected": -2.1904895305633545, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.4185649202733485, |
|
"grad_norm": 11.324074745178223, |
|
"learning_rate": 1.4060654897494305e-05, |
|
"logits/chosen": 1.845425009727478, |
|
"logits/rejected": 1.790997862815857, |
|
"logps/chosen": -205.37661743164062, |
|
"logps/rejected": -211.1995086669922, |
|
"loss": 0.5938, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.6029345989227295, |
|
"rewards/margins": 0.7839881777763367, |
|
"rewards/rejected": -2.38692307472229, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.4214123006833713, |
|
"grad_norm": 9.8126220703125, |
|
"learning_rate": 1.4060387243735763e-05, |
|
"logits/chosen": 1.5168917179107666, |
|
"logits/rejected": 1.4749294519424438, |
|
"logps/chosen": -200.8393096923828, |
|
"logps/rejected": -207.88693237304688, |
|
"loss": 0.4886, |
|
"rewards/accuracies": 0.7333332896232605, |
|
"rewards/chosen": -0.8076359033584595, |
|
"rewards/margins": 1.1594288349151611, |
|
"rewards/rejected": -1.967064619064331, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.42425968109339407, |
|
"grad_norm": 13.349224090576172, |
|
"learning_rate": 1.4060119589977221e-05, |
|
"logits/chosen": 1.6546955108642578, |
|
"logits/rejected": 1.641208291053772, |
|
"logps/chosen": -198.56167602539062, |
|
"logps/rejected": -204.73019409179688, |
|
"loss": 0.5426, |
|
"rewards/accuracies": 0.7833333611488342, |
|
"rewards/chosen": -0.6402263045310974, |
|
"rewards/margins": 0.8992630243301392, |
|
"rewards/rejected": -1.5394892692565918, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.4271070615034169, |
|
"grad_norm": 16.132837295532227, |
|
"learning_rate": 1.405985193621868e-05, |
|
"logits/chosen": 1.2271068096160889, |
|
"logits/rejected": 1.2327044010162354, |
|
"logps/chosen": -201.4064178466797, |
|
"logps/rejected": -205.9292449951172, |
|
"loss": 0.4608, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": -1.0444819927215576, |
|
"rewards/margins": 1.0280535221099854, |
|
"rewards/rejected": -2.072535514831543, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4299544419134396, |
|
"grad_norm": 13.677680969238281, |
|
"learning_rate": 1.4059584282460137e-05, |
|
"logits/chosen": 1.3775099515914917, |
|
"logits/rejected": 1.3772714138031006, |
|
"logps/chosen": -201.0505828857422, |
|
"logps/rejected": -210.51806640625, |
|
"loss": 0.4665, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": -1.2761850357055664, |
|
"rewards/margins": 1.0018669366836548, |
|
"rewards/rejected": -2.2780518531799316, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.4328018223234624, |
|
"grad_norm": 13.681148529052734, |
|
"learning_rate": 1.4059316628701594e-05, |
|
"logits/chosen": 1.206789255142212, |
|
"logits/rejected": 1.1529737710952759, |
|
"logps/chosen": -199.44151306152344, |
|
"logps/rejected": -211.2483367919922, |
|
"loss": 0.5924, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2694971561431885, |
|
"rewards/margins": 0.7812899351119995, |
|
"rewards/rejected": -2.0507869720458984, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.4356492027334852, |
|
"grad_norm": 6.829047203063965, |
|
"learning_rate": 1.4059048974943052e-05, |
|
"logits/chosen": 1.3871477842330933, |
|
"logits/rejected": 1.3141772747039795, |
|
"logps/chosen": -201.22048950195312, |
|
"logps/rejected": -206.67276000976562, |
|
"loss": 0.5169, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": -1.4746379852294922, |
|
"rewards/margins": 1.0388069152832031, |
|
"rewards/rejected": -2.5134449005126953, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.438496583143508, |
|
"grad_norm": 11.46579360961914, |
|
"learning_rate": 1.405878132118451e-05, |
|
"logits/chosen": 1.273644208908081, |
|
"logits/rejected": 1.2350003719329834, |
|
"logps/chosen": -211.4403839111328, |
|
"logps/rejected": -220.148193359375, |
|
"loss": 0.598, |
|
"rewards/accuracies": 0.6999999284744263, |
|
"rewards/chosen": -1.6227350234985352, |
|
"rewards/margins": 0.9658929705619812, |
|
"rewards/rejected": -2.588628053665161, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.4413439635535307, |
|
"grad_norm": 14.203997611999512, |
|
"learning_rate": 1.4058513667425969e-05, |
|
"logits/chosen": 1.1952084302902222, |
|
"logits/rejected": 1.1850013732910156, |
|
"logps/chosen": -203.4510498046875, |
|
"logps/rejected": -210.08004760742188, |
|
"loss": 0.5225, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": -1.5274189710617065, |
|
"rewards/margins": 0.8954464197158813, |
|
"rewards/rejected": -2.422865390777588, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.44419134396355353, |
|
"grad_norm": 7.090624809265137, |
|
"learning_rate": 1.4058246013667427e-05, |
|
"logits/chosen": 1.4373114109039307, |
|
"logits/rejected": 1.3903038501739502, |
|
"logps/chosen": -201.7006378173828, |
|
"logps/rejected": -211.03781127929688, |
|
"loss": 0.5677, |
|
"rewards/accuracies": 0.7333332896232605, |
|
"rewards/chosen": -0.874021053314209, |
|
"rewards/margins": 0.9151598811149597, |
|
"rewards/rejected": -1.7891807556152344, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.44703872437357633, |
|
"grad_norm": 10.441471099853516, |
|
"learning_rate": 1.4057978359908885e-05, |
|
"logits/chosen": 1.3745132684707642, |
|
"logits/rejected": 1.2919235229492188, |
|
"logps/chosen": -198.87559509277344, |
|
"logps/rejected": -217.3492431640625, |
|
"loss": 0.5406, |
|
"rewards/accuracies": 0.7500000596046448, |
|
"rewards/chosen": -1.0884069204330444, |
|
"rewards/margins": 1.036794900894165, |
|
"rewards/rejected": -2.125201940536499, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.4498861047835991, |
|
"grad_norm": 5.564964294433594, |
|
"learning_rate": 1.4057710706150343e-05, |
|
"logits/chosen": 1.195963740348816, |
|
"logits/rejected": 1.1703064441680908, |
|
"logps/chosen": -205.21533203125, |
|
"logps/rejected": -211.69650268554688, |
|
"loss": 0.5185, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -1.2169151306152344, |
|
"rewards/margins": 0.9917265176773071, |
|
"rewards/rejected": -2.208641529083252, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.4527334851936219, |
|
"grad_norm": 5.4993696212768555, |
|
"learning_rate": 1.40574430523918e-05, |
|
"logits/chosen": 0.8286596536636353, |
|
"logits/rejected": 0.8261906504631042, |
|
"logps/chosen": -198.970947265625, |
|
"logps/rejected": -216.66171264648438, |
|
"loss": 0.3587, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.8958838582038879, |
|
"rewards/margins": 1.5535662174224854, |
|
"rewards/rejected": -2.4494500160217285, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.45558086560364464, |
|
"grad_norm": 7.70150089263916, |
|
"learning_rate": 1.4057175398633258e-05, |
|
"logits/chosen": 1.3162205219268799, |
|
"logits/rejected": 1.2919889688491821, |
|
"logps/chosen": -206.515625, |
|
"logps/rejected": -222.2469482421875, |
|
"loss": 0.4, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -1.3980159759521484, |
|
"rewards/margins": 1.3234660625457764, |
|
"rewards/rejected": -2.7214818000793457, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.45842824601366744, |
|
"grad_norm": 11.316675186157227, |
|
"learning_rate": 1.4056907744874716e-05, |
|
"logits/chosen": 1.2483917474746704, |
|
"logits/rejected": 1.2210924625396729, |
|
"logps/chosen": -206.95620727539062, |
|
"logps/rejected": -220.11636352539062, |
|
"loss": 0.4602, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7000877857208252, |
|
"rewards/margins": 1.2964524030685425, |
|
"rewards/rejected": -2.9965403079986572, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.4612756264236902, |
|
"grad_norm": 14.703081130981445, |
|
"learning_rate": 1.4056640091116174e-05, |
|
"logits/chosen": 1.3368771076202393, |
|
"logits/rejected": 1.2918922901153564, |
|
"logps/chosen": -210.63418579101562, |
|
"logps/rejected": -220.60440063476562, |
|
"loss": 0.4989, |
|
"rewards/accuracies": 0.7500000596046448, |
|
"rewards/chosen": -1.670444130897522, |
|
"rewards/margins": 1.2454063892364502, |
|
"rewards/rejected": -2.9158504009246826, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.464123006833713, |
|
"grad_norm": 8.3635892868042, |
|
"learning_rate": 1.4056372437357632e-05, |
|
"logits/chosen": 1.290725588798523, |
|
"logits/rejected": 1.2314881086349487, |
|
"logps/chosen": -208.1670684814453, |
|
"logps/rejected": -216.7353057861328, |
|
"loss": 0.3892, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -1.7449800968170166, |
|
"rewards/margins": 1.2695982456207275, |
|
"rewards/rejected": -3.014578342437744, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.46697038724373574, |
|
"grad_norm": 20.11248779296875, |
|
"learning_rate": 1.405610478359909e-05, |
|
"logits/chosen": 1.5877583026885986, |
|
"logits/rejected": 1.5239537954330444, |
|
"logps/chosen": -208.3041534423828, |
|
"logps/rejected": -223.48318481445312, |
|
"loss": 0.369, |
|
"rewards/accuracies": 0.8333333730697632, |
|
"rewards/chosen": -1.7909959554672241, |
|
"rewards/margins": 1.4336395263671875, |
|
"rewards/rejected": -3.224635362625122, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.46981776765375854, |
|
"grad_norm": 13.5833101272583, |
|
"learning_rate": 1.4055837129840549e-05, |
|
"logits/chosen": 1.738555669784546, |
|
"logits/rejected": 1.6566972732543945, |
|
"logps/chosen": -209.2194061279297, |
|
"logps/rejected": -219.2682647705078, |
|
"loss": 0.4167, |
|
"rewards/accuracies": 0.8000000715255737, |
|
"rewards/chosen": -1.7458181381225586, |
|
"rewards/margins": 1.2833728790283203, |
|
"rewards/rejected": -3.029191255569458, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.47266514806378135, |
|
"grad_norm": 22.821298599243164, |
|
"learning_rate": 1.4055569476082005e-05, |
|
"logits/chosen": 1.3880656957626343, |
|
"logits/rejected": 1.3096697330474854, |
|
"logps/chosen": -203.7255859375, |
|
"logps/rejected": -220.28024291992188, |
|
"loss": 0.4339, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.0704998970031738, |
|
"rewards/margins": 1.3066881895065308, |
|
"rewards/rejected": -2.377188205718994, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.4755125284738041, |
|
"grad_norm": 11.496463775634766, |
|
"learning_rate": 1.4055301822323463e-05, |
|
"logits/chosen": 1.432905912399292, |
|
"logits/rejected": 1.3992332220077515, |
|
"logps/chosen": -208.29953002929688, |
|
"logps/rejected": -224.51248168945312, |
|
"loss": 0.4553, |
|
"rewards/accuracies": 0.8333333730697632, |
|
"rewards/chosen": -1.9868751764297485, |
|
"rewards/margins": 1.2016279697418213, |
|
"rewards/rejected": -3.1885030269622803, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.4783599088838269, |
|
"grad_norm": 8.086127281188965, |
|
"learning_rate": 1.405503416856492e-05, |
|
"logits/chosen": 1.6646270751953125, |
|
"logits/rejected": 1.6253557205200195, |
|
"logps/chosen": -216.95669555664062, |
|
"logps/rejected": -232.74746704101562, |
|
"loss": 0.4144, |
|
"rewards/accuracies": 0.8666666746139526, |
|
"rewards/chosen": -2.4913864135742188, |
|
"rewards/margins": 1.5564768314361572, |
|
"rewards/rejected": -4.047863483428955, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.48120728929384965, |
|
"grad_norm": 13.472739219665527, |
|
"learning_rate": 1.4054766514806378e-05, |
|
"logits/chosen": 2.18731427192688, |
|
"logits/rejected": 2.1197452545166016, |
|
"logps/chosen": -221.1719970703125, |
|
"logps/rejected": -232.5544891357422, |
|
"loss": 0.4315, |
|
"rewards/accuracies": 0.7666666507720947, |
|
"rewards/chosen": -2.685852289199829, |
|
"rewards/margins": 1.4282853603363037, |
|
"rewards/rejected": -4.114137172698975, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.48405466970387245, |
|
"grad_norm": 12.491531372070312, |
|
"learning_rate": 1.4054498861047836e-05, |
|
"logits/chosen": 1.754373550415039, |
|
"logits/rejected": 1.7022701501846313, |
|
"logps/chosen": -210.3401641845703, |
|
"logps/rejected": -228.8831024169922, |
|
"loss": 0.3737, |
|
"rewards/accuracies": 0.783333420753479, |
|
"rewards/chosen": -2.0991439819335938, |
|
"rewards/margins": 1.6580852270126343, |
|
"rewards/rejected": -3.7572293281555176, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.4869020501138952, |
|
"grad_norm": 18.713422775268555, |
|
"learning_rate": 1.4054231207289294e-05, |
|
"logits/chosen": 1.9377641677856445, |
|
"logits/rejected": 1.906818151473999, |
|
"logps/chosen": -217.85159301757812, |
|
"logps/rejected": -230.37515258789062, |
|
"loss": 0.3994, |
|
"rewards/accuracies": 0.783333420753479, |
|
"rewards/chosen": -2.5920052528381348, |
|
"rewards/margins": 1.5738455057144165, |
|
"rewards/rejected": -4.1658501625061035, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.489749430523918, |
|
"grad_norm": 21.47015380859375, |
|
"learning_rate": 1.4053963553530752e-05, |
|
"logits/chosen": 1.4932907819747925, |
|
"logits/rejected": 1.4301973581314087, |
|
"logps/chosen": -216.68905639648438, |
|
"logps/rejected": -233.25424194335938, |
|
"loss": 0.4647, |
|
"rewards/accuracies": 0.7666666507720947, |
|
"rewards/chosen": -2.9133524894714355, |
|
"rewards/margins": 1.4387586116790771, |
|
"rewards/rejected": -4.352110862731934, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.49259681093394075, |
|
"grad_norm": 14.534845352172852, |
|
"learning_rate": 1.4053695899772209e-05, |
|
"logits/chosen": 1.4016607999801636, |
|
"logits/rejected": 1.3695752620697021, |
|
"logps/chosen": -219.4730682373047, |
|
"logps/rejected": -229.04714965820312, |
|
"loss": 0.6529, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": -3.115053176879883, |
|
"rewards/margins": 1.099458932876587, |
|
"rewards/rejected": -4.214511871337891, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.49544419134396356, |
|
"grad_norm": 21.40360450744629, |
|
"learning_rate": 1.4053428246013667e-05, |
|
"logits/chosen": 1.6236956119537354, |
|
"logits/rejected": 1.583548903465271, |
|
"logps/chosen": -214.4650421142578, |
|
"logps/rejected": -230.59060668945312, |
|
"loss": 0.6043, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -2.8126559257507324, |
|
"rewards/margins": 1.1184018850326538, |
|
"rewards/rejected": -3.9310576915740967, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.4982915717539863, |
|
"grad_norm": 19.227012634277344, |
|
"learning_rate": 1.4053160592255125e-05, |
|
"logits/chosen": 1.6187642812728882, |
|
"logits/rejected": 1.6092262268066406, |
|
"logps/chosen": -218.21994018554688, |
|
"logps/rejected": -234.0594024658203, |
|
"loss": 0.3583, |
|
"rewards/accuracies": 0.8833333849906921, |
|
"rewards/chosen": -2.342740535736084, |
|
"rewards/margins": 1.5823628902435303, |
|
"rewards/rejected": -3.925102949142456, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.5011389521640092, |
|
"grad_norm": 20.58732795715332, |
|
"learning_rate": 1.4052892938496583e-05, |
|
"logits/chosen": 2.2254767417907715, |
|
"logits/rejected": 2.212104320526123, |
|
"logps/chosen": -214.5471954345703, |
|
"logps/rejected": -223.7714080810547, |
|
"loss": 0.5137, |
|
"rewards/accuracies": 0.7000000476837158, |
|
"rewards/chosen": -2.4208734035491943, |
|
"rewards/margins": 1.094592809677124, |
|
"rewards/rejected": -3.5154662132263184, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.5039863325740319, |
|
"grad_norm": 12.883881568908691, |
|
"learning_rate": 1.4052625284738042e-05, |
|
"logits/chosen": 1.7898054122924805, |
|
"logits/rejected": 1.698301076889038, |
|
"logps/chosen": -219.19509887695312, |
|
"logps/rejected": -229.88052368164062, |
|
"loss": 0.4222, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -2.7343392372131348, |
|
"rewards/margins": 1.2842615842819214, |
|
"rewards/rejected": -4.018601417541504, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.5068337129840547, |
|
"grad_norm": 11.711604118347168, |
|
"learning_rate": 1.40523576309795e-05, |
|
"logits/chosen": 1.5762044191360474, |
|
"logits/rejected": 1.5645755529403687, |
|
"logps/chosen": -222.1739959716797, |
|
"logps/rejected": -230.30014038085938, |
|
"loss": 0.5699, |
|
"rewards/accuracies": 0.6333333253860474, |
|
"rewards/chosen": -2.8136379718780518, |
|
"rewards/margins": 0.9600374102592468, |
|
"rewards/rejected": -3.7736752033233643, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.5096810933940774, |
|
"grad_norm": 13.225159645080566, |
|
"learning_rate": 1.4052089977220958e-05, |
|
"logits/chosen": 1.4274075031280518, |
|
"logits/rejected": 1.366379976272583, |
|
"logps/chosen": -209.89151000976562, |
|
"logps/rejected": -229.04910278320312, |
|
"loss": 0.4188, |
|
"rewards/accuracies": 0.8333333730697632, |
|
"rewards/chosen": -2.8537209033966064, |
|
"rewards/margins": 1.4593942165374756, |
|
"rewards/rejected": -4.313115119934082, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.5125284738041003, |
|
"grad_norm": 12.145174980163574, |
|
"learning_rate": 1.4051822323462414e-05, |
|
"logits/chosen": 1.1467812061309814, |
|
"logits/rejected": 1.1414228677749634, |
|
"logps/chosen": -217.54013061523438, |
|
"logps/rejected": -235.8665771484375, |
|
"loss": 0.5305, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -3.0461976528167725, |
|
"rewards/margins": 1.3629333972930908, |
|
"rewards/rejected": -4.409131050109863, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.515375854214123, |
|
"grad_norm": 12.6741304397583, |
|
"learning_rate": 1.4051554669703873e-05, |
|
"logits/chosen": 1.9150466918945312, |
|
"logits/rejected": 1.8071298599243164, |
|
"logps/chosen": -220.7820281982422, |
|
"logps/rejected": -233.08389282226562, |
|
"loss": 0.3941, |
|
"rewards/accuracies": 0.8833333849906921, |
|
"rewards/chosen": -2.813331127166748, |
|
"rewards/margins": 1.5690648555755615, |
|
"rewards/rejected": -4.3823957443237305, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.5182232346241458, |
|
"grad_norm": 15.344178199768066, |
|
"learning_rate": 1.405128701594533e-05, |
|
"logits/chosen": 1.4123234748840332, |
|
"logits/rejected": 1.3962544202804565, |
|
"logps/chosen": -217.7815399169922, |
|
"logps/rejected": -224.8722381591797, |
|
"loss": 0.6154, |
|
"rewards/accuracies": 0.7000000476837158, |
|
"rewards/chosen": -3.3274052143096924, |
|
"rewards/margins": 0.9693098068237305, |
|
"rewards/rejected": -4.296715259552002, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.5210706150341685, |
|
"grad_norm": 10.50731086730957, |
|
"learning_rate": 1.4051019362186789e-05, |
|
"logits/chosen": 1.4529472589492798, |
|
"logits/rejected": 1.4127845764160156, |
|
"logps/chosen": -215.39364624023438, |
|
"logps/rejected": -225.745361328125, |
|
"loss": 0.4139, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.4642977714538574, |
|
"rewards/margins": 1.5033318996429443, |
|
"rewards/rejected": -3.9676296710968018, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.5239179954441914, |
|
"grad_norm": 14.13061237335205, |
|
"learning_rate": 1.4050751708428247e-05, |
|
"logits/chosen": 1.4374377727508545, |
|
"logits/rejected": 1.3640098571777344, |
|
"logps/chosen": -216.8865203857422, |
|
"logps/rejected": -234.1781005859375, |
|
"loss": 0.4319, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -2.7498998641967773, |
|
"rewards/margins": 1.7011677026748657, |
|
"rewards/rejected": -4.451067924499512, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.5267653758542141, |
|
"grad_norm": 13.41270923614502, |
|
"learning_rate": 1.4050484054669705e-05, |
|
"logits/chosen": 1.7079713344573975, |
|
"logits/rejected": 1.695387840270996, |
|
"logps/chosen": -222.7223663330078, |
|
"logps/rejected": -232.77090454101562, |
|
"loss": 0.5748, |
|
"rewards/accuracies": 0.7499999403953552, |
|
"rewards/chosen": -3.0100722312927246, |
|
"rewards/margins": 1.1305018663406372, |
|
"rewards/rejected": -4.1405744552612305, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.5296127562642369, |
|
"grad_norm": 12.832780838012695, |
|
"learning_rate": 1.4050216400911163e-05, |
|
"logits/chosen": 2.0568480491638184, |
|
"logits/rejected": 1.9985520839691162, |
|
"logps/chosen": -212.0758819580078, |
|
"logps/rejected": -225.4073486328125, |
|
"loss": 0.4213, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.0503463745117188, |
|
"rewards/margins": 1.4921596050262451, |
|
"rewards/rejected": -3.542506456375122, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.5324601366742597, |
|
"grad_norm": 18.81167984008789, |
|
"learning_rate": 1.404994874715262e-05, |
|
"logits/chosen": 1.6365169286727905, |
|
"logits/rejected": 1.6133226156234741, |
|
"logps/chosen": -204.10067749023438, |
|
"logps/rejected": -214.98257446289062, |
|
"loss": 0.6072, |
|
"rewards/accuracies": 0.7333332896232605, |
|
"rewards/chosen": -1.8329346179962158, |
|
"rewards/margins": 0.8236812353134155, |
|
"rewards/rejected": -2.656615972518921, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.5353075170842825, |
|
"grad_norm": 11.395954132080078, |
|
"learning_rate": 1.4049681093394078e-05, |
|
"logits/chosen": 1.637677788734436, |
|
"logits/rejected": 1.5944675207138062, |
|
"logps/chosen": -202.73739624023438, |
|
"logps/rejected": -214.62820434570312, |
|
"loss": 0.5573, |
|
"rewards/accuracies": 0.7833333611488342, |
|
"rewards/chosen": -1.8346195220947266, |
|
"rewards/margins": 1.047932505607605, |
|
"rewards/rejected": -2.882551908493042, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.5381548974943052, |
|
"grad_norm": 12.5642728805542, |
|
"learning_rate": 1.4049413439635536e-05, |
|
"logits/chosen": 1.4088985919952393, |
|
"logits/rejected": 1.3726544380187988, |
|
"logps/chosen": -211.74282836914062, |
|
"logps/rejected": -227.03305053710938, |
|
"loss": 0.6012, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.4925270080566406, |
|
"rewards/margins": 1.1374258995056152, |
|
"rewards/rejected": -3.629952907562256, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.541002277904328, |
|
"grad_norm": 18.130271911621094, |
|
"learning_rate": 1.4049145785876993e-05, |
|
"logits/chosen": 1.833918809890747, |
|
"logits/rejected": 1.8095060586929321, |
|
"logps/chosen": -213.5587921142578, |
|
"logps/rejected": -226.3291473388672, |
|
"loss": 0.4597, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.095309019088745, |
|
"rewards/margins": 1.4185867309570312, |
|
"rewards/rejected": -3.5138957500457764, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5438496583143508, |
|
"grad_norm": 5.79637336730957, |
|
"learning_rate": 1.4048878132118451e-05, |
|
"logits/chosen": 1.5354747772216797, |
|
"logits/rejected": 1.4952408075332642, |
|
"logps/chosen": -218.20327758789062, |
|
"logps/rejected": -226.9008026123047, |
|
"loss": 0.5443, |
|
"rewards/accuracies": 0.7000000476837158, |
|
"rewards/chosen": -2.4994616508483887, |
|
"rewards/margins": 1.0572245121002197, |
|
"rewards/rejected": -3.5566864013671875, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.5466970387243736, |
|
"grad_norm": 8.202295303344727, |
|
"learning_rate": 1.4048610478359909e-05, |
|
"logits/chosen": 1.4175798892974854, |
|
"logits/rejected": 1.3886915445327759, |
|
"logps/chosen": -214.7366485595703, |
|
"logps/rejected": -224.1475372314453, |
|
"loss": 0.5296, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -2.728301525115967, |
|
"rewards/margins": 1.0051238536834717, |
|
"rewards/rejected": -3.7334251403808594, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.5495444191343963, |
|
"grad_norm": 11.456938743591309, |
|
"learning_rate": 1.4048342824601367e-05, |
|
"logits/chosen": 1.2038923501968384, |
|
"logits/rejected": 1.2050249576568604, |
|
"logps/chosen": -210.61184692382812, |
|
"logps/rejected": -225.3157958984375, |
|
"loss": 0.4021, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -1.8231241703033447, |
|
"rewards/margins": 1.664263129234314, |
|
"rewards/rejected": -3.487387180328369, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.5523917995444191, |
|
"grad_norm": 7.846871852874756, |
|
"learning_rate": 1.4048075170842824e-05, |
|
"logits/chosen": 1.7540676593780518, |
|
"logits/rejected": 1.7196149826049805, |
|
"logps/chosen": -210.4138946533203, |
|
"logps/rejected": -223.84194946289062, |
|
"loss": 0.4072, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.289306163787842, |
|
"rewards/margins": 1.4605536460876465, |
|
"rewards/rejected": -3.7498602867126465, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.5552391799544419, |
|
"grad_norm": 10.873760223388672, |
|
"learning_rate": 1.4047807517084282e-05, |
|
"logits/chosen": 1.411709189414978, |
|
"logits/rejected": 1.394074559211731, |
|
"logps/chosen": -211.9527587890625, |
|
"logps/rejected": -222.88851928710938, |
|
"loss": 0.5421, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.567718505859375, |
|
"rewards/margins": 1.1373217105865479, |
|
"rewards/rejected": -3.705040693283081, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.5580865603644647, |
|
"grad_norm": 13.660347938537598, |
|
"learning_rate": 1.404753986332574e-05, |
|
"logits/chosen": 2.127122163772583, |
|
"logits/rejected": 2.038539409637451, |
|
"logps/chosen": -208.9009246826172, |
|
"logps/rejected": -228.55947875976562, |
|
"loss": 0.3832, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.088381052017212, |
|
"rewards/margins": 1.6521527767181396, |
|
"rewards/rejected": -3.7405338287353516, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.5609339407744874, |
|
"grad_norm": 6.0093488693237305, |
|
"learning_rate": 1.4047272209567198e-05, |
|
"logits/chosen": 1.5527527332305908, |
|
"logits/rejected": 1.4864261150360107, |
|
"logps/chosen": -208.0782928466797, |
|
"logps/rejected": -223.2481689453125, |
|
"loss": 0.3406, |
|
"rewards/accuracies": 0.8833333253860474, |
|
"rewards/chosen": -1.4067806005477905, |
|
"rewards/margins": 1.806133508682251, |
|
"rewards/rejected": -3.2129147052764893, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.5637813211845103, |
|
"grad_norm": 21.775585174560547, |
|
"learning_rate": 1.4047004555808656e-05, |
|
"logits/chosen": 1.493690848350525, |
|
"logits/rejected": 1.4828380346298218, |
|
"logps/chosen": -211.7380828857422, |
|
"logps/rejected": -225.0364227294922, |
|
"loss": 0.575, |
|
"rewards/accuracies": 0.7666667699813843, |
|
"rewards/chosen": -1.9734036922454834, |
|
"rewards/margins": 1.117187738418579, |
|
"rewards/rejected": -3.0905914306640625, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.566628701594533, |
|
"grad_norm": 15.051896095275879, |
|
"learning_rate": 1.4046736902050115e-05, |
|
"logits/chosen": 1.7279115915298462, |
|
"logits/rejected": 1.6872913837432861, |
|
"logps/chosen": -212.10720825195312, |
|
"logps/rejected": -215.74484252929688, |
|
"loss": 0.6145, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.833011269569397, |
|
"rewards/margins": 1.0812532901763916, |
|
"rewards/rejected": -2.914264440536499, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.5694760820045558, |
|
"grad_norm": 5.998073101043701, |
|
"learning_rate": 1.4046469248291573e-05, |
|
"logits/chosen": 1.845990777015686, |
|
"logits/rejected": 1.782735824584961, |
|
"logps/chosen": -199.06289672851562, |
|
"logps/rejected": -205.4522705078125, |
|
"loss": 0.5772, |
|
"rewards/accuracies": 0.7166666388511658, |
|
"rewards/chosen": -0.7677577137947083, |
|
"rewards/margins": 0.9643963575363159, |
|
"rewards/rejected": -1.732154130935669, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5723234624145785, |
|
"grad_norm": 5.928720951080322, |
|
"learning_rate": 1.404620159453303e-05, |
|
"logits/chosen": 1.8845545053482056, |
|
"logits/rejected": 1.8174835443496704, |
|
"logps/chosen": -196.9351043701172, |
|
"logps/rejected": -210.2369842529297, |
|
"loss": 0.4358, |
|
"rewards/accuracies": 0.7833333611488342, |
|
"rewards/chosen": -0.5869276523590088, |
|
"rewards/margins": 1.3779737949371338, |
|
"rewards/rejected": -1.9649015665054321, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.5751708428246014, |
|
"grad_norm": 4.601451396942139, |
|
"learning_rate": 1.4045933940774487e-05, |
|
"logits/chosen": 2.0156209468841553, |
|
"logits/rejected": 1.9196068048477173, |
|
"logps/chosen": -204.44776916503906, |
|
"logps/rejected": -219.51742553710938, |
|
"loss": 0.4502, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.471993088722229, |
|
"rewards/margins": 1.4576828479766846, |
|
"rewards/rejected": -2.929675579071045, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.5780182232346242, |
|
"grad_norm": 9.150592803955078, |
|
"learning_rate": 1.4045666287015946e-05, |
|
"logits/chosen": 1.4569592475891113, |
|
"logits/rejected": 1.42463219165802, |
|
"logps/chosen": -213.670166015625, |
|
"logps/rejected": -219.941162109375, |
|
"loss": 0.6028, |
|
"rewards/accuracies": 0.6833333373069763, |
|
"rewards/chosen": -2.2914419174194336, |
|
"rewards/margins": 0.8582653999328613, |
|
"rewards/rejected": -3.149707555770874, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.5808656036446469, |
|
"grad_norm": 12.566153526306152, |
|
"learning_rate": 1.4045398633257404e-05, |
|
"logits/chosen": 1.3458335399627686, |
|
"logits/rejected": 1.3180339336395264, |
|
"logps/chosen": -220.4646453857422, |
|
"logps/rejected": -230.4011688232422, |
|
"loss": 0.4585, |
|
"rewards/accuracies": 0.8333333730697632, |
|
"rewards/chosen": -2.5000290870666504, |
|
"rewards/margins": 1.1148213148117065, |
|
"rewards/rejected": -3.614849805831909, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.5837129840546698, |
|
"grad_norm": 6.501167297363281, |
|
"learning_rate": 1.4045130979498862e-05, |
|
"logits/chosen": 1.350414514541626, |
|
"logits/rejected": 1.342252254486084, |
|
"logps/chosen": -209.9090118408203, |
|
"logps/rejected": -220.7002716064453, |
|
"loss": 0.4906, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6956933736801147, |
|
"rewards/margins": 1.4066914319992065, |
|
"rewards/rejected": -3.1023848056793213, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.5865603644646925, |
|
"grad_norm": 10.026237487792969, |
|
"learning_rate": 1.404486332574032e-05, |
|
"logits/chosen": 1.8001444339752197, |
|
"logits/rejected": 1.7271541357040405, |
|
"logps/chosen": -201.92617797851562, |
|
"logps/rejected": -216.8983917236328, |
|
"loss": 0.4417, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -1.1111762523651123, |
|
"rewards/margins": 1.3895812034606934, |
|
"rewards/rejected": -2.5007576942443848, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.5894077448747153, |
|
"grad_norm": 10.341662406921387, |
|
"learning_rate": 1.4044595671981778e-05, |
|
"logits/chosen": 1.6093097925186157, |
|
"logits/rejected": 1.55720853805542, |
|
"logps/chosen": -203.5904083251953, |
|
"logps/rejected": -214.962890625, |
|
"loss": 0.5121, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -1.2161873579025269, |
|
"rewards/margins": 1.0402132272720337, |
|
"rewards/rejected": -2.2564005851745605, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.592255125284738, |
|
"grad_norm": 9.483428001403809, |
|
"learning_rate": 1.4044328018223235e-05, |
|
"logits/chosen": 1.3485796451568604, |
|
"logits/rejected": 1.2952206134796143, |
|
"logps/chosen": -198.46286010742188, |
|
"logps/rejected": -209.2959442138672, |
|
"loss": 0.4198, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -0.9194291830062866, |
|
"rewards/margins": 1.4297075271606445, |
|
"rewards/rejected": -2.3491368293762207, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.5951025056947609, |
|
"grad_norm": 6.516709804534912, |
|
"learning_rate": 1.4044060364464693e-05, |
|
"logits/chosen": 1.5634223222732544, |
|
"logits/rejected": 1.5188058614730835, |
|
"logps/chosen": -199.25697326660156, |
|
"logps/rejected": -212.009033203125, |
|
"loss": 0.5602, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -1.1728389263153076, |
|
"rewards/margins": 1.0664641857147217, |
|
"rewards/rejected": -2.2393031120300293, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.5979498861047836, |
|
"grad_norm": 11.579174995422363, |
|
"learning_rate": 1.4043792710706151e-05, |
|
"logits/chosen": 1.5612658262252808, |
|
"logits/rejected": 1.540191650390625, |
|
"logps/chosen": -202.50173950195312, |
|
"logps/rejected": -211.82626342773438, |
|
"loss": 0.6105, |
|
"rewards/accuracies": 0.6833333969116211, |
|
"rewards/chosen": -1.4498227834701538, |
|
"rewards/margins": 0.8200041055679321, |
|
"rewards/rejected": -2.269826650619507, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.6007972665148064, |
|
"grad_norm": 8.937296867370605, |
|
"learning_rate": 1.404352505694761e-05, |
|
"logits/chosen": 1.3049190044403076, |
|
"logits/rejected": 1.3015944957733154, |
|
"logps/chosen": -205.02505493164062, |
|
"logps/rejected": -214.50637817382812, |
|
"loss": 0.5109, |
|
"rewards/accuracies": 0.7166666984558105, |
|
"rewards/chosen": -1.48309326171875, |
|
"rewards/margins": 1.045582890510559, |
|
"rewards/rejected": -2.5286762714385986, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.6036446469248291, |
|
"grad_norm": 12.235701560974121, |
|
"learning_rate": 1.4043257403189068e-05, |
|
"logits/chosen": 1.2076904773712158, |
|
"logits/rejected": 1.176274061203003, |
|
"logps/chosen": -212.11544799804688, |
|
"logps/rejected": -223.24087524414062, |
|
"loss": 0.4785, |
|
"rewards/accuracies": 0.7833333611488342, |
|
"rewards/chosen": -1.900011420249939, |
|
"rewards/margins": 1.1970151662826538, |
|
"rewards/rejected": -3.0970263481140137, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.606492027334852, |
|
"grad_norm": 4.460309982299805, |
|
"learning_rate": 1.4042989749430524e-05, |
|
"logits/chosen": 1.0031187534332275, |
|
"logits/rejected": 1.0076453685760498, |
|
"logps/chosen": -210.66738891601562, |
|
"logps/rejected": -221.1210479736328, |
|
"loss": 0.5552, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.1503262519836426, |
|
"rewards/margins": 1.0407392978668213, |
|
"rewards/rejected": -3.191065549850464, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.6093394077448747, |
|
"grad_norm": 10.836349487304688, |
|
"learning_rate": 1.4042722095671982e-05, |
|
"logits/chosen": 1.7120803594589233, |
|
"logits/rejected": 1.6473640203475952, |
|
"logps/chosen": -205.09329223632812, |
|
"logps/rejected": -219.45925903320312, |
|
"loss": 0.43, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.839080572128296, |
|
"rewards/margins": 1.4085099697113037, |
|
"rewards/rejected": -3.2475905418395996, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.6121867881548975, |
|
"grad_norm": 9.348235130310059, |
|
"learning_rate": 1.404245444191344e-05, |
|
"logits/chosen": 1.4184033870697021, |
|
"logits/rejected": 1.4061453342437744, |
|
"logps/chosen": -201.64305114746094, |
|
"logps/rejected": -215.83334350585938, |
|
"loss": 0.4861, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -1.3850140571594238, |
|
"rewards/margins": 1.0105946063995361, |
|
"rewards/rejected": -2.395608901977539, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.6150341685649203, |
|
"grad_norm": 7.500626564025879, |
|
"learning_rate": 1.4042186788154897e-05, |
|
"logits/chosen": 1.6477140188217163, |
|
"logits/rejected": 1.5993112325668335, |
|
"logps/chosen": -207.88192749023438, |
|
"logps/rejected": -220.2103729248047, |
|
"loss": 0.5188, |
|
"rewards/accuracies": 0.7666666507720947, |
|
"rewards/chosen": -1.7979981899261475, |
|
"rewards/margins": 0.9870179295539856, |
|
"rewards/rejected": -2.7850160598754883, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.6178815489749431, |
|
"grad_norm": 7.5613226890563965, |
|
"learning_rate": 1.4041919134396355e-05, |
|
"logits/chosen": 1.591292381286621, |
|
"logits/rejected": 1.5465342998504639, |
|
"logps/chosen": -211.07601928710938, |
|
"logps/rejected": -226.35107421875, |
|
"loss": 0.3141, |
|
"rewards/accuracies": 0.8833333253860474, |
|
"rewards/chosen": -2.4218368530273438, |
|
"rewards/margins": 1.6328353881835938, |
|
"rewards/rejected": -4.0546722412109375, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.6207289293849658, |
|
"grad_norm": 7.95067834854126, |
|
"learning_rate": 1.4041651480637813e-05, |
|
"logits/chosen": 1.7995331287384033, |
|
"logits/rejected": 1.7834396362304688, |
|
"logps/chosen": -212.1343536376953, |
|
"logps/rejected": -230.96517944335938, |
|
"loss": 0.3429, |
|
"rewards/accuracies": 0.8333333730697632, |
|
"rewards/chosen": -1.9310157299041748, |
|
"rewards/margins": 1.5808441638946533, |
|
"rewards/rejected": -3.5118603706359863, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.6235763097949886, |
|
"grad_norm": 11.615964889526367, |
|
"learning_rate": 1.4041383826879271e-05, |
|
"logits/chosen": 1.692636251449585, |
|
"logits/rejected": 1.6496816873550415, |
|
"logps/chosen": -216.45529174804688, |
|
"logps/rejected": -233.55355834960938, |
|
"loss": 0.4677, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -2.4663166999816895, |
|
"rewards/margins": 1.7424099445343018, |
|
"rewards/rejected": -4.208726406097412, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.6264236902050114, |
|
"grad_norm": 16.468753814697266, |
|
"learning_rate": 1.404111617312073e-05, |
|
"logits/chosen": 1.7807762622833252, |
|
"logits/rejected": 1.729928970336914, |
|
"logps/chosen": -222.51431274414062, |
|
"logps/rejected": -239.35311889648438, |
|
"loss": 0.4872, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.9725635051727295, |
|
"rewards/margins": 1.4294450283050537, |
|
"rewards/rejected": -4.402008533477783, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.6292710706150342, |
|
"grad_norm": 10.072558403015137, |
|
"learning_rate": 1.4040848519362188e-05, |
|
"logits/chosen": 1.8452990055084229, |
|
"logits/rejected": 1.7833513021469116, |
|
"logps/chosen": -233.5411834716797, |
|
"logps/rejected": -248.5119171142578, |
|
"loss": 0.4603, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -3.804687976837158, |
|
"rewards/margins": 1.5763683319091797, |
|
"rewards/rejected": -5.381056785583496, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.6321184510250569, |
|
"grad_norm": 19.554895401000977, |
|
"learning_rate": 1.4040580865603646e-05, |
|
"logits/chosen": 1.4583690166473389, |
|
"logits/rejected": 1.3847358226776123, |
|
"logps/chosen": -231.95474243164062, |
|
"logps/rejected": -237.9390106201172, |
|
"loss": 0.4337, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -4.024319648742676, |
|
"rewards/margins": 1.3094077110290527, |
|
"rewards/rejected": -5.3337273597717285, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.6349658314350797, |
|
"grad_norm": 16.957067489624023, |
|
"learning_rate": 1.4040313211845102e-05, |
|
"logits/chosen": 1.5189851522445679, |
|
"logits/rejected": 1.4547855854034424, |
|
"logps/chosen": -220.9446563720703, |
|
"logps/rejected": -236.49050903320312, |
|
"loss": 0.4954, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": -3.6130733489990234, |
|
"rewards/margins": 1.352766990661621, |
|
"rewards/rejected": -4.9658403396606445, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.6378132118451025, |
|
"grad_norm": 14.116981506347656, |
|
"learning_rate": 1.404004555808656e-05, |
|
"logits/chosen": 1.5582940578460693, |
|
"logits/rejected": 1.4913814067840576, |
|
"logps/chosen": -220.11062622070312, |
|
"logps/rejected": -236.986083984375, |
|
"loss": 0.3916, |
|
"rewards/accuracies": 0.7833333611488342, |
|
"rewards/chosen": -2.6138153076171875, |
|
"rewards/margins": 1.7570054531097412, |
|
"rewards/rejected": -4.370820045471191, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.6406605922551253, |
|
"grad_norm": 5.499735355377197, |
|
"learning_rate": 1.4039777904328019e-05, |
|
"logits/chosen": 1.5779650211334229, |
|
"logits/rejected": 1.5341997146606445, |
|
"logps/chosen": -215.645263671875, |
|
"logps/rejected": -229.6122283935547, |
|
"loss": 0.3973, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -2.5405592918395996, |
|
"rewards/margins": 1.463417649269104, |
|
"rewards/rejected": -4.003976821899414, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.643507972665148, |
|
"grad_norm": 6.563097953796387, |
|
"learning_rate": 1.4039510250569477e-05, |
|
"logits/chosen": 1.6726646423339844, |
|
"logits/rejected": 1.6540085077285767, |
|
"logps/chosen": -224.57510375976562, |
|
"logps/rejected": -235.57284545898438, |
|
"loss": 0.4268, |
|
"rewards/accuracies": 0.7666666507720947, |
|
"rewards/chosen": -2.6444642543792725, |
|
"rewards/margins": 1.399488091468811, |
|
"rewards/rejected": -4.043952465057373, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.6463553530751709, |
|
"grad_norm": 17.193113327026367, |
|
"learning_rate": 1.4039242596810935e-05, |
|
"logits/chosen": 1.5830856561660767, |
|
"logits/rejected": 1.5117225646972656, |
|
"logps/chosen": -223.4087677001953, |
|
"logps/rejected": -238.30709838867188, |
|
"loss": 0.5128, |
|
"rewards/accuracies": 0.8500000834465027, |
|
"rewards/chosen": -3.0339622497558594, |
|
"rewards/margins": 1.8273839950561523, |
|
"rewards/rejected": -4.861346244812012, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.6492027334851936, |
|
"grad_norm": 22.565580368041992, |
|
"learning_rate": 1.4038974943052393e-05, |
|
"logits/chosen": 1.512303113937378, |
|
"logits/rejected": 1.47734534740448, |
|
"logps/chosen": -235.7754364013672, |
|
"logps/rejected": -248.6204833984375, |
|
"loss": 0.5663, |
|
"rewards/accuracies": 0.7000000476837158, |
|
"rewards/chosen": -4.013455390930176, |
|
"rewards/margins": 1.3606445789337158, |
|
"rewards/rejected": -5.3740997314453125, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.6520501138952164, |
|
"grad_norm": 6.0489349365234375, |
|
"learning_rate": 1.4038707289293851e-05, |
|
"logits/chosen": 1.7461280822753906, |
|
"logits/rejected": 1.7028367519378662, |
|
"logps/chosen": -232.1387939453125, |
|
"logps/rejected": -248.25048828125, |
|
"loss": 0.3073, |
|
"rewards/accuracies": 0.8999999165534973, |
|
"rewards/chosen": -4.349621772766113, |
|
"rewards/margins": 1.8438152074813843, |
|
"rewards/rejected": -6.193437099456787, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.6548974943052391, |
|
"grad_norm": 15.137632369995117, |
|
"learning_rate": 1.4038439635535308e-05, |
|
"logits/chosen": 1.4312952756881714, |
|
"logits/rejected": 1.3589736223220825, |
|
"logps/chosen": -237.01358032226562, |
|
"logps/rejected": -246.2064208984375, |
|
"loss": 0.5728, |
|
"rewards/accuracies": 0.6999999284744263, |
|
"rewards/chosen": -4.380759239196777, |
|
"rewards/margins": 1.162846565246582, |
|
"rewards/rejected": -5.543606281280518, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.657744874715262, |
|
"grad_norm": 5.352278709411621, |
|
"learning_rate": 1.4038171981776766e-05, |
|
"logits/chosen": 1.5901074409484863, |
|
"logits/rejected": 1.547982931137085, |
|
"logps/chosen": -228.4188232421875, |
|
"logps/rejected": -245.8745574951172, |
|
"loss": 0.5632, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -4.198389053344727, |
|
"rewards/margins": 1.272357702255249, |
|
"rewards/rejected": -5.470747470855713, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.6605922551252847, |
|
"grad_norm": 13.036628723144531, |
|
"learning_rate": 1.4037904328018224e-05, |
|
"logits/chosen": 1.4683014154434204, |
|
"logits/rejected": 1.424804449081421, |
|
"logps/chosen": -234.803955078125, |
|
"logps/rejected": -243.31240844726562, |
|
"loss": 0.5059, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -4.2275567054748535, |
|
"rewards/margins": 1.2187873125076294, |
|
"rewards/rejected": -5.446343898773193, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.6634396355353075, |
|
"grad_norm": 6.533375263214111, |
|
"learning_rate": 1.4037636674259682e-05, |
|
"logits/chosen": 0.9792621731758118, |
|
"logits/rejected": 0.9765647053718567, |
|
"logps/chosen": -230.74819946289062, |
|
"logps/rejected": -242.41702270507812, |
|
"loss": 0.5086, |
|
"rewards/accuracies": 0.7499999403953552, |
|
"rewards/chosen": -4.1636061668396, |
|
"rewards/margins": 1.1499837636947632, |
|
"rewards/rejected": -5.313590049743652, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.6662870159453302, |
|
"grad_norm": 18.044649124145508, |
|
"learning_rate": 1.403736902050114e-05, |
|
"logits/chosen": 1.584518551826477, |
|
"logits/rejected": 1.54861319065094, |
|
"logps/chosen": -239.1269073486328, |
|
"logps/rejected": -250.5943145751953, |
|
"loss": 0.609, |
|
"rewards/accuracies": 0.7166666984558105, |
|
"rewards/chosen": -4.921685218811035, |
|
"rewards/margins": 1.0676127672195435, |
|
"rewards/rejected": -5.989298343658447, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.6691343963553531, |
|
"grad_norm": 19.3489990234375, |
|
"learning_rate": 1.4037101366742597e-05, |
|
"logits/chosen": 1.6986758708953857, |
|
"logits/rejected": 1.633707046508789, |
|
"logps/chosen": -240.86245727539062, |
|
"logps/rejected": -250.063232421875, |
|
"loss": 0.4793, |
|
"rewards/accuracies": 0.7833333611488342, |
|
"rewards/chosen": -4.555212020874023, |
|
"rewards/margins": 1.461682915687561, |
|
"rewards/rejected": -6.016894817352295, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.6719817767653758, |
|
"grad_norm": 19.8951358795166, |
|
"learning_rate": 1.4036833712984055e-05, |
|
"logits/chosen": 1.5498108863830566, |
|
"logits/rejected": 1.521090030670166, |
|
"logps/chosen": -231.66934204101562, |
|
"logps/rejected": -248.7901611328125, |
|
"loss": 0.3812, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -4.384482383728027, |
|
"rewards/margins": 1.5079156160354614, |
|
"rewards/rejected": -5.892397880554199, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.6748291571753986, |
|
"grad_norm": 5.044203758239746, |
|
"learning_rate": 1.4036566059225512e-05, |
|
"logits/chosen": 1.6914507150650024, |
|
"logits/rejected": 1.6152887344360352, |
|
"logps/chosen": -239.70755004882812, |
|
"logps/rejected": -258.3238220214844, |
|
"loss": 0.4626, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": -5.004892826080322, |
|
"rewards/margins": 1.4128177165985107, |
|
"rewards/rejected": -6.417710781097412, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.6776765375854215, |
|
"grad_norm": 8.82512092590332, |
|
"learning_rate": 1.403629840546697e-05, |
|
"logits/chosen": 1.7223188877105713, |
|
"logits/rejected": 1.6093647480010986, |
|
"logps/chosen": -240.8391876220703, |
|
"logps/rejected": -253.4154815673828, |
|
"loss": 0.4126, |
|
"rewards/accuracies": 0.7666666507720947, |
|
"rewards/chosen": -4.948991298675537, |
|
"rewards/margins": 1.9588031768798828, |
|
"rewards/rejected": -6.907794952392578, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.6805239179954442, |
|
"grad_norm": 9.408480644226074, |
|
"learning_rate": 1.4036030751708428e-05, |
|
"logits/chosen": 1.3756048679351807, |
|
"logits/rejected": 1.3040671348571777, |
|
"logps/chosen": -231.7460479736328, |
|
"logps/rejected": -252.6634063720703, |
|
"loss": 0.4124, |
|
"rewards/accuracies": 0.8000000715255737, |
|
"rewards/chosen": -4.464608669281006, |
|
"rewards/margins": 1.6818110942840576, |
|
"rewards/rejected": -6.146419525146484, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.683371298405467, |
|
"grad_norm": 14.945960998535156, |
|
"learning_rate": 1.4035763097949886e-05, |
|
"logits/chosen": 1.5611943006515503, |
|
"logits/rejected": 1.513925552368164, |
|
"logps/chosen": -234.7378387451172, |
|
"logps/rejected": -255.2218017578125, |
|
"loss": 0.3549, |
|
"rewards/accuracies": 0.8500000834465027, |
|
"rewards/chosen": -4.391298770904541, |
|
"rewards/margins": 1.9131309986114502, |
|
"rewards/rejected": -6.304429054260254, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.6862186788154897, |
|
"grad_norm": 22.232023239135742, |
|
"learning_rate": 1.4035495444191344e-05, |
|
"logits/chosen": 1.219208002090454, |
|
"logits/rejected": 1.1519418954849243, |
|
"logps/chosen": -246.47842407226562, |
|
"logps/rejected": -261.6808776855469, |
|
"loss": 0.4832, |
|
"rewards/accuracies": 0.7666666507720947, |
|
"rewards/chosen": -5.468182563781738, |
|
"rewards/margins": 1.899754285812378, |
|
"rewards/rejected": -7.367936611175537, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.6890660592255126, |
|
"grad_norm": 10.423013687133789, |
|
"learning_rate": 1.4035227790432803e-05, |
|
"logits/chosen": 1.3841204643249512, |
|
"logits/rejected": 1.303436279296875, |
|
"logps/chosen": -253.433349609375, |
|
"logps/rejected": -266.01544189453125, |
|
"loss": 0.4217, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -6.0178141593933105, |
|
"rewards/margins": 1.7622039318084717, |
|
"rewards/rejected": -7.7800188064575195, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.6919134396355353, |
|
"grad_norm": 11.22570514678955, |
|
"learning_rate": 1.403496013667426e-05, |
|
"logits/chosen": 1.279226541519165, |
|
"logits/rejected": 1.2061641216278076, |
|
"logps/chosen": -245.2624969482422, |
|
"logps/rejected": -256.43939208984375, |
|
"loss": 0.4384, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": -5.069748878479004, |
|
"rewards/margins": 1.824032187461853, |
|
"rewards/rejected": -6.8937811851501465, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.6947608200455581, |
|
"grad_norm": 12.202534675598145, |
|
"learning_rate": 1.4034692482915717e-05, |
|
"logits/chosen": 1.742522954940796, |
|
"logits/rejected": 1.6569029092788696, |
|
"logps/chosen": -235.7698211669922, |
|
"logps/rejected": -249.0869140625, |
|
"loss": 0.5603, |
|
"rewards/accuracies": 0.7166666984558105, |
|
"rewards/chosen": -4.401494979858398, |
|
"rewards/margins": 1.5202449560165405, |
|
"rewards/rejected": -5.921740531921387, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.6976082004555809, |
|
"grad_norm": 11.25021743774414, |
|
"learning_rate": 1.4034424829157175e-05, |
|
"logits/chosen": 1.2875077724456787, |
|
"logits/rejected": 1.2242827415466309, |
|
"logps/chosen": -235.0489501953125, |
|
"logps/rejected": -250.44076538085938, |
|
"loss": 0.4157, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -4.398201942443848, |
|
"rewards/margins": 2.000248432159424, |
|
"rewards/rejected": -6.398449897766113, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.7004555808656037, |
|
"grad_norm": 10.962873458862305, |
|
"learning_rate": 1.4034157175398634e-05, |
|
"logits/chosen": 1.6589164733886719, |
|
"logits/rejected": 1.5607925653457642, |
|
"logps/chosen": -247.64437866210938, |
|
"logps/rejected": -264.1566467285156, |
|
"loss": 0.506, |
|
"rewards/accuracies": 0.7666667699813843, |
|
"rewards/chosen": -5.916208744049072, |
|
"rewards/margins": 1.4422000646591187, |
|
"rewards/rejected": -7.3584089279174805, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.7033029612756264, |
|
"grad_norm": 6.1506500244140625, |
|
"learning_rate": 1.4033889521640092e-05, |
|
"logits/chosen": 1.1968879699707031, |
|
"logits/rejected": 1.1140633821487427, |
|
"logps/chosen": -254.21255493164062, |
|
"logps/rejected": -268.221435546875, |
|
"loss": 0.4051, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -6.287491798400879, |
|
"rewards/margins": 1.4170840978622437, |
|
"rewards/rejected": -7.704575538635254, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.7061503416856492, |
|
"grad_norm": 7.118803024291992, |
|
"learning_rate": 1.403362186788155e-05, |
|
"logits/chosen": 1.3762328624725342, |
|
"logits/rejected": 1.2869064807891846, |
|
"logps/chosen": -240.8204803466797, |
|
"logps/rejected": -257.37249755859375, |
|
"loss": 0.5173, |
|
"rewards/accuracies": 0.8000000715255737, |
|
"rewards/chosen": -5.4389543533325195, |
|
"rewards/margins": 1.7129642963409424, |
|
"rewards/rejected": -7.151918888092041, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.708997722095672, |
|
"grad_norm": 8.637585639953613, |
|
"learning_rate": 1.4033354214123008e-05, |
|
"logits/chosen": 1.5181769132614136, |
|
"logits/rejected": 1.4586423635482788, |
|
"logps/chosen": -244.4365234375, |
|
"logps/rejected": -262.47332763671875, |
|
"loss": 0.4751, |
|
"rewards/accuracies": 0.7833333611488342, |
|
"rewards/chosen": -5.681598663330078, |
|
"rewards/margins": 1.891000747680664, |
|
"rewards/rejected": -7.5725998878479, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.7118451025056948, |
|
"grad_norm": 12.983312606811523, |
|
"learning_rate": 1.4033086560364466e-05, |
|
"logits/chosen": 1.3992502689361572, |
|
"logits/rejected": 1.3234424591064453, |
|
"logps/chosen": -253.96762084960938, |
|
"logps/rejected": -270.48016357421875, |
|
"loss": 0.4632, |
|
"rewards/accuracies": 0.783333420753479, |
|
"rewards/chosen": -6.435060977935791, |
|
"rewards/margins": 1.5852489471435547, |
|
"rewards/rejected": -8.02031135559082, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.7146924829157175, |
|
"grad_norm": 13.200767517089844, |
|
"learning_rate": 1.4032818906605923e-05, |
|
"logits/chosen": 1.6667852401733398, |
|
"logits/rejected": 1.5995807647705078, |
|
"logps/chosen": -254.1513671875, |
|
"logps/rejected": -273.9045104980469, |
|
"loss": 0.5527, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -6.633721351623535, |
|
"rewards/margins": 1.3930259943008423, |
|
"rewards/rejected": -8.02674674987793, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.7175398633257403, |
|
"grad_norm": 20.3078670501709, |
|
"learning_rate": 1.4032551252847381e-05, |
|
"logits/chosen": 1.3614373207092285, |
|
"logits/rejected": 1.3755300045013428, |
|
"logps/chosen": -247.287353515625, |
|
"logps/rejected": -260.49456787109375, |
|
"loss": 0.4998, |
|
"rewards/accuracies": 0.7500000596046448, |
|
"rewards/chosen": -5.98111629486084, |
|
"rewards/margins": 1.3161729574203491, |
|
"rewards/rejected": -7.297289848327637, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.7203872437357631, |
|
"grad_norm": 4.286506175994873, |
|
"learning_rate": 1.4032283599088839e-05, |
|
"logits/chosen": 1.3778407573699951, |
|
"logits/rejected": 1.3152930736541748, |
|
"logps/chosen": -234.5768280029297, |
|
"logps/rejected": -255.61819458007812, |
|
"loss": 0.372, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.508410930633545, |
|
"rewards/margins": 1.7014334201812744, |
|
"rewards/rejected": -6.20984411239624, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.7232346241457859, |
|
"grad_norm": 14.069003105163574, |
|
"learning_rate": 1.4032015945330297e-05, |
|
"logits/chosen": 1.3468763828277588, |
|
"logits/rejected": 1.233034372329712, |
|
"logps/chosen": -233.3365478515625, |
|
"logps/rejected": -254.7558135986328, |
|
"loss": 0.4187, |
|
"rewards/accuracies": 0.783333420753479, |
|
"rewards/chosen": -4.505204677581787, |
|
"rewards/margins": 1.8832544088363647, |
|
"rewards/rejected": -6.388458728790283, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.7260820045558086, |
|
"grad_norm": 11.429390907287598, |
|
"learning_rate": 1.4031748291571755e-05, |
|
"logits/chosen": 1.393408179283142, |
|
"logits/rejected": 1.2685819864273071, |
|
"logps/chosen": -232.78707885742188, |
|
"logps/rejected": -258.8204345703125, |
|
"loss": 0.3655, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.111529350280762, |
|
"rewards/margins": 1.9012877941131592, |
|
"rewards/rejected": -6.012816429138184, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.7289293849658315, |
|
"grad_norm": 7.633700847625732, |
|
"learning_rate": 1.4031480637813214e-05, |
|
"logits/chosen": 1.7750869989395142, |
|
"logits/rejected": 1.6736621856689453, |
|
"logps/chosen": -239.56918334960938, |
|
"logps/rejected": -261.6385192871094, |
|
"loss": 0.2991, |
|
"rewards/accuracies": 0.8833333253860474, |
|
"rewards/chosen": -4.729853630065918, |
|
"rewards/margins": 2.528153657913208, |
|
"rewards/rejected": -7.258008003234863, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.7317767653758542, |
|
"grad_norm": 19.738534927368164, |
|
"learning_rate": 1.403121298405467e-05, |
|
"logits/chosen": 1.4052790403366089, |
|
"logits/rejected": 1.363965630531311, |
|
"logps/chosen": -255.84262084960938, |
|
"logps/rejected": -268.30609130859375, |
|
"loss": 0.4441, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -6.05529260635376, |
|
"rewards/margins": 1.61709725856781, |
|
"rewards/rejected": -7.672389030456543, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.734624145785877, |
|
"grad_norm": 3.8141791820526123, |
|
"learning_rate": 1.4030945330296127e-05, |
|
"logits/chosen": 1.2736724615097046, |
|
"logits/rejected": 1.1940056085586548, |
|
"logps/chosen": -252.11923217773438, |
|
"logps/rejected": -271.30157470703125, |
|
"loss": 0.4046, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -6.049666881561279, |
|
"rewards/margins": 1.935879111289978, |
|
"rewards/rejected": -7.9855451583862305, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.7374715261958997, |
|
"grad_norm": 12.9832181930542, |
|
"learning_rate": 1.4030677676537585e-05, |
|
"logits/chosen": 1.148601770401001, |
|
"logits/rejected": 1.0851247310638428, |
|
"logps/chosen": -248.23617553710938, |
|
"logps/rejected": -267.77691650390625, |
|
"loss": 0.3661, |
|
"rewards/accuracies": 0.8333333730697632, |
|
"rewards/chosen": -5.982962608337402, |
|
"rewards/margins": 2.2053725719451904, |
|
"rewards/rejected": -8.188336372375488, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.7403189066059226, |
|
"grad_norm": 8.390840530395508, |
|
"learning_rate": 1.4030410022779043e-05, |
|
"logits/chosen": 1.5896836519241333, |
|
"logits/rejected": 1.5164399147033691, |
|
"logps/chosen": -248.79019165039062, |
|
"logps/rejected": -263.56243896484375, |
|
"loss": 0.4359, |
|
"rewards/accuracies": 0.783333420753479, |
|
"rewards/chosen": -5.833072662353516, |
|
"rewards/margins": 1.9023926258087158, |
|
"rewards/rejected": -7.735465049743652, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.7431662870159453, |
|
"grad_norm": 17.48469352722168, |
|
"learning_rate": 1.4030142369020501e-05, |
|
"logits/chosen": 1.5616130828857422, |
|
"logits/rejected": 1.5186275243759155, |
|
"logps/chosen": -242.703125, |
|
"logps/rejected": -257.0491027832031, |
|
"loss": 0.4011, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -4.932897567749023, |
|
"rewards/margins": 2.0841403007507324, |
|
"rewards/rejected": -7.017037868499756, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.7460136674259681, |
|
"grad_norm": 7.127035140991211, |
|
"learning_rate": 1.402987471526196e-05, |
|
"logits/chosen": 1.4148176908493042, |
|
"logits/rejected": 1.3808656930923462, |
|
"logps/chosen": -248.82455444335938, |
|
"logps/rejected": -267.6011962890625, |
|
"loss": 0.5825, |
|
"rewards/accuracies": 0.7833333015441895, |
|
"rewards/chosen": -5.617693901062012, |
|
"rewards/margins": 1.4519156217575073, |
|
"rewards/rejected": -7.069609642028809, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.7488610478359908, |
|
"grad_norm": 27.17725944519043, |
|
"learning_rate": 1.4029607061503418e-05, |
|
"logits/chosen": 1.1969980001449585, |
|
"logits/rejected": 1.1847108602523804, |
|
"logps/chosen": -261.9609375, |
|
"logps/rejected": -281.9357604980469, |
|
"loss": 0.4544, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -6.976052284240723, |
|
"rewards/margins": 1.6174719333648682, |
|
"rewards/rejected": -8.593523025512695, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.7517084282460137, |
|
"grad_norm": 10.162541389465332, |
|
"learning_rate": 1.4029339407744876e-05, |
|
"logits/chosen": 1.1881685256958008, |
|
"logits/rejected": 1.162522792816162, |
|
"logps/chosen": -270.0924987792969, |
|
"logps/rejected": -284.1805725097656, |
|
"loss": 0.4265, |
|
"rewards/accuracies": 0.783333420753479, |
|
"rewards/chosen": -7.348545074462891, |
|
"rewards/margins": 1.6283506155014038, |
|
"rewards/rejected": -8.976896286010742, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.7545558086560364, |
|
"grad_norm": 8.748689651489258, |
|
"learning_rate": 1.4029071753986332e-05, |
|
"logits/chosen": 1.2383089065551758, |
|
"logits/rejected": 1.1446388959884644, |
|
"logps/chosen": -264.23828125, |
|
"logps/rejected": -284.57122802734375, |
|
"loss": 0.332, |
|
"rewards/accuracies": 0.8666666746139526, |
|
"rewards/chosen": -7.279618740081787, |
|
"rewards/margins": 2.021749973297119, |
|
"rewards/rejected": -9.301369667053223, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.7574031890660592, |
|
"grad_norm": 11.1958646774292, |
|
"learning_rate": 1.402880410022779e-05, |
|
"logits/chosen": 1.0028870105743408, |
|
"logits/rejected": 0.9154938459396362, |
|
"logps/chosen": -249.95993041992188, |
|
"logps/rejected": -271.75054931640625, |
|
"loss": 0.4982, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -6.213393211364746, |
|
"rewards/margins": 1.7268741130828857, |
|
"rewards/rejected": -7.940268039703369, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.760250569476082, |
|
"grad_norm": 15.898490905761719, |
|
"learning_rate": 1.4028536446469249e-05, |
|
"logits/chosen": 1.257958173751831, |
|
"logits/rejected": 1.2141045331954956, |
|
"logps/chosen": -255.44580078125, |
|
"logps/rejected": -270.67926025390625, |
|
"loss": 0.4744, |
|
"rewards/accuracies": 0.7333332896232605, |
|
"rewards/chosen": -5.937500476837158, |
|
"rewards/margins": 1.792832612991333, |
|
"rewards/rejected": -7.730332851409912, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.7630979498861048, |
|
"grad_norm": 13.417143821716309, |
|
"learning_rate": 1.4028268792710707e-05, |
|
"logits/chosen": 1.6417725086212158, |
|
"logits/rejected": 1.537362813949585, |
|
"logps/chosen": -249.5520477294922, |
|
"logps/rejected": -270.64495849609375, |
|
"loss": 0.4474, |
|
"rewards/accuracies": 0.7833333611488342, |
|
"rewards/chosen": -5.665754318237305, |
|
"rewards/margins": 1.6779617071151733, |
|
"rewards/rejected": -7.343716621398926, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.7659453302961275, |
|
"grad_norm": 19.788162231445312, |
|
"learning_rate": 1.4028001138952165e-05, |
|
"logits/chosen": 1.6244192123413086, |
|
"logits/rejected": 1.5817723274230957, |
|
"logps/chosen": -246.73605346679688, |
|
"logps/rejected": -262.739501953125, |
|
"loss": 0.6225, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -6.028580188751221, |
|
"rewards/margins": 1.7873833179473877, |
|
"rewards/rejected": -7.815962791442871, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.7687927107061503, |
|
"grad_norm": 21.922740936279297, |
|
"learning_rate": 1.4027733485193623e-05, |
|
"logits/chosen": 1.7325636148452759, |
|
"logits/rejected": 1.6674734354019165, |
|
"logps/chosen": -242.2566375732422, |
|
"logps/rejected": -272.3511657714844, |
|
"loss": 0.3497, |
|
"rewards/accuracies": 0.8833333253860474, |
|
"rewards/chosen": -5.904580116271973, |
|
"rewards/margins": 2.298239231109619, |
|
"rewards/rejected": -8.20281982421875, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.7716400911161732, |
|
"grad_norm": 18.379051208496094, |
|
"learning_rate": 1.4027465831435081e-05, |
|
"logits/chosen": 1.954079031944275, |
|
"logits/rejected": 1.889691710472107, |
|
"logps/chosen": -246.53463745117188, |
|
"logps/rejected": -268.9033203125, |
|
"loss": 0.391, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -5.3877739906311035, |
|
"rewards/margins": 2.023282051086426, |
|
"rewards/rejected": -7.411055564880371, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.7744874715261959, |
|
"grad_norm": 14.651932716369629, |
|
"learning_rate": 1.4027198177676538e-05, |
|
"logits/chosen": 1.7924703359603882, |
|
"logits/rejected": 1.7352300882339478, |
|
"logps/chosen": -241.3613739013672, |
|
"logps/rejected": -265.5358581542969, |
|
"loss": 0.3971, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -5.6403656005859375, |
|
"rewards/margins": 2.175809144973755, |
|
"rewards/rejected": -7.816174507141113, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.7773348519362187, |
|
"grad_norm": 17.90581512451172, |
|
"learning_rate": 1.4026930523917996e-05, |
|
"logits/chosen": 1.369199514389038, |
|
"logits/rejected": 1.3058414459228516, |
|
"logps/chosen": -247.9556121826172, |
|
"logps/rejected": -272.4525146484375, |
|
"loss": 0.4489, |
|
"rewards/accuracies": 0.7500000596046448, |
|
"rewards/chosen": -6.066657066345215, |
|
"rewards/margins": 1.838025450706482, |
|
"rewards/rejected": -7.904683589935303, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.7801822323462415, |
|
"grad_norm": 6.542720317840576, |
|
"learning_rate": 1.4026662870159454e-05, |
|
"logits/chosen": 1.6855316162109375, |
|
"logits/rejected": 1.6109323501586914, |
|
"logps/chosen": -248.1515655517578, |
|
"logps/rejected": -272.78765869140625, |
|
"loss": 0.3162, |
|
"rewards/accuracies": 0.8833333849906921, |
|
"rewards/chosen": -5.281750679016113, |
|
"rewards/margins": 2.642489194869995, |
|
"rewards/rejected": -7.924239158630371, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.7830296127562643, |
|
"grad_norm": 8.27110481262207, |
|
"learning_rate": 1.4026395216400912e-05, |
|
"logits/chosen": 1.410160779953003, |
|
"logits/rejected": 1.3594660758972168, |
|
"logps/chosen": -248.23818969726562, |
|
"logps/rejected": -270.97955322265625, |
|
"loss": 0.3703, |
|
"rewards/accuracies": 0.8000000715255737, |
|
"rewards/chosen": -5.9429450035095215, |
|
"rewards/margins": 2.0173375606536865, |
|
"rewards/rejected": -7.960282802581787, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.785876993166287, |
|
"grad_norm": 12.099336624145508, |
|
"learning_rate": 1.402612756264237e-05, |
|
"logits/chosen": 1.7231197357177734, |
|
"logits/rejected": 1.6259253025054932, |
|
"logps/chosen": -254.39724731445312, |
|
"logps/rejected": -271.6213684082031, |
|
"loss": 0.2878, |
|
"rewards/accuracies": 0.8666666746139526, |
|
"rewards/chosen": -6.0387163162231445, |
|
"rewards/margins": 2.486721992492676, |
|
"rewards/rejected": -8.525439262390137, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.7887243735763098, |
|
"grad_norm": 10.804024696350098, |
|
"learning_rate": 1.4025859908883829e-05, |
|
"logits/chosen": 1.5716185569763184, |
|
"logits/rejected": 1.486262559890747, |
|
"logps/chosen": -256.6431884765625, |
|
"logps/rejected": -277.954833984375, |
|
"loss": 0.303, |
|
"rewards/accuracies": 0.8166667819023132, |
|
"rewards/chosen": -6.014618873596191, |
|
"rewards/margins": 2.517322063446045, |
|
"rewards/rejected": -8.531940460205078, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.7915717539863326, |
|
"grad_norm": 15.970914840698242, |
|
"learning_rate": 1.4025592255125287e-05, |
|
"logits/chosen": 1.3881986141204834, |
|
"logits/rejected": 1.3379590511322021, |
|
"logps/chosen": -251.85659790039062, |
|
"logps/rejected": -281.0345153808594, |
|
"loss": 0.3171, |
|
"rewards/accuracies": 0.8666666746139526, |
|
"rewards/chosen": -6.694669246673584, |
|
"rewards/margins": 2.450220823287964, |
|
"rewards/rejected": -9.144889831542969, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.7944191343963554, |
|
"grad_norm": 12.897587776184082, |
|
"learning_rate": 1.4025324601366743e-05, |
|
"logits/chosen": 1.350629210472107, |
|
"logits/rejected": 1.3108307123184204, |
|
"logps/chosen": -259.5316162109375, |
|
"logps/rejected": -283.2249450683594, |
|
"loss": 0.2645, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -6.7262091636657715, |
|
"rewards/margins": 2.8170058727264404, |
|
"rewards/rejected": -9.543214797973633, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.7972665148063781, |
|
"grad_norm": 17.332544326782227, |
|
"learning_rate": 1.40250569476082e-05, |
|
"logits/chosen": 1.5189533233642578, |
|
"logits/rejected": 1.469429612159729, |
|
"logps/chosen": -262.32928466796875, |
|
"logps/rejected": -275.24981689453125, |
|
"loss": 0.3829, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -6.6358160972595215, |
|
"rewards/margins": 1.8431167602539062, |
|
"rewards/rejected": -8.47893238067627, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.8001138952164009, |
|
"grad_norm": 17.475202560424805, |
|
"learning_rate": 1.4024789293849658e-05, |
|
"logits/chosen": 1.844002366065979, |
|
"logits/rejected": 1.8084990978240967, |
|
"logps/chosen": -274.48699951171875, |
|
"logps/rejected": -298.3126525878906, |
|
"loss": 0.431, |
|
"rewards/accuracies": 0.8000000715255737, |
|
"rewards/chosen": -8.724340438842773, |
|
"rewards/margins": 1.9993741512298584, |
|
"rewards/rejected": -10.723714828491211, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.8029612756264237, |
|
"grad_norm": 22.87725257873535, |
|
"learning_rate": 1.4024521640091116e-05, |
|
"logits/chosen": 1.6398130655288696, |
|
"logits/rejected": 1.5759761333465576, |
|
"logps/chosen": -276.5049743652344, |
|
"logps/rejected": -288.7770080566406, |
|
"loss": 0.458, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -8.648481369018555, |
|
"rewards/margins": 1.7932255268096924, |
|
"rewards/rejected": -10.441704750061035, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.8058086560364465, |
|
"grad_norm": 10.597854614257812, |
|
"learning_rate": 1.4024253986332574e-05, |
|
"logits/chosen": 1.5820127725601196, |
|
"logits/rejected": 1.5301449298858643, |
|
"logps/chosen": -259.69732666015625, |
|
"logps/rejected": -274.4244079589844, |
|
"loss": 0.4166, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -6.665907859802246, |
|
"rewards/margins": 1.9291824102401733, |
|
"rewards/rejected": -8.59508991241455, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.8086560364464692, |
|
"grad_norm": 3.9533028602600098, |
|
"learning_rate": 1.4023986332574032e-05, |
|
"logits/chosen": 1.4371238946914673, |
|
"logits/rejected": 1.4022338390350342, |
|
"logps/chosen": -239.5390625, |
|
"logps/rejected": -267.0621337890625, |
|
"loss": 0.4274, |
|
"rewards/accuracies": 0.7666666507720947, |
|
"rewards/chosen": -5.695757865905762, |
|
"rewards/margins": 2.3239686489105225, |
|
"rewards/rejected": -8.019726753234863, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.8115034168564921, |
|
"grad_norm": 20.184728622436523, |
|
"learning_rate": 1.402371867881549e-05, |
|
"logits/chosen": 1.6135759353637695, |
|
"logits/rejected": 1.5178974866867065, |
|
"logps/chosen": -253.1510009765625, |
|
"logps/rejected": -278.24407958984375, |
|
"loss": 0.4337, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -6.1504926681518555, |
|
"rewards/margins": 2.674940586090088, |
|
"rewards/rejected": -8.825433731079102, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.8143507972665148, |
|
"grad_norm": 16.252994537353516, |
|
"learning_rate": 1.4023451025056947e-05, |
|
"logits/chosen": 1.6190258264541626, |
|
"logits/rejected": 1.564290165901184, |
|
"logps/chosen": -245.3192901611328, |
|
"logps/rejected": -275.1893615722656, |
|
"loss": 0.2521, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -5.764278411865234, |
|
"rewards/margins": 2.888000011444092, |
|
"rewards/rejected": -8.652277946472168, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.8171981776765376, |
|
"grad_norm": 15.702975273132324, |
|
"learning_rate": 1.4023183371298405e-05, |
|
"logits/chosen": 1.4291714429855347, |
|
"logits/rejected": 1.3949334621429443, |
|
"logps/chosen": -260.24554443359375, |
|
"logps/rejected": -288.02032470703125, |
|
"loss": 0.3742, |
|
"rewards/accuracies": 0.8333333730697632, |
|
"rewards/chosen": -7.282652378082275, |
|
"rewards/margins": 2.00462007522583, |
|
"rewards/rejected": -9.287271499633789, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.8200455580865603, |
|
"grad_norm": 29.931583404541016, |
|
"learning_rate": 1.4022915717539863e-05, |
|
"logits/chosen": 1.3684179782867432, |
|
"logits/rejected": 1.3120687007904053, |
|
"logps/chosen": -275.12432861328125, |
|
"logps/rejected": -295.9511413574219, |
|
"loss": 0.5024, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -8.723713874816895, |
|
"rewards/margins": 2.193488359451294, |
|
"rewards/rejected": -10.917202949523926, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.8228929384965832, |
|
"grad_norm": 8.566882133483887, |
|
"learning_rate": 1.4022648063781322e-05, |
|
"logits/chosen": 1.65011465549469, |
|
"logits/rejected": 1.5863733291625977, |
|
"logps/chosen": -262.7909851074219, |
|
"logps/rejected": -287.36016845703125, |
|
"loss": 0.4629, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -7.5483293533325195, |
|
"rewards/margins": 2.1988370418548584, |
|
"rewards/rejected": -9.74716567993164, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.8257403189066059, |
|
"grad_norm": 9.146045684814453, |
|
"learning_rate": 1.402238041002278e-05, |
|
"logits/chosen": 1.4438389539718628, |
|
"logits/rejected": 1.3922678232192993, |
|
"logps/chosen": -256.80194091796875, |
|
"logps/rejected": -281.6923828125, |
|
"loss": 0.3474, |
|
"rewards/accuracies": 0.8666667938232422, |
|
"rewards/chosen": -7.041781425476074, |
|
"rewards/margins": 2.2522146701812744, |
|
"rewards/rejected": -9.293996810913086, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.8285876993166287, |
|
"grad_norm": 14.651105880737305, |
|
"learning_rate": 1.4022112756264238e-05, |
|
"logits/chosen": 1.2979462146759033, |
|
"logits/rejected": 1.20625638961792, |
|
"logps/chosen": -262.59722900390625, |
|
"logps/rejected": -282.945068359375, |
|
"loss": 0.5572, |
|
"rewards/accuracies": 0.7833333611488342, |
|
"rewards/chosen": -7.369248390197754, |
|
"rewards/margins": 1.7789223194122314, |
|
"rewards/rejected": -9.14816951751709, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.8314350797266514, |
|
"grad_norm": 18.253070831298828, |
|
"learning_rate": 1.4021845102505696e-05, |
|
"logits/chosen": 1.4090951681137085, |
|
"logits/rejected": 1.3580656051635742, |
|
"logps/chosen": -260.5758056640625, |
|
"logps/rejected": -275.6468505859375, |
|
"loss": 0.6057, |
|
"rewards/accuracies": 0.7333332896232605, |
|
"rewards/chosen": -7.077749729156494, |
|
"rewards/margins": 1.6264019012451172, |
|
"rewards/rejected": -8.704151153564453, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.8342824601366743, |
|
"grad_norm": 29.435651779174805, |
|
"learning_rate": 1.4021577448747153e-05, |
|
"logits/chosen": 1.1056894063949585, |
|
"logits/rejected": 1.0585362911224365, |
|
"logps/chosen": -262.055419921875, |
|
"logps/rejected": -278.28082275390625, |
|
"loss": 0.4375, |
|
"rewards/accuracies": 0.73333340883255, |
|
"rewards/chosen": -7.14243221282959, |
|
"rewards/margins": 1.8324248790740967, |
|
"rewards/rejected": -8.974858283996582, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.837129840546697, |
|
"grad_norm": 23.39213752746582, |
|
"learning_rate": 1.402130979498861e-05, |
|
"logits/chosen": 1.1425232887268066, |
|
"logits/rejected": 1.1206319332122803, |
|
"logps/chosen": -254.543212890625, |
|
"logps/rejected": -272.4224548339844, |
|
"loss": 0.5169, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -6.688830375671387, |
|
"rewards/margins": 1.7970342636108398, |
|
"rewards/rejected": -8.485864639282227, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.8399772209567198, |
|
"grad_norm": 7.043151378631592, |
|
"learning_rate": 1.4021042141230069e-05, |
|
"logits/chosen": 1.5714797973632812, |
|
"logits/rejected": 1.5142377614974976, |
|
"logps/chosen": -242.92837524414062, |
|
"logps/rejected": -277.87359619140625, |
|
"loss": 0.4165, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -6.007413864135742, |
|
"rewards/margins": 2.4879133701324463, |
|
"rewards/rejected": -8.495327949523926, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.8428246013667426, |
|
"grad_norm": 12.184271812438965, |
|
"learning_rate": 1.4020774487471527e-05, |
|
"logits/chosen": 1.125959038734436, |
|
"logits/rejected": 1.0558584928512573, |
|
"logps/chosen": -243.2732696533203, |
|
"logps/rejected": -271.07122802734375, |
|
"loss": 0.2672, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -5.250051021575928, |
|
"rewards/margins": 2.6489367485046387, |
|
"rewards/rejected": -7.898987770080566, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.8456719817767654, |
|
"grad_norm": 3.8234286308288574, |
|
"learning_rate": 1.4020506833712985e-05, |
|
"logits/chosen": 1.368369460105896, |
|
"logits/rejected": 1.295506477355957, |
|
"logps/chosen": -246.85592651367188, |
|
"logps/rejected": -274.9765319824219, |
|
"loss": 0.2654, |
|
"rewards/accuracies": 0.8833333253860474, |
|
"rewards/chosen": -5.611214637756348, |
|
"rewards/margins": 2.9931886196136475, |
|
"rewards/rejected": -8.604402542114258, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.8485193621867881, |
|
"grad_norm": 28.678070068359375, |
|
"learning_rate": 1.4020239179954443e-05, |
|
"logits/chosen": 1.4959561824798584, |
|
"logits/rejected": 1.4261696338653564, |
|
"logps/chosen": -254.52072143554688, |
|
"logps/rejected": -277.2051696777344, |
|
"loss": 0.3827, |
|
"rewards/accuracies": 0.8666666746139526, |
|
"rewards/chosen": -6.094107627868652, |
|
"rewards/margins": 2.3713717460632324, |
|
"rewards/rejected": -8.465478897094727, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.8513667425968109, |
|
"grad_norm": 30.86003303527832, |
|
"learning_rate": 1.4019971526195902e-05, |
|
"logits/chosen": 1.6645902395248413, |
|
"logits/rejected": 1.5708162784576416, |
|
"logps/chosen": -251.2325897216797, |
|
"logps/rejected": -284.61077880859375, |
|
"loss": 0.3969, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -6.4030866622924805, |
|
"rewards/margins": 3.128662586212158, |
|
"rewards/rejected": -9.53174877166748, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.8542141230068337, |
|
"grad_norm": 24.597213745117188, |
|
"learning_rate": 1.4019703872437358e-05, |
|
"logits/chosen": 1.1054723262786865, |
|
"logits/rejected": 1.0511767864227295, |
|
"logps/chosen": -255.37075805664062, |
|
"logps/rejected": -279.79388427734375, |
|
"loss": 0.5802, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -6.036796569824219, |
|
"rewards/margins": 2.4329230785369873, |
|
"rewards/rejected": -8.469719886779785, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.8570615034168565, |
|
"grad_norm": 7.576244354248047, |
|
"learning_rate": 1.4019436218678816e-05, |
|
"logits/chosen": 0.9394910931587219, |
|
"logits/rejected": 0.9130109548568726, |
|
"logps/chosen": -260.2972106933594, |
|
"logps/rejected": -278.38916015625, |
|
"loss": 0.4125, |
|
"rewards/accuracies": 0.8333333730697632, |
|
"rewards/chosen": -7.257266998291016, |
|
"rewards/margins": 2.3638997077941895, |
|
"rewards/rejected": -9.621164321899414, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.8599088838268792, |
|
"grad_norm": 19.788061141967773, |
|
"learning_rate": 1.4019168564920273e-05, |
|
"logits/chosen": 0.8776735067367554, |
|
"logits/rejected": 0.8553932905197144, |
|
"logps/chosen": -258.1982421875, |
|
"logps/rejected": -288.4192810058594, |
|
"loss": 0.2575, |
|
"rewards/accuracies": 0.8666666746139526, |
|
"rewards/chosen": -6.4132399559021, |
|
"rewards/margins": 2.6626341342926025, |
|
"rewards/rejected": -9.075874328613281, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.8627562642369021, |
|
"grad_norm": 7.41077995300293, |
|
"learning_rate": 1.4018900911161731e-05, |
|
"logits/chosen": 0.8213122487068176, |
|
"logits/rejected": 0.7751299142837524, |
|
"logps/chosen": -255.14517211914062, |
|
"logps/rejected": -280.5233459472656, |
|
"loss": 0.3752, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": -6.201568603515625, |
|
"rewards/margins": 2.61332368850708, |
|
"rewards/rejected": -8.814892768859863, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.8656036446469249, |
|
"grad_norm": 5.560190200805664, |
|
"learning_rate": 1.4018633257403189e-05, |
|
"logits/chosen": 1.249352216720581, |
|
"logits/rejected": 1.222125768661499, |
|
"logps/chosen": -249.1427764892578, |
|
"logps/rejected": -272.9322814941406, |
|
"loss": 0.492, |
|
"rewards/accuracies": 0.8666666746139526, |
|
"rewards/chosen": -5.985461711883545, |
|
"rewards/margins": 2.357642650604248, |
|
"rewards/rejected": -8.343104362487793, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.8684510250569476, |
|
"grad_norm": 17.846477508544922, |
|
"learning_rate": 1.4018365603644647e-05, |
|
"logits/chosen": 1.1001662015914917, |
|
"logits/rejected": 1.073899745941162, |
|
"logps/chosen": -235.15127563476562, |
|
"logps/rejected": -256.9810485839844, |
|
"loss": 0.5462, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -4.736093521118164, |
|
"rewards/margins": 1.6902631521224976, |
|
"rewards/rejected": -6.426356315612793, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.8712984054669703, |
|
"grad_norm": 6.344593048095703, |
|
"learning_rate": 1.4018097949886105e-05, |
|
"logits/chosen": 1.6326649188995361, |
|
"logits/rejected": 1.630692481994629, |
|
"logps/chosen": -231.1848602294922, |
|
"logps/rejected": -251.69509887695312, |
|
"loss": 0.4002, |
|
"rewards/accuracies": 0.7500000596046448, |
|
"rewards/chosen": -4.022892951965332, |
|
"rewards/margins": 1.9389598369598389, |
|
"rewards/rejected": -5.961852073669434, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.8741457858769932, |
|
"grad_norm": 25.729211807250977, |
|
"learning_rate": 1.4017830296127562e-05, |
|
"logits/chosen": 1.8420072793960571, |
|
"logits/rejected": 1.8046996593475342, |
|
"logps/chosen": -239.9873504638672, |
|
"logps/rejected": -251.55722045898438, |
|
"loss": 0.6297, |
|
"rewards/accuracies": 0.7833333611488342, |
|
"rewards/chosen": -4.679580211639404, |
|
"rewards/margins": 1.3005344867706299, |
|
"rewards/rejected": -5.980114936828613, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.876993166287016, |
|
"grad_norm": 17.203083038330078, |
|
"learning_rate": 1.401756264236902e-05, |
|
"logits/chosen": 1.9162012338638306, |
|
"logits/rejected": 1.8495725393295288, |
|
"logps/chosen": -236.2726593017578, |
|
"logps/rejected": -247.9042510986328, |
|
"loss": 0.5307, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -4.582438945770264, |
|
"rewards/margins": 1.3650325536727905, |
|
"rewards/rejected": -5.947472095489502, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.8798405466970387, |
|
"grad_norm": 14.923961639404297, |
|
"learning_rate": 1.4017294988610478e-05, |
|
"logits/chosen": 1.6813533306121826, |
|
"logits/rejected": 1.647003412246704, |
|
"logps/chosen": -234.11679077148438, |
|
"logps/rejected": -252.88388061523438, |
|
"loss": 0.4419, |
|
"rewards/accuracies": 0.7833333611488342, |
|
"rewards/chosen": -4.13940954208374, |
|
"rewards/margins": 1.7298119068145752, |
|
"rewards/rejected": -5.8692216873168945, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.8826879271070615, |
|
"grad_norm": 11.808030128479004, |
|
"learning_rate": 1.4017027334851936e-05, |
|
"logits/chosen": 1.5109126567840576, |
|
"logits/rejected": 1.4859440326690674, |
|
"logps/chosen": -233.6483612060547, |
|
"logps/rejected": -249.63638305664062, |
|
"loss": 0.5049, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -4.1950249671936035, |
|
"rewards/margins": 1.3217439651489258, |
|
"rewards/rejected": -5.516768455505371, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.8855353075170843, |
|
"grad_norm": 4.284244060516357, |
|
"learning_rate": 1.4016759681093395e-05, |
|
"logits/chosen": 2.059269905090332, |
|
"logits/rejected": 1.9955742359161377, |
|
"logps/chosen": -233.57199096679688, |
|
"logps/rejected": -248.676025390625, |
|
"loss": 0.4276, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -3.8850066661834717, |
|
"rewards/margins": 1.9384835958480835, |
|
"rewards/rejected": -5.823491096496582, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.8883826879271071, |
|
"grad_norm": 32.62685012817383, |
|
"learning_rate": 1.4016492027334853e-05, |
|
"logits/chosen": 1.6952826976776123, |
|
"logits/rejected": 1.7041078805923462, |
|
"logps/chosen": -234.2545928955078, |
|
"logps/rejected": -243.2709197998047, |
|
"loss": 0.5903, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -4.209284782409668, |
|
"rewards/margins": 1.3029229640960693, |
|
"rewards/rejected": -5.512207984924316, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.8912300683371298, |
|
"grad_norm": 17.5522518157959, |
|
"learning_rate": 1.4016224373576311e-05, |
|
"logits/chosen": 1.4055092334747314, |
|
"logits/rejected": 1.3894739151000977, |
|
"logps/chosen": -235.7975311279297, |
|
"logps/rejected": -243.79611206054688, |
|
"loss": 0.4752, |
|
"rewards/accuracies": 0.7833333015441895, |
|
"rewards/chosen": -4.342627048492432, |
|
"rewards/margins": 1.4617254734039307, |
|
"rewards/rejected": -5.804352283477783, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.8940774487471527, |
|
"grad_norm": 11.302553176879883, |
|
"learning_rate": 1.4015956719817767e-05, |
|
"logits/chosen": 1.6838788986206055, |
|
"logits/rejected": 1.6712299585342407, |
|
"logps/chosen": -235.022705078125, |
|
"logps/rejected": -242.8223114013672, |
|
"loss": 0.4063, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -4.4199113845825195, |
|
"rewards/margins": 1.4390454292297363, |
|
"rewards/rejected": -5.858956813812256, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.8969248291571754, |
|
"grad_norm": 11.453242301940918, |
|
"learning_rate": 1.4015689066059226e-05, |
|
"logits/chosen": 1.2508509159088135, |
|
"logits/rejected": 1.2264854907989502, |
|
"logps/chosen": -233.52792358398438, |
|
"logps/rejected": -245.04104614257812, |
|
"loss": 0.4138, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -4.280221462249756, |
|
"rewards/margins": 1.6082589626312256, |
|
"rewards/rejected": -5.888480186462402, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.8997722095671982, |
|
"grad_norm": 5.725339412689209, |
|
"learning_rate": 1.4015421412300684e-05, |
|
"logits/chosen": 1.3065173625946045, |
|
"logits/rejected": 1.2575079202651978, |
|
"logps/chosen": -235.4761505126953, |
|
"logps/rejected": -260.0916442871094, |
|
"loss": 0.3539, |
|
"rewards/accuracies": 0.8333333730697632, |
|
"rewards/chosen": -4.3087477684021, |
|
"rewards/margins": 2.0462512969970703, |
|
"rewards/rejected": -6.35499906539917, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.9026195899772209, |
|
"grad_norm": 14.741144180297852, |
|
"learning_rate": 1.4015153758542142e-05, |
|
"logits/chosen": 1.2061371803283691, |
|
"logits/rejected": 1.1723283529281616, |
|
"logps/chosen": -246.95114135742188, |
|
"logps/rejected": -266.38836669921875, |
|
"loss": 0.4665, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -5.428233623504639, |
|
"rewards/margins": 1.7254750728607178, |
|
"rewards/rejected": -7.153708457946777, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.9054669703872438, |
|
"grad_norm": 22.822391510009766, |
|
"learning_rate": 1.40148861047836e-05, |
|
"logits/chosen": 0.7415103912353516, |
|
"logits/rejected": 0.7030202746391296, |
|
"logps/chosen": -256.55096435546875, |
|
"logps/rejected": -277.3672180175781, |
|
"loss": 0.3735, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -6.38254976272583, |
|
"rewards/margins": 2.4899661540985107, |
|
"rewards/rejected": -8.872515678405762, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.9083143507972665, |
|
"grad_norm": 23.086688995361328, |
|
"learning_rate": 1.4014618451025058e-05, |
|
"logits/chosen": 1.1446452140808105, |
|
"logits/rejected": 1.0936695337295532, |
|
"logps/chosen": -264.65997314453125, |
|
"logps/rejected": -285.80548095703125, |
|
"loss": 0.3886, |
|
"rewards/accuracies": 0.8000000715255737, |
|
"rewards/chosen": -7.392777919769287, |
|
"rewards/margins": 2.2488951683044434, |
|
"rewards/rejected": -9.64167308807373, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.9111617312072893, |
|
"grad_norm": 12.26983642578125, |
|
"learning_rate": 1.4014350797266517e-05, |
|
"logits/chosen": 0.9253866076469421, |
|
"logits/rejected": 0.867773175239563, |
|
"logps/chosen": -269.03515625, |
|
"logps/rejected": -294.5494079589844, |
|
"loss": 0.3786, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -8.011452674865723, |
|
"rewards/margins": 2.745143175125122, |
|
"rewards/rejected": -10.756595611572266, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.914009111617312, |
|
"grad_norm": 29.206724166870117, |
|
"learning_rate": 1.4014083143507973e-05, |
|
"logits/chosen": 1.3603354692459106, |
|
"logits/rejected": 1.3081294298171997, |
|
"logps/chosen": -271.06903076171875, |
|
"logps/rejected": -285.3981018066406, |
|
"loss": 0.5639, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -7.847748756408691, |
|
"rewards/margins": 1.758512258529663, |
|
"rewards/rejected": -9.606261253356934, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.9168564920273349, |
|
"grad_norm": 18.595735549926758, |
|
"learning_rate": 1.4013815489749431e-05, |
|
"logits/chosen": 1.1783987283706665, |
|
"logits/rejected": 1.1358586549758911, |
|
"logps/chosen": -274.8568420410156, |
|
"logps/rejected": -301.0245056152344, |
|
"loss": 0.3253, |
|
"rewards/accuracies": 0.8833333253860474, |
|
"rewards/chosen": -8.224080085754395, |
|
"rewards/margins": 2.375084638595581, |
|
"rewards/rejected": -10.599164962768555, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.9197038724373576, |
|
"grad_norm": 2.1214218139648438, |
|
"learning_rate": 1.401354783599089e-05, |
|
"logits/chosen": 0.9433174133300781, |
|
"logits/rejected": 0.8605527877807617, |
|
"logps/chosen": -273.72625732421875, |
|
"logps/rejected": -299.55023193359375, |
|
"loss": 0.4735, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -8.827226638793945, |
|
"rewards/margins": 2.468843460083008, |
|
"rewards/rejected": -11.296069145202637, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.9225512528473804, |
|
"grad_norm": 21.682979583740234, |
|
"learning_rate": 1.4013280182232348e-05, |
|
"logits/chosen": 1.137378215789795, |
|
"logits/rejected": 1.0911321640014648, |
|
"logps/chosen": -276.5653076171875, |
|
"logps/rejected": -296.9649963378906, |
|
"loss": 0.5877, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": -8.806356430053711, |
|
"rewards/margins": 1.9599056243896484, |
|
"rewards/rejected": -10.766261100769043, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.9253986332574032, |
|
"grad_norm": 16.58523941040039, |
|
"learning_rate": 1.4013012528473804e-05, |
|
"logits/chosen": 1.2076373100280762, |
|
"logits/rejected": 1.1502797603607178, |
|
"logps/chosen": -262.76336669921875, |
|
"logps/rejected": -297.0911560058594, |
|
"loss": 0.3606, |
|
"rewards/accuracies": 0.8833333253860474, |
|
"rewards/chosen": -7.569868564605713, |
|
"rewards/margins": 2.8257970809936523, |
|
"rewards/rejected": -10.39566421508789, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.928246013667426, |
|
"grad_norm": 32.41596984863281, |
|
"learning_rate": 1.4012744874715262e-05, |
|
"logits/chosen": 1.0952074527740479, |
|
"logits/rejected": 1.0772335529327393, |
|
"logps/chosen": -271.6856994628906, |
|
"logps/rejected": -288.0881042480469, |
|
"loss": 0.5652, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": -7.678606986999512, |
|
"rewards/margins": 1.8118677139282227, |
|
"rewards/rejected": -9.49047565460205, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.9310933940774487, |
|
"grad_norm": 16.00614356994629, |
|
"learning_rate": 1.401247722095672e-05, |
|
"logits/chosen": 1.1436659097671509, |
|
"logits/rejected": 1.1272941827774048, |
|
"logps/chosen": -257.2648010253906, |
|
"logps/rejected": -267.32452392578125, |
|
"loss": 0.567, |
|
"rewards/accuracies": 0.7833333611488342, |
|
"rewards/chosen": -6.478804588317871, |
|
"rewards/margins": 1.3715341091156006, |
|
"rewards/rejected": -7.850338935852051, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.9339407744874715, |
|
"grad_norm": 12.072650909423828, |
|
"learning_rate": 1.4012209567198177e-05, |
|
"logits/chosen": 0.7003241777420044, |
|
"logits/rejected": 0.671663761138916, |
|
"logps/chosen": -251.978515625, |
|
"logps/rejected": -272.39129638671875, |
|
"loss": 0.3737, |
|
"rewards/accuracies": 0.8166667819023132, |
|
"rewards/chosen": -6.3597917556762695, |
|
"rewards/margins": 2.238091230392456, |
|
"rewards/rejected": -8.597883224487305, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.9367881548974943, |
|
"grad_norm": 17.540746688842773, |
|
"learning_rate": 1.4011941913439635e-05, |
|
"logits/chosen": 1.0943725109100342, |
|
"logits/rejected": 1.0946639776229858, |
|
"logps/chosen": -257.8005676269531, |
|
"logps/rejected": -275.43048095703125, |
|
"loss": 0.7236, |
|
"rewards/accuracies": 0.6666666269302368, |
|
"rewards/chosen": -6.777687072753906, |
|
"rewards/margins": 1.4592044353485107, |
|
"rewards/rejected": -8.236891746520996, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.9396355353075171, |
|
"grad_norm": 20.523656845092773, |
|
"learning_rate": 1.4011674259681093e-05, |
|
"logits/chosen": 0.46389874815940857, |
|
"logits/rejected": 0.4536392092704773, |
|
"logps/chosen": -268.06927490234375, |
|
"logps/rejected": -283.0080871582031, |
|
"loss": 0.5618, |
|
"rewards/accuracies": 0.7666666507720947, |
|
"rewards/chosen": -7.472816467285156, |
|
"rewards/margins": 1.5584744215011597, |
|
"rewards/rejected": -9.031290054321289, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.9424829157175398, |
|
"grad_norm": 13.506325721740723, |
|
"learning_rate": 1.4011406605922551e-05, |
|
"logits/chosen": 0.7252389192581177, |
|
"logits/rejected": 0.7051321864128113, |
|
"logps/chosen": -266.4188537597656, |
|
"logps/rejected": -288.6436767578125, |
|
"loss": 0.3219, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": -7.288155555725098, |
|
"rewards/margins": 2.1877682209014893, |
|
"rewards/rejected": -9.475923538208008, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.9453302961275627, |
|
"grad_norm": 14.530557632446289, |
|
"learning_rate": 1.401113895216401e-05, |
|
"logits/chosen": 0.7469380497932434, |
|
"logits/rejected": 0.7100605964660645, |
|
"logps/chosen": -264.8739318847656, |
|
"logps/rejected": -279.9246520996094, |
|
"loss": 0.3682, |
|
"rewards/accuracies": 0.8666666746139526, |
|
"rewards/chosen": -7.130241394042969, |
|
"rewards/margins": 1.9568824768066406, |
|
"rewards/rejected": -9.08712387084961, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.9481776765375854, |
|
"grad_norm": 3.6131389141082764, |
|
"learning_rate": 1.4010871298405468e-05, |
|
"logits/chosen": 0.8936527967453003, |
|
"logits/rejected": 0.8348100781440735, |
|
"logps/chosen": -261.61505126953125, |
|
"logps/rejected": -276.36517333984375, |
|
"loss": 0.4312, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": -6.888121128082275, |
|
"rewards/margins": 1.6904165744781494, |
|
"rewards/rejected": -8.578537940979004, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.9510250569476082, |
|
"grad_norm": 14.113249778747559, |
|
"learning_rate": 1.4010603644646926e-05, |
|
"logits/chosen": 0.9257510900497437, |
|
"logits/rejected": 0.8715246319770813, |
|
"logps/chosen": -259.8716735839844, |
|
"logps/rejected": -280.5459899902344, |
|
"loss": 0.446, |
|
"rewards/accuracies": 0.7833333611488342, |
|
"rewards/chosen": -6.530680179595947, |
|
"rewards/margins": 2.3227107524871826, |
|
"rewards/rejected": -8.85339069366455, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.9538724373576309, |
|
"grad_norm": 23.52164649963379, |
|
"learning_rate": 1.4010335990888382e-05, |
|
"logits/chosen": 0.7195979356765747, |
|
"logits/rejected": 0.6487305164337158, |
|
"logps/chosen": -269.1336975097656, |
|
"logps/rejected": -293.1927185058594, |
|
"loss": 0.3455, |
|
"rewards/accuracies": 0.8333333730697632, |
|
"rewards/chosen": -7.874746799468994, |
|
"rewards/margins": 2.290498971939087, |
|
"rewards/rejected": -10.165246963500977, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.9567198177676538, |
|
"grad_norm": 21.7740478515625, |
|
"learning_rate": 1.401006833712984e-05, |
|
"logits/chosen": 0.9675396084785461, |
|
"logits/rejected": 0.9502711296081543, |
|
"logps/chosen": -280.28265380859375, |
|
"logps/rejected": -295.63922119140625, |
|
"loss": 0.5749, |
|
"rewards/accuracies": 0.7333333492279053, |
|
"rewards/chosen": -8.650227546691895, |
|
"rewards/margins": 1.562798261642456, |
|
"rewards/rejected": -10.21302604675293, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.9595671981776766, |
|
"grad_norm": 7.417437553405762, |
|
"learning_rate": 1.4009800683371299e-05, |
|
"logits/chosen": 0.8215571641921997, |
|
"logits/rejected": 0.8030030131340027, |
|
"logps/chosen": -280.65167236328125, |
|
"logps/rejected": -294.6379699707031, |
|
"loss": 0.5076, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -8.616076469421387, |
|
"rewards/margins": 1.911773443222046, |
|
"rewards/rejected": -10.527849197387695, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.9624145785876993, |
|
"grad_norm": 6.399313449859619, |
|
"learning_rate": 1.4009533029612757e-05, |
|
"logits/chosen": 0.8853170275688171, |
|
"logits/rejected": 0.8363176584243774, |
|
"logps/chosen": -271.77557373046875, |
|
"logps/rejected": -293.91229248046875, |
|
"loss": 0.2771, |
|
"rewards/accuracies": 0.8833333253860474, |
|
"rewards/chosen": -8.413153648376465, |
|
"rewards/margins": 2.4288392066955566, |
|
"rewards/rejected": -10.84199333190918, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.965261958997722, |
|
"grad_norm": 21.55265998840332, |
|
"learning_rate": 1.4009265375854215e-05, |
|
"logits/chosen": 0.755032479763031, |
|
"logits/rejected": 0.6583060622215271, |
|
"logps/chosen": -271.9349365234375, |
|
"logps/rejected": -301.05926513671875, |
|
"loss": 0.4095, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -8.547085762023926, |
|
"rewards/margins": 2.608051300048828, |
|
"rewards/rejected": -11.155137062072754, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.9681093394077449, |
|
"grad_norm": 22.12690544128418, |
|
"learning_rate": 1.4008997722095673e-05, |
|
"logits/chosen": 0.648233950138092, |
|
"logits/rejected": 0.6009319424629211, |
|
"logps/chosen": -268.60980224609375, |
|
"logps/rejected": -287.47772216796875, |
|
"loss": 0.4599, |
|
"rewards/accuracies": 0.8166666030883789, |
|
"rewards/chosen": -7.8952531814575195, |
|
"rewards/margins": 2.053194522857666, |
|
"rewards/rejected": -9.948448181152344, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.9709567198177677, |
|
"grad_norm": 5.293265342712402, |
|
"learning_rate": 1.4008730068337131e-05, |
|
"logits/chosen": 0.7652915120124817, |
|
"logits/rejected": 0.7024304866790771, |
|
"logps/chosen": -272.99755859375, |
|
"logps/rejected": -295.06463623046875, |
|
"loss": 0.4947, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -7.992232322692871, |
|
"rewards/margins": 2.4706037044525146, |
|
"rewards/rejected": -10.462837219238281, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.9738041002277904, |
|
"grad_norm": 19.21979331970215, |
|
"learning_rate": 1.4008462414578588e-05, |
|
"logits/chosen": 0.7430532574653625, |
|
"logits/rejected": 0.6697141528129578, |
|
"logps/chosen": -268.928955078125, |
|
"logps/rejected": -291.8720703125, |
|
"loss": 0.316, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -8.033040046691895, |
|
"rewards/margins": 2.5782742500305176, |
|
"rewards/rejected": -10.61131477355957, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.9766514806378133, |
|
"grad_norm": 13.320822715759277, |
|
"learning_rate": 1.4008194760820046e-05, |
|
"logits/chosen": 0.8317564129829407, |
|
"logits/rejected": 0.7908271551132202, |
|
"logps/chosen": -268.9102783203125, |
|
"logps/rejected": -295.58349609375, |
|
"loss": 0.4192, |
|
"rewards/accuracies": 0.8666666746139526, |
|
"rewards/chosen": -7.700282096862793, |
|
"rewards/margins": 2.5946054458618164, |
|
"rewards/rejected": -10.294888496398926, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.979498861047836, |
|
"grad_norm": 9.567367553710938, |
|
"learning_rate": 1.4007927107061504e-05, |
|
"logits/chosen": 0.8482456207275391, |
|
"logits/rejected": 0.8254700899124146, |
|
"logps/chosen": -258.6345520019531, |
|
"logps/rejected": -284.7102355957031, |
|
"loss": 0.3652, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -6.666827201843262, |
|
"rewards/margins": 2.667280673980713, |
|
"rewards/rejected": -9.334108352661133, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.9823462414578588, |
|
"grad_norm": 10.789649963378906, |
|
"learning_rate": 1.4007659453302962e-05, |
|
"logits/chosen": 0.5343358516693115, |
|
"logits/rejected": 0.5035391449928284, |
|
"logps/chosen": -258.1282043457031, |
|
"logps/rejected": -283.01202392578125, |
|
"loss": 0.2902, |
|
"rewards/accuracies": 0.8833333849906921, |
|
"rewards/chosen": -6.793849945068359, |
|
"rewards/margins": 2.455238103866577, |
|
"rewards/rejected": -9.249088287353516, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.9851936218678815, |
|
"grad_norm": 14.10362720489502, |
|
"learning_rate": 1.400739179954442e-05, |
|
"logits/chosen": 0.3702552020549774, |
|
"logits/rejected": 0.32574790716171265, |
|
"logps/chosen": -262.18408203125, |
|
"logps/rejected": -284.9990234375, |
|
"loss": 0.457, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -7.336909294128418, |
|
"rewards/margins": 2.0833096504211426, |
|
"rewards/rejected": -9.420219421386719, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.9880410022779044, |
|
"grad_norm": 5.914283275604248, |
|
"learning_rate": 1.4007124145785877e-05, |
|
"logits/chosen": 0.540917158126831, |
|
"logits/rejected": 0.5174443125724792, |
|
"logps/chosen": -251.36801147460938, |
|
"logps/rejected": -273.43450927734375, |
|
"loss": 0.2641, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -6.19476318359375, |
|
"rewards/margins": 2.6106886863708496, |
|
"rewards/rejected": -8.805452346801758, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.9908883826879271, |
|
"grad_norm": 7.198950290679932, |
|
"learning_rate": 1.4006856492027335e-05, |
|
"logits/chosen": 0.8166311383247375, |
|
"logits/rejected": 0.7666522264480591, |
|
"logps/chosen": -246.75436401367188, |
|
"logps/rejected": -270.91619873046875, |
|
"loss": 0.2759, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -5.385769844055176, |
|
"rewards/margins": 2.8532614707946777, |
|
"rewards/rejected": -8.239030838012695, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.9937357630979499, |
|
"grad_norm": 16.072847366333008, |
|
"learning_rate": 1.4006588838268792e-05, |
|
"logits/chosen": 0.6863161325454712, |
|
"logits/rejected": 0.6740552186965942, |
|
"logps/chosen": -248.19387817382812, |
|
"logps/rejected": -265.17889404296875, |
|
"loss": 0.5973, |
|
"rewards/accuracies": 0.6666666865348816, |
|
"rewards/chosen": -5.686077117919922, |
|
"rewards/margins": 1.7341415882110596, |
|
"rewards/rejected": -7.420218467712402, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.9965831435079726, |
|
"grad_norm": 13.368264198303223, |
|
"learning_rate": 1.400632118451025e-05, |
|
"logits/chosen": 0.6482622623443604, |
|
"logits/rejected": 0.5943307876586914, |
|
"logps/chosen": -249.43032836914062, |
|
"logps/rejected": -268.16607666015625, |
|
"loss": 0.4939, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -5.8397626876831055, |
|
"rewards/margins": 2.000570058822632, |
|
"rewards/rejected": -7.840332984924316, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.9994305239179955, |
|
"grad_norm": 16.661462783813477, |
|
"learning_rate": 1.4006053530751708e-05, |
|
"logits/chosen": 0.16263927519321442, |
|
"logits/rejected": 0.1554526388645172, |
|
"logps/chosen": -251.5836639404297, |
|
"logps/rejected": -271.8070068359375, |
|
"loss": 0.3875, |
|
"rewards/accuracies": 0.8333333730697632, |
|
"rewards/chosen": -6.0699052810668945, |
|
"rewards/margins": 1.8712621927261353, |
|
"rewards/rejected": -7.94116735458374, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.0022779043280183, |
|
"grad_norm": 10.85046100616455, |
|
"learning_rate": 1.4005785876993166e-05, |
|
"logits/chosen": 0.3554074764251709, |
|
"logits/rejected": 0.34798485040664673, |
|
"logps/chosen": -253.5457000732422, |
|
"logps/rejected": -276.7463684082031, |
|
"loss": 0.394, |
|
"rewards/accuracies": 0.8333333730697632, |
|
"rewards/chosen": -6.179994583129883, |
|
"rewards/margins": 2.402238130569458, |
|
"rewards/rejected": -8.582232475280762, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.005125284738041, |
|
"grad_norm": 21.960153579711914, |
|
"learning_rate": 1.4005518223234624e-05, |
|
"logits/chosen": 0.39422959089279175, |
|
"logits/rejected": 0.35846349596977234, |
|
"logps/chosen": -263.34832763671875, |
|
"logps/rejected": -285.46929931640625, |
|
"loss": 0.4013, |
|
"rewards/accuracies": 0.8333333730697632, |
|
"rewards/chosen": -7.29735803604126, |
|
"rewards/margins": 2.383136510848999, |
|
"rewards/rejected": -9.680493354797363, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.0079726651480638, |
|
"grad_norm": 6.245369911193848, |
|
"learning_rate": 1.4005250569476083e-05, |
|
"logits/chosen": 0.5632292628288269, |
|
"logits/rejected": 0.502747654914856, |
|
"logps/chosen": -268.88616943359375, |
|
"logps/rejected": -290.48797607421875, |
|
"loss": 0.3698, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -7.674142360687256, |
|
"rewards/margins": 2.4379868507385254, |
|
"rewards/rejected": -10.112129211425781, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.0108200455580865, |
|
"grad_norm": 11.277658462524414, |
|
"learning_rate": 1.400498291571754e-05, |
|
"logits/chosen": 0.5037197470664978, |
|
"logits/rejected": 0.47341617941856384, |
|
"logps/chosen": -269.2908630371094, |
|
"logps/rejected": -290.01025390625, |
|
"loss": 0.5014, |
|
"rewards/accuracies": 0.7833333611488342, |
|
"rewards/chosen": -7.865833282470703, |
|
"rewards/margins": 2.0145423412323, |
|
"rewards/rejected": -9.880374908447266, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.0136674259681093, |
|
"grad_norm": 17.50825309753418, |
|
"learning_rate": 1.4004715261958999e-05, |
|
"logits/chosen": 0.2622000575065613, |
|
"logits/rejected": 0.22665706276893616, |
|
"logps/chosen": -268.0179443359375, |
|
"logps/rejected": -284.2073059082031, |
|
"loss": 0.3836, |
|
"rewards/accuracies": 0.8666666746139526, |
|
"rewards/chosen": -7.505197048187256, |
|
"rewards/margins": 2.5877254009246826, |
|
"rewards/rejected": -10.092923164367676, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.0165148063781322, |
|
"grad_norm": 11.33314323425293, |
|
"learning_rate": 1.4004447608200455e-05, |
|
"logits/chosen": 0.3246827721595764, |
|
"logits/rejected": 0.3127135634422302, |
|
"logps/chosen": -256.9268798828125, |
|
"logps/rejected": -285.8166809082031, |
|
"loss": 0.2469, |
|
"rewards/accuracies": 0.9333332777023315, |
|
"rewards/chosen": -6.845952033996582, |
|
"rewards/margins": 2.841376543045044, |
|
"rewards/rejected": -9.687329292297363, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.0193621867881548, |
|
"grad_norm": 15.446540832519531, |
|
"learning_rate": 1.4004179954441914e-05, |
|
"logits/chosen": 0.36501818895339966, |
|
"logits/rejected": 0.31509679555892944, |
|
"logps/chosen": -260.56707763671875, |
|
"logps/rejected": -287.8186950683594, |
|
"loss": 0.2903, |
|
"rewards/accuracies": 0.8333333730697632, |
|
"rewards/chosen": -6.984988212585449, |
|
"rewards/margins": 2.249208927154541, |
|
"rewards/rejected": -9.234196662902832, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.0222095671981777, |
|
"grad_norm": 13.437565803527832, |
|
"learning_rate": 1.4003912300683372e-05, |
|
"logits/chosen": 0.45073944330215454, |
|
"logits/rejected": 0.402204692363739, |
|
"logps/chosen": -257.2052307128906, |
|
"logps/rejected": -281.25775146484375, |
|
"loss": 0.4182, |
|
"rewards/accuracies": 0.8333333730697632, |
|
"rewards/chosen": -6.852511405944824, |
|
"rewards/margins": 2.6151347160339355, |
|
"rewards/rejected": -9.467647552490234, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.0250569476082005, |
|
"grad_norm": 8.935168266296387, |
|
"learning_rate": 1.400364464692483e-05, |
|
"logits/chosen": 0.6178187131881714, |
|
"logits/rejected": 0.5572710633277893, |
|
"logps/chosen": -266.0174560546875, |
|
"logps/rejected": -285.91973876953125, |
|
"loss": 0.3928, |
|
"rewards/accuracies": 0.7666667103767395, |
|
"rewards/chosen": -7.804986000061035, |
|
"rewards/margins": 2.040278196334839, |
|
"rewards/rejected": -9.845263481140137, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.0279043280182232, |
|
"grad_norm": 15.844207763671875, |
|
"learning_rate": 1.4003376993166288e-05, |
|
"logits/chosen": 0.5763198137283325, |
|
"logits/rejected": 0.530194103717804, |
|
"logps/chosen": -275.58245849609375, |
|
"logps/rejected": -299.5401916503906, |
|
"loss": 0.4687, |
|
"rewards/accuracies": 0.8333333730697632, |
|
"rewards/chosen": -8.525606155395508, |
|
"rewards/margins": 2.2367708683013916, |
|
"rewards/rejected": -10.76237678527832, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.030751708428246, |
|
"grad_norm": 15.936345100402832, |
|
"learning_rate": 1.4003109339407746e-05, |
|
"logits/chosen": 0.6139250993728638, |
|
"logits/rejected": 0.5907430648803711, |
|
"logps/chosen": -277.6994323730469, |
|
"logps/rejected": -305.0992126464844, |
|
"loss": 0.3596, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -8.412062644958496, |
|
"rewards/margins": 3.1548120975494385, |
|
"rewards/rejected": -11.566877365112305, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.033599088838269, |
|
"grad_norm": 19.53025245666504, |
|
"learning_rate": 1.4002841685649205e-05, |
|
"logits/chosen": 0.5422394871711731, |
|
"logits/rejected": 0.5309593081474304, |
|
"logps/chosen": -286.34417724609375, |
|
"logps/rejected": -308.42584228515625, |
|
"loss": 0.4412, |
|
"rewards/accuracies": 0.8666666746139526, |
|
"rewards/chosen": -9.668391227722168, |
|
"rewards/margins": 2.4024975299835205, |
|
"rewards/rejected": -12.07088851928711, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.0364464692482915, |
|
"grad_norm": 16.09668731689453, |
|
"learning_rate": 1.4002574031890661e-05, |
|
"logits/chosen": 0.7884154915809631, |
|
"logits/rejected": 0.7050036787986755, |
|
"logps/chosen": -279.5062561035156, |
|
"logps/rejected": -309.83856201171875, |
|
"loss": 0.2377, |
|
"rewards/accuracies": 0.8833333849906921, |
|
"rewards/chosen": -8.75977897644043, |
|
"rewards/margins": 3.1992735862731934, |
|
"rewards/rejected": -11.959052085876465, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.0392938496583144, |
|
"grad_norm": 18.160032272338867, |
|
"learning_rate": 1.400230637813212e-05, |
|
"logits/chosen": 0.794320285320282, |
|
"logits/rejected": 0.7702603340148926, |
|
"logps/chosen": -282.390869140625, |
|
"logps/rejected": -307.16943359375, |
|
"loss": 0.6078, |
|
"rewards/accuracies": 0.7833333611488342, |
|
"rewards/chosen": -8.865792274475098, |
|
"rewards/margins": 2.653172016143799, |
|
"rewards/rejected": -11.518964767456055, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.042141230068337, |
|
"grad_norm": 17.57752227783203, |
|
"learning_rate": 1.4002038724373577e-05, |
|
"logits/chosen": 0.8936277627944946, |
|
"logits/rejected": 0.8679503202438354, |
|
"logps/chosen": -264.1590881347656, |
|
"logps/rejected": -295.1584167480469, |
|
"loss": 0.3022, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -7.652402400970459, |
|
"rewards/margins": 2.8865489959716797, |
|
"rewards/rejected": -10.53895092010498, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.04498861047836, |
|
"grad_norm": 10.988320350646973, |
|
"learning_rate": 1.4001771070615036e-05, |
|
"logits/chosen": 0.6470170021057129, |
|
"logits/rejected": 0.5980864763259888, |
|
"logps/chosen": -278.0357971191406, |
|
"logps/rejected": -301.3656005859375, |
|
"loss": 0.2475, |
|
"rewards/accuracies": 0.8833333253860474, |
|
"rewards/chosen": -8.397979736328125, |
|
"rewards/margins": 2.759563446044922, |
|
"rewards/rejected": -11.157544136047363, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.0478359908883828, |
|
"grad_norm": 21.041053771972656, |
|
"learning_rate": 1.4001503416856494e-05, |
|
"logits/chosen": 0.8440157771110535, |
|
"logits/rejected": 0.8416398167610168, |
|
"logps/chosen": -280.1607666015625, |
|
"logps/rejected": -308.46673583984375, |
|
"loss": 0.4413, |
|
"rewards/accuracies": 0.8166666030883789, |
|
"rewards/chosen": -9.083349227905273, |
|
"rewards/margins": 2.9152331352233887, |
|
"rewards/rejected": -11.998581886291504, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.0506833712984054, |
|
"grad_norm": 34.18033981323242, |
|
"learning_rate": 1.400123576309795e-05, |
|
"logits/chosen": 1.1155614852905273, |
|
"logits/rejected": 1.037345051765442, |
|
"logps/chosen": -291.4377136230469, |
|
"logps/rejected": -315.4261169433594, |
|
"loss": 0.2893, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": -9.966141700744629, |
|
"rewards/margins": 2.7352890968322754, |
|
"rewards/rejected": -12.701430320739746, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.0535307517084282, |
|
"grad_norm": 22.951887130737305, |
|
"learning_rate": 1.4000968109339408e-05, |
|
"logits/chosen": 0.8897800445556641, |
|
"logits/rejected": 0.8015605211257935, |
|
"logps/chosen": -282.42718505859375, |
|
"logps/rejected": -311.1539001464844, |
|
"loss": 0.3469, |
|
"rewards/accuracies": 0.8666666746139526, |
|
"rewards/chosen": -9.409584045410156, |
|
"rewards/margins": 2.546072483062744, |
|
"rewards/rejected": -11.955656051635742, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.056378132118451, |
|
"grad_norm": 26.796382904052734, |
|
"learning_rate": 1.4000700455580865e-05, |
|
"logits/chosen": 0.6857692003250122, |
|
"logits/rejected": 0.6226261258125305, |
|
"logps/chosen": -282.7877197265625, |
|
"logps/rejected": -315.45367431640625, |
|
"loss": 0.294, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": -9.390748977661133, |
|
"rewards/margins": 3.2603118419647217, |
|
"rewards/rejected": -12.6510591506958, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.0592255125284737, |
|
"grad_norm": 33.42181396484375, |
|
"learning_rate": 1.4000432801822323e-05, |
|
"logits/chosen": 0.4234946668148041, |
|
"logits/rejected": 0.3805975317955017, |
|
"logps/chosen": -296.66644287109375, |
|
"logps/rejected": -319.873291015625, |
|
"loss": 0.388, |
|
"rewards/accuracies": 0.8500000834465027, |
|
"rewards/chosen": -10.75451374053955, |
|
"rewards/margins": 2.3954834938049316, |
|
"rewards/rejected": -13.149996757507324, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.0620728929384966, |
|
"grad_norm": 8.873187065124512, |
|
"learning_rate": 1.4000165148063781e-05, |
|
"logits/chosen": 0.8379060626029968, |
|
"logits/rejected": 0.8032379150390625, |
|
"logps/chosen": -280.6669616699219, |
|
"logps/rejected": -309.33251953125, |
|
"loss": 0.3693, |
|
"rewards/accuracies": 0.8666666746139526, |
|
"rewards/chosen": -8.943597793579102, |
|
"rewards/margins": 2.9501333236694336, |
|
"rewards/rejected": -11.893732070922852, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.0649202733485195, |
|
"grad_norm": 17.75446891784668, |
|
"learning_rate": 1.399989749430524e-05, |
|
"logits/chosen": 0.4959283769130707, |
|
"logits/rejected": 0.4724133014678955, |
|
"logps/chosen": -288.1912841796875, |
|
"logps/rejected": -308.560546875, |
|
"loss": 0.4123, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -9.21948528289795, |
|
"rewards/margins": 2.6541736125946045, |
|
"rewards/rejected": -11.873659133911133, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.067767653758542, |
|
"grad_norm": 21.715051651000977, |
|
"learning_rate": 1.3999629840546698e-05, |
|
"logits/chosen": 0.9915273785591125, |
|
"logits/rejected": 0.9026697278022766, |
|
"logps/chosen": -288.552490234375, |
|
"logps/rejected": -325.791015625, |
|
"loss": 0.2591, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -9.927160263061523, |
|
"rewards/margins": 3.521343946456909, |
|
"rewards/rejected": -13.448504447937012, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.070615034168565, |
|
"grad_norm": 8.18878173828125, |
|
"learning_rate": 1.3999362186788156e-05, |
|
"logits/chosen": 0.8200756907463074, |
|
"logits/rejected": 0.7880581617355347, |
|
"logps/chosen": -281.2442932128906, |
|
"logps/rejected": -309.26300048828125, |
|
"loss": 0.3599, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -9.674338340759277, |
|
"rewards/margins": 2.2603495121002197, |
|
"rewards/rejected": -11.934687614440918, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.0734624145785876, |
|
"grad_norm": 11.345477104187012, |
|
"learning_rate": 1.3999094533029614e-05, |
|
"logits/chosen": 0.6193078756332397, |
|
"logits/rejected": 0.5920727849006653, |
|
"logps/chosen": -297.87579345703125, |
|
"logps/rejected": -324.90875244140625, |
|
"loss": 0.3291, |
|
"rewards/accuracies": 0.8333333134651184, |
|
"rewards/chosen": -10.389954566955566, |
|
"rewards/margins": 3.0738964080810547, |
|
"rewards/rejected": -13.463849067687988, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.0763097949886105, |
|
"grad_norm": 12.467089653015137, |
|
"learning_rate": 1.399882687927107e-05, |
|
"logits/chosen": 0.6029736995697021, |
|
"logits/rejected": 0.5278339385986328, |
|
"logps/chosen": -287.04547119140625, |
|
"logps/rejected": -313.1669006347656, |
|
"loss": 0.2722, |
|
"rewards/accuracies": 0.8666666746139526, |
|
"rewards/chosen": -9.467456817626953, |
|
"rewards/margins": 3.074500322341919, |
|
"rewards/rejected": -12.54195785522461, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.0791571753986333, |
|
"grad_norm": 5.360784530639648, |
|
"learning_rate": 1.3998559225512529e-05, |
|
"logits/chosen": 0.5335519313812256, |
|
"logits/rejected": 0.5010834336280823, |
|
"logps/chosen": -273.48895263671875, |
|
"logps/rejected": -307.15576171875, |
|
"loss": 0.3501, |
|
"rewards/accuracies": 0.8666666746139526, |
|
"rewards/chosen": -8.458849906921387, |
|
"rewards/margins": 3.136683940887451, |
|
"rewards/rejected": -11.59553337097168, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.082004555808656, |
|
"grad_norm": 23.285932540893555, |
|
"learning_rate": 1.3998291571753987e-05, |
|
"logits/chosen": 0.4613843858242035, |
|
"logits/rejected": 0.4110085070133209, |
|
"logps/chosen": -275.67706298828125, |
|
"logps/rejected": -303.83782958984375, |
|
"loss": 0.3489, |
|
"rewards/accuracies": 0.8333333730697632, |
|
"rewards/chosen": -8.685543060302734, |
|
"rewards/margins": 2.5913987159729004, |
|
"rewards/rejected": -11.276942253112793, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.0848519362186788, |
|
"grad_norm": 18.05284309387207, |
|
"learning_rate": 1.3998023917995445e-05, |
|
"logits/chosen": 0.7175520658493042, |
|
"logits/rejected": 0.6548722386360168, |
|
"logps/chosen": -279.5047912597656, |
|
"logps/rejected": -309.22479248046875, |
|
"loss": 0.3255, |
|
"rewards/accuracies": 0.8833333253860474, |
|
"rewards/chosen": -9.209101676940918, |
|
"rewards/margins": 3.0299510955810547, |
|
"rewards/rejected": -12.239053726196289, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.0876993166287017, |
|
"grad_norm": 15.506644248962402, |
|
"learning_rate": 1.3997756264236903e-05, |
|
"logits/chosen": 0.8500604629516602, |
|
"logits/rejected": 0.8025990724563599, |
|
"logps/chosen": -281.86053466796875, |
|
"logps/rejected": -311.606201171875, |
|
"loss": 0.3359, |
|
"rewards/accuracies": 0.8500000834465027, |
|
"rewards/chosen": -9.722951889038086, |
|
"rewards/margins": 2.3718514442443848, |
|
"rewards/rejected": -12.094802856445312, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.0905466970387243, |
|
"grad_norm": 3.2531468868255615, |
|
"learning_rate": 1.3997488610478361e-05, |
|
"logits/chosen": 0.9165185689926147, |
|
"logits/rejected": 0.8282724618911743, |
|
"logps/chosen": -278.8956604003906, |
|
"logps/rejected": -312.00750732421875, |
|
"loss": 0.2895, |
|
"rewards/accuracies": 0.8833333849906921, |
|
"rewards/chosen": -8.979422569274902, |
|
"rewards/margins": 3.4525809288024902, |
|
"rewards/rejected": -12.432002067565918, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.0933940774487472, |
|
"grad_norm": 8.112292289733887, |
|
"learning_rate": 1.399722095671982e-05, |
|
"logits/chosen": 1.0575357675552368, |
|
"logits/rejected": 1.013039469718933, |
|
"logps/chosen": -280.9009094238281, |
|
"logps/rejected": -303.0403747558594, |
|
"loss": 0.3678, |
|
"rewards/accuracies": 0.7833333015441895, |
|
"rewards/chosen": -8.741242408752441, |
|
"rewards/margins": 2.594489574432373, |
|
"rewards/rejected": -11.335733413696289, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.09624145785877, |
|
"grad_norm": 3.0322318077087402, |
|
"learning_rate": 1.3996953302961276e-05, |
|
"logits/chosen": 0.8057734370231628, |
|
"logits/rejected": 0.7428101301193237, |
|
"logps/chosen": -293.65765380859375, |
|
"logps/rejected": -322.9544982910156, |
|
"loss": 0.3457, |
|
"rewards/accuracies": 0.8666666746139526, |
|
"rewards/chosen": -10.046621322631836, |
|
"rewards/margins": 2.509535312652588, |
|
"rewards/rejected": -12.556157112121582, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.0990888382687927, |
|
"grad_norm": 13.537039756774902, |
|
"learning_rate": 1.3996685649202734e-05, |
|
"logits/chosen": 0.8179152607917786, |
|
"logits/rejected": 0.7628077268600464, |
|
"logps/chosen": -309.3814392089844, |
|
"logps/rejected": -333.23236083984375, |
|
"loss": 0.3651, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -11.620147705078125, |
|
"rewards/margins": 2.818713426589966, |
|
"rewards/rejected": -14.438860893249512, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.1019362186788155, |
|
"grad_norm": 15.105693817138672, |
|
"learning_rate": 1.3996417995444192e-05, |
|
"logits/chosen": 0.8153516054153442, |
|
"logits/rejected": 0.7349362373352051, |
|
"logps/chosen": -316.74114990234375, |
|
"logps/rejected": -343.94329833984375, |
|
"loss": 0.2984, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -12.85276985168457, |
|
"rewards/margins": 2.6356797218322754, |
|
"rewards/rejected": -15.48845100402832, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.1047835990888384, |
|
"grad_norm": 9.755592346191406, |
|
"learning_rate": 1.399615034168565e-05, |
|
"logits/chosen": 0.6579657793045044, |
|
"logits/rejected": 0.5918071269989014, |
|
"logps/chosen": -309.8885498046875, |
|
"logps/rejected": -337.7580261230469, |
|
"loss": 0.385, |
|
"rewards/accuracies": 0.8666666746139526, |
|
"rewards/chosen": -11.838005065917969, |
|
"rewards/margins": 3.0567147731781006, |
|
"rewards/rejected": -14.894720077514648, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.107630979498861, |
|
"grad_norm": 19.806747436523438, |
|
"learning_rate": 1.3995882687927109e-05, |
|
"logits/chosen": 1.0924065113067627, |
|
"logits/rejected": 1.0375772714614868, |
|
"logps/chosen": -309.6856384277344, |
|
"logps/rejected": -335.0812683105469, |
|
"loss": 0.2217, |
|
"rewards/accuracies": 0.8833333253860474, |
|
"rewards/chosen": -11.95275592803955, |
|
"rewards/margins": 2.838541269302368, |
|
"rewards/rejected": -14.791296005249023, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 1.1104783599088839, |
|
"grad_norm": 20.042261123657227, |
|
"learning_rate": 1.3995615034168567e-05, |
|
"logits/chosen": 0.5404999256134033, |
|
"logits/rejected": 0.5026549100875854, |
|
"logps/chosen": -308.2242736816406, |
|
"logps/rejected": -331.86944580078125, |
|
"loss": 0.3761, |
|
"rewards/accuracies": 0.8500000834465027, |
|
"rewards/chosen": -12.19024658203125, |
|
"rewards/margins": 2.2361843585968018, |
|
"rewards/rejected": -14.426431655883789, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.1133257403189065, |
|
"grad_norm": 9.1209135055542, |
|
"learning_rate": 1.3995347380410023e-05, |
|
"logits/chosen": 1.2070353031158447, |
|
"logits/rejected": 1.1822826862335205, |
|
"logps/chosen": -312.82049560546875, |
|
"logps/rejected": -336.69305419921875, |
|
"loss": 0.3917, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -11.746156692504883, |
|
"rewards/margins": 3.025268793106079, |
|
"rewards/rejected": -14.77142333984375, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 1.1161731207289294, |
|
"grad_norm": 5.072232723236084, |
|
"learning_rate": 1.399507972665148e-05, |
|
"logits/chosen": 0.7343096137046814, |
|
"logits/rejected": 0.7266718149185181, |
|
"logps/chosen": -295.480712890625, |
|
"logps/rejected": -331.20391845703125, |
|
"loss": 0.2191, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": -10.570296287536621, |
|
"rewards/margins": 3.5358786582946777, |
|
"rewards/rejected": -14.106175422668457, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.1190205011389522, |
|
"grad_norm": 15.483905792236328, |
|
"learning_rate": 1.3994812072892938e-05, |
|
"logits/chosen": 0.7368067502975464, |
|
"logits/rejected": 0.6594582796096802, |
|
"logps/chosen": -284.6335144042969, |
|
"logps/rejected": -315.6561584472656, |
|
"loss": 0.2343, |
|
"rewards/accuracies": 0.9166666865348816, |
|
"rewards/chosen": -10.035016059875488, |
|
"rewards/margins": 3.443873167037964, |
|
"rewards/rejected": -13.478889465332031, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 1.1218678815489749, |
|
"grad_norm": 20.447673797607422, |
|
"learning_rate": 1.3994544419134396e-05, |
|
"logits/chosen": 0.6332524418830872, |
|
"logits/rejected": 0.606482207775116, |
|
"logps/chosen": -290.54254150390625, |
|
"logps/rejected": -317.78076171875, |
|
"loss": 0.5733, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -10.31615161895752, |
|
"rewards/margins": 2.5428757667541504, |
|
"rewards/rejected": -12.859028816223145, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.1247152619589977, |
|
"grad_norm": 15.281331062316895, |
|
"learning_rate": 1.3994276765375854e-05, |
|
"logits/chosen": 1.1297801733016968, |
|
"logits/rejected": 1.0745735168457031, |
|
"logps/chosen": -300.40338134765625, |
|
"logps/rejected": -326.6527404785156, |
|
"loss": 0.4481, |
|
"rewards/accuracies": 0.8000000715255737, |
|
"rewards/chosen": -11.181158065795898, |
|
"rewards/margins": 2.283782482147217, |
|
"rewards/rejected": -13.464941024780273, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.1275626423690206, |
|
"grad_norm": 17.650279998779297, |
|
"learning_rate": 1.3994009111617312e-05, |
|
"logits/chosen": 0.7293352484703064, |
|
"logits/rejected": 0.7268023490905762, |
|
"logps/chosen": -323.0600280761719, |
|
"logps/rejected": -347.69049072265625, |
|
"loss": 0.5341, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -13.055007934570312, |
|
"rewards/margins": 2.4304747581481934, |
|
"rewards/rejected": -15.485481262207031, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.1304100227790432, |
|
"grad_norm": 13.10045337677002, |
|
"learning_rate": 1.399374145785877e-05, |
|
"logits/chosen": 0.9735004305839539, |
|
"logits/rejected": 0.9148595929145813, |
|
"logps/chosen": -318.93878173828125, |
|
"logps/rejected": -341.3691101074219, |
|
"loss": 0.4774, |
|
"rewards/accuracies": 0.73333340883255, |
|
"rewards/chosen": -12.5765380859375, |
|
"rewards/margins": 2.1414952278137207, |
|
"rewards/rejected": -14.718032836914062, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 1.133257403189066, |
|
"grad_norm": 2.9735374450683594, |
|
"learning_rate": 1.3993473804100229e-05, |
|
"logits/chosen": 0.705932080745697, |
|
"logits/rejected": 0.645165205001831, |
|
"logps/chosen": -325.68902587890625, |
|
"logps/rejected": -348.7044677734375, |
|
"loss": 0.3736, |
|
"rewards/accuracies": 0.8166667222976685, |
|
"rewards/chosen": -13.175191879272461, |
|
"rewards/margins": 2.545431613922119, |
|
"rewards/rejected": -15.720623970031738, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.1361047835990887, |
|
"grad_norm": 21.49675750732422, |
|
"learning_rate": 1.3993206150341685e-05, |
|
"logits/chosen": 0.7589794397354126, |
|
"logits/rejected": 0.7352560758590698, |
|
"logps/chosen": -317.130859375, |
|
"logps/rejected": -333.79876708984375, |
|
"loss": 0.5117, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -12.715624809265137, |
|
"rewards/margins": 1.7535444498062134, |
|
"rewards/rejected": -14.469167709350586, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 1.1389521640091116, |
|
"grad_norm": 22.71150016784668, |
|
"learning_rate": 1.3992938496583143e-05, |
|
"logits/chosen": 0.7460082769393921, |
|
"logits/rejected": 0.7586153745651245, |
|
"logps/chosen": -302.3631896972656, |
|
"logps/rejected": -322.42266845703125, |
|
"loss": 0.3693, |
|
"rewards/accuracies": 0.8666666746139526, |
|
"rewards/chosen": -11.592964172363281, |
|
"rewards/margins": 2.2121787071228027, |
|
"rewards/rejected": -13.805142402648926, |
|
"step": 4000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 526800, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 150, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 6, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|